xref: /dragonfly/contrib/gcc-8.0/libgomp/target.c (revision 3d33658b)
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h>  /* For PRIu64.  */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47 
48 static void gomp_target_init (void);
49 
50 /* The whole initialization code for offloading plugins is only run one.  */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 
53 /* Mutex for offload image registration.  */
54 static gomp_mutex_t register_lock;
55 
56 /* This structure describes an offload image.
57    It contains type of the target device, pointer to host table descriptor, and
58    pointer to target data.  */
59 struct offload_image_descr {
60   unsigned version;
61   enum offload_target_type type;
62   const void *host_table;
63   const void *target_data;
64 };
65 
66 /* Array of descriptors of offload images.  */
67 static struct offload_image_descr *offload_images;
68 
69 /* Total number of offload images.  */
70 static int num_offload_images;
71 
72 /* Array of descriptors for all available devices.  */
73 static struct gomp_device_descr *devices;
74 
75 /* Total number of available devices.  */
76 static int num_devices;
77 
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
79 static int num_devices_openmp;
80 
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
82 
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
85 {
86   void *ret = realloc (old, size);
87   if (ret == NULL)
88     {
89       gomp_mutex_unlock (&register_lock);
90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91     }
92   return ret;
93 }
94 
95 attribute_hidden void
96 gomp_init_targets_once (void)
97 {
98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 }
100 
101 attribute_hidden int
102 gomp_get_num_devices (void)
103 {
104   gomp_init_targets_once ();
105   return num_devices_openmp;
106 }
107 
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
110 {
111   if (device_id == GOMP_DEVICE_ICV)
112     {
113       struct gomp_task_icv *icv = gomp_icv (false);
114       device_id = icv->default_device_var;
115     }
116 
117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
118     return NULL;
119 
120   gomp_mutex_lock (&devices[device_id].lock);
121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122     gomp_init_device (&devices[device_id]);
123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124     {
125       gomp_mutex_unlock (&devices[device_id].lock);
126       return NULL;
127     }
128   gomp_mutex_unlock (&devices[device_id].lock);
129 
130   return &devices[device_id];
131 }
132 
133 
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 {
137   if (key->host_start != key->host_end)
138     return splay_tree_lookup (mem_map, key);
139 
140   key->host_end++;
141   splay_tree_key n = splay_tree_lookup (mem_map, key);
142   key->host_end--;
143   if (n)
144     return n;
145   key->host_start--;
146   n = splay_tree_lookup (mem_map, key);
147   key->host_start++;
148   if (n)
149     return n;
150   return splay_tree_lookup (mem_map, key);
151 }
152 
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 {
156   if (key->host_start != key->host_end)
157     return splay_tree_lookup (mem_map, key);
158 
159   key->host_end++;
160   splay_tree_key n = splay_tree_lookup (mem_map, key);
161   key->host_end--;
162   return n;
163 }
164 
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 		  bool (*copy_func) (int, void *, const void *, size_t),
168 		  const char *dst, void *dstaddr,
169 		  const char *src, const void *srcaddr,
170 		  size_t size)
171 {
172   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173     {
174       gomp_mutex_unlock (&devicep->lock);
175       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177     }
178 }
179 
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181    host to device memory transfers.  */
182 
183 struct gomp_coalesce_buf
184 {
185   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
186      it will be copied to the device.  */
187   void *buf;
188   struct target_mem_desc *tgt;
189   /* Array with offsets, chunks[2 * i] is the starting offset and
190      chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
191      of chunks which are to be copied to buf and later copied to device.  */
192   size_t *chunks;
193   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
194      be performed.  */
195   long chunk_cnt;
196   /* During construction of chunks array, how many memory regions are within
197      the last chunk.  If there is just one memory region for a chunk, we copy
198      it directly to device rather than going through buf.  */
199   long use_cnt;
200 };
201 
202 /* Maximum size of memory region considered for coalescing.  Larger copies
203    are performed directly.  */
204 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
205 
206 /* Maximum size of a gap in between regions to consider them being copied
207    within the same chunk.  All the device offsets considered are within
208    newly allocated device memory, so it isn't fatal if we copy some padding
209    in between from host to device.  The gaps come either from alignment
210    padding or from memory regions which are not supposed to be copied from
211    host to device (e.g. map(alloc:), map(from:) etc.).  */
212 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
213 
214 /* Add region with device tgt_start relative offset and length to CBUF.  */
215 
216 static inline void
217 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
218 {
219   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
220     return;
221   if (cbuf->chunk_cnt)
222     {
223       if (cbuf->chunk_cnt < 0)
224 	return;
225       if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
226 	{
227 	  cbuf->chunk_cnt = -1;
228 	  return;
229 	}
230       if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
231 	{
232 	  cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
233 	  cbuf->use_cnt++;
234 	  return;
235 	}
236       /* If the last chunk is only used by one mapping, discard it,
237 	 as it will be one host to device copy anyway and
238 	 memcpying it around will only waste cycles.  */
239       if (cbuf->use_cnt == 1)
240 	cbuf->chunk_cnt--;
241     }
242   cbuf->chunks[2 * cbuf->chunk_cnt] = start;
243   cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
244   cbuf->chunk_cnt++;
245   cbuf->use_cnt = 1;
246 }
247 
248 /* Return true for mapping kinds which need to copy data from the
249    host to device for regions that weren't previously mapped.  */
250 
251 static inline bool
252 gomp_to_device_kind_p (int kind)
253 {
254   switch (kind)
255     {
256     case GOMP_MAP_ALLOC:
257     case GOMP_MAP_FROM:
258     case GOMP_MAP_FORCE_ALLOC:
259     case GOMP_MAP_ALWAYS_FROM:
260       return false;
261     default:
262       return true;
263     }
264 }
265 
266 static void
267 gomp_copy_host2dev (struct gomp_device_descr *devicep,
268 		    void *d, const void *h, size_t sz,
269 		    struct gomp_coalesce_buf *cbuf)
270 {
271   if (cbuf)
272     {
273       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
274       if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
275 	{
276 	  long first = 0;
277 	  long last = cbuf->chunk_cnt - 1;
278 	  while (first <= last)
279 	    {
280 	      long middle = (first + last) >> 1;
281 	      if (cbuf->chunks[2 * middle + 1] <= doff)
282 		first = middle + 1;
283 	      else if (cbuf->chunks[2 * middle] <= doff)
284 		{
285 		  if (doff + sz > cbuf->chunks[2 * middle + 1])
286 		    gomp_fatal ("internal libgomp cbuf error");
287 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
288 			  h, sz);
289 		  return;
290 		}
291 	      else
292 		last = middle - 1;
293 	    }
294 	}
295     }
296   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
297 }
298 
299 static void
300 gomp_copy_dev2host (struct gomp_device_descr *devicep,
301 		    void *h, const void *d, size_t sz)
302 {
303   gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
304 }
305 
306 static void
307 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
308 {
309   if (!devicep->free_func (devicep->target_id, devptr))
310     {
311       gomp_mutex_unlock (&devicep->lock);
312       gomp_fatal ("error in freeing device memory block at %p", devptr);
313     }
314 }
315 
316 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
317    gomp_map_0len_lookup found oldn for newn.
318    Helper function of gomp_map_vars.  */
319 
320 static inline void
321 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
322 			splay_tree_key newn, struct target_var_desc *tgt_var,
323 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
324 {
325   tgt_var->key = oldn;
326   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
327   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
328   tgt_var->offset = newn->host_start - oldn->host_start;
329   tgt_var->length = newn->host_end - newn->host_start;
330 
331   if ((kind & GOMP_MAP_FLAG_FORCE)
332       || oldn->host_start > newn->host_start
333       || oldn->host_end < newn->host_end)
334     {
335       gomp_mutex_unlock (&devicep->lock);
336       gomp_fatal ("Trying to map into device [%p..%p) object when "
337 		  "[%p..%p) is already mapped",
338 		  (void *) newn->host_start, (void *) newn->host_end,
339 		  (void *) oldn->host_start, (void *) oldn->host_end);
340     }
341 
342   if (GOMP_MAP_ALWAYS_TO_P (kind))
343     gomp_copy_host2dev (devicep,
344 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
345 				  + newn->host_start - oldn->host_start),
346 			(void *) newn->host_start,
347 			newn->host_end - newn->host_start, cbuf);
348 
349   if (oldn->refcount != REFCOUNT_INFINITY)
350     oldn->refcount++;
351 }
352 
353 static int
354 get_kind (bool short_mapkind, void *kinds, int idx)
355 {
356   return short_mapkind ? ((unsigned short *) kinds)[idx]
357 		       : ((unsigned char *) kinds)[idx];
358 }
359 
360 static void
361 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
362 		  uintptr_t target_offset, uintptr_t bias,
363 		  struct gomp_coalesce_buf *cbuf)
364 {
365   struct gomp_device_descr *devicep = tgt->device_descr;
366   struct splay_tree_s *mem_map = &devicep->mem_map;
367   struct splay_tree_key_s cur_node;
368 
369   cur_node.host_start = host_ptr;
370   if (cur_node.host_start == (uintptr_t) NULL)
371     {
372       cur_node.tgt_offset = (uintptr_t) NULL;
373       gomp_copy_host2dev (devicep,
374 			  (void *) (tgt->tgt_start + target_offset),
375 			  (void *) &cur_node.tgt_offset,
376 			  sizeof (void *), cbuf);
377       return;
378     }
379   /* Add bias to the pointer value.  */
380   cur_node.host_start += bias;
381   cur_node.host_end = cur_node.host_start;
382   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
383   if (n == NULL)
384     {
385       gomp_mutex_unlock (&devicep->lock);
386       gomp_fatal ("Pointer target of array section wasn't mapped");
387     }
388   cur_node.host_start -= n->host_start;
389   cur_node.tgt_offset
390     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
391   /* At this point tgt_offset is target address of the
392      array section.  Now subtract bias to get what we want
393      to initialize the pointer with.  */
394   cur_node.tgt_offset -= bias;
395   gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
396 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
397 }
398 
399 static void
400 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
401 			  size_t first, size_t i, void **hostaddrs,
402 			  size_t *sizes, void *kinds,
403 			  struct gomp_coalesce_buf *cbuf)
404 {
405   struct gomp_device_descr *devicep = tgt->device_descr;
406   struct splay_tree_s *mem_map = &devicep->mem_map;
407   struct splay_tree_key_s cur_node;
408   int kind;
409   const bool short_mapkind = true;
410   const int typemask = short_mapkind ? 0xff : 0x7;
411 
412   cur_node.host_start = (uintptr_t) hostaddrs[i];
413   cur_node.host_end = cur_node.host_start + sizes[i];
414   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
415   kind = get_kind (short_mapkind, kinds, i);
416   if (n2
417       && n2->tgt == n->tgt
418       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
419     {
420       gomp_map_vars_existing (devicep, n2, &cur_node,
421 			      &tgt->list[i], kind & typemask, cbuf);
422       return;
423     }
424   if (sizes[i] == 0)
425     {
426       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
427 	{
428 	  cur_node.host_start--;
429 	  n2 = splay_tree_lookup (mem_map, &cur_node);
430 	  cur_node.host_start++;
431 	  if (n2
432 	      && n2->tgt == n->tgt
433 	      && n2->host_start - n->host_start
434 		 == n2->tgt_offset - n->tgt_offset)
435 	    {
436 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
437 				      kind & typemask, cbuf);
438 	      return;
439 	    }
440 	}
441       cur_node.host_end++;
442       n2 = splay_tree_lookup (mem_map, &cur_node);
443       cur_node.host_end--;
444       if (n2
445 	  && n2->tgt == n->tgt
446 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
447 	{
448 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
449 				  kind & typemask, cbuf);
450 	  return;
451 	}
452     }
453   gomp_mutex_unlock (&devicep->lock);
454   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
455 	      "other mapped elements from the same structure weren't mapped "
456 	      "together with it", (void *) cur_node.host_start,
457 	      (void *) cur_node.host_end);
458 }
459 
460 static inline uintptr_t
461 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
462 {
463   if (tgt->list[i].key != NULL)
464     return tgt->list[i].key->tgt->tgt_start
465 	   + tgt->list[i].key->tgt_offset
466 	   + tgt->list[i].offset;
467   if (tgt->list[i].offset == ~(uintptr_t) 0)
468     return (uintptr_t) hostaddrs[i];
469   if (tgt->list[i].offset == ~(uintptr_t) 1)
470     return 0;
471   if (tgt->list[i].offset == ~(uintptr_t) 2)
472     return tgt->list[i + 1].key->tgt->tgt_start
473 	   + tgt->list[i + 1].key->tgt_offset
474 	   + tgt->list[i + 1].offset
475 	   + (uintptr_t) hostaddrs[i]
476 	   - (uintptr_t) hostaddrs[i + 1];
477   return tgt->tgt_start + tgt->list[i].offset;
478 }
479 
480 attribute_hidden struct target_mem_desc *
481 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
482 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
483 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
484 {
485   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
486   bool has_firstprivate = false;
487   const int rshift = short_mapkind ? 8 : 3;
488   const int typemask = short_mapkind ? 0xff : 0x7;
489   struct splay_tree_s *mem_map = &devicep->mem_map;
490   struct splay_tree_key_s cur_node;
491   struct target_mem_desc *tgt
492     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
493   tgt->list_count = mapnum;
494   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
495   tgt->device_descr = devicep;
496   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
497 
498   if (mapnum == 0)
499     {
500       tgt->tgt_start = 0;
501       tgt->tgt_end = 0;
502       return tgt;
503     }
504 
505   tgt_align = sizeof (void *);
506   tgt_size = 0;
507   cbuf.chunks = NULL;
508   cbuf.chunk_cnt = -1;
509   cbuf.use_cnt = 0;
510   cbuf.buf = NULL;
511   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
512     {
513       cbuf.chunks
514 	= (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
515       cbuf.chunk_cnt = 0;
516     }
517   if (pragma_kind == GOMP_MAP_VARS_TARGET)
518     {
519       size_t align = 4 * sizeof (void *);
520       tgt_align = align;
521       tgt_size = mapnum * sizeof (void *);
522       cbuf.chunk_cnt = 1;
523       cbuf.use_cnt = 1 + (mapnum > 1);
524       cbuf.chunks[0] = 0;
525       cbuf.chunks[1] = tgt_size;
526     }
527 
528   gomp_mutex_lock (&devicep->lock);
529   if (devicep->state == GOMP_DEVICE_FINALIZED)
530     {
531       gomp_mutex_unlock (&devicep->lock);
532       free (tgt);
533       return NULL;
534     }
535 
536   for (i = 0; i < mapnum; i++)
537     {
538       int kind = get_kind (short_mapkind, kinds, i);
539       if (hostaddrs[i] == NULL
540 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
541 	{
542 	  tgt->list[i].key = NULL;
543 	  tgt->list[i].offset = ~(uintptr_t) 0;
544 	  continue;
545 	}
546       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
547 	{
548 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
549 	  cur_node.host_end = cur_node.host_start;
550 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
551 	  if (n == NULL)
552 	    {
553 	      gomp_mutex_unlock (&devicep->lock);
554 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
555 	    }
556 	  cur_node.host_start -= n->host_start;
557 	  hostaddrs[i]
558 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
559 			+ cur_node.host_start);
560 	  tgt->list[i].key = NULL;
561 	  tgt->list[i].offset = ~(uintptr_t) 0;
562 	  continue;
563 	}
564       else if ((kind & typemask) == GOMP_MAP_STRUCT)
565 	{
566 	  size_t first = i + 1;
567 	  size_t last = i + sizes[i];
568 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
569 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
570 			      + sizes[last];
571 	  tgt->list[i].key = NULL;
572 	  tgt->list[i].offset = ~(uintptr_t) 2;
573 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
574 	  if (n == NULL)
575 	    {
576 	      size_t align = (size_t) 1 << (kind >> rshift);
577 	      if (tgt_align < align)
578 		tgt_align = align;
579 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
580 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
581 	      tgt_size += cur_node.host_end - cur_node.host_start;
582 	      not_found_cnt += last - i;
583 	      for (i = first; i <= last; i++)
584 		{
585 		  tgt->list[i].key = NULL;
586 		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
587 					     & typemask))
588 		    gomp_coalesce_buf_add (&cbuf,
589 					   tgt_size - cur_node.host_end
590 					   + (uintptr_t) hostaddrs[i],
591 					   sizes[i]);
592 		}
593 	      i--;
594 	      continue;
595 	    }
596 	  for (i = first; i <= last; i++)
597 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
598 				      sizes, kinds, NULL);
599 	  i--;
600 	  continue;
601 	}
602       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
603 	{
604 	  tgt->list[i].key = NULL;
605 	  tgt->list[i].offset = ~(uintptr_t) 1;
606 	  has_firstprivate = true;
607 	  continue;
608 	}
609       cur_node.host_start = (uintptr_t) hostaddrs[i];
610       if (!GOMP_MAP_POINTER_P (kind & typemask))
611 	cur_node.host_end = cur_node.host_start + sizes[i];
612       else
613 	cur_node.host_end = cur_node.host_start + sizeof (void *);
614       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
615 	{
616 	  tgt->list[i].key = NULL;
617 
618 	  size_t align = (size_t) 1 << (kind >> rshift);
619 	  if (tgt_align < align)
620 	    tgt_align = align;
621 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
622 	  gomp_coalesce_buf_add (&cbuf, tgt_size,
623 				 cur_node.host_end - cur_node.host_start);
624 	  tgt_size += cur_node.host_end - cur_node.host_start;
625 	  has_firstprivate = true;
626 	  continue;
627 	}
628       splay_tree_key n;
629       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
630 	{
631 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
632 	  if (!n)
633 	    {
634 	      tgt->list[i].key = NULL;
635 	      tgt->list[i].offset = ~(uintptr_t) 1;
636 	      continue;
637 	    }
638 	}
639       else
640 	n = splay_tree_lookup (mem_map, &cur_node);
641       if (n && n->refcount != REFCOUNT_LINK)
642 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
643 				kind & typemask, NULL);
644       else
645 	{
646 	  tgt->list[i].key = NULL;
647 
648 	  size_t align = (size_t) 1 << (kind >> rshift);
649 	  not_found_cnt++;
650 	  if (tgt_align < align)
651 	    tgt_align = align;
652 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
653 	  if (gomp_to_device_kind_p (kind & typemask))
654 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
655 				   cur_node.host_end - cur_node.host_start);
656 	  tgt_size += cur_node.host_end - cur_node.host_start;
657 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
658 	    {
659 	      size_t j;
660 	      for (j = i + 1; j < mapnum; j++)
661 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
662 					 & typemask))
663 		  break;
664 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
665 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
666 			     > cur_node.host_end))
667 		  break;
668 		else
669 		  {
670 		    tgt->list[j].key = NULL;
671 		    i++;
672 		  }
673 	    }
674 	}
675     }
676 
677   if (devaddrs)
678     {
679       if (mapnum != 1)
680 	{
681 	  gomp_mutex_unlock (&devicep->lock);
682 	  gomp_fatal ("unexpected aggregation");
683 	}
684       tgt->to_free = devaddrs[0];
685       tgt->tgt_start = (uintptr_t) tgt->to_free;
686       tgt->tgt_end = tgt->tgt_start + sizes[0];
687     }
688   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
689     {
690       /* Allocate tgt_align aligned tgt_size block of memory.  */
691       /* FIXME: Perhaps change interface to allocate properly aligned
692 	 memory.  */
693       tgt->to_free = devicep->alloc_func (devicep->target_id,
694 					  tgt_size + tgt_align - 1);
695       if (!tgt->to_free)
696 	{
697 	  gomp_mutex_unlock (&devicep->lock);
698 	  gomp_fatal ("device memory allocation fail");
699 	}
700 
701       tgt->tgt_start = (uintptr_t) tgt->to_free;
702       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
703       tgt->tgt_end = tgt->tgt_start + tgt_size;
704 
705       if (cbuf.use_cnt == 1)
706 	cbuf.chunk_cnt--;
707       if (cbuf.chunk_cnt > 0)
708 	{
709 	  cbuf.buf
710 	    = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
711 	  if (cbuf.buf)
712 	    {
713 	      cbuf.tgt = tgt;
714 	      cbufp = &cbuf;
715 	    }
716 	}
717     }
718   else
719     {
720       tgt->to_free = NULL;
721       tgt->tgt_start = 0;
722       tgt->tgt_end = 0;
723     }
724 
725   tgt_size = 0;
726   if (pragma_kind == GOMP_MAP_VARS_TARGET)
727     tgt_size = mapnum * sizeof (void *);
728 
729   tgt->array = NULL;
730   if (not_found_cnt || has_firstprivate)
731     {
732       if (not_found_cnt)
733 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
734       splay_tree_node array = tgt->array;
735       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
736       uintptr_t field_tgt_base = 0;
737 
738       for (i = 0; i < mapnum; i++)
739 	if (tgt->list[i].key == NULL)
740 	  {
741 	    int kind = get_kind (short_mapkind, kinds, i);
742 	    if (hostaddrs[i] == NULL)
743 	      continue;
744 	    switch (kind & typemask)
745 	      {
746 		size_t align, len, first, last;
747 		splay_tree_key n;
748 	      case GOMP_MAP_FIRSTPRIVATE:
749 		align = (size_t) 1 << (kind >> rshift);
750 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
751 		tgt->list[i].offset = tgt_size;
752 		len = sizes[i];
753 		gomp_copy_host2dev (devicep,
754 				    (void *) (tgt->tgt_start + tgt_size),
755 				    (void *) hostaddrs[i], len, cbufp);
756 		tgt_size += len;
757 		continue;
758 	      case GOMP_MAP_FIRSTPRIVATE_INT:
759 	      case GOMP_MAP_USE_DEVICE_PTR:
760 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
761 		continue;
762 	      case GOMP_MAP_STRUCT:
763 		first = i + 1;
764 		last = i + sizes[i];
765 		cur_node.host_start = (uintptr_t) hostaddrs[i];
766 		cur_node.host_end = (uintptr_t) hostaddrs[last]
767 				    + sizes[last];
768 		if (tgt->list[first].key != NULL)
769 		  continue;
770 		n = splay_tree_lookup (mem_map, &cur_node);
771 		if (n == NULL)
772 		  {
773 		    size_t align = (size_t) 1 << (kind >> rshift);
774 		    tgt_size -= (uintptr_t) hostaddrs[first]
775 				- (uintptr_t) hostaddrs[i];
776 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
777 		    tgt_size += (uintptr_t) hostaddrs[first]
778 				- (uintptr_t) hostaddrs[i];
779 		    field_tgt_base = (uintptr_t) hostaddrs[first];
780 		    field_tgt_offset = tgt_size;
781 		    field_tgt_clear = last;
782 		    tgt_size += cur_node.host_end
783 				- (uintptr_t) hostaddrs[first];
784 		    continue;
785 		  }
786 		for (i = first; i <= last; i++)
787 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
788 					    sizes, kinds, cbufp);
789 		i--;
790 		continue;
791 	      case GOMP_MAP_ALWAYS_POINTER:
792 		cur_node.host_start = (uintptr_t) hostaddrs[i];
793 		cur_node.host_end = cur_node.host_start + sizeof (void *);
794 		n = splay_tree_lookup (mem_map, &cur_node);
795 		if (n == NULL
796 		    || n->host_start > cur_node.host_start
797 		    || n->host_end < cur_node.host_end)
798 		  {
799 		    gomp_mutex_unlock (&devicep->lock);
800 		    gomp_fatal ("always pointer not mapped");
801 		  }
802 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
803 		    != GOMP_MAP_ALWAYS_POINTER)
804 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
805 		if (cur_node.tgt_offset)
806 		  cur_node.tgt_offset -= sizes[i];
807 		gomp_copy_host2dev (devicep,
808 				    (void *) (n->tgt->tgt_start
809 					      + n->tgt_offset
810 					      + cur_node.host_start
811 					      - n->host_start),
812 				    (void *) &cur_node.tgt_offset,
813 				    sizeof (void *), cbufp);
814 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
815 				      + cur_node.host_start - n->host_start;
816 		continue;
817 	      default:
818 		break;
819 	      }
820 	    splay_tree_key k = &array->key;
821 	    k->host_start = (uintptr_t) hostaddrs[i];
822 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
823 	      k->host_end = k->host_start + sizes[i];
824 	    else
825 	      k->host_end = k->host_start + sizeof (void *);
826 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
827 	    if (n && n->refcount != REFCOUNT_LINK)
828 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
829 				      kind & typemask, cbufp);
830 	    else
831 	      {
832 		k->link_key = NULL;
833 		if (n && n->refcount == REFCOUNT_LINK)
834 		  {
835 		    /* Replace target address of the pointer with target address
836 		       of mapped object in the splay tree.  */
837 		    splay_tree_remove (mem_map, n);
838 		    k->link_key = n;
839 		  }
840 		size_t align = (size_t) 1 << (kind >> rshift);
841 		tgt->list[i].key = k;
842 		k->tgt = tgt;
843 		if (field_tgt_clear != ~(size_t) 0)
844 		  {
845 		    k->tgt_offset = k->host_start - field_tgt_base
846 				    + field_tgt_offset;
847 		    if (i == field_tgt_clear)
848 		      field_tgt_clear = ~(size_t) 0;
849 		  }
850 		else
851 		  {
852 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
853 		    k->tgt_offset = tgt_size;
854 		    tgt_size += k->host_end - k->host_start;
855 		  }
856 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
857 		tgt->list[i].always_copy_from
858 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
859 		tgt->list[i].offset = 0;
860 		tgt->list[i].length = k->host_end - k->host_start;
861 		k->refcount = 1;
862 		tgt->refcount++;
863 		array->left = NULL;
864 		array->right = NULL;
865 		splay_tree_insert (mem_map, array);
866 		switch (kind & typemask)
867 		  {
868 		  case GOMP_MAP_ALLOC:
869 		  case GOMP_MAP_FROM:
870 		  case GOMP_MAP_FORCE_ALLOC:
871 		  case GOMP_MAP_FORCE_FROM:
872 		  case GOMP_MAP_ALWAYS_FROM:
873 		    break;
874 		  case GOMP_MAP_TO:
875 		  case GOMP_MAP_TOFROM:
876 		  case GOMP_MAP_FORCE_TO:
877 		  case GOMP_MAP_FORCE_TOFROM:
878 		  case GOMP_MAP_ALWAYS_TO:
879 		  case GOMP_MAP_ALWAYS_TOFROM:
880 		    gomp_copy_host2dev (devicep,
881 					(void *) (tgt->tgt_start
882 						  + k->tgt_offset),
883 					(void *) k->host_start,
884 					k->host_end - k->host_start, cbufp);
885 		    break;
886 		  case GOMP_MAP_POINTER:
887 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
888 				      k->tgt_offset, sizes[i], cbufp);
889 		    break;
890 		  case GOMP_MAP_TO_PSET:
891 		    gomp_copy_host2dev (devicep,
892 					(void *) (tgt->tgt_start
893 						  + k->tgt_offset),
894 					(void *) k->host_start,
895 					k->host_end - k->host_start, cbufp);
896 
897 		    for (j = i + 1; j < mapnum; j++)
898 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
899 							 j)
900 					       & typemask))
901 			break;
902 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
903 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
904 				   > k->host_end))
905 			break;
906 		      else
907 			{
908 			  tgt->list[j].key = k;
909 			  tgt->list[j].copy_from = false;
910 			  tgt->list[j].always_copy_from = false;
911 			  if (k->refcount != REFCOUNT_INFINITY)
912 			    k->refcount++;
913 			  gomp_map_pointer (tgt,
914 					    (uintptr_t) *(void **) hostaddrs[j],
915 					    k->tgt_offset
916 					    + ((uintptr_t) hostaddrs[j]
917 					       - k->host_start),
918 					    sizes[j], cbufp);
919 			  i++;
920 			}
921 		    break;
922 		  case GOMP_MAP_FORCE_PRESENT:
923 		    {
924 		      /* We already looked up the memory region above and it
925 			 was missing.  */
926 		      size_t size = k->host_end - k->host_start;
927 		      gomp_mutex_unlock (&devicep->lock);
928 #ifdef HAVE_INTTYPES_H
929 		      gomp_fatal ("present clause: !acc_is_present (%p, "
930 				  "%"PRIu64" (0x%"PRIx64"))",
931 				  (void *) k->host_start,
932 				  (uint64_t) size, (uint64_t) size);
933 #else
934 		      gomp_fatal ("present clause: !acc_is_present (%p, "
935 				  "%lu (0x%lx))", (void *) k->host_start,
936 				  (unsigned long) size, (unsigned long) size);
937 #endif
938 		    }
939 		    break;
940 		  case GOMP_MAP_FORCE_DEVICEPTR:
941 		    assert (k->host_end - k->host_start == sizeof (void *));
942 		    gomp_copy_host2dev (devicep,
943 					(void *) (tgt->tgt_start
944 						  + k->tgt_offset),
945 					(void *) k->host_start,
946 					sizeof (void *), cbufp);
947 		    break;
948 		  default:
949 		    gomp_mutex_unlock (&devicep->lock);
950 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
951 				kind);
952 		  }
953 
954 		if (k->link_key)
955 		  {
956 		    /* Set link pointer on target to the device address of the
957 		       mapped object.  */
958 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
959 		    devicep->host2dev_func (devicep->target_id,
960 					    (void *) n->tgt_offset,
961 					    &tgt_addr, sizeof (void *));
962 		  }
963 		array++;
964 	      }
965 	  }
966     }
967 
968   if (pragma_kind == GOMP_MAP_VARS_TARGET)
969     {
970       for (i = 0; i < mapnum; i++)
971 	{
972 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
973 	  gomp_copy_host2dev (devicep,
974 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
975 			      (void *) &cur_node.tgt_offset, sizeof (void *),
976 			      cbufp);
977 	}
978     }
979 
980   if (cbufp)
981     {
982       long c = 0;
983       for (c = 0; c < cbuf.chunk_cnt; ++c)
984 	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
985 			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
986 			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
987       free (cbuf.buf);
988     }
989 
990   /* If the variable from "omp target enter data" map-list was already mapped,
991      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
992      gomp_exit_data.  */
993   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
994     {
995       free (tgt);
996       tgt = NULL;
997     }
998 
999   gomp_mutex_unlock (&devicep->lock);
1000   return tgt;
1001 }
1002 
1003 static void
1004 gomp_unmap_tgt (struct target_mem_desc *tgt)
1005 {
1006   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1007   if (tgt->tgt_end)
1008     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1009 
1010   free (tgt->array);
1011   free (tgt);
1012 }
1013 
1014 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1015    variables back from device to host: if it is false, it is assumed that this
1016    has been done already.  */
1017 
1018 attribute_hidden void
1019 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1020 {
1021   struct gomp_device_descr *devicep = tgt->device_descr;
1022 
1023   if (tgt->list_count == 0)
1024     {
1025       free (tgt);
1026       return;
1027     }
1028 
1029   gomp_mutex_lock (&devicep->lock);
1030   if (devicep->state == GOMP_DEVICE_FINALIZED)
1031     {
1032       gomp_mutex_unlock (&devicep->lock);
1033       free (tgt->array);
1034       free (tgt);
1035       return;
1036     }
1037 
1038   size_t i;
1039   for (i = 0; i < tgt->list_count; i++)
1040     {
1041       splay_tree_key k = tgt->list[i].key;
1042       if (k == NULL)
1043 	continue;
1044 
1045       bool do_unmap = false;
1046       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1047 	k->refcount--;
1048       else if (k->refcount == 1)
1049 	{
1050 	  k->refcount--;
1051 	  do_unmap = true;
1052 	}
1053 
1054       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1055 	  || tgt->list[i].always_copy_from)
1056 	gomp_copy_dev2host (devicep,
1057 			    (void *) (k->host_start + tgt->list[i].offset),
1058 			    (void *) (k->tgt->tgt_start + k->tgt_offset
1059 				      + tgt->list[i].offset),
1060 			    tgt->list[i].length);
1061       if (do_unmap)
1062 	{
1063 	  splay_tree_remove (&devicep->mem_map, k);
1064 	  if (k->link_key)
1065 	    splay_tree_insert (&devicep->mem_map,
1066 			       (splay_tree_node) k->link_key);
1067 	  if (k->tgt->refcount > 1)
1068 	    k->tgt->refcount--;
1069 	  else
1070 	    gomp_unmap_tgt (k->tgt);
1071 	}
1072     }
1073 
1074   if (tgt->refcount > 1)
1075     tgt->refcount--;
1076   else
1077     gomp_unmap_tgt (tgt);
1078 
1079   gomp_mutex_unlock (&devicep->lock);
1080 }
1081 
1082 static void
1083 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1084 	     size_t *sizes, void *kinds, bool short_mapkind)
1085 {
1086   size_t i;
1087   struct splay_tree_key_s cur_node;
1088   const int typemask = short_mapkind ? 0xff : 0x7;
1089 
1090   if (!devicep)
1091     return;
1092 
1093   if (mapnum == 0)
1094     return;
1095 
1096   gomp_mutex_lock (&devicep->lock);
1097   if (devicep->state == GOMP_DEVICE_FINALIZED)
1098     {
1099       gomp_mutex_unlock (&devicep->lock);
1100       return;
1101     }
1102 
1103   for (i = 0; i < mapnum; i++)
1104     if (sizes[i])
1105       {
1106 	cur_node.host_start = (uintptr_t) hostaddrs[i];
1107 	cur_node.host_end = cur_node.host_start + sizes[i];
1108 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1109 	if (n)
1110 	  {
1111 	    int kind = get_kind (short_mapkind, kinds, i);
1112 	    if (n->host_start > cur_node.host_start
1113 		|| n->host_end < cur_node.host_end)
1114 	      {
1115 		gomp_mutex_unlock (&devicep->lock);
1116 		gomp_fatal ("Trying to update [%p..%p) object when "
1117 			    "only [%p..%p) is mapped",
1118 			    (void *) cur_node.host_start,
1119 			    (void *) cur_node.host_end,
1120 			    (void *) n->host_start,
1121 			    (void *) n->host_end);
1122 	      }
1123 
1124 
1125 	    void *hostaddr = (void *) cur_node.host_start;
1126 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1127 				      + cur_node.host_start - n->host_start);
1128 	    size_t size = cur_node.host_end - cur_node.host_start;
1129 
1130 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
1131 	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1132 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1133 	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1134 	  }
1135       }
1136   gomp_mutex_unlock (&devicep->lock);
1137 }
1138 
1139 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1140    And insert to splay tree the mapping between addresses from HOST_TABLE and
1141    from loaded target image.  We rely in the host and device compiler
1142    emitting variable and functions in the same order.  */
1143 
1144 static void
1145 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1146 			   const void *host_table, const void *target_data,
1147 			   bool is_register_lock)
1148 {
1149   void **host_func_table = ((void ***) host_table)[0];
1150   void **host_funcs_end  = ((void ***) host_table)[1];
1151   void **host_var_table  = ((void ***) host_table)[2];
1152   void **host_vars_end   = ((void ***) host_table)[3];
1153 
1154   /* The func table contains only addresses, the var table contains addresses
1155      and corresponding sizes.  */
1156   int num_funcs = host_funcs_end - host_func_table;
1157   int num_vars  = (host_vars_end - host_var_table) / 2;
1158 
1159   /* Load image to device and get target addresses for the image.  */
1160   struct addr_pair *target_table = NULL;
1161   int i, num_target_entries;
1162 
1163   num_target_entries
1164     = devicep->load_image_func (devicep->target_id, version,
1165 				target_data, &target_table);
1166 
1167   if (num_target_entries != num_funcs + num_vars)
1168     {
1169       gomp_mutex_unlock (&devicep->lock);
1170       if (is_register_lock)
1171 	gomp_mutex_unlock (&register_lock);
1172       gomp_fatal ("Cannot map target functions or variables"
1173 		  " (expected %u, have %u)", num_funcs + num_vars,
1174 		  num_target_entries);
1175     }
1176 
1177   /* Insert host-target address mapping into splay tree.  */
1178   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1179   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1180   tgt->refcount = REFCOUNT_INFINITY;
1181   tgt->tgt_start = 0;
1182   tgt->tgt_end = 0;
1183   tgt->to_free = NULL;
1184   tgt->prev = NULL;
1185   tgt->list_count = 0;
1186   tgt->device_descr = devicep;
1187   splay_tree_node array = tgt->array;
1188 
1189   for (i = 0; i < num_funcs; i++)
1190     {
1191       splay_tree_key k = &array->key;
1192       k->host_start = (uintptr_t) host_func_table[i];
1193       k->host_end = k->host_start + 1;
1194       k->tgt = tgt;
1195       k->tgt_offset = target_table[i].start;
1196       k->refcount = REFCOUNT_INFINITY;
1197       k->link_key = NULL;
1198       array->left = NULL;
1199       array->right = NULL;
1200       splay_tree_insert (&devicep->mem_map, array);
1201       array++;
1202     }
1203 
1204   /* Most significant bit of the size in host and target tables marks
1205      "omp declare target link" variables.  */
1206   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1207   const uintptr_t size_mask = ~link_bit;
1208 
1209   for (i = 0; i < num_vars; i++)
1210     {
1211       struct addr_pair *target_var = &target_table[num_funcs + i];
1212       uintptr_t target_size = target_var->end - target_var->start;
1213 
1214       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1215 	{
1216 	  gomp_mutex_unlock (&devicep->lock);
1217 	  if (is_register_lock)
1218 	    gomp_mutex_unlock (&register_lock);
1219 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1220 	}
1221 
1222       splay_tree_key k = &array->key;
1223       k->host_start = (uintptr_t) host_var_table[i * 2];
1224       k->host_end
1225 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1226       k->tgt = tgt;
1227       k->tgt_offset = target_var->start;
1228       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1229       k->link_key = NULL;
1230       array->left = NULL;
1231       array->right = NULL;
1232       splay_tree_insert (&devicep->mem_map, array);
1233       array++;
1234     }
1235 
1236   free (target_table);
1237 }
1238 
1239 /* Unload the mappings described by target_data from device DEVICE_P.
1240    The device must be locked.   */
1241 
1242 static void
1243 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1244 			       unsigned version,
1245 			       const void *host_table, const void *target_data)
1246 {
1247   void **host_func_table = ((void ***) host_table)[0];
1248   void **host_funcs_end  = ((void ***) host_table)[1];
1249   void **host_var_table  = ((void ***) host_table)[2];
1250   void **host_vars_end   = ((void ***) host_table)[3];
1251 
1252   /* The func table contains only addresses, the var table contains addresses
1253      and corresponding sizes.  */
1254   int num_funcs = host_funcs_end - host_func_table;
1255   int num_vars  = (host_vars_end - host_var_table) / 2;
1256 
1257   struct splay_tree_key_s k;
1258   splay_tree_key node = NULL;
1259 
1260   /* Find mapping at start of node array */
1261   if (num_funcs || num_vars)
1262     {
1263       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1264 		      : (uintptr_t) host_var_table[0]);
1265       k.host_end = k.host_start + 1;
1266       node = splay_tree_lookup (&devicep->mem_map, &k);
1267     }
1268 
1269   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1270     {
1271       gomp_mutex_unlock (&devicep->lock);
1272       gomp_fatal ("image unload fail");
1273     }
1274 
1275   /* Remove mappings from splay tree.  */
1276   int i;
1277   for (i = 0; i < num_funcs; i++)
1278     {
1279       k.host_start = (uintptr_t) host_func_table[i];
1280       k.host_end = k.host_start + 1;
1281       splay_tree_remove (&devicep->mem_map, &k);
1282     }
1283 
1284   /* Most significant bit of the size in host and target tables marks
1285      "omp declare target link" variables.  */
1286   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1287   const uintptr_t size_mask = ~link_bit;
1288   bool is_tgt_unmapped = false;
1289 
1290   for (i = 0; i < num_vars; i++)
1291     {
1292       k.host_start = (uintptr_t) host_var_table[i * 2];
1293       k.host_end
1294 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1295 
1296       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1297 	splay_tree_remove (&devicep->mem_map, &k);
1298       else
1299 	{
1300 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1301 	  splay_tree_remove (&devicep->mem_map, n);
1302 	  if (n->link_key)
1303 	    {
1304 	      if (n->tgt->refcount > 1)
1305 		n->tgt->refcount--;
1306 	      else
1307 		{
1308 		  is_tgt_unmapped = true;
1309 		  gomp_unmap_tgt (n->tgt);
1310 		}
1311 	    }
1312 	}
1313     }
1314 
1315   if (node && !is_tgt_unmapped)
1316     {
1317       free (node->tgt);
1318       free (node);
1319     }
1320 }
1321 
1322 /* This function should be called from every offload image while loading.
1323    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1324    the target, and TARGET_DATA needed by target plugin.  */
1325 
1326 void
1327 GOMP_offload_register_ver (unsigned version, const void *host_table,
1328 			   int target_type, const void *target_data)
1329 {
1330   int i;
1331 
1332   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1333     gomp_fatal ("Library too old for offload (version %u < %u)",
1334 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1335 
1336   gomp_mutex_lock (&register_lock);
1337 
1338   /* Load image to all initialized devices.  */
1339   for (i = 0; i < num_devices; i++)
1340     {
1341       struct gomp_device_descr *devicep = &devices[i];
1342       gomp_mutex_lock (&devicep->lock);
1343       if (devicep->type == target_type
1344 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1345 	gomp_load_image_to_device (devicep, version,
1346 				   host_table, target_data, true);
1347       gomp_mutex_unlock (&devicep->lock);
1348     }
1349 
1350   /* Insert image to array of pending images.  */
1351   offload_images
1352     = gomp_realloc_unlock (offload_images,
1353 			   (num_offload_images + 1)
1354 			   * sizeof (struct offload_image_descr));
1355   offload_images[num_offload_images].version = version;
1356   offload_images[num_offload_images].type = target_type;
1357   offload_images[num_offload_images].host_table = host_table;
1358   offload_images[num_offload_images].target_data = target_data;
1359 
1360   num_offload_images++;
1361   gomp_mutex_unlock (&register_lock);
1362 }
1363 
1364 void
1365 GOMP_offload_register (const void *host_table, int target_type,
1366 		       const void *target_data)
1367 {
1368   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1369 }
1370 
1371 /* This function should be called from every offload image while unloading.
1372    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1373    the target, and TARGET_DATA needed by target plugin.  */
1374 
1375 void
1376 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1377 			     int target_type, const void *target_data)
1378 {
1379   int i;
1380 
1381   gomp_mutex_lock (&register_lock);
1382 
1383   /* Unload image from all initialized devices.  */
1384   for (i = 0; i < num_devices; i++)
1385     {
1386       struct gomp_device_descr *devicep = &devices[i];
1387       gomp_mutex_lock (&devicep->lock);
1388       if (devicep->type == target_type
1389 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1390 	gomp_unload_image_from_device (devicep, version,
1391 				       host_table, target_data);
1392       gomp_mutex_unlock (&devicep->lock);
1393     }
1394 
1395   /* Remove image from array of pending images.  */
1396   for (i = 0; i < num_offload_images; i++)
1397     if (offload_images[i].target_data == target_data)
1398       {
1399 	offload_images[i] = offload_images[--num_offload_images];
1400 	break;
1401       }
1402 
1403   gomp_mutex_unlock (&register_lock);
1404 }
1405 
1406 void
1407 GOMP_offload_unregister (const void *host_table, int target_type,
1408 			 const void *target_data)
1409 {
1410   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1411 }
1412 
1413 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1414    must be locked on entry, and remains locked on return.  */
1415 
1416 attribute_hidden void
1417 gomp_init_device (struct gomp_device_descr *devicep)
1418 {
1419   int i;
1420   if (!devicep->init_device_func (devicep->target_id))
1421     {
1422       gomp_mutex_unlock (&devicep->lock);
1423       gomp_fatal ("device initialization failed");
1424     }
1425 
1426   /* Load to device all images registered by the moment.  */
1427   for (i = 0; i < num_offload_images; i++)
1428     {
1429       struct offload_image_descr *image = &offload_images[i];
1430       if (image->type == devicep->type)
1431 	gomp_load_image_to_device (devicep, image->version,
1432 				   image->host_table, image->target_data,
1433 				   false);
1434     }
1435 
1436   devicep->state = GOMP_DEVICE_INITIALIZED;
1437 }
1438 
1439 attribute_hidden void
1440 gomp_unload_device (struct gomp_device_descr *devicep)
1441 {
1442   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1443     {
1444       unsigned i;
1445 
1446       /* Unload from device all images registered at the moment.  */
1447       for (i = 0; i < num_offload_images; i++)
1448 	{
1449 	  struct offload_image_descr *image = &offload_images[i];
1450 	  if (image->type == devicep->type)
1451 	    gomp_unload_image_from_device (devicep, image->version,
1452 					   image->host_table,
1453 					   image->target_data);
1454 	}
1455     }
1456 }
1457 
1458 /* Free address mapping tables.  MM must be locked on entry, and remains locked
1459    on return.  */
1460 
1461 attribute_hidden void
1462 gomp_free_memmap (struct splay_tree_s *mem_map)
1463 {
1464   while (mem_map->root)
1465     {
1466       struct target_mem_desc *tgt = mem_map->root->key.tgt;
1467 
1468       splay_tree_remove (mem_map, &mem_map->root->key);
1469       free (tgt->array);
1470       free (tgt);
1471     }
1472 }
1473 
1474 /* Host fallback for GOMP_target{,_ext} routines.  */
1475 
1476 static void
1477 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1478 {
1479   struct gomp_thread old_thr, *thr = gomp_thread ();
1480   old_thr = *thr;
1481   memset (thr, '\0', sizeof (*thr));
1482   if (gomp_places_list)
1483     {
1484       thr->place = old_thr.place;
1485       thr->ts.place_partition_len = gomp_places_list_len;
1486     }
1487   fn (hostaddrs);
1488   gomp_free_thread (thr);
1489   *thr = old_thr;
1490 }
1491 
1492 /* Calculate alignment and size requirements of a private copy of data shared
1493    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1494 
1495 static inline void
1496 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1497 				     unsigned short *kinds, size_t *tgt_align,
1498 				     size_t *tgt_size)
1499 {
1500   size_t i;
1501   for (i = 0; i < mapnum; i++)
1502     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1503       {
1504 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1505 	if (*tgt_align < align)
1506 	  *tgt_align = align;
1507 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1508 	*tgt_size += sizes[i];
1509       }
1510 }
1511 
1512 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1513 
1514 static inline void
1515 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1516 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1517 			size_t tgt_size)
1518 {
1519   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1520   if (al)
1521     tgt += tgt_align - al;
1522   tgt_size = 0;
1523   size_t i;
1524   for (i = 0; i < mapnum; i++)
1525     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1526       {
1527 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1528 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1529 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1530 	hostaddrs[i] = tgt + tgt_size;
1531 	tgt_size = tgt_size + sizes[i];
1532       }
1533 }
1534 
1535 /* Helper function of GOMP_target{,_ext} routines.  */
1536 
1537 static void *
1538 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1539 			 void (*host_fn) (void *))
1540 {
1541   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1542     return (void *) host_fn;
1543   else
1544     {
1545       gomp_mutex_lock (&devicep->lock);
1546       if (devicep->state == GOMP_DEVICE_FINALIZED)
1547 	{
1548 	  gomp_mutex_unlock (&devicep->lock);
1549 	  return NULL;
1550 	}
1551 
1552       struct splay_tree_key_s k;
1553       k.host_start = (uintptr_t) host_fn;
1554       k.host_end = k.host_start + 1;
1555       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1556       gomp_mutex_unlock (&devicep->lock);
1557       if (tgt_fn == NULL)
1558 	return NULL;
1559 
1560       return (void *) tgt_fn->tgt_offset;
1561     }
1562 }
1563 
1564 /* Called when encountering a target directive.  If DEVICE
1565    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1566    GOMP_DEVICE_HOST_FALLBACK (or any value
1567    larger than last available hw device), use host fallback.
1568    FN is address of host code, UNUSED is part of the current ABI, but
1569    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1570    with MAPNUM entries, with addresses of the host objects,
1571    sizes of the host objects (resp. for pointer kind pointer bias
1572    and assumed sizeof (void *) size) and kinds.  */
1573 
1574 void
1575 GOMP_target (int device, void (*fn) (void *), const void *unused,
1576 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1577 	     unsigned char *kinds)
1578 {
1579   struct gomp_device_descr *devicep = resolve_device (device);
1580 
1581   void *fn_addr;
1582   if (devicep == NULL
1583       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1584       /* All shared memory devices should use the GOMP_target_ext function.  */
1585       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1586       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1587     return gomp_target_fallback (fn, hostaddrs);
1588 
1589   struct target_mem_desc *tgt_vars
1590     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1591 		     GOMP_MAP_VARS_TARGET);
1592   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1593 		     NULL);
1594   gomp_unmap_vars (tgt_vars, true);
1595 }
1596 
1597 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1598    and several arguments have been added:
1599    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1600    DEPEND is array of dependencies, see GOMP_task for details.
1601 
1602    ARGS is a pointer to an array consisting of a variable number of both
1603    device-independent and device-specific arguments, which can take one two
1604    elements where the first specifies for which device it is intended, the type
1605    and optionally also the value.  If the value is not present in the first
1606    one, the whole second element the actual value.  The last element of the
1607    array is a single NULL.  Among the device independent can be for example
1608    NUM_TEAMS and THREAD_LIMIT.
1609 
1610    NUM_TEAMS is positive if GOMP_teams will be called in the body with
1611    that value, or 1 if teams construct is not present, or 0, if
1612    teams construct does not have num_teams clause and so the choice is
1613    implementation defined, and -1 if it can't be determined on the host
1614    what value will GOMP_teams have on the device.
1615    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1616    body with that value, or 0, if teams construct does not have thread_limit
1617    clause or the teams construct is not present, or -1 if it can't be
1618    determined on the host what value will GOMP_teams have on the device.  */
1619 
1620 void
1621 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1622 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1623 		 unsigned int flags, void **depend, void **args)
1624 {
1625   struct gomp_device_descr *devicep = resolve_device (device);
1626   size_t tgt_align = 0, tgt_size = 0;
1627   bool fpc_done = false;
1628 
1629   if (flags & GOMP_TARGET_FLAG_NOWAIT)
1630     {
1631       struct gomp_thread *thr = gomp_thread ();
1632       /* Create a team if we don't have any around, as nowait
1633 	 target tasks make sense to run asynchronously even when
1634 	 outside of any parallel.  */
1635       if (__builtin_expect (thr->ts.team == NULL, 0))
1636 	{
1637 	  struct gomp_team *team = gomp_new_team (1);
1638 	  struct gomp_task *task = thr->task;
1639 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1640 	  team->prev_ts = thr->ts;
1641 	  thr->ts.team = team;
1642 	  thr->ts.team_id = 0;
1643 	  thr->ts.work_share = &team->work_shares[0];
1644 	  thr->ts.last_work_share = NULL;
1645 #ifdef HAVE_SYNC_BUILTINS
1646 	  thr->ts.single_count = 0;
1647 #endif
1648 	  thr->ts.static_trip = 0;
1649 	  thr->task = &team->implicit_task[0];
1650 	  gomp_init_task (thr->task, NULL, icv);
1651 	  if (task)
1652 	    {
1653 	      thr->task = task;
1654 	      gomp_end_task ();
1655 	      free (task);
1656 	      thr->task = &team->implicit_task[0];
1657 	    }
1658 	  else
1659 	    pthread_setspecific (gomp_thread_destructor, thr);
1660 	}
1661       if (thr->ts.team
1662 	  && !thr->task->final_task)
1663 	{
1664 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1665 				   sizes, kinds, flags, depend, args,
1666 				   GOMP_TARGET_TASK_BEFORE_MAP);
1667 	  return;
1668 	}
1669     }
1670 
1671   /* If there are depend clauses, but nowait is not present
1672      (or we are in a final task), block the parent task until the
1673      dependencies are resolved and then just continue with the rest
1674      of the function as if it is a merged task.  */
1675   if (depend != NULL)
1676     {
1677       struct gomp_thread *thr = gomp_thread ();
1678       if (thr->task && thr->task->depend_hash)
1679 	{
1680 	  /* If we might need to wait, copy firstprivate now.  */
1681 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1682 					       &tgt_align, &tgt_size);
1683 	  if (tgt_align)
1684 	    {
1685 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1686 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1687 				      tgt_align, tgt_size);
1688 	    }
1689 	  fpc_done = true;
1690 	  gomp_task_maybe_wait_for_dependencies (depend);
1691 	}
1692     }
1693 
1694   void *fn_addr;
1695   if (devicep == NULL
1696       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1697       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1698       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1699     {
1700       if (!fpc_done)
1701 	{
1702 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1703 					       &tgt_align, &tgt_size);
1704 	  if (tgt_align)
1705 	    {
1706 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1707 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1708 				      tgt_align, tgt_size);
1709 	    }
1710 	}
1711       gomp_target_fallback (fn, hostaddrs);
1712       return;
1713     }
1714 
1715   struct target_mem_desc *tgt_vars;
1716   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1717     {
1718       if (!fpc_done)
1719 	{
1720 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1721 					       &tgt_align, &tgt_size);
1722 	  if (tgt_align)
1723 	    {
1724 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1725 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1726 				      tgt_align, tgt_size);
1727 	    }
1728 	}
1729       tgt_vars = NULL;
1730     }
1731   else
1732     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1733 			      true, GOMP_MAP_VARS_TARGET);
1734   devicep->run_func (devicep->target_id, fn_addr,
1735 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1736 		     args);
1737   if (tgt_vars)
1738     gomp_unmap_vars (tgt_vars, true);
1739 }
1740 
1741 /* Host fallback for GOMP_target_data{,_ext} routines.  */
1742 
1743 static void
1744 gomp_target_data_fallback (void)
1745 {
1746   struct gomp_task_icv *icv = gomp_icv (false);
1747   if (icv->target_data)
1748     {
1749       /* Even when doing a host fallback, if there are any active
1750          #pragma omp target data constructs, need to remember the
1751          new #pragma omp target data, otherwise GOMP_target_end_data
1752          would get out of sync.  */
1753       struct target_mem_desc *tgt
1754 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1755 			 GOMP_MAP_VARS_DATA);
1756       tgt->prev = icv->target_data;
1757       icv->target_data = tgt;
1758     }
1759 }
1760 
1761 void
1762 GOMP_target_data (int device, const void *unused, size_t mapnum,
1763 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
1764 {
1765   struct gomp_device_descr *devicep = resolve_device (device);
1766 
1767   if (devicep == NULL
1768       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1769       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1770     return gomp_target_data_fallback ();
1771 
1772   struct target_mem_desc *tgt
1773     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1774 		     GOMP_MAP_VARS_DATA);
1775   struct gomp_task_icv *icv = gomp_icv (true);
1776   tgt->prev = icv->target_data;
1777   icv->target_data = tgt;
1778 }
1779 
1780 void
1781 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1782 		      size_t *sizes, unsigned short *kinds)
1783 {
1784   struct gomp_device_descr *devicep = resolve_device (device);
1785 
1786   if (devicep == NULL
1787       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1788       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1789     return gomp_target_data_fallback ();
1790 
1791   struct target_mem_desc *tgt
1792     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1793 		     GOMP_MAP_VARS_DATA);
1794   struct gomp_task_icv *icv = gomp_icv (true);
1795   tgt->prev = icv->target_data;
1796   icv->target_data = tgt;
1797 }
1798 
1799 void
1800 GOMP_target_end_data (void)
1801 {
1802   struct gomp_task_icv *icv = gomp_icv (false);
1803   if (icv->target_data)
1804     {
1805       struct target_mem_desc *tgt = icv->target_data;
1806       icv->target_data = tgt->prev;
1807       gomp_unmap_vars (tgt, true);
1808     }
1809 }
1810 
1811 void
1812 GOMP_target_update (int device, const void *unused, size_t mapnum,
1813 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
1814 {
1815   struct gomp_device_descr *devicep = resolve_device (device);
1816 
1817   if (devicep == NULL
1818       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1819       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1820     return;
1821 
1822   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1823 }
1824 
1825 void
1826 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1827 			size_t *sizes, unsigned short *kinds,
1828 			unsigned int flags, void **depend)
1829 {
1830   struct gomp_device_descr *devicep = resolve_device (device);
1831 
1832   /* If there are depend clauses, but nowait is not present,
1833      block the parent task until the dependencies are resolved
1834      and then just continue with the rest of the function as if it
1835      is a merged task.  Until we are able to schedule task during
1836      variable mapping or unmapping, ignore nowait if depend clauses
1837      are not present.  */
1838   if (depend != NULL)
1839     {
1840       struct gomp_thread *thr = gomp_thread ();
1841       if (thr->task && thr->task->depend_hash)
1842 	{
1843 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1844 	      && thr->ts.team
1845 	      && !thr->task->final_task)
1846 	    {
1847 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1848 					   mapnum, hostaddrs, sizes, kinds,
1849 					   flags | GOMP_TARGET_FLAG_UPDATE,
1850 					   depend, NULL, GOMP_TARGET_TASK_DATA))
1851 		return;
1852 	    }
1853 	  else
1854 	    {
1855 	      struct gomp_team *team = thr->ts.team;
1856 	      /* If parallel or taskgroup has been cancelled, don't start new
1857 		 tasks.  */
1858 	      if (team
1859 		  && (gomp_team_barrier_cancelled (&team->barrier)
1860 		      || (thr->task->taskgroup
1861 			  && thr->task->taskgroup->cancelled)))
1862 		return;
1863 
1864 	      gomp_task_maybe_wait_for_dependencies (depend);
1865 	    }
1866 	}
1867     }
1868 
1869   if (devicep == NULL
1870       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1871       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1872     return;
1873 
1874   struct gomp_thread *thr = gomp_thread ();
1875   struct gomp_team *team = thr->ts.team;
1876   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1877   if (team
1878       && (gomp_team_barrier_cancelled (&team->barrier)
1879 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1880     return;
1881 
1882   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1883 }
1884 
1885 static void
1886 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1887 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
1888 {
1889   const int typemask = 0xff;
1890   size_t i;
1891   gomp_mutex_lock (&devicep->lock);
1892   if (devicep->state == GOMP_DEVICE_FINALIZED)
1893     {
1894       gomp_mutex_unlock (&devicep->lock);
1895       return;
1896     }
1897 
1898   for (i = 0; i < mapnum; i++)
1899     {
1900       struct splay_tree_key_s cur_node;
1901       unsigned char kind = kinds[i] & typemask;
1902       switch (kind)
1903 	{
1904 	case GOMP_MAP_FROM:
1905 	case GOMP_MAP_ALWAYS_FROM:
1906 	case GOMP_MAP_DELETE:
1907 	case GOMP_MAP_RELEASE:
1908 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1909 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1910 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1911 	  cur_node.host_end = cur_node.host_start + sizes[i];
1912 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1913 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1914 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1915 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
1916 	  if (!k)
1917 	    continue;
1918 
1919 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1920 	    k->refcount--;
1921 	  if ((kind == GOMP_MAP_DELETE
1922 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1923 	      && k->refcount != REFCOUNT_INFINITY)
1924 	    k->refcount = 0;
1925 
1926 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1927 	      || kind == GOMP_MAP_ALWAYS_FROM)
1928 	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1929 				(void *) (k->tgt->tgt_start + k->tgt_offset
1930 					  + cur_node.host_start
1931 					  - k->host_start),
1932 				cur_node.host_end - cur_node.host_start);
1933 	  if (k->refcount == 0)
1934 	    {
1935 	      splay_tree_remove (&devicep->mem_map, k);
1936 	      if (k->link_key)
1937 		splay_tree_insert (&devicep->mem_map,
1938 				   (splay_tree_node) k->link_key);
1939 	      if (k->tgt->refcount > 1)
1940 		k->tgt->refcount--;
1941 	      else
1942 		gomp_unmap_tgt (k->tgt);
1943 	    }
1944 
1945 	  break;
1946 	default:
1947 	  gomp_mutex_unlock (&devicep->lock);
1948 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1949 		      kind);
1950 	}
1951     }
1952 
1953   gomp_mutex_unlock (&devicep->lock);
1954 }
1955 
1956 void
1957 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1958 			     size_t *sizes, unsigned short *kinds,
1959 			     unsigned int flags, void **depend)
1960 {
1961   struct gomp_device_descr *devicep = resolve_device (device);
1962 
1963   /* If there are depend clauses, but nowait is not present,
1964      block the parent task until the dependencies are resolved
1965      and then just continue with the rest of the function as if it
1966      is a merged task.  Until we are able to schedule task during
1967      variable mapping or unmapping, ignore nowait if depend clauses
1968      are not present.  */
1969   if (depend != NULL)
1970     {
1971       struct gomp_thread *thr = gomp_thread ();
1972       if (thr->task && thr->task->depend_hash)
1973 	{
1974 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1975 	      && thr->ts.team
1976 	      && !thr->task->final_task)
1977 	    {
1978 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1979 					   mapnum, hostaddrs, sizes, kinds,
1980 					   flags, depend, NULL,
1981 					   GOMP_TARGET_TASK_DATA))
1982 		return;
1983 	    }
1984 	  else
1985 	    {
1986 	      struct gomp_team *team = thr->ts.team;
1987 	      /* If parallel or taskgroup has been cancelled, don't start new
1988 		 tasks.  */
1989 	      if (team
1990 		  && (gomp_team_barrier_cancelled (&team->barrier)
1991 		      || (thr->task->taskgroup
1992 			  && thr->task->taskgroup->cancelled)))
1993 		return;
1994 
1995 	      gomp_task_maybe_wait_for_dependencies (depend);
1996 	    }
1997 	}
1998     }
1999 
2000   if (devicep == NULL
2001       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2002       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2003     return;
2004 
2005   struct gomp_thread *thr = gomp_thread ();
2006   struct gomp_team *team = thr->ts.team;
2007   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2008   if (team
2009       && (gomp_team_barrier_cancelled (&team->barrier)
2010 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
2011     return;
2012 
2013   size_t i;
2014   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2015     for (i = 0; i < mapnum; i++)
2016       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2017 	{
2018 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2019 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2020 	  i += sizes[i];
2021 	}
2022       else
2023 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2024 		       true, GOMP_MAP_VARS_ENTER_DATA);
2025   else
2026     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2027 }
2028 
2029 bool
2030 gomp_target_task_fn (void *data)
2031 {
2032   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2033   struct gomp_device_descr *devicep = ttask->devicep;
2034 
2035   if (ttask->fn != NULL)
2036     {
2037       void *fn_addr;
2038       if (devicep == NULL
2039 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2040 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2041 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2042 	{
2043 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
2044 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2045 	  return false;
2046 	}
2047 
2048       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2049 	{
2050 	  if (ttask->tgt)
2051 	    gomp_unmap_vars (ttask->tgt, true);
2052 	  return false;
2053 	}
2054 
2055       void *actual_arguments;
2056       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2057 	{
2058 	  ttask->tgt = NULL;
2059 	  actual_arguments = ttask->hostaddrs;
2060 	}
2061       else
2062 	{
2063 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2064 				      NULL, ttask->sizes, ttask->kinds, true,
2065 				      GOMP_MAP_VARS_TARGET);
2066 	  actual_arguments = (void *) ttask->tgt->tgt_start;
2067 	}
2068       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2069 
2070       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2071 			       ttask->args, (void *) ttask);
2072       return true;
2073     }
2074   else if (devicep == NULL
2075 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2076 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2077     return false;
2078 
2079   size_t i;
2080   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2081     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2082 		 ttask->kinds, true);
2083   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2084     for (i = 0; i < ttask->mapnum; i++)
2085       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2086 	{
2087 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2088 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2089 			 GOMP_MAP_VARS_ENTER_DATA);
2090 	  i += ttask->sizes[i];
2091 	}
2092       else
2093 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2094 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2095   else
2096     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2097 		    ttask->kinds);
2098   return false;
2099 }
2100 
2101 void
2102 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2103 {
2104   if (thread_limit)
2105     {
2106       struct gomp_task_icv *icv = gomp_icv (true);
2107       icv->thread_limit_var
2108 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2109     }
2110   (void) num_teams;
2111 }
2112 
2113 void *
2114 omp_target_alloc (size_t size, int device_num)
2115 {
2116   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2117     return malloc (size);
2118 
2119   if (device_num < 0)
2120     return NULL;
2121 
2122   struct gomp_device_descr *devicep = resolve_device (device_num);
2123   if (devicep == NULL)
2124     return NULL;
2125 
2126   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2127       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2128     return malloc (size);
2129 
2130   gomp_mutex_lock (&devicep->lock);
2131   void *ret = devicep->alloc_func (devicep->target_id, size);
2132   gomp_mutex_unlock (&devicep->lock);
2133   return ret;
2134 }
2135 
2136 void
2137 omp_target_free (void *device_ptr, int device_num)
2138 {
2139   if (device_ptr == NULL)
2140     return;
2141 
2142   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2143     {
2144       free (device_ptr);
2145       return;
2146     }
2147 
2148   if (device_num < 0)
2149     return;
2150 
2151   struct gomp_device_descr *devicep = resolve_device (device_num);
2152   if (devicep == NULL)
2153     return;
2154 
2155   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2156       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2157     {
2158       free (device_ptr);
2159       return;
2160     }
2161 
2162   gomp_mutex_lock (&devicep->lock);
2163   gomp_free_device_memory (devicep, device_ptr);
2164   gomp_mutex_unlock (&devicep->lock);
2165 }
2166 
2167 int
2168 omp_target_is_present (void *ptr, int device_num)
2169 {
2170   if (ptr == NULL)
2171     return 1;
2172 
2173   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2174     return 1;
2175 
2176   if (device_num < 0)
2177     return 0;
2178 
2179   struct gomp_device_descr *devicep = resolve_device (device_num);
2180   if (devicep == NULL)
2181     return 0;
2182 
2183   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2184       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2185     return 1;
2186 
2187   gomp_mutex_lock (&devicep->lock);
2188   struct splay_tree_s *mem_map = &devicep->mem_map;
2189   struct splay_tree_key_s cur_node;
2190 
2191   cur_node.host_start = (uintptr_t) ptr;
2192   cur_node.host_end = cur_node.host_start;
2193   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2194   int ret = n != NULL;
2195   gomp_mutex_unlock (&devicep->lock);
2196   return ret;
2197 }
2198 
2199 int
2200 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2201 		   size_t src_offset, int dst_device_num, int src_device_num)
2202 {
2203   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2204   bool ret;
2205 
2206   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2207     {
2208       if (dst_device_num < 0)
2209 	return EINVAL;
2210 
2211       dst_devicep = resolve_device (dst_device_num);
2212       if (dst_devicep == NULL)
2213 	return EINVAL;
2214 
2215       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217 	dst_devicep = NULL;
2218     }
2219   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2220     {
2221       if (src_device_num < 0)
2222 	return EINVAL;
2223 
2224       src_devicep = resolve_device (src_device_num);
2225       if (src_devicep == NULL)
2226 	return EINVAL;
2227 
2228       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2229 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2230 	src_devicep = NULL;
2231     }
2232   if (src_devicep == NULL && dst_devicep == NULL)
2233     {
2234       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2235       return 0;
2236     }
2237   if (src_devicep == NULL)
2238     {
2239       gomp_mutex_lock (&dst_devicep->lock);
2240       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2241 					(char *) dst + dst_offset,
2242 					(char *) src + src_offset, length);
2243       gomp_mutex_unlock (&dst_devicep->lock);
2244       return (ret ? 0 : EINVAL);
2245     }
2246   if (dst_devicep == NULL)
2247     {
2248       gomp_mutex_lock (&src_devicep->lock);
2249       ret = src_devicep->dev2host_func (src_devicep->target_id,
2250 					(char *) dst + dst_offset,
2251 					(char *) src + src_offset, length);
2252       gomp_mutex_unlock (&src_devicep->lock);
2253       return (ret ? 0 : EINVAL);
2254     }
2255   if (src_devicep == dst_devicep)
2256     {
2257       gomp_mutex_lock (&src_devicep->lock);
2258       ret = src_devicep->dev2dev_func (src_devicep->target_id,
2259 				       (char *) dst + dst_offset,
2260 				       (char *) src + src_offset, length);
2261       gomp_mutex_unlock (&src_devicep->lock);
2262       return (ret ? 0 : EINVAL);
2263     }
2264   return EINVAL;
2265 }
2266 
2267 static int
2268 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2269 			       int num_dims, const size_t *volume,
2270 			       const size_t *dst_offsets,
2271 			       const size_t *src_offsets,
2272 			       const size_t *dst_dimensions,
2273 			       const size_t *src_dimensions,
2274 			       struct gomp_device_descr *dst_devicep,
2275 			       struct gomp_device_descr *src_devicep)
2276 {
2277   size_t dst_slice = element_size;
2278   size_t src_slice = element_size;
2279   size_t j, dst_off, src_off, length;
2280   int i, ret;
2281 
2282   if (num_dims == 1)
2283     {
2284       if (__builtin_mul_overflow (element_size, volume[0], &length)
2285 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2286 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2287 	return EINVAL;
2288       if (dst_devicep == NULL && src_devicep == NULL)
2289 	{
2290 	  memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2291 	  ret = 1;
2292 	}
2293       else if (src_devicep == NULL)
2294 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2295 					  (char *) dst + dst_off,
2296 					  (char *) src + src_off, length);
2297       else if (dst_devicep == NULL)
2298 	ret = src_devicep->dev2host_func (src_devicep->target_id,
2299 					  (char *) dst + dst_off,
2300 					  (char *) src + src_off, length);
2301       else if (src_devicep == dst_devicep)
2302 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2303 					 (char *) dst + dst_off,
2304 					 (char *) src + src_off, length);
2305       else
2306 	ret = 0;
2307       return ret ? 0 : EINVAL;
2308     }
2309 
2310   /* FIXME: it would be nice to have some plugin function to handle
2311      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2312      be handled in the generic recursion below, and for host-host it
2313      should be used even for any num_dims >= 2.  */
2314 
2315   for (i = 1; i < num_dims; i++)
2316     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2317 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2318       return EINVAL;
2319   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2320       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2321     return EINVAL;
2322   for (j = 0; j < volume[0]; j++)
2323     {
2324       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2325 					   (char *) src + src_off,
2326 					   element_size, num_dims - 1,
2327 					   volume + 1, dst_offsets + 1,
2328 					   src_offsets + 1, dst_dimensions + 1,
2329 					   src_dimensions + 1, dst_devicep,
2330 					   src_devicep);
2331       if (ret)
2332 	return ret;
2333       dst_off += dst_slice;
2334       src_off += src_slice;
2335     }
2336   return 0;
2337 }
2338 
2339 int
2340 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2341 			int num_dims, const size_t *volume,
2342 			const size_t *dst_offsets,
2343 			const size_t *src_offsets,
2344 			const size_t *dst_dimensions,
2345 			const size_t *src_dimensions,
2346 			int dst_device_num, int src_device_num)
2347 {
2348   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2349 
2350   if (!dst && !src)
2351     return INT_MAX;
2352 
2353   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2354     {
2355       if (dst_device_num < 0)
2356 	return EINVAL;
2357 
2358       dst_devicep = resolve_device (dst_device_num);
2359       if (dst_devicep == NULL)
2360 	return EINVAL;
2361 
2362       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2363 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2364 	dst_devicep = NULL;
2365     }
2366   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2367     {
2368       if (src_device_num < 0)
2369 	return EINVAL;
2370 
2371       src_devicep = resolve_device (src_device_num);
2372       if (src_devicep == NULL)
2373 	return EINVAL;
2374 
2375       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2376 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2377 	src_devicep = NULL;
2378     }
2379 
2380   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2381     return EINVAL;
2382 
2383   if (src_devicep)
2384     gomp_mutex_lock (&src_devicep->lock);
2385   else if (dst_devicep)
2386     gomp_mutex_lock (&dst_devicep->lock);
2387   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2388 					   volume, dst_offsets, src_offsets,
2389 					   dst_dimensions, src_dimensions,
2390 					   dst_devicep, src_devicep);
2391   if (src_devicep)
2392     gomp_mutex_unlock (&src_devicep->lock);
2393   else if (dst_devicep)
2394     gomp_mutex_unlock (&dst_devicep->lock);
2395   return ret;
2396 }
2397 
2398 int
2399 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2400 			  size_t device_offset, int device_num)
2401 {
2402   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2403     return EINVAL;
2404 
2405   if (device_num < 0)
2406     return EINVAL;
2407 
2408   struct gomp_device_descr *devicep = resolve_device (device_num);
2409   if (devicep == NULL)
2410     return EINVAL;
2411 
2412   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2413       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2414     return EINVAL;
2415 
2416   gomp_mutex_lock (&devicep->lock);
2417 
2418   struct splay_tree_s *mem_map = &devicep->mem_map;
2419   struct splay_tree_key_s cur_node;
2420   int ret = EINVAL;
2421 
2422   cur_node.host_start = (uintptr_t) host_ptr;
2423   cur_node.host_end = cur_node.host_start + size;
2424   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2425   if (n)
2426     {
2427       if (n->tgt->tgt_start + n->tgt_offset
2428 	  == (uintptr_t) device_ptr + device_offset
2429 	  && n->host_start <= cur_node.host_start
2430 	  && n->host_end >= cur_node.host_end)
2431 	ret = 0;
2432     }
2433   else
2434     {
2435       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2436       tgt->array = gomp_malloc (sizeof (*tgt->array));
2437       tgt->refcount = 1;
2438       tgt->tgt_start = 0;
2439       tgt->tgt_end = 0;
2440       tgt->to_free = NULL;
2441       tgt->prev = NULL;
2442       tgt->list_count = 0;
2443       tgt->device_descr = devicep;
2444       splay_tree_node array = tgt->array;
2445       splay_tree_key k = &array->key;
2446       k->host_start = cur_node.host_start;
2447       k->host_end = cur_node.host_end;
2448       k->tgt = tgt;
2449       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2450       k->refcount = REFCOUNT_INFINITY;
2451       array->left = NULL;
2452       array->right = NULL;
2453       splay_tree_insert (&devicep->mem_map, array);
2454       ret = 0;
2455     }
2456   gomp_mutex_unlock (&devicep->lock);
2457   return ret;
2458 }
2459 
2460 int
2461 omp_target_disassociate_ptr (void *ptr, int device_num)
2462 {
2463   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2464     return EINVAL;
2465 
2466   if (device_num < 0)
2467     return EINVAL;
2468 
2469   struct gomp_device_descr *devicep = resolve_device (device_num);
2470   if (devicep == NULL)
2471     return EINVAL;
2472 
2473   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2474     return EINVAL;
2475 
2476   gomp_mutex_lock (&devicep->lock);
2477 
2478   struct splay_tree_s *mem_map = &devicep->mem_map;
2479   struct splay_tree_key_s cur_node;
2480   int ret = EINVAL;
2481 
2482   cur_node.host_start = (uintptr_t) ptr;
2483   cur_node.host_end = cur_node.host_start;
2484   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2485   if (n
2486       && n->host_start == cur_node.host_start
2487       && n->refcount == REFCOUNT_INFINITY
2488       && n->tgt->tgt_start == 0
2489       && n->tgt->to_free == NULL
2490       && n->tgt->refcount == 1
2491       && n->tgt->list_count == 0)
2492     {
2493       splay_tree_remove (&devicep->mem_map, n);
2494       gomp_unmap_tgt (n->tgt);
2495       ret = 0;
2496     }
2497 
2498   gomp_mutex_unlock (&devicep->lock);
2499   return ret;
2500 }
2501 
2502 #ifdef PLUGIN_SUPPORT
2503 
2504 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
2505    in PLUGIN_NAME.
2506    The handles of the found functions are stored in the corresponding fields
2507    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
2508 
2509 static bool
2510 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2511 			     const char *plugin_name)
2512 {
2513   const char *err = NULL, *last_missing = NULL;
2514 
2515   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2516   if (!plugin_handle)
2517     goto dl_fail;
2518 
2519   /* Check if all required functions are available in the plugin and store
2520      their handlers.  None of the symbols can legitimately be NULL,
2521      so we don't need to check dlerror all the time.  */
2522 #define DLSYM(f)							\
2523   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
2524     goto dl_fail
2525   /* Similar, but missing functions are not an error.  Return false if
2526      failed, true otherwise.  */
2527 #define DLSYM_OPT(f, n)							\
2528   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
2529    || (last_missing = #n, 0))
2530 
2531   DLSYM (version);
2532   if (device->version_func () != GOMP_VERSION)
2533     {
2534       err = "plugin version mismatch";
2535       goto fail;
2536     }
2537 
2538   DLSYM (get_name);
2539   DLSYM (get_caps);
2540   DLSYM (get_type);
2541   DLSYM (get_num_devices);
2542   DLSYM (init_device);
2543   DLSYM (fini_device);
2544   DLSYM (load_image);
2545   DLSYM (unload_image);
2546   DLSYM (alloc);
2547   DLSYM (free);
2548   DLSYM (dev2host);
2549   DLSYM (host2dev);
2550   device->capabilities = device->get_caps_func ();
2551   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2552     {
2553       DLSYM (run);
2554       DLSYM (async_run);
2555       DLSYM_OPT (can_run, can_run);
2556       DLSYM (dev2dev);
2557     }
2558   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2559     {
2560       if (!DLSYM_OPT (openacc.exec, openacc_exec)
2561 	  || !DLSYM_OPT (openacc.register_async_cleanup,
2562 			 openacc_register_async_cleanup)
2563 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2564 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2565 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2566 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2567 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2568 	  || !DLSYM_OPT (openacc.async_wait_all_async,
2569 			 openacc_async_wait_all_async)
2570 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2571 	  || !DLSYM_OPT (openacc.create_thread_data,
2572 			 openacc_create_thread_data)
2573 	  || !DLSYM_OPT (openacc.destroy_thread_data,
2574 			 openacc_destroy_thread_data))
2575 	{
2576 	  /* Require all the OpenACC handlers if we have
2577 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
2578 	  err = "plugin missing OpenACC handler function";
2579 	  goto fail;
2580 	}
2581 
2582       unsigned cuda = 0;
2583       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2584 			 openacc_cuda_get_current_device);
2585       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2586 			 openacc_cuda_get_current_context);
2587       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2588       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2589       if (cuda && cuda != 4)
2590 	{
2591 	  /* Make sure all the CUDA functions are there if any of them are.  */
2592 	  err = "plugin missing OpenACC CUDA handler function";
2593 	  goto fail;
2594 	}
2595     }
2596 #undef DLSYM
2597 #undef DLSYM_OPT
2598 
2599   return 1;
2600 
2601  dl_fail:
2602   err = dlerror ();
2603  fail:
2604   gomp_error ("while loading %s: %s", plugin_name, err);
2605   if (last_missing)
2606     gomp_error ("missing function was %s", last_missing);
2607   if (plugin_handle)
2608     dlclose (plugin_handle);
2609 
2610   return 0;
2611 }
2612 
2613 /* This function finalizes all initialized devices.  */
2614 
2615 static void
2616 gomp_target_fini (void)
2617 {
2618   int i;
2619   for (i = 0; i < num_devices; i++)
2620     {
2621       bool ret = true;
2622       struct gomp_device_descr *devicep = &devices[i];
2623       gomp_mutex_lock (&devicep->lock);
2624       if (devicep->state == GOMP_DEVICE_INITIALIZED)
2625 	{
2626 	  ret = devicep->fini_device_func (devicep->target_id);
2627 	  devicep->state = GOMP_DEVICE_FINALIZED;
2628 	}
2629       gomp_mutex_unlock (&devicep->lock);
2630       if (!ret)
2631 	gomp_fatal ("device finalization failed");
2632     }
2633 }
2634 
2635 /* This function initializes the runtime needed for offloading.
2636    It parses the list of offload targets and tries to load the plugins for
2637    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2638    will be set, and the array DEVICES initialized, containing descriptors for
2639    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2640    by the others.  */
2641 
2642 static void
2643 gomp_target_init (void)
2644 {
2645   const char *prefix ="libgomp-plugin-";
2646   const char *suffix = SONAME_SUFFIX (1);
2647   const char *cur, *next;
2648   char *plugin_name;
2649   int i, new_num_devices;
2650 
2651   num_devices = 0;
2652   devices = NULL;
2653 
2654   cur = OFFLOAD_TARGETS;
2655   if (*cur)
2656     do
2657       {
2658 	struct gomp_device_descr current_device;
2659 	size_t prefix_len, suffix_len, cur_len;
2660 
2661 	next = strchr (cur, ',');
2662 
2663 	prefix_len = strlen (prefix);
2664 	cur_len = next ? next - cur : strlen (cur);
2665 	suffix_len = strlen (suffix);
2666 
2667 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2668 	if (!plugin_name)
2669 	  {
2670 	    num_devices = 0;
2671 	    break;
2672 	  }
2673 
2674 	memcpy (plugin_name, prefix, prefix_len);
2675 	memcpy (plugin_name + prefix_len, cur, cur_len);
2676 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2677 
2678 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
2679 	  {
2680 	    new_num_devices = current_device.get_num_devices_func ();
2681 	    if (new_num_devices >= 1)
2682 	      {
2683 		/* Augment DEVICES and NUM_DEVICES.  */
2684 
2685 		devices = realloc (devices, (num_devices + new_num_devices)
2686 				   * sizeof (struct gomp_device_descr));
2687 		if (!devices)
2688 		  {
2689 		    num_devices = 0;
2690 		    free (plugin_name);
2691 		    break;
2692 		  }
2693 
2694 		current_device.name = current_device.get_name_func ();
2695 		/* current_device.capabilities has already been set.  */
2696 		current_device.type = current_device.get_type_func ();
2697 		current_device.mem_map.root = NULL;
2698 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
2699 		current_device.openacc.data_environ = NULL;
2700 		for (i = 0; i < new_num_devices; i++)
2701 		  {
2702 		    current_device.target_id = i;
2703 		    devices[num_devices] = current_device;
2704 		    gomp_mutex_init (&devices[num_devices].lock);
2705 		    num_devices++;
2706 		  }
2707 	      }
2708 	  }
2709 
2710 	free (plugin_name);
2711 	cur = next + 1;
2712       }
2713     while (next);
2714 
2715   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2716      NUM_DEVICES_OPENMP.  */
2717   struct gomp_device_descr *devices_s
2718     = malloc (num_devices * sizeof (struct gomp_device_descr));
2719   if (!devices_s)
2720     {
2721       num_devices = 0;
2722       free (devices);
2723       devices = NULL;
2724     }
2725   num_devices_openmp = 0;
2726   for (i = 0; i < num_devices; i++)
2727     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2728       devices_s[num_devices_openmp++] = devices[i];
2729   int num_devices_after_openmp = num_devices_openmp;
2730   for (i = 0; i < num_devices; i++)
2731     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2732       devices_s[num_devices_after_openmp++] = devices[i];
2733   free (devices);
2734   devices = devices_s;
2735 
2736   for (i = 0; i < num_devices; i++)
2737     {
2738       /* The 'devices' array can be moved (by the realloc call) until we have
2739 	 found all the plugins, so registering with the OpenACC runtime (which
2740 	 takes a copy of the pointer argument) must be delayed until now.  */
2741       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2742 	goacc_register (&devices[i]);
2743     }
2744 
2745   if (atexit (gomp_target_fini) != 0)
2746     gomp_fatal ("atexit failed");
2747 }
2748 
2749 #else /* PLUGIN_SUPPORT */
2750 /* If dlfcn.h is unavailable we always fallback to host execution.
2751    GOMP_target* routines are just stubs for this case.  */
2752 static void
2753 gomp_target_init (void)
2754 {
2755 }
2756 #endif /* PLUGIN_SUPPORT */
2757