1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h>  /* For PRIu64.  */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41 
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46 
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48 
49 static void gomp_target_init (void);
50 
51 /* The whole initialization code for offloading plugins is only run one.  */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 
54 /* Mutex for offload image registration.  */
55 static gomp_mutex_t register_lock;
56 
57 /* This structure describes an offload image.
58    It contains type of the target device, pointer to host table descriptor, and
59    pointer to target data.  */
60 struct offload_image_descr {
61   unsigned version;
62   enum offload_target_type type;
63   const void *host_table;
64   const void *target_data;
65 };
66 
67 /* Array of descriptors of offload images.  */
68 static struct offload_image_descr *offload_images;
69 
70 /* Total number of offload images.  */
71 static int num_offload_images;
72 
73 /* Array of descriptors for all available devices.  */
74 static struct gomp_device_descr *devices;
75 
76 /* Total number of available devices.  */
77 static int num_devices;
78 
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
80 static int num_devices_openmp;
81 
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
83 
84 static void *
gomp_realloc_unlock(void * old,size_t size)85 gomp_realloc_unlock (void *old, size_t size)
86 {
87   void *ret = realloc (old, size);
88   if (ret == NULL)
89     {
90       gomp_mutex_unlock (&register_lock);
91       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92     }
93   return ret;
94 }
95 
96 attribute_hidden void
gomp_init_targets_once(void)97 gomp_init_targets_once (void)
98 {
99   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101 
102 attribute_hidden int
gomp_get_num_devices(void)103 gomp_get_num_devices (void)
104 {
105   gomp_init_targets_once ();
106   return num_devices_openmp;
107 }
108 
109 static struct gomp_device_descr *
resolve_device(int device_id)110 resolve_device (int device_id)
111 {
112   if (device_id == GOMP_DEVICE_ICV)
113     {
114       struct gomp_task_icv *icv = gomp_icv (false);
115       device_id = icv->default_device_var;
116     }
117 
118   if (device_id < 0 || device_id >= gomp_get_num_devices ())
119     return NULL;
120 
121   gomp_mutex_lock (&devices[device_id].lock);
122   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123     gomp_init_device (&devices[device_id]);
124   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125     {
126       gomp_mutex_unlock (&devices[device_id].lock);
127       return NULL;
128     }
129   gomp_mutex_unlock (&devices[device_id].lock);
130 
131   return &devices[device_id];
132 }
133 
134 
135 static inline splay_tree_key
gomp_map_lookup(splay_tree mem_map,splay_tree_key key)136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138   if (key->host_start != key->host_end)
139     return splay_tree_lookup (mem_map, key);
140 
141   key->host_end++;
142   splay_tree_key n = splay_tree_lookup (mem_map, key);
143   key->host_end--;
144   if (n)
145     return n;
146   key->host_start--;
147   n = splay_tree_lookup (mem_map, key);
148   key->host_start++;
149   if (n)
150     return n;
151   return splay_tree_lookup (mem_map, key);
152 }
153 
154 static inline splay_tree_key
gomp_map_0len_lookup(splay_tree mem_map,splay_tree_key key)155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157   if (key->host_start != key->host_end)
158     return splay_tree_lookup (mem_map, key);
159 
160   key->host_end++;
161   splay_tree_key n = splay_tree_lookup (mem_map, key);
162   key->host_end--;
163   return n;
164 }
165 
166 static inline void
gomp_device_copy(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t),const char * dst,void * dstaddr,const char * src,const void * srcaddr,size_t size)167 gomp_device_copy (struct gomp_device_descr *devicep,
168 		  bool (*copy_func) (int, void *, const void *, size_t),
169 		  const char *dst, void *dstaddr,
170 		  const char *src, const void *srcaddr,
171 		  size_t size)
172 {
173   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174     {
175       gomp_mutex_unlock (&devicep->lock);
176       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178     }
179 }
180 
181 static inline void
goacc_device_copy_async(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t,struct goacc_asyncqueue *),const char * dst,void * dstaddr,const char * src,const void * srcaddr,size_t size,struct goacc_asyncqueue * aq)182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 			 bool (*copy_func) (int, void *, const void *, size_t,
184 					    struct goacc_asyncqueue *),
185 			 const char *dst, void *dstaddr,
186 			 const char *src, const void *srcaddr,
187 			 size_t size, struct goacc_asyncqueue *aq)
188 {
189   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190     {
191       gomp_mutex_unlock (&devicep->lock);
192       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194     }
195 }
196 
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198    host to device memory transfers.  */
199 
200 struct gomp_coalesce_chunk
201 {
202   /* The starting and ending point of a coalesced chunk of memory.  */
203   size_t start, end;
204 };
205 
206 struct gomp_coalesce_buf
207 {
208   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209      it will be copied to the device.  */
210   void *buf;
211   struct target_mem_desc *tgt;
212   /* Array with offsets, chunks[i].start is the starting offset and
213      chunks[i].end ending offset relative to tgt->tgt_start device address
214      of chunks which are to be copied to buf and later copied to device.  */
215   struct gomp_coalesce_chunk *chunks;
216   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217      be performed.  */
218   long chunk_cnt;
219   /* During construction of chunks array, how many memory regions are within
220      the last chunk.  If there is just one memory region for a chunk, we copy
221      it directly to device rather than going through buf.  */
222   long use_cnt;
223 };
224 
225 /* Maximum size of memory region considered for coalescing.  Larger copies
226    are performed directly.  */
227 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
228 
229 /* Maximum size of a gap in between regions to consider them being copied
230    within the same chunk.  All the device offsets considered are within
231    newly allocated device memory, so it isn't fatal if we copy some padding
232    in between from host to device.  The gaps come either from alignment
233    padding or from memory regions which are not supposed to be copied from
234    host to device (e.g. map(alloc:), map(from:) etc.).  */
235 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
236 
237 /* Add region with device tgt_start relative offset and length to CBUF.  */
238 
239 static inline void
gomp_coalesce_buf_add(struct gomp_coalesce_buf * cbuf,size_t start,size_t len)240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243     return;
244   if (cbuf->chunk_cnt)
245     {
246       if (cbuf->chunk_cnt < 0)
247 	return;
248       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 	{
250 	  cbuf->chunk_cnt = -1;
251 	  return;
252 	}
253       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 	{
255 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 	  cbuf->use_cnt++;
257 	  return;
258 	}
259       /* If the last chunk is only used by one mapping, discard it,
260 	 as it will be one host to device copy anyway and
261 	 memcpying it around will only waste cycles.  */
262       if (cbuf->use_cnt == 1)
263 	cbuf->chunk_cnt--;
264     }
265   cbuf->chunks[cbuf->chunk_cnt].start = start;
266   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267   cbuf->chunk_cnt++;
268   cbuf->use_cnt = 1;
269 }
270 
271 /* Return true for mapping kinds which need to copy data from the
272    host to device for regions that weren't previously mapped.  */
273 
274 static inline bool
gomp_to_device_kind_p(int kind)275 gomp_to_device_kind_p (int kind)
276 {
277   switch (kind)
278     {
279     case GOMP_MAP_ALLOC:
280     case GOMP_MAP_FROM:
281     case GOMP_MAP_FORCE_ALLOC:
282     case GOMP_MAP_FORCE_FROM:
283     case GOMP_MAP_ALWAYS_FROM:
284       return false;
285     default:
286       return true;
287     }
288 }
289 
290 attribute_hidden void
gomp_copy_host2dev(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * d,const void * h,size_t sz,struct gomp_coalesce_buf * cbuf)291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 		    struct goacc_asyncqueue *aq,
293 		    void *d, const void *h, size_t sz,
294 		    struct gomp_coalesce_buf *cbuf)
295 {
296   if (cbuf)
297     {
298       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 	{
301 	  long first = 0;
302 	  long last = cbuf->chunk_cnt - 1;
303 	  while (first <= last)
304 	    {
305 	      long middle = (first + last) >> 1;
306 	      if (cbuf->chunks[middle].end <= doff)
307 		first = middle + 1;
308 	      else if (cbuf->chunks[middle].start <= doff)
309 		{
310 		  if (doff + sz > cbuf->chunks[middle].end)
311 		    gomp_fatal ("internal libgomp cbuf error");
312 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 			  h, sz);
314 		  return;
315 		}
316 	      else
317 		last = middle - 1;
318 	    }
319 	}
320     }
321   if (__builtin_expect (aq != NULL, 0))
322     goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 			     "dev", d, "host", h, sz, aq);
324   else
325     gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327 
328 attribute_hidden void
gomp_copy_dev2host(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * h,const void * d,size_t sz)329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 		    struct goacc_asyncqueue *aq,
331 		    void *h, const void *d, size_t sz)
332 {
333   if (__builtin_expect (aq != NULL, 0))
334     goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 			     "host", h, "dev", d, sz, aq);
336   else
337     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339 
340 static void
gomp_free_device_memory(struct gomp_device_descr * devicep,void * devptr)341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343   if (!devicep->free_func (devicep->target_id, devptr))
344     {
345       gomp_mutex_unlock (&devicep->lock);
346       gomp_fatal ("error in freeing device memory block at %p", devptr);
347     }
348 }
349 
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351    gomp_map_0len_lookup found oldn for newn.
352    Helper function of gomp_map_vars.  */
353 
354 static inline void
gomp_map_vars_existing(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key oldn,splay_tree_key newn,struct target_var_desc * tgt_var,unsigned char kind,struct gomp_coalesce_buf * cbuf)355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 			struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 			splay_tree_key newn, struct target_var_desc *tgt_var,
358 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360   assert (kind != GOMP_MAP_ATTACH);
361 
362   tgt_var->key = oldn;
363   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365   tgt_var->is_attach = false;
366   tgt_var->offset = newn->host_start - oldn->host_start;
367   tgt_var->length = newn->host_end - newn->host_start;
368 
369   if ((kind & GOMP_MAP_FLAG_FORCE)
370       || oldn->host_start > newn->host_start
371       || oldn->host_end < newn->host_end)
372     {
373       gomp_mutex_unlock (&devicep->lock);
374       gomp_fatal ("Trying to map into device [%p..%p) object when "
375 		  "[%p..%p) is already mapped",
376 		  (void *) newn->host_start, (void *) newn->host_end,
377 		  (void *) oldn->host_start, (void *) oldn->host_end);
378     }
379 
380   if (GOMP_MAP_ALWAYS_TO_P (kind))
381     gomp_copy_host2dev (devicep, aq,
382 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383 				  + newn->host_start - oldn->host_start),
384 			(void *) newn->host_start,
385 			newn->host_end - newn->host_start, cbuf);
386 
387   if (oldn->refcount != REFCOUNT_INFINITY)
388     oldn->refcount++;
389 }
390 
391 static int
get_kind(bool short_mapkind,void * kinds,int idx)392 get_kind (bool short_mapkind, void *kinds, int idx)
393 {
394   return short_mapkind ? ((unsigned short *) kinds)[idx]
395 		       : ((unsigned char *) kinds)[idx];
396 }
397 
398 static void
gomp_map_pointer(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,uintptr_t host_ptr,uintptr_t target_offset,uintptr_t bias,struct gomp_coalesce_buf * cbuf)399 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400 		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401 		  struct gomp_coalesce_buf *cbuf)
402 {
403   struct gomp_device_descr *devicep = tgt->device_descr;
404   struct splay_tree_s *mem_map = &devicep->mem_map;
405   struct splay_tree_key_s cur_node;
406 
407   cur_node.host_start = host_ptr;
408   if (cur_node.host_start == (uintptr_t) NULL)
409     {
410       cur_node.tgt_offset = (uintptr_t) NULL;
411       gomp_copy_host2dev (devicep, aq,
412 			  (void *) (tgt->tgt_start + target_offset),
413 			  (void *) &cur_node.tgt_offset,
414 			  sizeof (void *), cbuf);
415       return;
416     }
417   /* Add bias to the pointer value.  */
418   cur_node.host_start += bias;
419   cur_node.host_end = cur_node.host_start;
420   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421   if (n == NULL)
422     {
423       gomp_mutex_unlock (&devicep->lock);
424       gomp_fatal ("Pointer target of array section wasn't mapped");
425     }
426   cur_node.host_start -= n->host_start;
427   cur_node.tgt_offset
428     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429   /* At this point tgt_offset is target address of the
430      array section.  Now subtract bias to get what we want
431      to initialize the pointer with.  */
432   cur_node.tgt_offset -= bias;
433   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
435 }
436 
437 static void
gomp_map_fields_existing(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,splay_tree_key n,size_t first,size_t i,void ** hostaddrs,size_t * sizes,void * kinds,struct gomp_coalesce_buf * cbuf)438 gomp_map_fields_existing (struct target_mem_desc *tgt,
439 			  struct goacc_asyncqueue *aq, splay_tree_key n,
440 			  size_t first, size_t i, void **hostaddrs,
441 			  size_t *sizes, void *kinds,
442 			  struct gomp_coalesce_buf *cbuf)
443 {
444   struct gomp_device_descr *devicep = tgt->device_descr;
445   struct splay_tree_s *mem_map = &devicep->mem_map;
446   struct splay_tree_key_s cur_node;
447   int kind;
448   const bool short_mapkind = true;
449   const int typemask = short_mapkind ? 0xff : 0x7;
450 
451   cur_node.host_start = (uintptr_t) hostaddrs[i];
452   cur_node.host_end = cur_node.host_start + sizes[i];
453   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454   kind = get_kind (short_mapkind, kinds, i);
455   if (n2
456       && n2->tgt == n->tgt
457       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
458     {
459       gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460 			      &tgt->list[i], kind & typemask, cbuf);
461       return;
462     }
463   if (sizes[i] == 0)
464     {
465       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
466 	{
467 	  cur_node.host_start--;
468 	  n2 = splay_tree_lookup (mem_map, &cur_node);
469 	  cur_node.host_start++;
470 	  if (n2
471 	      && n2->tgt == n->tgt
472 	      && n2->host_start - n->host_start
473 		 == n2->tgt_offset - n->tgt_offset)
474 	    {
475 	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476 				      &tgt->list[i], kind & typemask, cbuf);
477 	      return;
478 	    }
479 	}
480       cur_node.host_end++;
481       n2 = splay_tree_lookup (mem_map, &cur_node);
482       cur_node.host_end--;
483       if (n2
484 	  && n2->tgt == n->tgt
485 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
486 	{
487 	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488 				  kind & typemask, cbuf);
489 	  return;
490 	}
491     }
492   gomp_mutex_unlock (&devicep->lock);
493   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494 	      "other mapped elements from the same structure weren't mapped "
495 	      "together with it", (void *) cur_node.host_start,
496 	      (void *) cur_node.host_end);
497 }
498 
499 attribute_hidden void
gomp_attach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree mem_map,splay_tree_key n,uintptr_t attach_to,size_t bias,struct gomp_coalesce_buf * cbufp)500 gomp_attach_pointer (struct gomp_device_descr *devicep,
501 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
502 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
503 		     struct gomp_coalesce_buf *cbufp)
504 {
505   struct splay_tree_key_s s;
506   size_t size, idx;
507 
508   if (n == NULL)
509     {
510       gomp_mutex_unlock (&devicep->lock);
511       gomp_fatal ("enclosing struct not mapped for attach");
512     }
513 
514   size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515   /* We might have a pointer in a packed struct: however we cannot have more
516      than one such pointer in each pointer-sized portion of the struct, so
517      this is safe.  */
518   idx = (attach_to - n->host_start) / sizeof (void *);
519 
520   if (!n->aux)
521     n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
522 
523   if (!n->aux->attach_count)
524     n->aux->attach_count
525       = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
526 
527   if (n->aux->attach_count[idx] < UINTPTR_MAX)
528     n->aux->attach_count[idx]++;
529   else
530     {
531       gomp_mutex_unlock (&devicep->lock);
532       gomp_fatal ("attach count overflow");
533     }
534 
535   if (n->aux->attach_count[idx] == 1)
536     {
537       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538 			 - n->host_start;
539       uintptr_t target = (uintptr_t) *(void **) attach_to;
540       splay_tree_key tn;
541       uintptr_t data;
542 
543       if ((void *) target == NULL)
544 	{
545 	  gomp_mutex_unlock (&devicep->lock);
546 	  gomp_fatal ("attempt to attach null pointer");
547 	}
548 
549       s.host_start = target + bias;
550       s.host_end = s.host_start + 1;
551       tn = splay_tree_lookup (mem_map, &s);
552 
553       if (!tn)
554 	{
555 	  gomp_mutex_unlock (&devicep->lock);
556 	  gomp_fatal ("pointer target not mapped for attach");
557 	}
558 
559       data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
560 
561       gomp_debug (1,
562 		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
563 		  __FUNCTION__, (void *) attach_to, (void *) devptr,
564 		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
565 
566       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567 			  sizeof (void *), cbufp);
568     }
569   else
570     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571 		(void *) attach_to, (int) n->aux->attach_count[idx]);
572 }
573 
574 attribute_hidden void
gomp_detach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key n,uintptr_t detach_from,bool finalize,struct gomp_coalesce_buf * cbufp)575 gomp_detach_pointer (struct gomp_device_descr *devicep,
576 		     struct goacc_asyncqueue *aq, splay_tree_key n,
577 		     uintptr_t detach_from, bool finalize,
578 		     struct gomp_coalesce_buf *cbufp)
579 {
580   size_t idx;
581 
582   if (n == NULL)
583     {
584       gomp_mutex_unlock (&devicep->lock);
585       gomp_fatal ("enclosing struct not mapped for detach");
586     }
587 
588   idx = (detach_from - n->host_start) / sizeof (void *);
589 
590   if (!n->aux || !n->aux->attach_count)
591     {
592       gomp_mutex_unlock (&devicep->lock);
593       gomp_fatal ("no attachment counters for struct");
594     }
595 
596   if (finalize)
597     n->aux->attach_count[idx] = 1;
598 
599   if (n->aux->attach_count[idx] == 0)
600     {
601       gomp_mutex_unlock (&devicep->lock);
602       gomp_fatal ("attach count underflow");
603     }
604   else
605     n->aux->attach_count[idx]--;
606 
607   if (n->aux->attach_count[idx] == 0)
608     {
609       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610 			 - n->host_start;
611       uintptr_t target = (uintptr_t) *(void **) detach_from;
612 
613       gomp_debug (1,
614 		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
615 		  __FUNCTION__, (void *) detach_from, (void *) devptr,
616 		  (void *) (n->tgt->tgt_start + n->tgt_offset),
617 		  (void *) target);
618 
619       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620 			  sizeof (void *), cbufp);
621     }
622   else
623     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624 		(void *) detach_from, (int) n->aux->attach_count[idx]);
625 }
626 
627 attribute_hidden uintptr_t
gomp_map_val(struct target_mem_desc * tgt,void ** hostaddrs,size_t i)628 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
629 {
630   if (tgt->list[i].key != NULL)
631     return tgt->list[i].key->tgt->tgt_start
632 	   + tgt->list[i].key->tgt_offset
633 	   + tgt->list[i].offset;
634 
635   switch (tgt->list[i].offset)
636     {
637     case OFFSET_INLINED:
638       return (uintptr_t) hostaddrs[i];
639 
640     case OFFSET_POINTER:
641       return 0;
642 
643     case OFFSET_STRUCT:
644       return tgt->list[i + 1].key->tgt->tgt_start
645 	     + tgt->list[i + 1].key->tgt_offset
646 	     + tgt->list[i + 1].offset
647 	     + (uintptr_t) hostaddrs[i]
648 	     - (uintptr_t) hostaddrs[i + 1];
649 
650     default:
651       return tgt->tgt_start + tgt->list[i].offset;
652     }
653 }
654 
655 static inline __attribute__((always_inline)) struct target_mem_desc *
gomp_map_vars_internal(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)656 gomp_map_vars_internal (struct gomp_device_descr *devicep,
657 			struct goacc_asyncqueue *aq, size_t mapnum,
658 			void **hostaddrs, void **devaddrs, size_t *sizes,
659 			void *kinds, bool short_mapkind,
660 			enum gomp_map_vars_kind pragma_kind)
661 {
662   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663   bool has_firstprivate = false;
664   const int rshift = short_mapkind ? 8 : 3;
665   const int typemask = short_mapkind ? 0xff : 0x7;
666   struct splay_tree_s *mem_map = &devicep->mem_map;
667   struct splay_tree_key_s cur_node;
668   struct target_mem_desc *tgt
669     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670   tgt->list_count = mapnum;
671   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672   tgt->device_descr = devicep;
673   tgt->prev = NULL;
674   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
675 
676   if (mapnum == 0)
677     {
678       tgt->tgt_start = 0;
679       tgt->tgt_end = 0;
680       return tgt;
681     }
682 
683   tgt_align = sizeof (void *);
684   tgt_size = 0;
685   cbuf.chunks = NULL;
686   cbuf.chunk_cnt = -1;
687   cbuf.use_cnt = 0;
688   cbuf.buf = NULL;
689   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
690     {
691       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693       cbuf.chunk_cnt = 0;
694     }
695   if (pragma_kind == GOMP_MAP_VARS_TARGET)
696     {
697       size_t align = 4 * sizeof (void *);
698       tgt_align = align;
699       tgt_size = mapnum * sizeof (void *);
700       cbuf.chunk_cnt = 1;
701       cbuf.use_cnt = 1 + (mapnum > 1);
702       cbuf.chunks[0].start = 0;
703       cbuf.chunks[0].end = tgt_size;
704     }
705 
706   gomp_mutex_lock (&devicep->lock);
707   if (devicep->state == GOMP_DEVICE_FINALIZED)
708     {
709       gomp_mutex_unlock (&devicep->lock);
710       free (tgt);
711       return NULL;
712     }
713 
714   for (i = 0; i < mapnum; i++)
715     {
716       int kind = get_kind (short_mapkind, kinds, i);
717       if (hostaddrs[i] == NULL
718 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
719 	{
720 	  tgt->list[i].key = NULL;
721 	  tgt->list[i].offset = OFFSET_INLINED;
722 	  continue;
723 	}
724       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725 	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
726 	{
727 	  tgt->list[i].key = NULL;
728 	  if (!not_found_cnt)
729 	    {
730 	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731 		 on a separate construct prior to using use_device_{addr,ptr}.
732 		 In OpenMP 5.0, map directives need to be ordered by the
733 		 middle-end before the use_device_* clauses.  If
734 		 !not_found_cnt, all mappings requested (if any) are already
735 		 mapped, so use_device_{addr,ptr} can be resolved right away.
736 		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
737 		 now but would succeed after performing the mappings in the
738 		 following loop.  We can't defer this always to the second
739 		 loop, because it is not even invoked when !not_found_cnt
740 		 after the first loop.  */
741 	      cur_node.host_start = (uintptr_t) hostaddrs[i];
742 	      cur_node.host_end = cur_node.host_start;
743 	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744 	      if (n != NULL)
745 		{
746 		  cur_node.host_start -= n->host_start;
747 		  hostaddrs[i]
748 		    = (void *) (n->tgt->tgt_start + n->tgt_offset
749 				+ cur_node.host_start);
750 		}
751 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
752 		{
753 		  gomp_mutex_unlock (&devicep->lock);
754 		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
755 		}
756 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757 		/* If not present, continue using the host address.  */
758 		;
759 	      else
760 		__builtin_unreachable ();
761 	      tgt->list[i].offset = OFFSET_INLINED;
762 	    }
763 	  else
764 	    tgt->list[i].offset = 0;
765 	  continue;
766 	}
767       else if ((kind & typemask) == GOMP_MAP_STRUCT)
768 	{
769 	  size_t first = i + 1;
770 	  size_t last = i + sizes[i];
771 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
772 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
773 			      + sizes[last];
774 	  tgt->list[i].key = NULL;
775 	  tgt->list[i].offset = OFFSET_STRUCT;
776 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777 	  if (n == NULL)
778 	    {
779 	      size_t align = (size_t) 1 << (kind >> rshift);
780 	      if (tgt_align < align)
781 		tgt_align = align;
782 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
784 	      tgt_size += cur_node.host_end - cur_node.host_start;
785 	      not_found_cnt += last - i;
786 	      for (i = first; i <= last; i++)
787 		{
788 		  tgt->list[i].key = NULL;
789 		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790 					     & typemask))
791 		    gomp_coalesce_buf_add (&cbuf,
792 					   tgt_size - cur_node.host_end
793 					   + (uintptr_t) hostaddrs[i],
794 					   sizes[i]);
795 		}
796 	      i--;
797 	      continue;
798 	    }
799 	  for (i = first; i <= last; i++)
800 	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801 				      sizes, kinds, NULL);
802 	  i--;
803 	  continue;
804 	}
805       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
806 	{
807 	  tgt->list[i].key = NULL;
808 	  tgt->list[i].offset = OFFSET_POINTER;
809 	  has_firstprivate = true;
810 	  continue;
811 	}
812       else if ((kind & typemask) == GOMP_MAP_ATTACH)
813 	{
814 	  tgt->list[i].key = NULL;
815 	  has_firstprivate = true;
816 	  continue;
817 	}
818       cur_node.host_start = (uintptr_t) hostaddrs[i];
819       if (!GOMP_MAP_POINTER_P (kind & typemask))
820 	cur_node.host_end = cur_node.host_start + sizes[i];
821       else
822 	cur_node.host_end = cur_node.host_start + sizeof (void *);
823       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824 	{
825 	  tgt->list[i].key = NULL;
826 
827 	  size_t align = (size_t) 1 << (kind >> rshift);
828 	  if (tgt_align < align)
829 	    tgt_align = align;
830 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 	  gomp_coalesce_buf_add (&cbuf, tgt_size,
832 				 cur_node.host_end - cur_node.host_start);
833 	  tgt_size += cur_node.host_end - cur_node.host_start;
834 	  has_firstprivate = true;
835 	  continue;
836 	}
837       splay_tree_key n;
838       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839 	{
840 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
841 	  if (!n)
842 	    {
843 	      tgt->list[i].key = NULL;
844 	      tgt->list[i].offset = OFFSET_POINTER;
845 	      continue;
846 	    }
847 	}
848       else
849 	n = splay_tree_lookup (mem_map, &cur_node);
850       if (n && n->refcount != REFCOUNT_LINK)
851 	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 				kind & typemask, NULL);
853       else
854 	{
855 	  tgt->list[i].key = NULL;
856 
857 	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858 	    {
859 	      /* Not present, hence, skip entry - including its MAP_POINTER,
860 		 when existing.  */
861 	      tgt->list[i].offset = OFFSET_POINTER;
862 	      if (i + 1 < mapnum
863 		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 		      == GOMP_MAP_POINTER))
865 		{
866 		  ++i;
867 		  tgt->list[i].key = NULL;
868 		  tgt->list[i].offset = 0;
869 		}
870 	      continue;
871 	    }
872 	  size_t align = (size_t) 1 << (kind >> rshift);
873 	  not_found_cnt++;
874 	  if (tgt_align < align)
875 	    tgt_align = align;
876 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 	  if (gomp_to_device_kind_p (kind & typemask))
878 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
879 				   cur_node.host_end - cur_node.host_start);
880 	  tgt_size += cur_node.host_end - cur_node.host_start;
881 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
882 	    {
883 	      size_t j;
884 	      for (j = i + 1; j < mapnum; j++)
885 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 					 & typemask))
887 		  break;
888 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 			     > cur_node.host_end))
891 		  break;
892 		else
893 		  {
894 		    tgt->list[j].key = NULL;
895 		    i++;
896 		  }
897 	    }
898 	}
899     }
900 
901   if (devaddrs)
902     {
903       if (mapnum != 1)
904 	{
905 	  gomp_mutex_unlock (&devicep->lock);
906 	  gomp_fatal ("unexpected aggregation");
907 	}
908       tgt->to_free = devaddrs[0];
909       tgt->tgt_start = (uintptr_t) tgt->to_free;
910       tgt->tgt_end = tgt->tgt_start + sizes[0];
911     }
912   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913     {
914       /* Allocate tgt_align aligned tgt_size block of memory.  */
915       /* FIXME: Perhaps change interface to allocate properly aligned
916 	 memory.  */
917       tgt->to_free = devicep->alloc_func (devicep->target_id,
918 					  tgt_size + tgt_align - 1);
919       if (!tgt->to_free)
920 	{
921 	  gomp_mutex_unlock (&devicep->lock);
922 	  gomp_fatal ("device memory allocation fail");
923 	}
924 
925       tgt->tgt_start = (uintptr_t) tgt->to_free;
926       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927       tgt->tgt_end = tgt->tgt_start + tgt_size;
928 
929       if (cbuf.use_cnt == 1)
930 	cbuf.chunk_cnt--;
931       if (cbuf.chunk_cnt > 0)
932 	{
933 	  cbuf.buf
934 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 	  if (cbuf.buf)
936 	    {
937 	      cbuf.tgt = tgt;
938 	      cbufp = &cbuf;
939 	    }
940 	}
941     }
942   else
943     {
944       tgt->to_free = NULL;
945       tgt->tgt_start = 0;
946       tgt->tgt_end = 0;
947     }
948 
949   tgt_size = 0;
950   if (pragma_kind == GOMP_MAP_VARS_TARGET)
951     tgt_size = mapnum * sizeof (void *);
952 
953   tgt->array = NULL;
954   if (not_found_cnt || has_firstprivate)
955     {
956       if (not_found_cnt)
957 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958       splay_tree_node array = tgt->array;
959       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960       uintptr_t field_tgt_base = 0;
961 
962       for (i = 0; i < mapnum; i++)
963 	if (tgt->list[i].key == NULL)
964 	  {
965 	    int kind = get_kind (short_mapkind, kinds, i);
966 	    if (hostaddrs[i] == NULL)
967 	      continue;
968 	    switch (kind & typemask)
969 	      {
970 		size_t align, len, first, last;
971 		splay_tree_key n;
972 	      case GOMP_MAP_FIRSTPRIVATE:
973 		align = (size_t) 1 << (kind >> rshift);
974 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 		tgt->list[i].offset = tgt_size;
976 		len = sizes[i];
977 		gomp_copy_host2dev (devicep, aq,
978 				    (void *) (tgt->tgt_start + tgt_size),
979 				    (void *) hostaddrs[i], len, cbufp);
980 		tgt_size += len;
981 		continue;
982 	      case GOMP_MAP_FIRSTPRIVATE_INT:
983 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 		continue;
985 	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 		/* The OpenACC 'host_data' construct only allows 'use_device'
987 		   "mapping" clauses, so in the first loop, 'not_found_cnt'
988 		   must always have been zero, so all OpenACC 'use_device'
989 		   clauses have already been handled.  (We can only easily test
990 		   'use_device' with 'if_present' clause here.)  */
991 		assert (tgt->list[i].offset == OFFSET_INLINED);
992 		/* Nevertheless, FALLTHRU to the normal handling, to keep the
993 		   code conceptually simple, similar to the first loop.  */
994 	      case GOMP_MAP_USE_DEVICE_PTR:
995 		if (tgt->list[i].offset == 0)
996 		  {
997 		    cur_node.host_start = (uintptr_t) hostaddrs[i];
998 		    cur_node.host_end = cur_node.host_start;
999 		    n = gomp_map_lookup (mem_map, &cur_node);
1000 		    if (n != NULL)
1001 		      {
1002 			cur_node.host_start -= n->host_start;
1003 			hostaddrs[i]
1004 			  = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 				      + cur_node.host_start);
1006 		      }
1007 		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008 		      {
1009 			gomp_mutex_unlock (&devicep->lock);
1010 			gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011 		      }
1012 		    else if ((kind & typemask)
1013 			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 		      /* If not present, continue using the host address.  */
1015 		      ;
1016 		    else
1017 		      __builtin_unreachable ();
1018 		    tgt->list[i].offset = OFFSET_INLINED;
1019 		  }
1020 		continue;
1021 	      case GOMP_MAP_STRUCT:
1022 		first = i + 1;
1023 		last = i + sizes[i];
1024 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 		cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 				    + sizes[last];
1027 		if (tgt->list[first].key != NULL)
1028 		  continue;
1029 		n = splay_tree_lookup (mem_map, &cur_node);
1030 		if (n == NULL)
1031 		  {
1032 		    size_t align = (size_t) 1 << (kind >> rshift);
1033 		    tgt_size -= (uintptr_t) hostaddrs[first]
1034 				- (uintptr_t) hostaddrs[i];
1035 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 		    tgt_size += (uintptr_t) hostaddrs[first]
1037 				- (uintptr_t) hostaddrs[i];
1038 		    field_tgt_base = (uintptr_t) hostaddrs[first];
1039 		    field_tgt_offset = tgt_size;
1040 		    field_tgt_clear = last;
1041 		    tgt_size += cur_node.host_end
1042 				- (uintptr_t) hostaddrs[first];
1043 		    continue;
1044 		  }
1045 		for (i = first; i <= last; i++)
1046 		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 					    sizes, kinds, cbufp);
1048 		i--;
1049 		continue;
1050 	      case GOMP_MAP_ALWAYS_POINTER:
1051 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 		cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 		n = splay_tree_lookup (mem_map, &cur_node);
1054 		if (n == NULL
1055 		    || n->host_start > cur_node.host_start
1056 		    || n->host_end < cur_node.host_end)
1057 		  {
1058 		    gomp_mutex_unlock (&devicep->lock);
1059 		    gomp_fatal ("always pointer not mapped");
1060 		  }
1061 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 		    != GOMP_MAP_ALWAYS_POINTER)
1063 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 		if (cur_node.tgt_offset)
1065 		  cur_node.tgt_offset -= sizes[i];
1066 		gomp_copy_host2dev (devicep, aq,
1067 				    (void *) (n->tgt->tgt_start
1068 					      + n->tgt_offset
1069 					      + cur_node.host_start
1070 					      - n->host_start),
1071 				    (void *) &cur_node.tgt_offset,
1072 				    sizeof (void *), cbufp);
1073 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 				      + cur_node.host_start - n->host_start;
1075 		continue;
1076 	      case GOMP_MAP_IF_PRESENT:
1077 		/* Not present - otherwise handled above. Skip over its
1078 		   MAP_POINTER as well.  */
1079 		if (i + 1 < mapnum
1080 		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 			== GOMP_MAP_POINTER))
1082 		  ++i;
1083 		continue;
1084 	      case GOMP_MAP_ATTACH:
1085 		{
1086 		  cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 		  cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 		  if (n != NULL)
1090 		    {
1091 		      tgt->list[i].key = n;
1092 		      tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 		      tgt->list[i].length = n->host_end - n->host_start;
1094 		      tgt->list[i].copy_from = false;
1095 		      tgt->list[i].always_copy_from = false;
1096 		      tgt->list[i].is_attach = true;
1097 		      /* OpenACC 'attach'/'detach' doesn't affect
1098 			 structured/dynamic reference counts ('n->refcount',
1099 			 'n->dynamic_refcount').  */
1100 		    }
1101 		  else
1102 		    {
1103 		      gomp_mutex_unlock (&devicep->lock);
1104 		      gomp_fatal ("outer struct not mapped for attach");
1105 		    }
1106 		  gomp_attach_pointer (devicep, aq, mem_map, n,
1107 				       (uintptr_t) hostaddrs[i], sizes[i],
1108 				       cbufp);
1109 		  continue;
1110 		}
1111 	      default:
1112 		break;
1113 	      }
1114 	    splay_tree_key k = &array->key;
1115 	    k->host_start = (uintptr_t) hostaddrs[i];
1116 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
1117 	      k->host_end = k->host_start + sizes[i];
1118 	    else
1119 	      k->host_end = k->host_start + sizeof (void *);
1120 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
1121 	    if (n && n->refcount != REFCOUNT_LINK)
1122 	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1123 				      kind & typemask, cbufp);
1124 	    else
1125 	      {
1126 		k->aux = NULL;
1127 		if (n && n->refcount == REFCOUNT_LINK)
1128 		  {
1129 		    /* Replace target address of the pointer with target address
1130 		       of mapped object in the splay tree.  */
1131 		    splay_tree_remove (mem_map, n);
1132 		    k->aux
1133 		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1134 		    k->aux->link_key = n;
1135 		  }
1136 		size_t align = (size_t) 1 << (kind >> rshift);
1137 		tgt->list[i].key = k;
1138 		k->tgt = tgt;
1139 		if (field_tgt_clear != FIELD_TGT_EMPTY)
1140 		  {
1141 		    k->tgt_offset = k->host_start - field_tgt_base
1142 				    + field_tgt_offset;
1143 		    if (i == field_tgt_clear)
1144 		      field_tgt_clear = FIELD_TGT_EMPTY;
1145 		  }
1146 		else
1147 		  {
1148 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1149 		    k->tgt_offset = tgt_size;
1150 		    tgt_size += k->host_end - k->host_start;
1151 		  }
1152 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1153 		tgt->list[i].always_copy_from
1154 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1155 		tgt->list[i].is_attach = false;
1156 		tgt->list[i].offset = 0;
1157 		tgt->list[i].length = k->host_end - k->host_start;
1158 		k->refcount = 1;
1159 		k->dynamic_refcount = 0;
1160 		tgt->refcount++;
1161 		array->left = NULL;
1162 		array->right = NULL;
1163 		splay_tree_insert (mem_map, array);
1164 		switch (kind & typemask)
1165 		  {
1166 		  case GOMP_MAP_ALLOC:
1167 		  case GOMP_MAP_FROM:
1168 		  case GOMP_MAP_FORCE_ALLOC:
1169 		  case GOMP_MAP_FORCE_FROM:
1170 		  case GOMP_MAP_ALWAYS_FROM:
1171 		    break;
1172 		  case GOMP_MAP_TO:
1173 		  case GOMP_MAP_TOFROM:
1174 		  case GOMP_MAP_FORCE_TO:
1175 		  case GOMP_MAP_FORCE_TOFROM:
1176 		  case GOMP_MAP_ALWAYS_TO:
1177 		  case GOMP_MAP_ALWAYS_TOFROM:
1178 		    gomp_copy_host2dev (devicep, aq,
1179 					(void *) (tgt->tgt_start
1180 						  + k->tgt_offset),
1181 					(void *) k->host_start,
1182 					k->host_end - k->host_start, cbufp);
1183 		    break;
1184 		  case GOMP_MAP_POINTER:
1185 		    gomp_map_pointer (tgt, aq,
1186 				      (uintptr_t) *(void **) k->host_start,
1187 				      k->tgt_offset, sizes[i], cbufp);
1188 		    break;
1189 		  case GOMP_MAP_TO_PSET:
1190 		    gomp_copy_host2dev (devicep, aq,
1191 					(void *) (tgt->tgt_start
1192 						  + k->tgt_offset),
1193 					(void *) k->host_start,
1194 					k->host_end - k->host_start, cbufp);
1195 
1196 		    for (j = i + 1; j < mapnum; j++)
1197 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198 							 j)
1199 					       & typemask))
1200 			break;
1201 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
1202 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203 				   > k->host_end))
1204 			break;
1205 		      else
1206 			{
1207 			  tgt->list[j].key = k;
1208 			  tgt->list[j].copy_from = false;
1209 			  tgt->list[j].always_copy_from = false;
1210 			  tgt->list[j].is_attach = false;
1211 			  if (k->refcount != REFCOUNT_INFINITY)
1212 			    k->refcount++;
1213 			  gomp_map_pointer (tgt, aq,
1214 					    (uintptr_t) *(void **) hostaddrs[j],
1215 					    k->tgt_offset
1216 					    + ((uintptr_t) hostaddrs[j]
1217 					       - k->host_start),
1218 					    sizes[j], cbufp);
1219 			  i++;
1220 			}
1221 		    break;
1222 		  case GOMP_MAP_FORCE_PRESENT:
1223 		    {
1224 		      /* We already looked up the memory region above and it
1225 			 was missing.  */
1226 		      size_t size = k->host_end - k->host_start;
1227 		      gomp_mutex_unlock (&devicep->lock);
1228 #ifdef HAVE_INTTYPES_H
1229 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1230 				  "%"PRIu64" (0x%"PRIx64"))",
1231 				  (void *) k->host_start,
1232 				  (uint64_t) size, (uint64_t) size);
1233 #else
1234 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1235 				  "%lu (0x%lx))", (void *) k->host_start,
1236 				  (unsigned long) size, (unsigned long) size);
1237 #endif
1238 		    }
1239 		    break;
1240 		  case GOMP_MAP_FORCE_DEVICEPTR:
1241 		    assert (k->host_end - k->host_start == sizeof (void *));
1242 		    gomp_copy_host2dev (devicep, aq,
1243 					(void *) (tgt->tgt_start
1244 						  + k->tgt_offset),
1245 					(void *) k->host_start,
1246 					sizeof (void *), cbufp);
1247 		    break;
1248 		  default:
1249 		    gomp_mutex_unlock (&devicep->lock);
1250 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1251 				kind);
1252 		  }
1253 
1254 		if (k->aux && k->aux->link_key)
1255 		  {
1256 		    /* Set link pointer on target to the device address of the
1257 		       mapped object.  */
1258 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1259 		    /* We intentionally do not use coalescing here, as it's not
1260 		       data allocated by the current call to this function.  */
1261 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1262 					&tgt_addr, sizeof (void *), NULL);
1263 		  }
1264 		array++;
1265 	      }
1266 	  }
1267     }
1268 
1269   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270     {
1271       for (i = 0; i < mapnum; i++)
1272 	{
1273 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1274 	  gomp_copy_host2dev (devicep, aq,
1275 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
1276 			      (void *) &cur_node.tgt_offset, sizeof (void *),
1277 			      cbufp);
1278 	}
1279     }
1280 
1281   if (cbufp)
1282     {
1283       long c = 0;
1284       for (c = 0; c < cbuf.chunk_cnt; ++c)
1285 	gomp_copy_host2dev (devicep, aq,
1286 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1287 			    (char *) cbuf.buf + (cbuf.chunks[c].start
1288 						 - cbuf.chunks[0].start),
1289 			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1290       free (cbuf.buf);
1291       cbuf.buf = NULL;
1292       cbufp = NULL;
1293     }
1294 
1295   /* If the variable from "omp target enter data" map-list was already mapped,
1296      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1297      gomp_exit_data.  */
1298   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299     {
1300       free (tgt);
1301       tgt = NULL;
1302     }
1303 
1304   gomp_mutex_unlock (&devicep->lock);
1305   return tgt;
1306 }
1307 
1308 attribute_hidden struct target_mem_desc *
gomp_map_vars(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)1309 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1310 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1311 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312 {
1313   return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1314 				 sizes, kinds, short_mapkind, pragma_kind);
1315 }
1316 
1317 attribute_hidden struct target_mem_desc *
gomp_map_vars_async(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)1318 gomp_map_vars_async (struct gomp_device_descr *devicep,
1319 		     struct goacc_asyncqueue *aq, size_t mapnum,
1320 		     void **hostaddrs, void **devaddrs, size_t *sizes,
1321 		     void *kinds, bool short_mapkind,
1322 		     enum gomp_map_vars_kind pragma_kind)
1323 {
1324   return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1325 				 sizes, kinds, short_mapkind, pragma_kind);
1326 }
1327 
1328 static void
gomp_unmap_tgt(struct target_mem_desc * tgt)1329 gomp_unmap_tgt (struct target_mem_desc *tgt)
1330 {
1331   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1332   if (tgt->tgt_end)
1333     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334 
1335   free (tgt->array);
1336   free (tgt);
1337 }
1338 
1339 static bool
gomp_unref_tgt(void * ptr)1340 gomp_unref_tgt (void *ptr)
1341 {
1342   bool is_tgt_unmapped = false;
1343 
1344   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345 
1346   if (tgt->refcount > 1)
1347     tgt->refcount--;
1348   else
1349     {
1350       gomp_unmap_tgt (tgt);
1351       is_tgt_unmapped = true;
1352     }
1353 
1354   return is_tgt_unmapped;
1355 }
1356 
1357 static void
gomp_unref_tgt_void(void * ptr)1358 gomp_unref_tgt_void (void *ptr)
1359 {
1360   (void) gomp_unref_tgt (ptr);
1361 }
1362 
1363 static inline __attribute__((always_inline)) bool
gomp_remove_var_internal(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1364 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1365 			  struct goacc_asyncqueue *aq)
1366 {
1367   bool is_tgt_unmapped = false;
1368   splay_tree_remove (&devicep->mem_map, k);
1369   if (k->aux)
1370     {
1371       if (k->aux->link_key)
1372 	splay_tree_insert (&devicep->mem_map,
1373 			   (splay_tree_node) k->aux->link_key);
1374       if (k->aux->attach_count)
1375 	free (k->aux->attach_count);
1376       free (k->aux);
1377       k->aux = NULL;
1378     }
1379   if (aq)
1380     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1381 						(void *) k->tgt);
1382   else
1383     is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1384   return is_tgt_unmapped;
1385 }
1386 
1387 attribute_hidden bool
gomp_remove_var(struct gomp_device_descr * devicep,splay_tree_key k)1388 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389 {
1390   return gomp_remove_var_internal (devicep, k, NULL);
1391 }
1392 
1393 /* Remove a variable asynchronously.  This actually removes the variable
1394    mapping immediately, but retains the linked target_mem_desc until the
1395    asynchronous operation has completed (as it may still refer to target
1396    memory).  The device lock must be held before entry, and remains locked on
1397    exit.  */
1398 
1399 attribute_hidden void
gomp_remove_var_async(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1400 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1401 		       struct goacc_asyncqueue *aq)
1402 {
1403   (void) gomp_remove_var_internal (devicep, k, aq);
1404 }
1405 
1406 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1407    variables back from device to host: if it is false, it is assumed that this
1408    has been done already.  */
1409 
1410 static inline __attribute__((always_inline)) void
gomp_unmap_vars_internal(struct target_mem_desc * tgt,bool do_copyfrom,struct goacc_asyncqueue * aq)1411 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1412 			  struct goacc_asyncqueue *aq)
1413 {
1414   struct gomp_device_descr *devicep = tgt->device_descr;
1415 
1416   if (tgt->list_count == 0)
1417     {
1418       free (tgt);
1419       return;
1420     }
1421 
1422   gomp_mutex_lock (&devicep->lock);
1423   if (devicep->state == GOMP_DEVICE_FINALIZED)
1424     {
1425       gomp_mutex_unlock (&devicep->lock);
1426       free (tgt->array);
1427       free (tgt);
1428       return;
1429     }
1430 
1431   size_t i;
1432 
1433   /* We must perform detachments before any copies back to the host.  */
1434   for (i = 0; i < tgt->list_count; i++)
1435     {
1436       splay_tree_key k = tgt->list[i].key;
1437 
1438       if (k != NULL && tgt->list[i].is_attach)
1439 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1440 					     + tgt->list[i].offset,
1441 			     false, NULL);
1442     }
1443 
1444   for (i = 0; i < tgt->list_count; i++)
1445     {
1446       splay_tree_key k = tgt->list[i].key;
1447       if (k == NULL)
1448 	continue;
1449 
1450       /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1451 	 counts ('n->refcount', 'n->dynamic_refcount').  */
1452       if (tgt->list[i].is_attach)
1453 	continue;
1454 
1455       bool do_unmap = false;
1456       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1457 	k->refcount--;
1458       else if (k->refcount == 1)
1459 	{
1460 	  k->refcount--;
1461 	  do_unmap = true;
1462 	}
1463 
1464       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1465 	  || tgt->list[i].always_copy_from)
1466 	gomp_copy_dev2host (devicep, aq,
1467 			    (void *) (k->host_start + tgt->list[i].offset),
1468 			    (void *) (k->tgt->tgt_start + k->tgt_offset
1469 				      + tgt->list[i].offset),
1470 			    tgt->list[i].length);
1471       if (do_unmap)
1472 	{
1473 	  struct target_mem_desc *k_tgt = k->tgt;
1474 	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1475 	  /* It would be bad if TGT got unmapped while we're still iterating
1476 	     over its LIST_COUNT, and also expect to use it in the following
1477 	     code.  */
1478 	  assert (!is_tgt_unmapped
1479 		  || k_tgt != tgt);
1480 	}
1481     }
1482 
1483   if (aq)
1484     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1485 						(void *) tgt);
1486   else
1487     gomp_unref_tgt ((void *) tgt);
1488 
1489   gomp_mutex_unlock (&devicep->lock);
1490 }
1491 
1492 attribute_hidden void
gomp_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom)1493 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1494 {
1495   gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1496 }
1497 
1498 attribute_hidden void
gomp_unmap_vars_async(struct target_mem_desc * tgt,bool do_copyfrom,struct goacc_asyncqueue * aq)1499 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1500 		       struct goacc_asyncqueue *aq)
1501 {
1502   gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1503 }
1504 
1505 static void
gomp_update(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds,bool short_mapkind)1506 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1507 	     size_t *sizes, void *kinds, bool short_mapkind)
1508 {
1509   size_t i;
1510   struct splay_tree_key_s cur_node;
1511   const int typemask = short_mapkind ? 0xff : 0x7;
1512 
1513   if (!devicep)
1514     return;
1515 
1516   if (mapnum == 0)
1517     return;
1518 
1519   gomp_mutex_lock (&devicep->lock);
1520   if (devicep->state == GOMP_DEVICE_FINALIZED)
1521     {
1522       gomp_mutex_unlock (&devicep->lock);
1523       return;
1524     }
1525 
1526   for (i = 0; i < mapnum; i++)
1527     if (sizes[i])
1528       {
1529 	cur_node.host_start = (uintptr_t) hostaddrs[i];
1530 	cur_node.host_end = cur_node.host_start + sizes[i];
1531 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1532 	if (n)
1533 	  {
1534 	    int kind = get_kind (short_mapkind, kinds, i);
1535 	    if (n->host_start > cur_node.host_start
1536 		|| n->host_end < cur_node.host_end)
1537 	      {
1538 		gomp_mutex_unlock (&devicep->lock);
1539 		gomp_fatal ("Trying to update [%p..%p) object when "
1540 			    "only [%p..%p) is mapped",
1541 			    (void *) cur_node.host_start,
1542 			    (void *) cur_node.host_end,
1543 			    (void *) n->host_start,
1544 			    (void *) n->host_end);
1545 	      }
1546 
1547 
1548 	    void *hostaddr = (void *) cur_node.host_start;
1549 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1550 				      + cur_node.host_start - n->host_start);
1551 	    size_t size = cur_node.host_end - cur_node.host_start;
1552 
1553 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
1554 	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1555 				  NULL);
1556 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1557 	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1558 	  }
1559       }
1560   gomp_mutex_unlock (&devicep->lock);
1561 }
1562 
1563 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1564    And insert to splay tree the mapping between addresses from HOST_TABLE and
1565    from loaded target image.  We rely in the host and device compiler
1566    emitting variable and functions in the same order.  */
1567 
1568 static void
gomp_load_image_to_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data,bool is_register_lock)1569 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1570 			   const void *host_table, const void *target_data,
1571 			   bool is_register_lock)
1572 {
1573   void **host_func_table = ((void ***) host_table)[0];
1574   void **host_funcs_end  = ((void ***) host_table)[1];
1575   void **host_var_table  = ((void ***) host_table)[2];
1576   void **host_vars_end   = ((void ***) host_table)[3];
1577 
1578   /* The func table contains only addresses, the var table contains addresses
1579      and corresponding sizes.  */
1580   int num_funcs = host_funcs_end - host_func_table;
1581   int num_vars  = (host_vars_end - host_var_table) / 2;
1582 
1583   /* Load image to device and get target addresses for the image.  */
1584   struct addr_pair *target_table = NULL;
1585   int i, num_target_entries;
1586 
1587   num_target_entries
1588     = devicep->load_image_func (devicep->target_id, version,
1589 				target_data, &target_table);
1590 
1591   if (num_target_entries != num_funcs + num_vars)
1592     {
1593       gomp_mutex_unlock (&devicep->lock);
1594       if (is_register_lock)
1595 	gomp_mutex_unlock (&register_lock);
1596       gomp_fatal ("Cannot map target functions or variables"
1597 		  " (expected %u, have %u)", num_funcs + num_vars,
1598 		  num_target_entries);
1599     }
1600 
1601   /* Insert host-target address mapping into splay tree.  */
1602   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1603   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1604   tgt->refcount = REFCOUNT_INFINITY;
1605   tgt->tgt_start = 0;
1606   tgt->tgt_end = 0;
1607   tgt->to_free = NULL;
1608   tgt->prev = NULL;
1609   tgt->list_count = 0;
1610   tgt->device_descr = devicep;
1611   splay_tree_node array = tgt->array;
1612 
1613   for (i = 0; i < num_funcs; i++)
1614     {
1615       splay_tree_key k = &array->key;
1616       k->host_start = (uintptr_t) host_func_table[i];
1617       k->host_end = k->host_start + 1;
1618       k->tgt = tgt;
1619       k->tgt_offset = target_table[i].start;
1620       k->refcount = REFCOUNT_INFINITY;
1621       k->dynamic_refcount = 0;
1622       k->aux = NULL;
1623       array->left = NULL;
1624       array->right = NULL;
1625       splay_tree_insert (&devicep->mem_map, array);
1626       array++;
1627     }
1628 
1629   /* Most significant bit of the size in host and target tables marks
1630      "omp declare target link" variables.  */
1631   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1632   const uintptr_t size_mask = ~link_bit;
1633 
1634   for (i = 0; i < num_vars; i++)
1635     {
1636       struct addr_pair *target_var = &target_table[num_funcs + i];
1637       uintptr_t target_size = target_var->end - target_var->start;
1638       bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1639 
1640       if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1641 	{
1642 	  gomp_mutex_unlock (&devicep->lock);
1643 	  if (is_register_lock)
1644 	    gomp_mutex_unlock (&register_lock);
1645 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1646 	}
1647 
1648       splay_tree_key k = &array->key;
1649       k->host_start = (uintptr_t) host_var_table[i * 2];
1650       k->host_end
1651 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1652       k->tgt = tgt;
1653       k->tgt_offset = target_var->start;
1654       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1655       k->dynamic_refcount = 0;
1656       k->aux = NULL;
1657       array->left = NULL;
1658       array->right = NULL;
1659       splay_tree_insert (&devicep->mem_map, array);
1660       array++;
1661     }
1662 
1663   free (target_table);
1664 }
1665 
1666 /* Unload the mappings described by target_data from device DEVICE_P.
1667    The device must be locked.   */
1668 
1669 static void
gomp_unload_image_from_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data)1670 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1671 			       unsigned version,
1672 			       const void *host_table, const void *target_data)
1673 {
1674   void **host_func_table = ((void ***) host_table)[0];
1675   void **host_funcs_end  = ((void ***) host_table)[1];
1676   void **host_var_table  = ((void ***) host_table)[2];
1677   void **host_vars_end   = ((void ***) host_table)[3];
1678 
1679   /* The func table contains only addresses, the var table contains addresses
1680      and corresponding sizes.  */
1681   int num_funcs = host_funcs_end - host_func_table;
1682   int num_vars  = (host_vars_end - host_var_table) / 2;
1683 
1684   struct splay_tree_key_s k;
1685   splay_tree_key node = NULL;
1686 
1687   /* Find mapping at start of node array */
1688   if (num_funcs || num_vars)
1689     {
1690       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1691 		      : (uintptr_t) host_var_table[0]);
1692       k.host_end = k.host_start + 1;
1693       node = splay_tree_lookup (&devicep->mem_map, &k);
1694     }
1695 
1696   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1697     {
1698       gomp_mutex_unlock (&devicep->lock);
1699       gomp_fatal ("image unload fail");
1700     }
1701 
1702   /* Remove mappings from splay tree.  */
1703   int i;
1704   for (i = 0; i < num_funcs; i++)
1705     {
1706       k.host_start = (uintptr_t) host_func_table[i];
1707       k.host_end = k.host_start + 1;
1708       splay_tree_remove (&devicep->mem_map, &k);
1709     }
1710 
1711   /* Most significant bit of the size in host and target tables marks
1712      "omp declare target link" variables.  */
1713   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1714   const uintptr_t size_mask = ~link_bit;
1715   bool is_tgt_unmapped = false;
1716 
1717   for (i = 0; i < num_vars; i++)
1718     {
1719       k.host_start = (uintptr_t) host_var_table[i * 2];
1720       k.host_end
1721 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1722 
1723       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1724 	splay_tree_remove (&devicep->mem_map, &k);
1725       else
1726 	{
1727 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1728 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
1729 	}
1730     }
1731 
1732   if (node && !is_tgt_unmapped)
1733     {
1734       free (node->tgt);
1735       free (node);
1736     }
1737 }
1738 
1739 /* This function should be called from every offload image while loading.
1740    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1741    the target, and TARGET_DATA needed by target plugin.  */
1742 
1743 void
GOMP_offload_register_ver(unsigned version,const void * host_table,int target_type,const void * target_data)1744 GOMP_offload_register_ver (unsigned version, const void *host_table,
1745 			   int target_type, const void *target_data)
1746 {
1747   int i;
1748 
1749   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1750     gomp_fatal ("Library too old for offload (version %u < %u)",
1751 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1752 
1753   gomp_mutex_lock (&register_lock);
1754 
1755   /* Load image to all initialized devices.  */
1756   for (i = 0; i < num_devices; i++)
1757     {
1758       struct gomp_device_descr *devicep = &devices[i];
1759       gomp_mutex_lock (&devicep->lock);
1760       if (devicep->type == target_type
1761 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1762 	gomp_load_image_to_device (devicep, version,
1763 				   host_table, target_data, true);
1764       gomp_mutex_unlock (&devicep->lock);
1765     }
1766 
1767   /* Insert image to array of pending images.  */
1768   offload_images
1769     = gomp_realloc_unlock (offload_images,
1770 			   (num_offload_images + 1)
1771 			   * sizeof (struct offload_image_descr));
1772   offload_images[num_offload_images].version = version;
1773   offload_images[num_offload_images].type = target_type;
1774   offload_images[num_offload_images].host_table = host_table;
1775   offload_images[num_offload_images].target_data = target_data;
1776 
1777   num_offload_images++;
1778   gomp_mutex_unlock (&register_lock);
1779 }
1780 
1781 void
GOMP_offload_register(const void * host_table,int target_type,const void * target_data)1782 GOMP_offload_register (const void *host_table, int target_type,
1783 		       const void *target_data)
1784 {
1785   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1786 }
1787 
1788 /* This function should be called from every offload image while unloading.
1789    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1790    the target, and TARGET_DATA needed by target plugin.  */
1791 
1792 void
GOMP_offload_unregister_ver(unsigned version,const void * host_table,int target_type,const void * target_data)1793 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1794 			     int target_type, const void *target_data)
1795 {
1796   int i;
1797 
1798   gomp_mutex_lock (&register_lock);
1799 
1800   /* Unload image from all initialized devices.  */
1801   for (i = 0; i < num_devices; i++)
1802     {
1803       struct gomp_device_descr *devicep = &devices[i];
1804       gomp_mutex_lock (&devicep->lock);
1805       if (devicep->type == target_type
1806 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1807 	gomp_unload_image_from_device (devicep, version,
1808 				       host_table, target_data);
1809       gomp_mutex_unlock (&devicep->lock);
1810     }
1811 
1812   /* Remove image from array of pending images.  */
1813   for (i = 0; i < num_offload_images; i++)
1814     if (offload_images[i].target_data == target_data)
1815       {
1816 	offload_images[i] = offload_images[--num_offload_images];
1817 	break;
1818       }
1819 
1820   gomp_mutex_unlock (&register_lock);
1821 }
1822 
1823 void
GOMP_offload_unregister(const void * host_table,int target_type,const void * target_data)1824 GOMP_offload_unregister (const void *host_table, int target_type,
1825 			 const void *target_data)
1826 {
1827   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1828 }
1829 
1830 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1831    must be locked on entry, and remains locked on return.  */
1832 
1833 attribute_hidden void
gomp_init_device(struct gomp_device_descr * devicep)1834 gomp_init_device (struct gomp_device_descr *devicep)
1835 {
1836   int i;
1837   if (!devicep->init_device_func (devicep->target_id))
1838     {
1839       gomp_mutex_unlock (&devicep->lock);
1840       gomp_fatal ("device initialization failed");
1841     }
1842 
1843   /* Load to device all images registered by the moment.  */
1844   for (i = 0; i < num_offload_images; i++)
1845     {
1846       struct offload_image_descr *image = &offload_images[i];
1847       if (image->type == devicep->type)
1848 	gomp_load_image_to_device (devicep, image->version,
1849 				   image->host_table, image->target_data,
1850 				   false);
1851     }
1852 
1853   /* Initialize OpenACC asynchronous queues.  */
1854   goacc_init_asyncqueues (devicep);
1855 
1856   devicep->state = GOMP_DEVICE_INITIALIZED;
1857 }
1858 
1859 /* This function finalizes the target device, specified by DEVICEP.  DEVICEP
1860    must be locked on entry, and remains locked on return.  */
1861 
1862 attribute_hidden bool
gomp_fini_device(struct gomp_device_descr * devicep)1863 gomp_fini_device (struct gomp_device_descr *devicep)
1864 {
1865   bool ret = goacc_fini_asyncqueues (devicep);
1866   ret &= devicep->fini_device_func (devicep->target_id);
1867   devicep->state = GOMP_DEVICE_FINALIZED;
1868   return ret;
1869 }
1870 
1871 attribute_hidden void
gomp_unload_device(struct gomp_device_descr * devicep)1872 gomp_unload_device (struct gomp_device_descr *devicep)
1873 {
1874   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1875     {
1876       unsigned i;
1877 
1878       /* Unload from device all images registered at the moment.  */
1879       for (i = 0; i < num_offload_images; i++)
1880 	{
1881 	  struct offload_image_descr *image = &offload_images[i];
1882 	  if (image->type == devicep->type)
1883 	    gomp_unload_image_from_device (devicep, image->version,
1884 					   image->host_table,
1885 					   image->target_data);
1886 	}
1887     }
1888 }
1889 
1890 /* Host fallback for GOMP_target{,_ext} routines.  */
1891 
1892 static void
gomp_target_fallback(void (* fn)(void *),void ** hostaddrs)1893 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1894 {
1895   struct gomp_thread old_thr, *thr = gomp_thread ();
1896   old_thr = *thr;
1897   memset (thr, '\0', sizeof (*thr));
1898   if (gomp_places_list)
1899     {
1900       thr->place = old_thr.place;
1901       thr->ts.place_partition_len = gomp_places_list_len;
1902     }
1903   fn (hostaddrs);
1904   gomp_free_thread (thr);
1905   *thr = old_thr;
1906 }
1907 
1908 /* Calculate alignment and size requirements of a private copy of data shared
1909    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1910 
1911 static inline void
calculate_firstprivate_requirements(size_t mapnum,size_t * sizes,unsigned short * kinds,size_t * tgt_align,size_t * tgt_size)1912 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1913 				     unsigned short *kinds, size_t *tgt_align,
1914 				     size_t *tgt_size)
1915 {
1916   size_t i;
1917   for (i = 0; i < mapnum; i++)
1918     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1919       {
1920 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1921 	if (*tgt_align < align)
1922 	  *tgt_align = align;
1923 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1924 	*tgt_size += sizes[i];
1925       }
1926 }
1927 
1928 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1929 
1930 static inline void
copy_firstprivate_data(char * tgt,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,size_t tgt_align,size_t tgt_size)1931 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1932 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1933 			size_t tgt_size)
1934 {
1935   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1936   if (al)
1937     tgt += tgt_align - al;
1938   tgt_size = 0;
1939   size_t i;
1940   for (i = 0; i < mapnum; i++)
1941     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1942       {
1943 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1944 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1945 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1946 	hostaddrs[i] = tgt + tgt_size;
1947 	tgt_size = tgt_size + sizes[i];
1948       }
1949 }
1950 
1951 /* Helper function of GOMP_target{,_ext} routines.  */
1952 
1953 static void *
gomp_get_target_fn_addr(struct gomp_device_descr * devicep,void (* host_fn)(void *))1954 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1955 			 void (*host_fn) (void *))
1956 {
1957   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1958     return (void *) host_fn;
1959   else
1960     {
1961       gomp_mutex_lock (&devicep->lock);
1962       if (devicep->state == GOMP_DEVICE_FINALIZED)
1963 	{
1964 	  gomp_mutex_unlock (&devicep->lock);
1965 	  return NULL;
1966 	}
1967 
1968       struct splay_tree_key_s k;
1969       k.host_start = (uintptr_t) host_fn;
1970       k.host_end = k.host_start + 1;
1971       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1972       gomp_mutex_unlock (&devicep->lock);
1973       if (tgt_fn == NULL)
1974 	return NULL;
1975 
1976       return (void *) tgt_fn->tgt_offset;
1977     }
1978 }
1979 
1980 /* Called when encountering a target directive.  If DEVICE
1981    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1982    GOMP_DEVICE_HOST_FALLBACK (or any value
1983    larger than last available hw device), use host fallback.
1984    FN is address of host code, UNUSED is part of the current ABI, but
1985    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1986    with MAPNUM entries, with addresses of the host objects,
1987    sizes of the host objects (resp. for pointer kind pointer bias
1988    and assumed sizeof (void *) size) and kinds.  */
1989 
1990 void
GOMP_target(int device,void (* fn)(void *),const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)1991 GOMP_target (int device, void (*fn) (void *), const void *unused,
1992 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1993 	     unsigned char *kinds)
1994 {
1995   struct gomp_device_descr *devicep = resolve_device (device);
1996 
1997   void *fn_addr;
1998   if (devicep == NULL
1999       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2000       /* All shared memory devices should use the GOMP_target_ext function.  */
2001       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2002       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2003     return gomp_target_fallback (fn, hostaddrs);
2004 
2005   struct target_mem_desc *tgt_vars
2006     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2007 		     GOMP_MAP_VARS_TARGET);
2008   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2009 		     NULL);
2010   gomp_unmap_vars (tgt_vars, true);
2011 }
2012 
2013 static inline unsigned int
clear_unsupported_flags(struct gomp_device_descr * devicep,unsigned int flags)2014 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2015 {
2016   /* If we cannot run asynchronously, simply ignore nowait.  */
2017   if (devicep != NULL && devicep->async_run_func == NULL)
2018     flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2019 
2020   return flags;
2021 }
2022 
2023 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2024    and several arguments have been added:
2025    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2026    DEPEND is array of dependencies, see GOMP_task for details.
2027 
2028    ARGS is a pointer to an array consisting of a variable number of both
2029    device-independent and device-specific arguments, which can take one two
2030    elements where the first specifies for which device it is intended, the type
2031    and optionally also the value.  If the value is not present in the first
2032    one, the whole second element the actual value.  The last element of the
2033    array is a single NULL.  Among the device independent can be for example
2034    NUM_TEAMS and THREAD_LIMIT.
2035 
2036    NUM_TEAMS is positive if GOMP_teams will be called in the body with
2037    that value, or 1 if teams construct is not present, or 0, if
2038    teams construct does not have num_teams clause and so the choice is
2039    implementation defined, and -1 if it can't be determined on the host
2040    what value will GOMP_teams have on the device.
2041    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2042    body with that value, or 0, if teams construct does not have thread_limit
2043    clause or the teams construct is not present, or -1 if it can't be
2044    determined on the host what value will GOMP_teams have on the device.  */
2045 
2046 void
GOMP_target_ext(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args)2047 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2048 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2049 		 unsigned int flags, void **depend, void **args)
2050 {
2051   struct gomp_device_descr *devicep = resolve_device (device);
2052   size_t tgt_align = 0, tgt_size = 0;
2053   bool fpc_done = false;
2054 
2055   flags = clear_unsupported_flags (devicep, flags);
2056 
2057   if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058     {
2059       struct gomp_thread *thr = gomp_thread ();
2060       /* Create a team if we don't have any around, as nowait
2061 	 target tasks make sense to run asynchronously even when
2062 	 outside of any parallel.  */
2063       if (__builtin_expect (thr->ts.team == NULL, 0))
2064 	{
2065 	  struct gomp_team *team = gomp_new_team (1);
2066 	  struct gomp_task *task = thr->task;
2067 	  struct gomp_task **implicit_task = &task;
2068 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2069 	  team->prev_ts = thr->ts;
2070 	  thr->ts.team = team;
2071 	  thr->ts.team_id = 0;
2072 	  thr->ts.work_share = &team->work_shares[0];
2073 	  thr->ts.last_work_share = NULL;
2074 #ifdef HAVE_SYNC_BUILTINS
2075 	  thr->ts.single_count = 0;
2076 #endif
2077 	  thr->ts.static_trip = 0;
2078 	  thr->task = &team->implicit_task[0];
2079 	  gomp_init_task (thr->task, NULL, icv);
2080 	  while (*implicit_task
2081 		 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2082 	    implicit_task = &(*implicit_task)->parent;
2083 	  if (*implicit_task)
2084 	    {
2085 	      thr->task = *implicit_task;
2086 	      gomp_end_task ();
2087 	      free (*implicit_task);
2088 	      thr->task = &team->implicit_task[0];
2089 	    }
2090 	  else
2091 	    pthread_setspecific (gomp_thread_destructor, thr);
2092 	  if (implicit_task != &task)
2093 	    {
2094 	      *implicit_task = thr->task;
2095 	      thr->task = task;
2096 	    }
2097 	}
2098       if (thr->ts.team
2099 	  && !thr->task->final_task)
2100 	{
2101 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2102 				   sizes, kinds, flags, depend, args,
2103 				   GOMP_TARGET_TASK_BEFORE_MAP);
2104 	  return;
2105 	}
2106     }
2107 
2108   /* If there are depend clauses, but nowait is not present
2109      (or we are in a final task), block the parent task until the
2110      dependencies are resolved and then just continue with the rest
2111      of the function as if it is a merged task.  */
2112   if (depend != NULL)
2113     {
2114       struct gomp_thread *thr = gomp_thread ();
2115       if (thr->task && thr->task->depend_hash)
2116 	{
2117 	  /* If we might need to wait, copy firstprivate now.  */
2118 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2119 					       &tgt_align, &tgt_size);
2120 	  if (tgt_align)
2121 	    {
2122 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2123 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2124 				      tgt_align, tgt_size);
2125 	    }
2126 	  fpc_done = true;
2127 	  gomp_task_maybe_wait_for_dependencies (depend);
2128 	}
2129     }
2130 
2131   void *fn_addr;
2132   if (devicep == NULL
2133       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2134       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2135       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2136     {
2137       if (!fpc_done)
2138 	{
2139 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2140 					       &tgt_align, &tgt_size);
2141 	  if (tgt_align)
2142 	    {
2143 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2144 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2145 				      tgt_align, tgt_size);
2146 	    }
2147 	}
2148       gomp_target_fallback (fn, hostaddrs);
2149       return;
2150     }
2151 
2152   struct target_mem_desc *tgt_vars;
2153   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2154     {
2155       if (!fpc_done)
2156 	{
2157 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2158 					       &tgt_align, &tgt_size);
2159 	  if (tgt_align)
2160 	    {
2161 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2162 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2163 				      tgt_align, tgt_size);
2164 	    }
2165 	}
2166       tgt_vars = NULL;
2167     }
2168   else
2169     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2170 			      true, GOMP_MAP_VARS_TARGET);
2171   devicep->run_func (devicep->target_id, fn_addr,
2172 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2173 		     args);
2174   if (tgt_vars)
2175     gomp_unmap_vars (tgt_vars, true);
2176 }
2177 
2178 /* Host fallback for GOMP_target_data{,_ext} routines.  */
2179 
2180 static void
gomp_target_data_fallback(void)2181 gomp_target_data_fallback (void)
2182 {
2183   struct gomp_task_icv *icv = gomp_icv (false);
2184   if (icv->target_data)
2185     {
2186       /* Even when doing a host fallback, if there are any active
2187          #pragma omp target data constructs, need to remember the
2188          new #pragma omp target data, otherwise GOMP_target_end_data
2189          would get out of sync.  */
2190       struct target_mem_desc *tgt
2191 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2192 			 GOMP_MAP_VARS_DATA);
2193       tgt->prev = icv->target_data;
2194       icv->target_data = tgt;
2195     }
2196 }
2197 
2198 void
GOMP_target_data(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2199 GOMP_target_data (int device, const void *unused, size_t mapnum,
2200 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
2201 {
2202   struct gomp_device_descr *devicep = resolve_device (device);
2203 
2204   if (devicep == NULL
2205       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2206       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2207     return gomp_target_data_fallback ();
2208 
2209   struct target_mem_desc *tgt
2210     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2211 		     GOMP_MAP_VARS_DATA);
2212   struct gomp_task_icv *icv = gomp_icv (true);
2213   tgt->prev = icv->target_data;
2214   icv->target_data = tgt;
2215 }
2216 
2217 void
GOMP_target_data_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)2218 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2219 		      size_t *sizes, unsigned short *kinds)
2220 {
2221   struct gomp_device_descr *devicep = resolve_device (device);
2222 
2223   if (devicep == NULL
2224       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2225       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2226     return gomp_target_data_fallback ();
2227 
2228   struct target_mem_desc *tgt
2229     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2230 		     GOMP_MAP_VARS_DATA);
2231   struct gomp_task_icv *icv = gomp_icv (true);
2232   tgt->prev = icv->target_data;
2233   icv->target_data = tgt;
2234 }
2235 
2236 void
GOMP_target_end_data(void)2237 GOMP_target_end_data (void)
2238 {
2239   struct gomp_task_icv *icv = gomp_icv (false);
2240   if (icv->target_data)
2241     {
2242       struct target_mem_desc *tgt = icv->target_data;
2243       icv->target_data = tgt->prev;
2244       gomp_unmap_vars (tgt, true);
2245     }
2246 }
2247 
2248 void
GOMP_target_update(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2249 GOMP_target_update (int device, const void *unused, size_t mapnum,
2250 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
2251 {
2252   struct gomp_device_descr *devicep = resolve_device (device);
2253 
2254   if (devicep == NULL
2255       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2256       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2257     return;
2258 
2259   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2260 }
2261 
2262 void
GOMP_target_update_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)2263 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2264 			size_t *sizes, unsigned short *kinds,
2265 			unsigned int flags, void **depend)
2266 {
2267   struct gomp_device_descr *devicep = resolve_device (device);
2268 
2269   /* If there are depend clauses, but nowait is not present,
2270      block the parent task until the dependencies are resolved
2271      and then just continue with the rest of the function as if it
2272      is a merged task.  Until we are able to schedule task during
2273      variable mapping or unmapping, ignore nowait if depend clauses
2274      are not present.  */
2275   if (depend != NULL)
2276     {
2277       struct gomp_thread *thr = gomp_thread ();
2278       if (thr->task && thr->task->depend_hash)
2279 	{
2280 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2281 	      && thr->ts.team
2282 	      && !thr->task->final_task)
2283 	    {
2284 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2285 					   mapnum, hostaddrs, sizes, kinds,
2286 					   flags | GOMP_TARGET_FLAG_UPDATE,
2287 					   depend, NULL, GOMP_TARGET_TASK_DATA))
2288 		return;
2289 	    }
2290 	  else
2291 	    {
2292 	      struct gomp_team *team = thr->ts.team;
2293 	      /* If parallel or taskgroup has been cancelled, don't start new
2294 		 tasks.  */
2295 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2296 		{
2297 		  if (gomp_team_barrier_cancelled (&team->barrier))
2298 		    return;
2299 		  if (thr->task->taskgroup)
2300 		    {
2301 		      if (thr->task->taskgroup->cancelled)
2302 			return;
2303 		      if (thr->task->taskgroup->workshare
2304 			  && thr->task->taskgroup->prev
2305 			  && thr->task->taskgroup->prev->cancelled)
2306 			return;
2307 		    }
2308 		}
2309 
2310 	      gomp_task_maybe_wait_for_dependencies (depend);
2311 	    }
2312 	}
2313     }
2314 
2315   if (devicep == NULL
2316       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2317       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2318     return;
2319 
2320   struct gomp_thread *thr = gomp_thread ();
2321   struct gomp_team *team = thr->ts.team;
2322   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2323   if (__builtin_expect (gomp_cancel_var, 0) && team)
2324     {
2325       if (gomp_team_barrier_cancelled (&team->barrier))
2326 	return;
2327       if (thr->task->taskgroup)
2328 	{
2329 	  if (thr->task->taskgroup->cancelled)
2330 	    return;
2331 	  if (thr->task->taskgroup->workshare
2332 	      && thr->task->taskgroup->prev
2333 	      && thr->task->taskgroup->prev->cancelled)
2334 	    return;
2335 	}
2336     }
2337 
2338   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2339 }
2340 
2341 static void
gomp_exit_data(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)2342 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2343 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
2344 {
2345   const int typemask = 0xff;
2346   size_t i;
2347   gomp_mutex_lock (&devicep->lock);
2348   if (devicep->state == GOMP_DEVICE_FINALIZED)
2349     {
2350       gomp_mutex_unlock (&devicep->lock);
2351       return;
2352     }
2353 
2354   for (i = 0; i < mapnum; i++)
2355     {
2356       struct splay_tree_key_s cur_node;
2357       unsigned char kind = kinds[i] & typemask;
2358       switch (kind)
2359 	{
2360 	case GOMP_MAP_FROM:
2361 	case GOMP_MAP_ALWAYS_FROM:
2362 	case GOMP_MAP_DELETE:
2363 	case GOMP_MAP_RELEASE:
2364 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2365 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2366 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
2367 	  cur_node.host_end = cur_node.host_start + sizes[i];
2368 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2369 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2370 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2371 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
2372 	  if (!k)
2373 	    continue;
2374 
2375 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2376 	    k->refcount--;
2377 	  if ((kind == GOMP_MAP_DELETE
2378 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2379 	      && k->refcount != REFCOUNT_INFINITY)
2380 	    k->refcount = 0;
2381 
2382 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2383 	      || kind == GOMP_MAP_ALWAYS_FROM)
2384 	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2385 				(void *) (k->tgt->tgt_start + k->tgt_offset
2386 					  + cur_node.host_start
2387 					  - k->host_start),
2388 				cur_node.host_end - cur_node.host_start);
2389 	  if (k->refcount == 0)
2390 	    gomp_remove_var (devicep, k);
2391 
2392 	  break;
2393 	default:
2394 	  gomp_mutex_unlock (&devicep->lock);
2395 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2396 		      kind);
2397 	}
2398     }
2399 
2400   gomp_mutex_unlock (&devicep->lock);
2401 }
2402 
2403 void
GOMP_target_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)2404 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2405 			     size_t *sizes, unsigned short *kinds,
2406 			     unsigned int flags, void **depend)
2407 {
2408   struct gomp_device_descr *devicep = resolve_device (device);
2409 
2410   /* If there are depend clauses, but nowait is not present,
2411      block the parent task until the dependencies are resolved
2412      and then just continue with the rest of the function as if it
2413      is a merged task.  Until we are able to schedule task during
2414      variable mapping or unmapping, ignore nowait if depend clauses
2415      are not present.  */
2416   if (depend != NULL)
2417     {
2418       struct gomp_thread *thr = gomp_thread ();
2419       if (thr->task && thr->task->depend_hash)
2420 	{
2421 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2422 	      && thr->ts.team
2423 	      && !thr->task->final_task)
2424 	    {
2425 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2426 					   mapnum, hostaddrs, sizes, kinds,
2427 					   flags, depend, NULL,
2428 					   GOMP_TARGET_TASK_DATA))
2429 		return;
2430 	    }
2431 	  else
2432 	    {
2433 	      struct gomp_team *team = thr->ts.team;
2434 	      /* If parallel or taskgroup has been cancelled, don't start new
2435 		 tasks.  */
2436 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2437 		{
2438 		  if (gomp_team_barrier_cancelled (&team->barrier))
2439 		    return;
2440 		  if (thr->task->taskgroup)
2441 		    {
2442 		      if (thr->task->taskgroup->cancelled)
2443 			return;
2444 		      if (thr->task->taskgroup->workshare
2445 			  && thr->task->taskgroup->prev
2446 			  && thr->task->taskgroup->prev->cancelled)
2447 			return;
2448 		    }
2449 		}
2450 
2451 	      gomp_task_maybe_wait_for_dependencies (depend);
2452 	    }
2453 	}
2454     }
2455 
2456   if (devicep == NULL
2457       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2458       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2459     return;
2460 
2461   struct gomp_thread *thr = gomp_thread ();
2462   struct gomp_team *team = thr->ts.team;
2463   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2464   if (__builtin_expect (gomp_cancel_var, 0) && team)
2465     {
2466       if (gomp_team_barrier_cancelled (&team->barrier))
2467 	return;
2468       if (thr->task->taskgroup)
2469 	{
2470 	  if (thr->task->taskgroup->cancelled)
2471 	    return;
2472 	  if (thr->task->taskgroup->workshare
2473 	      && thr->task->taskgroup->prev
2474 	      && thr->task->taskgroup->prev->cancelled)
2475 	    return;
2476 	}
2477     }
2478 
2479   /* The variables are mapped separately such that they can be released
2480      independently.  */
2481   size_t i, j;
2482   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2483     for (i = 0; i < mapnum; i++)
2484       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2485 	{
2486 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2487 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2488 	  i += sizes[i];
2489 	}
2490       else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2491 	{
2492 	  for (j = i + 1; j < mapnum; j++)
2493 	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2494 	      break;
2495 	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2496 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2497 	  i += j - i - 1;
2498 	}
2499       else
2500 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2501 		       true, GOMP_MAP_VARS_ENTER_DATA);
2502   else
2503     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2504 }
2505 
2506 bool
gomp_target_task_fn(void * data)2507 gomp_target_task_fn (void *data)
2508 {
2509   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2510   struct gomp_device_descr *devicep = ttask->devicep;
2511 
2512   if (ttask->fn != NULL)
2513     {
2514       void *fn_addr;
2515       if (devicep == NULL
2516 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2517 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2518 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2519 	{
2520 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
2521 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2522 	  return false;
2523 	}
2524 
2525       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2526 	{
2527 	  if (ttask->tgt)
2528 	    gomp_unmap_vars (ttask->tgt, true);
2529 	  return false;
2530 	}
2531 
2532       void *actual_arguments;
2533       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2534 	{
2535 	  ttask->tgt = NULL;
2536 	  actual_arguments = ttask->hostaddrs;
2537 	}
2538       else
2539 	{
2540 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2541 				      NULL, ttask->sizes, ttask->kinds, true,
2542 				      GOMP_MAP_VARS_TARGET);
2543 	  actual_arguments = (void *) ttask->tgt->tgt_start;
2544 	}
2545       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2546 
2547       assert (devicep->async_run_func);
2548       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2549 			       ttask->args, (void *) ttask);
2550       return true;
2551     }
2552   else if (devicep == NULL
2553 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2554 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2555     return false;
2556 
2557   size_t i;
2558   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2559     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2560 		 ttask->kinds, true);
2561   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2562     for (i = 0; i < ttask->mapnum; i++)
2563       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2564 	{
2565 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2566 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2567 			 GOMP_MAP_VARS_ENTER_DATA);
2568 	  i += ttask->sizes[i];
2569 	}
2570       else
2571 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2572 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2573   else
2574     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2575 		    ttask->kinds);
2576   return false;
2577 }
2578 
2579 void
GOMP_teams(unsigned int num_teams,unsigned int thread_limit)2580 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2581 {
2582   if (thread_limit)
2583     {
2584       struct gomp_task_icv *icv = gomp_icv (true);
2585       icv->thread_limit_var
2586 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2587     }
2588   (void) num_teams;
2589 }
2590 
2591 void *
omp_target_alloc(size_t size,int device_num)2592 omp_target_alloc (size_t size, int device_num)
2593 {
2594   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2595     return malloc (size);
2596 
2597   if (device_num < 0)
2598     return NULL;
2599 
2600   struct gomp_device_descr *devicep = resolve_device (device_num);
2601   if (devicep == NULL)
2602     return NULL;
2603 
2604   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2605       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2606     return malloc (size);
2607 
2608   gomp_mutex_lock (&devicep->lock);
2609   void *ret = devicep->alloc_func (devicep->target_id, size);
2610   gomp_mutex_unlock (&devicep->lock);
2611   return ret;
2612 }
2613 
2614 void
omp_target_free(void * device_ptr,int device_num)2615 omp_target_free (void *device_ptr, int device_num)
2616 {
2617   if (device_ptr == NULL)
2618     return;
2619 
2620   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2621     {
2622       free (device_ptr);
2623       return;
2624     }
2625 
2626   if (device_num < 0)
2627     return;
2628 
2629   struct gomp_device_descr *devicep = resolve_device (device_num);
2630   if (devicep == NULL)
2631     return;
2632 
2633   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2634       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2635     {
2636       free (device_ptr);
2637       return;
2638     }
2639 
2640   gomp_mutex_lock (&devicep->lock);
2641   gomp_free_device_memory (devicep, device_ptr);
2642   gomp_mutex_unlock (&devicep->lock);
2643 }
2644 
2645 int
omp_target_is_present(const void * ptr,int device_num)2646 omp_target_is_present (const void *ptr, int device_num)
2647 {
2648   if (ptr == NULL)
2649     return 1;
2650 
2651   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652     return 1;
2653 
2654   if (device_num < 0)
2655     return 0;
2656 
2657   struct gomp_device_descr *devicep = resolve_device (device_num);
2658   if (devicep == NULL)
2659     return 0;
2660 
2661   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2662       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2663     return 1;
2664 
2665   gomp_mutex_lock (&devicep->lock);
2666   struct splay_tree_s *mem_map = &devicep->mem_map;
2667   struct splay_tree_key_s cur_node;
2668 
2669   cur_node.host_start = (uintptr_t) ptr;
2670   cur_node.host_end = cur_node.host_start;
2671   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2672   int ret = n != NULL;
2673   gomp_mutex_unlock (&devicep->lock);
2674   return ret;
2675 }
2676 
2677 int
omp_target_memcpy(void * dst,const void * src,size_t length,size_t dst_offset,size_t src_offset,int dst_device_num,int src_device_num)2678 omp_target_memcpy (void *dst, const void *src, size_t length,
2679 		   size_t dst_offset, size_t src_offset, int dst_device_num,
2680 		   int src_device_num)
2681 {
2682   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2683   bool ret;
2684 
2685   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2686     {
2687       if (dst_device_num < 0)
2688 	return EINVAL;
2689 
2690       dst_devicep = resolve_device (dst_device_num);
2691       if (dst_devicep == NULL)
2692 	return EINVAL;
2693 
2694       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2695 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2696 	dst_devicep = NULL;
2697     }
2698   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2699     {
2700       if (src_device_num < 0)
2701 	return EINVAL;
2702 
2703       src_devicep = resolve_device (src_device_num);
2704       if (src_devicep == NULL)
2705 	return EINVAL;
2706 
2707       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2708 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2709 	src_devicep = NULL;
2710     }
2711   if (src_devicep == NULL && dst_devicep == NULL)
2712     {
2713       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2714       return 0;
2715     }
2716   if (src_devicep == NULL)
2717     {
2718       gomp_mutex_lock (&dst_devicep->lock);
2719       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2720 					(char *) dst + dst_offset,
2721 					(char *) src + src_offset, length);
2722       gomp_mutex_unlock (&dst_devicep->lock);
2723       return (ret ? 0 : EINVAL);
2724     }
2725   if (dst_devicep == NULL)
2726     {
2727       gomp_mutex_lock (&src_devicep->lock);
2728       ret = src_devicep->dev2host_func (src_devicep->target_id,
2729 					(char *) dst + dst_offset,
2730 					(char *) src + src_offset, length);
2731       gomp_mutex_unlock (&src_devicep->lock);
2732       return (ret ? 0 : EINVAL);
2733     }
2734   if (src_devicep == dst_devicep)
2735     {
2736       gomp_mutex_lock (&src_devicep->lock);
2737       ret = src_devicep->dev2dev_func (src_devicep->target_id,
2738 				       (char *) dst + dst_offset,
2739 				       (char *) src + src_offset, length);
2740       gomp_mutex_unlock (&src_devicep->lock);
2741       return (ret ? 0 : EINVAL);
2742     }
2743   return EINVAL;
2744 }
2745 
2746 static int
omp_target_memcpy_rect_worker(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,struct gomp_device_descr * dst_devicep,struct gomp_device_descr * src_devicep)2747 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2748 			       int num_dims, const size_t *volume,
2749 			       const size_t *dst_offsets,
2750 			       const size_t *src_offsets,
2751 			       const size_t *dst_dimensions,
2752 			       const size_t *src_dimensions,
2753 			       struct gomp_device_descr *dst_devicep,
2754 			       struct gomp_device_descr *src_devicep)
2755 {
2756   size_t dst_slice = element_size;
2757   size_t src_slice = element_size;
2758   size_t j, dst_off, src_off, length;
2759   int i, ret;
2760 
2761   if (num_dims == 1)
2762     {
2763       if (__builtin_mul_overflow (element_size, volume[0], &length)
2764 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2765 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2766 	return EINVAL;
2767       if (dst_devicep == NULL && src_devicep == NULL)
2768 	{
2769 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2770 		  length);
2771 	  ret = 1;
2772 	}
2773       else if (src_devicep == NULL)
2774 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2775 					  (char *) dst + dst_off,
2776 					  (const char *) src + src_off,
2777 					  length);
2778       else if (dst_devicep == NULL)
2779 	ret = src_devicep->dev2host_func (src_devicep->target_id,
2780 					  (char *) dst + dst_off,
2781 					  (const char *) src + src_off,
2782 					  length);
2783       else if (src_devicep == dst_devicep)
2784 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2785 					 (char *) dst + dst_off,
2786 					 (const char *) src + src_off,
2787 					 length);
2788       else
2789 	ret = 0;
2790       return ret ? 0 : EINVAL;
2791     }
2792 
2793   /* FIXME: it would be nice to have some plugin function to handle
2794      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2795      be handled in the generic recursion below, and for host-host it
2796      should be used even for any num_dims >= 2.  */
2797 
2798   for (i = 1; i < num_dims; i++)
2799     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2800 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2801       return EINVAL;
2802   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2803       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2804     return EINVAL;
2805   for (j = 0; j < volume[0]; j++)
2806     {
2807       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2808 					   (const char *) src + src_off,
2809 					   element_size, num_dims - 1,
2810 					   volume + 1, dst_offsets + 1,
2811 					   src_offsets + 1, dst_dimensions + 1,
2812 					   src_dimensions + 1, dst_devicep,
2813 					   src_devicep);
2814       if (ret)
2815 	return ret;
2816       dst_off += dst_slice;
2817       src_off += src_slice;
2818     }
2819   return 0;
2820 }
2821 
2822 int
omp_target_memcpy_rect(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,int dst_device_num,int src_device_num)2823 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2824 			int num_dims, const size_t *volume,
2825 			const size_t *dst_offsets,
2826 			const size_t *src_offsets,
2827 			const size_t *dst_dimensions,
2828 			const size_t *src_dimensions,
2829 			int dst_device_num, int src_device_num)
2830 {
2831   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2832 
2833   if (!dst && !src)
2834     return INT_MAX;
2835 
2836   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2837     {
2838       if (dst_device_num < 0)
2839 	return EINVAL;
2840 
2841       dst_devicep = resolve_device (dst_device_num);
2842       if (dst_devicep == NULL)
2843 	return EINVAL;
2844 
2845       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2846 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2847 	dst_devicep = NULL;
2848     }
2849   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2850     {
2851       if (src_device_num < 0)
2852 	return EINVAL;
2853 
2854       src_devicep = resolve_device (src_device_num);
2855       if (src_devicep == NULL)
2856 	return EINVAL;
2857 
2858       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2859 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2860 	src_devicep = NULL;
2861     }
2862 
2863   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2864     return EINVAL;
2865 
2866   if (src_devicep)
2867     gomp_mutex_lock (&src_devicep->lock);
2868   else if (dst_devicep)
2869     gomp_mutex_lock (&dst_devicep->lock);
2870   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2871 					   volume, dst_offsets, src_offsets,
2872 					   dst_dimensions, src_dimensions,
2873 					   dst_devicep, src_devicep);
2874   if (src_devicep)
2875     gomp_mutex_unlock (&src_devicep->lock);
2876   else if (dst_devicep)
2877     gomp_mutex_unlock (&dst_devicep->lock);
2878   return ret;
2879 }
2880 
2881 int
omp_target_associate_ptr(const void * host_ptr,const void * device_ptr,size_t size,size_t device_offset,int device_num)2882 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2883 			  size_t size, size_t device_offset, int device_num)
2884 {
2885   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2886     return EINVAL;
2887 
2888   if (device_num < 0)
2889     return EINVAL;
2890 
2891   struct gomp_device_descr *devicep = resolve_device (device_num);
2892   if (devicep == NULL)
2893     return EINVAL;
2894 
2895   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2896       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2897     return EINVAL;
2898 
2899   gomp_mutex_lock (&devicep->lock);
2900 
2901   struct splay_tree_s *mem_map = &devicep->mem_map;
2902   struct splay_tree_key_s cur_node;
2903   int ret = EINVAL;
2904 
2905   cur_node.host_start = (uintptr_t) host_ptr;
2906   cur_node.host_end = cur_node.host_start + size;
2907   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2908   if (n)
2909     {
2910       if (n->tgt->tgt_start + n->tgt_offset
2911 	  == (uintptr_t) device_ptr + device_offset
2912 	  && n->host_start <= cur_node.host_start
2913 	  && n->host_end >= cur_node.host_end)
2914 	ret = 0;
2915     }
2916   else
2917     {
2918       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2919       tgt->array = gomp_malloc (sizeof (*tgt->array));
2920       tgt->refcount = 1;
2921       tgt->tgt_start = 0;
2922       tgt->tgt_end = 0;
2923       tgt->to_free = NULL;
2924       tgt->prev = NULL;
2925       tgt->list_count = 0;
2926       tgt->device_descr = devicep;
2927       splay_tree_node array = tgt->array;
2928       splay_tree_key k = &array->key;
2929       k->host_start = cur_node.host_start;
2930       k->host_end = cur_node.host_end;
2931       k->tgt = tgt;
2932       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2933       k->refcount = REFCOUNT_INFINITY;
2934       k->dynamic_refcount = 0;
2935       k->aux = NULL;
2936       array->left = NULL;
2937       array->right = NULL;
2938       splay_tree_insert (&devicep->mem_map, array);
2939       ret = 0;
2940     }
2941   gomp_mutex_unlock (&devicep->lock);
2942   return ret;
2943 }
2944 
2945 int
omp_target_disassociate_ptr(const void * ptr,int device_num)2946 omp_target_disassociate_ptr (const void *ptr, int device_num)
2947 {
2948   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2949     return EINVAL;
2950 
2951   if (device_num < 0)
2952     return EINVAL;
2953 
2954   struct gomp_device_descr *devicep = resolve_device (device_num);
2955   if (devicep == NULL)
2956     return EINVAL;
2957 
2958   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2959     return EINVAL;
2960 
2961   gomp_mutex_lock (&devicep->lock);
2962 
2963   struct splay_tree_s *mem_map = &devicep->mem_map;
2964   struct splay_tree_key_s cur_node;
2965   int ret = EINVAL;
2966 
2967   cur_node.host_start = (uintptr_t) ptr;
2968   cur_node.host_end = cur_node.host_start;
2969   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2970   if (n
2971       && n->host_start == cur_node.host_start
2972       && n->refcount == REFCOUNT_INFINITY
2973       && n->tgt->tgt_start == 0
2974       && n->tgt->to_free == NULL
2975       && n->tgt->refcount == 1
2976       && n->tgt->list_count == 0)
2977     {
2978       splay_tree_remove (&devicep->mem_map, n);
2979       gomp_unmap_tgt (n->tgt);
2980       ret = 0;
2981     }
2982 
2983   gomp_mutex_unlock (&devicep->lock);
2984   return ret;
2985 }
2986 
2987 int
omp_pause_resource(omp_pause_resource_t kind,int device_num)2988 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2989 {
2990   (void) kind;
2991   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2992     return gomp_pause_host ();
2993   if (device_num < 0 || device_num >= gomp_get_num_devices ())
2994     return -1;
2995   /* Do nothing for target devices for now.  */
2996   return 0;
2997 }
2998 
2999 int
omp_pause_resource_all(omp_pause_resource_t kind)3000 omp_pause_resource_all (omp_pause_resource_t kind)
3001 {
3002   (void) kind;
3003   if (gomp_pause_host ())
3004     return -1;
3005   /* Do nothing for target devices for now.  */
3006   return 0;
3007 }
3008 
3009 ialias (omp_pause_resource)
ialias(omp_pause_resource_all)3010 ialias (omp_pause_resource_all)
3011 
3012 #ifdef PLUGIN_SUPPORT
3013 
3014 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3015    in PLUGIN_NAME.
3016    The handles of the found functions are stored in the corresponding fields
3017    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3018 
3019 static bool
3020 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3021 			     const char *plugin_name)
3022 {
3023   const char *err = NULL, *last_missing = NULL;
3024 
3025   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3026   if (!plugin_handle)
3027     goto dl_fail;
3028 
3029   /* Check if all required functions are available in the plugin and store
3030      their handlers.  None of the symbols can legitimately be NULL,
3031      so we don't need to check dlerror all the time.  */
3032 #define DLSYM(f)							\
3033   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
3034     goto dl_fail
3035   /* Similar, but missing functions are not an error.  Return false if
3036      failed, true otherwise.  */
3037 #define DLSYM_OPT(f, n)							\
3038   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
3039    || (last_missing = #n, 0))
3040 
3041   DLSYM (version);
3042   if (device->version_func () != GOMP_VERSION)
3043     {
3044       err = "plugin version mismatch";
3045       goto fail;
3046     }
3047 
3048   DLSYM (get_name);
3049   DLSYM (get_caps);
3050   DLSYM (get_type);
3051   DLSYM (get_num_devices);
3052   DLSYM (init_device);
3053   DLSYM (fini_device);
3054   DLSYM (load_image);
3055   DLSYM (unload_image);
3056   DLSYM (alloc);
3057   DLSYM (free);
3058   DLSYM (dev2host);
3059   DLSYM (host2dev);
3060   device->capabilities = device->get_caps_func ();
3061   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3062     {
3063       DLSYM (run);
3064       DLSYM_OPT (async_run, async_run);
3065       DLSYM_OPT (can_run, can_run);
3066       DLSYM (dev2dev);
3067     }
3068   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3069     {
3070       if (!DLSYM_OPT (openacc.exec, openacc_exec)
3071 	  || !DLSYM_OPT (openacc.create_thread_data,
3072 			 openacc_create_thread_data)
3073 	  || !DLSYM_OPT (openacc.destroy_thread_data,
3074 			 openacc_destroy_thread_data)
3075 	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3076 	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3077 	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3078 	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3079 	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3080 	  || !DLSYM_OPT (openacc.async.queue_callback,
3081 			 openacc_async_queue_callback)
3082 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3083 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3084 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3085 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3086 	{
3087 	  /* Require all the OpenACC handlers if we have
3088 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
3089 	  err = "plugin missing OpenACC handler function";
3090 	  goto fail;
3091 	}
3092 
3093       unsigned cuda = 0;
3094       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3095 			 openacc_cuda_get_current_device);
3096       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3097 			 openacc_cuda_get_current_context);
3098       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3099       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3100       if (cuda && cuda != 4)
3101 	{
3102 	  /* Make sure all the CUDA functions are there if any of them are.  */
3103 	  err = "plugin missing OpenACC CUDA handler function";
3104 	  goto fail;
3105 	}
3106     }
3107 #undef DLSYM
3108 #undef DLSYM_OPT
3109 
3110   return 1;
3111 
3112  dl_fail:
3113   err = dlerror ();
3114  fail:
3115   gomp_error ("while loading %s: %s", plugin_name, err);
3116   if (last_missing)
3117     gomp_error ("missing function was %s", last_missing);
3118   if (plugin_handle)
3119     dlclose (plugin_handle);
3120 
3121   return 0;
3122 }
3123 
3124 /* This function finalizes all initialized devices.  */
3125 
3126 static void
gomp_target_fini(void)3127 gomp_target_fini (void)
3128 {
3129   int i;
3130   for (i = 0; i < num_devices; i++)
3131     {
3132       bool ret = true;
3133       struct gomp_device_descr *devicep = &devices[i];
3134       gomp_mutex_lock (&devicep->lock);
3135       if (devicep->state == GOMP_DEVICE_INITIALIZED)
3136 	ret = gomp_fini_device (devicep);
3137       gomp_mutex_unlock (&devicep->lock);
3138       if (!ret)
3139 	gomp_fatal ("device finalization failed");
3140     }
3141 }
3142 
3143 /* This function initializes the runtime for offloading.
3144    It parses the list of offload plugins, and tries to load these.
3145    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3146    will be set, and the array DEVICES initialized, containing descriptors for
3147    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3148    by the others.  */
3149 
3150 static void
gomp_target_init(void)3151 gomp_target_init (void)
3152 {
3153   const char *prefix ="libgomp-plugin-";
3154   const char *suffix = SONAME_SUFFIX (1);
3155   const char *cur, *next;
3156   char *plugin_name;
3157   int i, new_num_devices;
3158 
3159   num_devices = 0;
3160   devices = NULL;
3161 
3162   cur = OFFLOAD_PLUGINS;
3163   if (*cur)
3164     do
3165       {
3166 	struct gomp_device_descr current_device;
3167 	size_t prefix_len, suffix_len, cur_len;
3168 
3169 	next = strchr (cur, ',');
3170 
3171 	prefix_len = strlen (prefix);
3172 	cur_len = next ? next - cur : strlen (cur);
3173 	suffix_len = strlen (suffix);
3174 
3175 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3176 	if (!plugin_name)
3177 	  {
3178 	    num_devices = 0;
3179 	    break;
3180 	  }
3181 
3182 	memcpy (plugin_name, prefix, prefix_len);
3183 	memcpy (plugin_name + prefix_len, cur, cur_len);
3184 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3185 
3186 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
3187 	  {
3188 	    new_num_devices = current_device.get_num_devices_func ();
3189 	    if (new_num_devices >= 1)
3190 	      {
3191 		/* Augment DEVICES and NUM_DEVICES.  */
3192 
3193 		devices = realloc (devices, (num_devices + new_num_devices)
3194 				   * sizeof (struct gomp_device_descr));
3195 		if (!devices)
3196 		  {
3197 		    num_devices = 0;
3198 		    free (plugin_name);
3199 		    break;
3200 		  }
3201 
3202 		current_device.name = current_device.get_name_func ();
3203 		/* current_device.capabilities has already been set.  */
3204 		current_device.type = current_device.get_type_func ();
3205 		current_device.mem_map.root = NULL;
3206 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
3207 		for (i = 0; i < new_num_devices; i++)
3208 		  {
3209 		    current_device.target_id = i;
3210 		    devices[num_devices] = current_device;
3211 		    gomp_mutex_init (&devices[num_devices].lock);
3212 		    num_devices++;
3213 		  }
3214 	      }
3215 	  }
3216 
3217 	free (plugin_name);
3218 	cur = next + 1;
3219       }
3220     while (next);
3221 
3222   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3223      NUM_DEVICES_OPENMP.  */
3224   struct gomp_device_descr *devices_s
3225     = malloc (num_devices * sizeof (struct gomp_device_descr));
3226   if (!devices_s)
3227     {
3228       num_devices = 0;
3229       free (devices);
3230       devices = NULL;
3231     }
3232   num_devices_openmp = 0;
3233   for (i = 0; i < num_devices; i++)
3234     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3235       devices_s[num_devices_openmp++] = devices[i];
3236   int num_devices_after_openmp = num_devices_openmp;
3237   for (i = 0; i < num_devices; i++)
3238     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3239       devices_s[num_devices_after_openmp++] = devices[i];
3240   free (devices);
3241   devices = devices_s;
3242 
3243   for (i = 0; i < num_devices; i++)
3244     {
3245       /* The 'devices' array can be moved (by the realloc call) until we have
3246 	 found all the plugins, so registering with the OpenACC runtime (which
3247 	 takes a copy of the pointer argument) must be delayed until now.  */
3248       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3249 	goacc_register (&devices[i]);
3250     }
3251 
3252   if (atexit (gomp_target_fini) != 0)
3253     gomp_fatal ("atexit failed");
3254 }
3255 
3256 #else /* PLUGIN_SUPPORT */
3257 /* If dlfcn.h is unavailable we always fallback to host execution.
3258    GOMP_target* routines are just stubs for this case.  */
3259 static void
3260 gomp_target_init (void)
3261 {
3262 }
3263 #endif /* PLUGIN_SUPPORT */
3264