1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    Contributed by Mentor Embedded.
4*38fd1498Szrj 
5*38fd1498Szrj    This file is part of the GNU Offloading and Multi Processing Library
6*38fd1498Szrj    (libgomp).
7*38fd1498Szrj 
8*38fd1498Szrj    Libgomp is free software; you can redistribute it and/or modify it
9*38fd1498Szrj    under the terms of the GNU General Public License as published by
10*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
11*38fd1498Szrj    any later version.
12*38fd1498Szrj 
13*38fd1498Szrj    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14*38fd1498Szrj    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15*38fd1498Szrj    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16*38fd1498Szrj    more details.
17*38fd1498Szrj 
18*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
19*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
20*38fd1498Szrj    3.1, as published by the Free Software Foundation.
21*38fd1498Szrj 
22*38fd1498Szrj    You should have received a copy of the GNU General Public License and
23*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
24*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
26*38fd1498Szrj 
27*38fd1498Szrj /* This file handles OpenACC constructs.  */
28*38fd1498Szrj 
29*38fd1498Szrj #include "openacc.h"
30*38fd1498Szrj #include "libgomp.h"
31*38fd1498Szrj #include "libgomp_g.h"
32*38fd1498Szrj #include "gomp-constants.h"
33*38fd1498Szrj #include "oacc-int.h"
34*38fd1498Szrj #ifdef HAVE_INTTYPES_H
35*38fd1498Szrj # include <inttypes.h>  /* For PRIu64.  */
36*38fd1498Szrj #endif
37*38fd1498Szrj #include <string.h>
38*38fd1498Szrj #include <stdarg.h>
39*38fd1498Szrj #include <assert.h>
40*38fd1498Szrj 
41*38fd1498Szrj static int
find_pset(int pos,size_t mapnum,unsigned short * kinds)42*38fd1498Szrj find_pset (int pos, size_t mapnum, unsigned short *kinds)
43*38fd1498Szrj {
44*38fd1498Szrj   if (pos + 1 >= mapnum)
45*38fd1498Szrj     return 0;
46*38fd1498Szrj 
47*38fd1498Szrj   unsigned char kind = kinds[pos+1] & 0xff;
48*38fd1498Szrj 
49*38fd1498Szrj   return kind == GOMP_MAP_TO_PSET;
50*38fd1498Szrj }
51*38fd1498Szrj 
52*38fd1498Szrj static void goacc_wait (int async, int num_waits, va_list *ap);
53*38fd1498Szrj 
54*38fd1498Szrj 
55*38fd1498Szrj /* Launch a possibly offloaded function on DEVICE.  FN is the host fn
56*38fd1498Szrj    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
57*38fd1498Szrj    blocks to be copied to/from the device.  Varadic arguments are
58*38fd1498Szrj    keyed optional parameters terminated with a zero.  */
59*38fd1498Szrj 
60*38fd1498Szrj void
GOACC_parallel_keyed(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,...)61*38fd1498Szrj GOACC_parallel_keyed (int device, void (*fn) (void *),
62*38fd1498Szrj 		      size_t mapnum, void **hostaddrs, size_t *sizes,
63*38fd1498Szrj 		      unsigned short *kinds, ...)
64*38fd1498Szrj {
65*38fd1498Szrj   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66*38fd1498Szrj   va_list ap;
67*38fd1498Szrj   struct goacc_thread *thr;
68*38fd1498Szrj   struct gomp_device_descr *acc_dev;
69*38fd1498Szrj   struct target_mem_desc *tgt;
70*38fd1498Szrj   void **devaddrs;
71*38fd1498Szrj   unsigned int i;
72*38fd1498Szrj   struct splay_tree_key_s k;
73*38fd1498Szrj   splay_tree_key tgt_fn_key;
74*38fd1498Szrj   void (*tgt_fn);
75*38fd1498Szrj   int async = GOMP_ASYNC_SYNC;
76*38fd1498Szrj   unsigned dims[GOMP_DIM_MAX];
77*38fd1498Szrj   unsigned tag;
78*38fd1498Szrj 
79*38fd1498Szrj #ifdef HAVE_INTTYPES_H
80*38fd1498Szrj   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81*38fd1498Szrj 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
82*38fd1498Szrj #else
83*38fd1498Szrj   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84*38fd1498Szrj 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
85*38fd1498Szrj #endif
86*38fd1498Szrj   goacc_lazy_initialize ();
87*38fd1498Szrj 
88*38fd1498Szrj   thr = goacc_thread ();
89*38fd1498Szrj   acc_dev = thr->dev;
90*38fd1498Szrj 
91*38fd1498Szrj   /* Host fallback if "if" clause is false or if the current device is set to
92*38fd1498Szrj      the host.  */
93*38fd1498Szrj   if (host_fallback)
94*38fd1498Szrj     {
95*38fd1498Szrj       goacc_save_and_set_bind (acc_device_host);
96*38fd1498Szrj       fn (hostaddrs);
97*38fd1498Szrj       goacc_restore_bind ();
98*38fd1498Szrj       return;
99*38fd1498Szrj     }
100*38fd1498Szrj   else if (acc_device_type (acc_dev->type) == acc_device_host)
101*38fd1498Szrj     {
102*38fd1498Szrj       fn (hostaddrs);
103*38fd1498Szrj       return;
104*38fd1498Szrj     }
105*38fd1498Szrj 
106*38fd1498Szrj   /* Default: let the runtime choose.  */
107*38fd1498Szrj   for (i = 0; i != GOMP_DIM_MAX; i++)
108*38fd1498Szrj     dims[i] = 0;
109*38fd1498Szrj 
110*38fd1498Szrj   va_start (ap, kinds);
111*38fd1498Szrj   /* TODO: This will need amending when device_type is implemented.  */
112*38fd1498Szrj   while ((tag = va_arg (ap, unsigned)) != 0)
113*38fd1498Szrj     {
114*38fd1498Szrj       if (GOMP_LAUNCH_DEVICE (tag))
115*38fd1498Szrj 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116*38fd1498Szrj 		    GOMP_LAUNCH_DEVICE (tag));
117*38fd1498Szrj 
118*38fd1498Szrj       switch (GOMP_LAUNCH_CODE (tag))
119*38fd1498Szrj 	{
120*38fd1498Szrj 	case GOMP_LAUNCH_DIM:
121*38fd1498Szrj 	  {
122*38fd1498Szrj 	    unsigned mask = GOMP_LAUNCH_OP (tag);
123*38fd1498Szrj 
124*38fd1498Szrj 	    for (i = 0; i != GOMP_DIM_MAX; i++)
125*38fd1498Szrj 	      if (mask & GOMP_DIM_MASK (i))
126*38fd1498Szrj 		dims[i] = va_arg (ap, unsigned);
127*38fd1498Szrj 	  }
128*38fd1498Szrj 	  break;
129*38fd1498Szrj 
130*38fd1498Szrj 	case GOMP_LAUNCH_ASYNC:
131*38fd1498Szrj 	  {
132*38fd1498Szrj 	    /* Small constant values are encoded in the operand.  */
133*38fd1498Szrj 	    async = GOMP_LAUNCH_OP (tag);
134*38fd1498Szrj 
135*38fd1498Szrj 	    if (async == GOMP_LAUNCH_OP_MAX)
136*38fd1498Szrj 	      async = va_arg (ap, unsigned);
137*38fd1498Szrj 	    break;
138*38fd1498Szrj 	  }
139*38fd1498Szrj 
140*38fd1498Szrj 	case GOMP_LAUNCH_WAIT:
141*38fd1498Szrj 	  {
142*38fd1498Szrj 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
143*38fd1498Szrj 
144*38fd1498Szrj 	    if (num_waits)
145*38fd1498Szrj 	      goacc_wait (async, num_waits, &ap);
146*38fd1498Szrj 	    break;
147*38fd1498Szrj 	  }
148*38fd1498Szrj 
149*38fd1498Szrj 	default:
150*38fd1498Szrj 	  gomp_fatal ("unrecognized offload code '%d',"
151*38fd1498Szrj 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152*38fd1498Szrj 	}
153*38fd1498Szrj     }
154*38fd1498Szrj   va_end (ap);
155*38fd1498Szrj 
156*38fd1498Szrj   acc_dev->openacc.async_set_async_func (async);
157*38fd1498Szrj 
158*38fd1498Szrj   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159*38fd1498Szrj     {
160*38fd1498Szrj       k.host_start = (uintptr_t) fn;
161*38fd1498Szrj       k.host_end = k.host_start + 1;
162*38fd1498Szrj       gomp_mutex_lock (&acc_dev->lock);
163*38fd1498Szrj       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164*38fd1498Szrj       gomp_mutex_unlock (&acc_dev->lock);
165*38fd1498Szrj 
166*38fd1498Szrj       if (tgt_fn_key == NULL)
167*38fd1498Szrj 	gomp_fatal ("target function wasn't mapped");
168*38fd1498Szrj 
169*38fd1498Szrj       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
170*38fd1498Szrj     }
171*38fd1498Szrj   else
172*38fd1498Szrj     tgt_fn = (void (*)) fn;
173*38fd1498Szrj 
174*38fd1498Szrj   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
175*38fd1498Szrj 		       GOMP_MAP_VARS_OPENACC);
176*38fd1498Szrj 
177*38fd1498Szrj   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
178*38fd1498Szrj   for (i = 0; i < mapnum; i++)
179*38fd1498Szrj     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180*38fd1498Szrj 			    + tgt->list[i].key->tgt_offset);
181*38fd1498Szrj 
182*38fd1498Szrj   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183*38fd1498Szrj 			      async, dims, tgt);
184*38fd1498Szrj 
185*38fd1498Szrj   /* If running synchronously, unmap immediately.  */
186*38fd1498Szrj   if (async < acc_async_noval)
187*38fd1498Szrj     gomp_unmap_vars (tgt, true);
188*38fd1498Szrj   else
189*38fd1498Szrj     tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
190*38fd1498Szrj 
191*38fd1498Szrj   acc_dev->openacc.async_set_async_func (acc_async_sync);
192*38fd1498Szrj }
193*38fd1498Szrj 
194*38fd1498Szrj /* Legacy entry point, only provide host execution.  */
195*38fd1498Szrj 
196*38fd1498Szrj void
GOACC_parallel(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int num_gangs,int num_workers,int vector_length,int async,int num_waits,...)197*38fd1498Szrj GOACC_parallel (int device, void (*fn) (void *),
198*38fd1498Szrj 		size_t mapnum, void **hostaddrs, size_t *sizes,
199*38fd1498Szrj 		unsigned short *kinds,
200*38fd1498Szrj 		int num_gangs, int num_workers, int vector_length,
201*38fd1498Szrj 		int async, int num_waits, ...)
202*38fd1498Szrj {
203*38fd1498Szrj   goacc_save_and_set_bind (acc_device_host);
204*38fd1498Szrj   fn (hostaddrs);
205*38fd1498Szrj   goacc_restore_bind ();
206*38fd1498Szrj }
207*38fd1498Szrj 
208*38fd1498Szrj void
GOACC_data_start(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)209*38fd1498Szrj GOACC_data_start (int device, size_t mapnum,
210*38fd1498Szrj 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
211*38fd1498Szrj {
212*38fd1498Szrj   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
213*38fd1498Szrj   struct target_mem_desc *tgt;
214*38fd1498Szrj 
215*38fd1498Szrj #ifdef HAVE_INTTYPES_H
216*38fd1498Szrj   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
217*38fd1498Szrj 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
218*38fd1498Szrj #else
219*38fd1498Szrj   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
220*38fd1498Szrj 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
221*38fd1498Szrj #endif
222*38fd1498Szrj 
223*38fd1498Szrj   goacc_lazy_initialize ();
224*38fd1498Szrj 
225*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
226*38fd1498Szrj   struct gomp_device_descr *acc_dev = thr->dev;
227*38fd1498Szrj 
228*38fd1498Szrj   /* Host fallback or 'do nothing'.  */
229*38fd1498Szrj   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
230*38fd1498Szrj       || host_fallback)
231*38fd1498Szrj     {
232*38fd1498Szrj       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
233*38fd1498Szrj 			   GOMP_MAP_VARS_OPENACC);
234*38fd1498Szrj       tgt->prev = thr->mapped_data;
235*38fd1498Szrj       thr->mapped_data = tgt;
236*38fd1498Szrj 
237*38fd1498Szrj       return;
238*38fd1498Szrj     }
239*38fd1498Szrj 
240*38fd1498Szrj   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
241*38fd1498Szrj   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
242*38fd1498Szrj 		       GOMP_MAP_VARS_OPENACC);
243*38fd1498Szrj   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
244*38fd1498Szrj   tgt->prev = thr->mapped_data;
245*38fd1498Szrj   thr->mapped_data = tgt;
246*38fd1498Szrj }
247*38fd1498Szrj 
248*38fd1498Szrj void
GOACC_data_end(void)249*38fd1498Szrj GOACC_data_end (void)
250*38fd1498Szrj {
251*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
252*38fd1498Szrj   struct target_mem_desc *tgt = thr->mapped_data;
253*38fd1498Szrj 
254*38fd1498Szrj   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
255*38fd1498Szrj   thr->mapped_data = tgt->prev;
256*38fd1498Szrj   gomp_unmap_vars (tgt, true);
257*38fd1498Szrj   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
258*38fd1498Szrj }
259*38fd1498Szrj 
260*38fd1498Szrj void
GOACC_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)261*38fd1498Szrj GOACC_enter_exit_data (int device, size_t mapnum,
262*38fd1498Szrj 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
263*38fd1498Szrj 		       int async, int num_waits, ...)
264*38fd1498Szrj {
265*38fd1498Szrj   struct goacc_thread *thr;
266*38fd1498Szrj   struct gomp_device_descr *acc_dev;
267*38fd1498Szrj   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
268*38fd1498Szrj   bool data_enter = false;
269*38fd1498Szrj   size_t i;
270*38fd1498Szrj 
271*38fd1498Szrj   goacc_lazy_initialize ();
272*38fd1498Szrj 
273*38fd1498Szrj   thr = goacc_thread ();
274*38fd1498Szrj   acc_dev = thr->dev;
275*38fd1498Szrj 
276*38fd1498Szrj   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
277*38fd1498Szrj       || host_fallback)
278*38fd1498Szrj     return;
279*38fd1498Szrj 
280*38fd1498Szrj   if (num_waits)
281*38fd1498Szrj     {
282*38fd1498Szrj       va_list ap;
283*38fd1498Szrj 
284*38fd1498Szrj       va_start (ap, num_waits);
285*38fd1498Szrj       goacc_wait (async, num_waits, &ap);
286*38fd1498Szrj       va_end (ap);
287*38fd1498Szrj     }
288*38fd1498Szrj 
289*38fd1498Szrj   acc_dev->openacc.async_set_async_func (async);
290*38fd1498Szrj 
291*38fd1498Szrj   /* Determine if this is an "acc enter data".  */
292*38fd1498Szrj   for (i = 0; i < mapnum; ++i)
293*38fd1498Szrj     {
294*38fd1498Szrj       unsigned char kind = kinds[i] & 0xff;
295*38fd1498Szrj 
296*38fd1498Szrj       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
297*38fd1498Szrj 	continue;
298*38fd1498Szrj 
299*38fd1498Szrj       if (kind == GOMP_MAP_FORCE_ALLOC
300*38fd1498Szrj 	  || kind == GOMP_MAP_FORCE_PRESENT
301*38fd1498Szrj 	  || kind == GOMP_MAP_FORCE_TO)
302*38fd1498Szrj 	{
303*38fd1498Szrj 	  data_enter = true;
304*38fd1498Szrj 	  break;
305*38fd1498Szrj 	}
306*38fd1498Szrj 
307*38fd1498Szrj       if (kind == GOMP_MAP_DELETE
308*38fd1498Szrj 	  || kind == GOMP_MAP_FORCE_FROM)
309*38fd1498Szrj 	break;
310*38fd1498Szrj 
311*38fd1498Szrj       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312*38fd1498Szrj 		      kind);
313*38fd1498Szrj     }
314*38fd1498Szrj 
315*38fd1498Szrj   if (data_enter)
316*38fd1498Szrj     {
317*38fd1498Szrj       for (i = 0; i < mapnum; i++)
318*38fd1498Szrj 	{
319*38fd1498Szrj 	  unsigned char kind = kinds[i] & 0xff;
320*38fd1498Szrj 
321*38fd1498Szrj 	  /* Scan for PSETs.  */
322*38fd1498Szrj 	  int psets = find_pset (i, mapnum, kinds);
323*38fd1498Szrj 
324*38fd1498Szrj 	  if (!psets)
325*38fd1498Szrj 	    {
326*38fd1498Szrj 	      switch (kind)
327*38fd1498Szrj 		{
328*38fd1498Szrj 		case GOMP_MAP_POINTER:
329*38fd1498Szrj 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
330*38fd1498Szrj 					&kinds[i]);
331*38fd1498Szrj 		  break;
332*38fd1498Szrj 		case GOMP_MAP_FORCE_ALLOC:
333*38fd1498Szrj 		  acc_create (hostaddrs[i], sizes[i]);
334*38fd1498Szrj 		  break;
335*38fd1498Szrj 		case GOMP_MAP_FORCE_PRESENT:
336*38fd1498Szrj 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
337*38fd1498Szrj 		  break;
338*38fd1498Szrj 		case GOMP_MAP_FORCE_TO:
339*38fd1498Szrj 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
340*38fd1498Szrj 		  break;
341*38fd1498Szrj 		default:
342*38fd1498Szrj 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
343*38fd1498Szrj 			      kind);
344*38fd1498Szrj 		  break;
345*38fd1498Szrj 		}
346*38fd1498Szrj 	    }
347*38fd1498Szrj 	  else
348*38fd1498Szrj 	    {
349*38fd1498Szrj 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
350*38fd1498Szrj 	      /* Increment 'i' by two because OpenACC requires fortran
351*38fd1498Szrj 		 arrays to be contiguous, so each PSET is associated with
352*38fd1498Szrj 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
353*38fd1498Szrj 		 one MAP_POINTER.  */
354*38fd1498Szrj 	      i += 2;
355*38fd1498Szrj 	    }
356*38fd1498Szrj 	}
357*38fd1498Szrj     }
358*38fd1498Szrj   else
359*38fd1498Szrj     for (i = 0; i < mapnum; ++i)
360*38fd1498Szrj       {
361*38fd1498Szrj 	unsigned char kind = kinds[i] & 0xff;
362*38fd1498Szrj 
363*38fd1498Szrj 	int psets = find_pset (i, mapnum, kinds);
364*38fd1498Szrj 
365*38fd1498Szrj 	if (!psets)
366*38fd1498Szrj 	  {
367*38fd1498Szrj 	    switch (kind)
368*38fd1498Szrj 	      {
369*38fd1498Szrj 	      case GOMP_MAP_POINTER:
370*38fd1498Szrj 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
371*38fd1498Szrj 					 == GOMP_MAP_FORCE_FROM,
372*38fd1498Szrj 					 async, 1);
373*38fd1498Szrj 		break;
374*38fd1498Szrj 	      case GOMP_MAP_DELETE:
375*38fd1498Szrj 		acc_delete (hostaddrs[i], sizes[i]);
376*38fd1498Szrj 		break;
377*38fd1498Szrj 	      case GOMP_MAP_FORCE_FROM:
378*38fd1498Szrj 		acc_copyout (hostaddrs[i], sizes[i]);
379*38fd1498Szrj 		break;
380*38fd1498Szrj 	      default:
381*38fd1498Szrj 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
382*38fd1498Szrj 			    kind);
383*38fd1498Szrj 		break;
384*38fd1498Szrj 	      }
385*38fd1498Szrj 	  }
386*38fd1498Szrj 	else
387*38fd1498Szrj 	  {
388*38fd1498Szrj 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
389*38fd1498Szrj 				     == GOMP_MAP_FORCE_FROM, async, 3);
390*38fd1498Szrj 	    /* See the above comment.  */
391*38fd1498Szrj 	    i += 2;
392*38fd1498Szrj 	  }
393*38fd1498Szrj       }
394*38fd1498Szrj 
395*38fd1498Szrj   acc_dev->openacc.async_set_async_func (acc_async_sync);
396*38fd1498Szrj }
397*38fd1498Szrj 
398*38fd1498Szrj static void
goacc_wait(int async,int num_waits,va_list * ap)399*38fd1498Szrj goacc_wait (int async, int num_waits, va_list *ap)
400*38fd1498Szrj {
401*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
402*38fd1498Szrj   struct gomp_device_descr *acc_dev = thr->dev;
403*38fd1498Szrj 
404*38fd1498Szrj   while (num_waits--)
405*38fd1498Szrj     {
406*38fd1498Szrj       int qid = va_arg (*ap, int);
407*38fd1498Szrj 
408*38fd1498Szrj       if (acc_async_test (qid))
409*38fd1498Szrj 	continue;
410*38fd1498Szrj 
411*38fd1498Szrj       if (async == acc_async_sync)
412*38fd1498Szrj 	acc_wait (qid);
413*38fd1498Szrj       else if (qid == async)
414*38fd1498Szrj 	;/* If we're waiting on the same asynchronous queue as we're
415*38fd1498Szrj 	    launching on, the queue itself will order work as
416*38fd1498Szrj 	    required, so there's no need to wait explicitly.  */
417*38fd1498Szrj       else
418*38fd1498Szrj 	acc_dev->openacc.async_wait_async_func (qid, async);
419*38fd1498Szrj     }
420*38fd1498Szrj }
421*38fd1498Szrj 
422*38fd1498Szrj void
GOACC_update(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)423*38fd1498Szrj GOACC_update (int device, size_t mapnum,
424*38fd1498Szrj 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
425*38fd1498Szrj 	      int async, int num_waits, ...)
426*38fd1498Szrj {
427*38fd1498Szrj   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
428*38fd1498Szrj   size_t i;
429*38fd1498Szrj 
430*38fd1498Szrj   goacc_lazy_initialize ();
431*38fd1498Szrj 
432*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
433*38fd1498Szrj   struct gomp_device_descr *acc_dev = thr->dev;
434*38fd1498Szrj 
435*38fd1498Szrj   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
436*38fd1498Szrj       || host_fallback)
437*38fd1498Szrj     return;
438*38fd1498Szrj 
439*38fd1498Szrj   if (num_waits)
440*38fd1498Szrj     {
441*38fd1498Szrj       va_list ap;
442*38fd1498Szrj 
443*38fd1498Szrj       va_start (ap, num_waits);
444*38fd1498Szrj       goacc_wait (async, num_waits, &ap);
445*38fd1498Szrj       va_end (ap);
446*38fd1498Szrj     }
447*38fd1498Szrj 
448*38fd1498Szrj   acc_dev->openacc.async_set_async_func (async);
449*38fd1498Szrj 
450*38fd1498Szrj   for (i = 0; i < mapnum; ++i)
451*38fd1498Szrj     {
452*38fd1498Szrj       unsigned char kind = kinds[i] & 0xff;
453*38fd1498Szrj 
454*38fd1498Szrj       switch (kind)
455*38fd1498Szrj 	{
456*38fd1498Szrj 	case GOMP_MAP_POINTER:
457*38fd1498Szrj 	case GOMP_MAP_TO_PSET:
458*38fd1498Szrj 	  break;
459*38fd1498Szrj 
460*38fd1498Szrj 	case GOMP_MAP_FORCE_TO:
461*38fd1498Szrj 	  acc_update_device (hostaddrs[i], sizes[i]);
462*38fd1498Szrj 	  break;
463*38fd1498Szrj 
464*38fd1498Szrj 	case GOMP_MAP_FORCE_FROM:
465*38fd1498Szrj 	  acc_update_self (hostaddrs[i], sizes[i]);
466*38fd1498Szrj 	  break;
467*38fd1498Szrj 
468*38fd1498Szrj 	default:
469*38fd1498Szrj 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
470*38fd1498Szrj 	  break;
471*38fd1498Szrj 	}
472*38fd1498Szrj     }
473*38fd1498Szrj 
474*38fd1498Szrj   acc_dev->openacc.async_set_async_func (acc_async_sync);
475*38fd1498Szrj }
476*38fd1498Szrj 
477*38fd1498Szrj void
GOACC_wait(int async,int num_waits,...)478*38fd1498Szrj GOACC_wait (int async, int num_waits, ...)
479*38fd1498Szrj {
480*38fd1498Szrj   if (num_waits)
481*38fd1498Szrj     {
482*38fd1498Szrj       va_list ap;
483*38fd1498Szrj 
484*38fd1498Szrj       va_start (ap, num_waits);
485*38fd1498Szrj       goacc_wait (async, num_waits, &ap);
486*38fd1498Szrj       va_end (ap);
487*38fd1498Szrj     }
488*38fd1498Szrj   else if (async == acc_async_sync)
489*38fd1498Szrj     acc_wait_all ();
490*38fd1498Szrj   else if (async == acc_async_noval)
491*38fd1498Szrj     goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
492*38fd1498Szrj }
493*38fd1498Szrj 
494*38fd1498Szrj int
GOACC_get_num_threads(void)495*38fd1498Szrj GOACC_get_num_threads (void)
496*38fd1498Szrj {
497*38fd1498Szrj   return 1;
498*38fd1498Szrj }
499*38fd1498Szrj 
500*38fd1498Szrj int
GOACC_get_thread_num(void)501*38fd1498Szrj GOACC_get_thread_num (void)
502*38fd1498Szrj {
503*38fd1498Szrj   return 0;
504*38fd1498Szrj }
505*38fd1498Szrj 
506*38fd1498Szrj void
GOACC_declare(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)507*38fd1498Szrj GOACC_declare (int device, size_t mapnum,
508*38fd1498Szrj 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
509*38fd1498Szrj {
510*38fd1498Szrj   int i;
511*38fd1498Szrj 
512*38fd1498Szrj   for (i = 0; i < mapnum; i++)
513*38fd1498Szrj     {
514*38fd1498Szrj       unsigned char kind = kinds[i] & 0xff;
515*38fd1498Szrj 
516*38fd1498Szrj       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
517*38fd1498Szrj 	continue;
518*38fd1498Szrj 
519*38fd1498Szrj       switch (kind)
520*38fd1498Szrj 	{
521*38fd1498Szrj 	  case GOMP_MAP_FORCE_ALLOC:
522*38fd1498Szrj 	  case GOMP_MAP_FORCE_FROM:
523*38fd1498Szrj 	  case GOMP_MAP_FORCE_TO:
524*38fd1498Szrj 	  case GOMP_MAP_POINTER:
525*38fd1498Szrj 	  case GOMP_MAP_DELETE:
526*38fd1498Szrj 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
527*38fd1498Szrj 				   &kinds[i], 0, 0);
528*38fd1498Szrj 	    break;
529*38fd1498Szrj 
530*38fd1498Szrj 	  case GOMP_MAP_FORCE_DEVICEPTR:
531*38fd1498Szrj 	    break;
532*38fd1498Szrj 
533*38fd1498Szrj 	  case GOMP_MAP_ALLOC:
534*38fd1498Szrj 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
535*38fd1498Szrj 	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
536*38fd1498Szrj 				     &kinds[i], 0, 0);
537*38fd1498Szrj 	    break;
538*38fd1498Szrj 
539*38fd1498Szrj 	  case GOMP_MAP_TO:
540*38fd1498Szrj 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
541*38fd1498Szrj 				   &kinds[i], 0, 0);
542*38fd1498Szrj 
543*38fd1498Szrj 	    break;
544*38fd1498Szrj 
545*38fd1498Szrj 	  case GOMP_MAP_FROM:
546*38fd1498Szrj 	    kinds[i] = GOMP_MAP_FORCE_FROM;
547*38fd1498Szrj 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
548*38fd1498Szrj 				   &kinds[i], 0, 0);
549*38fd1498Szrj 	    break;
550*38fd1498Szrj 
551*38fd1498Szrj 	  case GOMP_MAP_FORCE_PRESENT:
552*38fd1498Szrj 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
553*38fd1498Szrj 	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
554*38fd1498Szrj 			  (unsigned long) sizes[i]);
555*38fd1498Szrj 	    break;
556*38fd1498Szrj 
557*38fd1498Szrj 	  default:
558*38fd1498Szrj 	    assert (0);
559*38fd1498Szrj 	    break;
560*38fd1498Szrj 	}
561*38fd1498Szrj     }
562*38fd1498Szrj }
563