1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj
3*38fd1498Szrj Contributed by Mentor Embedded.
4*38fd1498Szrj
5*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
6*38fd1498Szrj (libgomp).
7*38fd1498Szrj
8*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
9*38fd1498Szrj under the terms of the GNU General Public License as published by
10*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
11*38fd1498Szrj any later version.
12*38fd1498Szrj
13*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16*38fd1498Szrj more details.
17*38fd1498Szrj
18*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
19*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
20*38fd1498Szrj 3.1, as published by the Free Software Foundation.
21*38fd1498Szrj
22*38fd1498Szrj You should have received a copy of the GNU General Public License and
23*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
24*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25*38fd1498Szrj <http://www.gnu.org/licenses/>. */
26*38fd1498Szrj
27*38fd1498Szrj /* This file handles OpenACC constructs. */
28*38fd1498Szrj
29*38fd1498Szrj #include "openacc.h"
30*38fd1498Szrj #include "libgomp.h"
31*38fd1498Szrj #include "libgomp_g.h"
32*38fd1498Szrj #include "gomp-constants.h"
33*38fd1498Szrj #include "oacc-int.h"
34*38fd1498Szrj #ifdef HAVE_INTTYPES_H
35*38fd1498Szrj # include <inttypes.h> /* For PRIu64. */
36*38fd1498Szrj #endif
37*38fd1498Szrj #include <string.h>
38*38fd1498Szrj #include <stdarg.h>
39*38fd1498Szrj #include <assert.h>
40*38fd1498Szrj
41*38fd1498Szrj static int
find_pset(int pos,size_t mapnum,unsigned short * kinds)42*38fd1498Szrj find_pset (int pos, size_t mapnum, unsigned short *kinds)
43*38fd1498Szrj {
44*38fd1498Szrj if (pos + 1 >= mapnum)
45*38fd1498Szrj return 0;
46*38fd1498Szrj
47*38fd1498Szrj unsigned char kind = kinds[pos+1] & 0xff;
48*38fd1498Szrj
49*38fd1498Szrj return kind == GOMP_MAP_TO_PSET;
50*38fd1498Szrj }
51*38fd1498Szrj
52*38fd1498Szrj static void goacc_wait (int async, int num_waits, va_list *ap);
53*38fd1498Szrj
54*38fd1498Szrj
55*38fd1498Szrj /* Launch a possibly offloaded function on DEVICE. FN is the host fn
56*38fd1498Szrj address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
57*38fd1498Szrj blocks to be copied to/from the device. Varadic arguments are
58*38fd1498Szrj keyed optional parameters terminated with a zero. */
59*38fd1498Szrj
60*38fd1498Szrj void
GOACC_parallel_keyed(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,...)61*38fd1498Szrj GOACC_parallel_keyed (int device, void (*fn) (void *),
62*38fd1498Szrj size_t mapnum, void **hostaddrs, size_t *sizes,
63*38fd1498Szrj unsigned short *kinds, ...)
64*38fd1498Szrj {
65*38fd1498Szrj bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66*38fd1498Szrj va_list ap;
67*38fd1498Szrj struct goacc_thread *thr;
68*38fd1498Szrj struct gomp_device_descr *acc_dev;
69*38fd1498Szrj struct target_mem_desc *tgt;
70*38fd1498Szrj void **devaddrs;
71*38fd1498Szrj unsigned int i;
72*38fd1498Szrj struct splay_tree_key_s k;
73*38fd1498Szrj splay_tree_key tgt_fn_key;
74*38fd1498Szrj void (*tgt_fn);
75*38fd1498Szrj int async = GOMP_ASYNC_SYNC;
76*38fd1498Szrj unsigned dims[GOMP_DIM_MAX];
77*38fd1498Szrj unsigned tag;
78*38fd1498Szrj
79*38fd1498Szrj #ifdef HAVE_INTTYPES_H
80*38fd1498Szrj gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81*38fd1498Szrj __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
82*38fd1498Szrj #else
83*38fd1498Szrj gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84*38fd1498Szrj __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
85*38fd1498Szrj #endif
86*38fd1498Szrj goacc_lazy_initialize ();
87*38fd1498Szrj
88*38fd1498Szrj thr = goacc_thread ();
89*38fd1498Szrj acc_dev = thr->dev;
90*38fd1498Szrj
91*38fd1498Szrj /* Host fallback if "if" clause is false or if the current device is set to
92*38fd1498Szrj the host. */
93*38fd1498Szrj if (host_fallback)
94*38fd1498Szrj {
95*38fd1498Szrj goacc_save_and_set_bind (acc_device_host);
96*38fd1498Szrj fn (hostaddrs);
97*38fd1498Szrj goacc_restore_bind ();
98*38fd1498Szrj return;
99*38fd1498Szrj }
100*38fd1498Szrj else if (acc_device_type (acc_dev->type) == acc_device_host)
101*38fd1498Szrj {
102*38fd1498Szrj fn (hostaddrs);
103*38fd1498Szrj return;
104*38fd1498Szrj }
105*38fd1498Szrj
106*38fd1498Szrj /* Default: let the runtime choose. */
107*38fd1498Szrj for (i = 0; i != GOMP_DIM_MAX; i++)
108*38fd1498Szrj dims[i] = 0;
109*38fd1498Szrj
110*38fd1498Szrj va_start (ap, kinds);
111*38fd1498Szrj /* TODO: This will need amending when device_type is implemented. */
112*38fd1498Szrj while ((tag = va_arg (ap, unsigned)) != 0)
113*38fd1498Szrj {
114*38fd1498Szrj if (GOMP_LAUNCH_DEVICE (tag))
115*38fd1498Szrj gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116*38fd1498Szrj GOMP_LAUNCH_DEVICE (tag));
117*38fd1498Szrj
118*38fd1498Szrj switch (GOMP_LAUNCH_CODE (tag))
119*38fd1498Szrj {
120*38fd1498Szrj case GOMP_LAUNCH_DIM:
121*38fd1498Szrj {
122*38fd1498Szrj unsigned mask = GOMP_LAUNCH_OP (tag);
123*38fd1498Szrj
124*38fd1498Szrj for (i = 0; i != GOMP_DIM_MAX; i++)
125*38fd1498Szrj if (mask & GOMP_DIM_MASK (i))
126*38fd1498Szrj dims[i] = va_arg (ap, unsigned);
127*38fd1498Szrj }
128*38fd1498Szrj break;
129*38fd1498Szrj
130*38fd1498Szrj case GOMP_LAUNCH_ASYNC:
131*38fd1498Szrj {
132*38fd1498Szrj /* Small constant values are encoded in the operand. */
133*38fd1498Szrj async = GOMP_LAUNCH_OP (tag);
134*38fd1498Szrj
135*38fd1498Szrj if (async == GOMP_LAUNCH_OP_MAX)
136*38fd1498Szrj async = va_arg (ap, unsigned);
137*38fd1498Szrj break;
138*38fd1498Szrj }
139*38fd1498Szrj
140*38fd1498Szrj case GOMP_LAUNCH_WAIT:
141*38fd1498Szrj {
142*38fd1498Szrj unsigned num_waits = GOMP_LAUNCH_OP (tag);
143*38fd1498Szrj
144*38fd1498Szrj if (num_waits)
145*38fd1498Szrj goacc_wait (async, num_waits, &ap);
146*38fd1498Szrj break;
147*38fd1498Szrj }
148*38fd1498Szrj
149*38fd1498Szrj default:
150*38fd1498Szrj gomp_fatal ("unrecognized offload code '%d',"
151*38fd1498Szrj " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152*38fd1498Szrj }
153*38fd1498Szrj }
154*38fd1498Szrj va_end (ap);
155*38fd1498Szrj
156*38fd1498Szrj acc_dev->openacc.async_set_async_func (async);
157*38fd1498Szrj
158*38fd1498Szrj if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159*38fd1498Szrj {
160*38fd1498Szrj k.host_start = (uintptr_t) fn;
161*38fd1498Szrj k.host_end = k.host_start + 1;
162*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
163*38fd1498Szrj tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
165*38fd1498Szrj
166*38fd1498Szrj if (tgt_fn_key == NULL)
167*38fd1498Szrj gomp_fatal ("target function wasn't mapped");
168*38fd1498Szrj
169*38fd1498Szrj tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
170*38fd1498Szrj }
171*38fd1498Szrj else
172*38fd1498Szrj tgt_fn = (void (*)) fn;
173*38fd1498Szrj
174*38fd1498Szrj tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
175*38fd1498Szrj GOMP_MAP_VARS_OPENACC);
176*38fd1498Szrj
177*38fd1498Szrj devaddrs = gomp_alloca (sizeof (void *) * mapnum);
178*38fd1498Szrj for (i = 0; i < mapnum; i++)
179*38fd1498Szrj devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180*38fd1498Szrj + tgt->list[i].key->tgt_offset);
181*38fd1498Szrj
182*38fd1498Szrj acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183*38fd1498Szrj async, dims, tgt);
184*38fd1498Szrj
185*38fd1498Szrj /* If running synchronously, unmap immediately. */
186*38fd1498Szrj if (async < acc_async_noval)
187*38fd1498Szrj gomp_unmap_vars (tgt, true);
188*38fd1498Szrj else
189*38fd1498Szrj tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
190*38fd1498Szrj
191*38fd1498Szrj acc_dev->openacc.async_set_async_func (acc_async_sync);
192*38fd1498Szrj }
193*38fd1498Szrj
194*38fd1498Szrj /* Legacy entry point, only provide host execution. */
195*38fd1498Szrj
196*38fd1498Szrj void
GOACC_parallel(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int num_gangs,int num_workers,int vector_length,int async,int num_waits,...)197*38fd1498Szrj GOACC_parallel (int device, void (*fn) (void *),
198*38fd1498Szrj size_t mapnum, void **hostaddrs, size_t *sizes,
199*38fd1498Szrj unsigned short *kinds,
200*38fd1498Szrj int num_gangs, int num_workers, int vector_length,
201*38fd1498Szrj int async, int num_waits, ...)
202*38fd1498Szrj {
203*38fd1498Szrj goacc_save_and_set_bind (acc_device_host);
204*38fd1498Szrj fn (hostaddrs);
205*38fd1498Szrj goacc_restore_bind ();
206*38fd1498Szrj }
207*38fd1498Szrj
208*38fd1498Szrj void
GOACC_data_start(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)209*38fd1498Szrj GOACC_data_start (int device, size_t mapnum,
210*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds)
211*38fd1498Szrj {
212*38fd1498Szrj bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
213*38fd1498Szrj struct target_mem_desc *tgt;
214*38fd1498Szrj
215*38fd1498Szrj #ifdef HAVE_INTTYPES_H
216*38fd1498Szrj gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
217*38fd1498Szrj __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
218*38fd1498Szrj #else
219*38fd1498Szrj gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
220*38fd1498Szrj __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
221*38fd1498Szrj #endif
222*38fd1498Szrj
223*38fd1498Szrj goacc_lazy_initialize ();
224*38fd1498Szrj
225*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
226*38fd1498Szrj struct gomp_device_descr *acc_dev = thr->dev;
227*38fd1498Szrj
228*38fd1498Szrj /* Host fallback or 'do nothing'. */
229*38fd1498Szrj if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
230*38fd1498Szrj || host_fallback)
231*38fd1498Szrj {
232*38fd1498Szrj tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
233*38fd1498Szrj GOMP_MAP_VARS_OPENACC);
234*38fd1498Szrj tgt->prev = thr->mapped_data;
235*38fd1498Szrj thr->mapped_data = tgt;
236*38fd1498Szrj
237*38fd1498Szrj return;
238*38fd1498Szrj }
239*38fd1498Szrj
240*38fd1498Szrj gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
241*38fd1498Szrj tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
242*38fd1498Szrj GOMP_MAP_VARS_OPENACC);
243*38fd1498Szrj gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
244*38fd1498Szrj tgt->prev = thr->mapped_data;
245*38fd1498Szrj thr->mapped_data = tgt;
246*38fd1498Szrj }
247*38fd1498Szrj
248*38fd1498Szrj void
GOACC_data_end(void)249*38fd1498Szrj GOACC_data_end (void)
250*38fd1498Szrj {
251*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
252*38fd1498Szrj struct target_mem_desc *tgt = thr->mapped_data;
253*38fd1498Szrj
254*38fd1498Szrj gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
255*38fd1498Szrj thr->mapped_data = tgt->prev;
256*38fd1498Szrj gomp_unmap_vars (tgt, true);
257*38fd1498Szrj gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
258*38fd1498Szrj }
259*38fd1498Szrj
260*38fd1498Szrj void
GOACC_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)261*38fd1498Szrj GOACC_enter_exit_data (int device, size_t mapnum,
262*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds,
263*38fd1498Szrj int async, int num_waits, ...)
264*38fd1498Szrj {
265*38fd1498Szrj struct goacc_thread *thr;
266*38fd1498Szrj struct gomp_device_descr *acc_dev;
267*38fd1498Szrj bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
268*38fd1498Szrj bool data_enter = false;
269*38fd1498Szrj size_t i;
270*38fd1498Szrj
271*38fd1498Szrj goacc_lazy_initialize ();
272*38fd1498Szrj
273*38fd1498Szrj thr = goacc_thread ();
274*38fd1498Szrj acc_dev = thr->dev;
275*38fd1498Szrj
276*38fd1498Szrj if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
277*38fd1498Szrj || host_fallback)
278*38fd1498Szrj return;
279*38fd1498Szrj
280*38fd1498Szrj if (num_waits)
281*38fd1498Szrj {
282*38fd1498Szrj va_list ap;
283*38fd1498Szrj
284*38fd1498Szrj va_start (ap, num_waits);
285*38fd1498Szrj goacc_wait (async, num_waits, &ap);
286*38fd1498Szrj va_end (ap);
287*38fd1498Szrj }
288*38fd1498Szrj
289*38fd1498Szrj acc_dev->openacc.async_set_async_func (async);
290*38fd1498Szrj
291*38fd1498Szrj /* Determine if this is an "acc enter data". */
292*38fd1498Szrj for (i = 0; i < mapnum; ++i)
293*38fd1498Szrj {
294*38fd1498Szrj unsigned char kind = kinds[i] & 0xff;
295*38fd1498Szrj
296*38fd1498Szrj if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
297*38fd1498Szrj continue;
298*38fd1498Szrj
299*38fd1498Szrj if (kind == GOMP_MAP_FORCE_ALLOC
300*38fd1498Szrj || kind == GOMP_MAP_FORCE_PRESENT
301*38fd1498Szrj || kind == GOMP_MAP_FORCE_TO)
302*38fd1498Szrj {
303*38fd1498Szrj data_enter = true;
304*38fd1498Szrj break;
305*38fd1498Szrj }
306*38fd1498Szrj
307*38fd1498Szrj if (kind == GOMP_MAP_DELETE
308*38fd1498Szrj || kind == GOMP_MAP_FORCE_FROM)
309*38fd1498Szrj break;
310*38fd1498Szrj
311*38fd1498Szrj gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312*38fd1498Szrj kind);
313*38fd1498Szrj }
314*38fd1498Szrj
315*38fd1498Szrj if (data_enter)
316*38fd1498Szrj {
317*38fd1498Szrj for (i = 0; i < mapnum; i++)
318*38fd1498Szrj {
319*38fd1498Szrj unsigned char kind = kinds[i] & 0xff;
320*38fd1498Szrj
321*38fd1498Szrj /* Scan for PSETs. */
322*38fd1498Szrj int psets = find_pset (i, mapnum, kinds);
323*38fd1498Szrj
324*38fd1498Szrj if (!psets)
325*38fd1498Szrj {
326*38fd1498Szrj switch (kind)
327*38fd1498Szrj {
328*38fd1498Szrj case GOMP_MAP_POINTER:
329*38fd1498Szrj gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
330*38fd1498Szrj &kinds[i]);
331*38fd1498Szrj break;
332*38fd1498Szrj case GOMP_MAP_FORCE_ALLOC:
333*38fd1498Szrj acc_create (hostaddrs[i], sizes[i]);
334*38fd1498Szrj break;
335*38fd1498Szrj case GOMP_MAP_FORCE_PRESENT:
336*38fd1498Szrj acc_present_or_copyin (hostaddrs[i], sizes[i]);
337*38fd1498Szrj break;
338*38fd1498Szrj case GOMP_MAP_FORCE_TO:
339*38fd1498Szrj acc_present_or_copyin (hostaddrs[i], sizes[i]);
340*38fd1498Szrj break;
341*38fd1498Szrj default:
342*38fd1498Szrj gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
343*38fd1498Szrj kind);
344*38fd1498Szrj break;
345*38fd1498Szrj }
346*38fd1498Szrj }
347*38fd1498Szrj else
348*38fd1498Szrj {
349*38fd1498Szrj gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
350*38fd1498Szrj /* Increment 'i' by two because OpenACC requires fortran
351*38fd1498Szrj arrays to be contiguous, so each PSET is associated with
352*38fd1498Szrj one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
353*38fd1498Szrj one MAP_POINTER. */
354*38fd1498Szrj i += 2;
355*38fd1498Szrj }
356*38fd1498Szrj }
357*38fd1498Szrj }
358*38fd1498Szrj else
359*38fd1498Szrj for (i = 0; i < mapnum; ++i)
360*38fd1498Szrj {
361*38fd1498Szrj unsigned char kind = kinds[i] & 0xff;
362*38fd1498Szrj
363*38fd1498Szrj int psets = find_pset (i, mapnum, kinds);
364*38fd1498Szrj
365*38fd1498Szrj if (!psets)
366*38fd1498Szrj {
367*38fd1498Szrj switch (kind)
368*38fd1498Szrj {
369*38fd1498Szrj case GOMP_MAP_POINTER:
370*38fd1498Szrj gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
371*38fd1498Szrj == GOMP_MAP_FORCE_FROM,
372*38fd1498Szrj async, 1);
373*38fd1498Szrj break;
374*38fd1498Szrj case GOMP_MAP_DELETE:
375*38fd1498Szrj acc_delete (hostaddrs[i], sizes[i]);
376*38fd1498Szrj break;
377*38fd1498Szrj case GOMP_MAP_FORCE_FROM:
378*38fd1498Szrj acc_copyout (hostaddrs[i], sizes[i]);
379*38fd1498Szrj break;
380*38fd1498Szrj default:
381*38fd1498Szrj gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
382*38fd1498Szrj kind);
383*38fd1498Szrj break;
384*38fd1498Szrj }
385*38fd1498Szrj }
386*38fd1498Szrj else
387*38fd1498Szrj {
388*38fd1498Szrj gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
389*38fd1498Szrj == GOMP_MAP_FORCE_FROM, async, 3);
390*38fd1498Szrj /* See the above comment. */
391*38fd1498Szrj i += 2;
392*38fd1498Szrj }
393*38fd1498Szrj }
394*38fd1498Szrj
395*38fd1498Szrj acc_dev->openacc.async_set_async_func (acc_async_sync);
396*38fd1498Szrj }
397*38fd1498Szrj
398*38fd1498Szrj static void
goacc_wait(int async,int num_waits,va_list * ap)399*38fd1498Szrj goacc_wait (int async, int num_waits, va_list *ap)
400*38fd1498Szrj {
401*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
402*38fd1498Szrj struct gomp_device_descr *acc_dev = thr->dev;
403*38fd1498Szrj
404*38fd1498Szrj while (num_waits--)
405*38fd1498Szrj {
406*38fd1498Szrj int qid = va_arg (*ap, int);
407*38fd1498Szrj
408*38fd1498Szrj if (acc_async_test (qid))
409*38fd1498Szrj continue;
410*38fd1498Szrj
411*38fd1498Szrj if (async == acc_async_sync)
412*38fd1498Szrj acc_wait (qid);
413*38fd1498Szrj else if (qid == async)
414*38fd1498Szrj ;/* If we're waiting on the same asynchronous queue as we're
415*38fd1498Szrj launching on, the queue itself will order work as
416*38fd1498Szrj required, so there's no need to wait explicitly. */
417*38fd1498Szrj else
418*38fd1498Szrj acc_dev->openacc.async_wait_async_func (qid, async);
419*38fd1498Szrj }
420*38fd1498Szrj }
421*38fd1498Szrj
422*38fd1498Szrj void
GOACC_update(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)423*38fd1498Szrj GOACC_update (int device, size_t mapnum,
424*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds,
425*38fd1498Szrj int async, int num_waits, ...)
426*38fd1498Szrj {
427*38fd1498Szrj bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
428*38fd1498Szrj size_t i;
429*38fd1498Szrj
430*38fd1498Szrj goacc_lazy_initialize ();
431*38fd1498Szrj
432*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
433*38fd1498Szrj struct gomp_device_descr *acc_dev = thr->dev;
434*38fd1498Szrj
435*38fd1498Szrj if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
436*38fd1498Szrj || host_fallback)
437*38fd1498Szrj return;
438*38fd1498Szrj
439*38fd1498Szrj if (num_waits)
440*38fd1498Szrj {
441*38fd1498Szrj va_list ap;
442*38fd1498Szrj
443*38fd1498Szrj va_start (ap, num_waits);
444*38fd1498Szrj goacc_wait (async, num_waits, &ap);
445*38fd1498Szrj va_end (ap);
446*38fd1498Szrj }
447*38fd1498Szrj
448*38fd1498Szrj acc_dev->openacc.async_set_async_func (async);
449*38fd1498Szrj
450*38fd1498Szrj for (i = 0; i < mapnum; ++i)
451*38fd1498Szrj {
452*38fd1498Szrj unsigned char kind = kinds[i] & 0xff;
453*38fd1498Szrj
454*38fd1498Szrj switch (kind)
455*38fd1498Szrj {
456*38fd1498Szrj case GOMP_MAP_POINTER:
457*38fd1498Szrj case GOMP_MAP_TO_PSET:
458*38fd1498Szrj break;
459*38fd1498Szrj
460*38fd1498Szrj case GOMP_MAP_FORCE_TO:
461*38fd1498Szrj acc_update_device (hostaddrs[i], sizes[i]);
462*38fd1498Szrj break;
463*38fd1498Szrj
464*38fd1498Szrj case GOMP_MAP_FORCE_FROM:
465*38fd1498Szrj acc_update_self (hostaddrs[i], sizes[i]);
466*38fd1498Szrj break;
467*38fd1498Szrj
468*38fd1498Szrj default:
469*38fd1498Szrj gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
470*38fd1498Szrj break;
471*38fd1498Szrj }
472*38fd1498Szrj }
473*38fd1498Szrj
474*38fd1498Szrj acc_dev->openacc.async_set_async_func (acc_async_sync);
475*38fd1498Szrj }
476*38fd1498Szrj
477*38fd1498Szrj void
GOACC_wait(int async,int num_waits,...)478*38fd1498Szrj GOACC_wait (int async, int num_waits, ...)
479*38fd1498Szrj {
480*38fd1498Szrj if (num_waits)
481*38fd1498Szrj {
482*38fd1498Szrj va_list ap;
483*38fd1498Szrj
484*38fd1498Szrj va_start (ap, num_waits);
485*38fd1498Szrj goacc_wait (async, num_waits, &ap);
486*38fd1498Szrj va_end (ap);
487*38fd1498Szrj }
488*38fd1498Szrj else if (async == acc_async_sync)
489*38fd1498Szrj acc_wait_all ();
490*38fd1498Szrj else if (async == acc_async_noval)
491*38fd1498Szrj goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
492*38fd1498Szrj }
493*38fd1498Szrj
494*38fd1498Szrj int
GOACC_get_num_threads(void)495*38fd1498Szrj GOACC_get_num_threads (void)
496*38fd1498Szrj {
497*38fd1498Szrj return 1;
498*38fd1498Szrj }
499*38fd1498Szrj
500*38fd1498Szrj int
GOACC_get_thread_num(void)501*38fd1498Szrj GOACC_get_thread_num (void)
502*38fd1498Szrj {
503*38fd1498Szrj return 0;
504*38fd1498Szrj }
505*38fd1498Szrj
506*38fd1498Szrj void
GOACC_declare(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)507*38fd1498Szrj GOACC_declare (int device, size_t mapnum,
508*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds)
509*38fd1498Szrj {
510*38fd1498Szrj int i;
511*38fd1498Szrj
512*38fd1498Szrj for (i = 0; i < mapnum; i++)
513*38fd1498Szrj {
514*38fd1498Szrj unsigned char kind = kinds[i] & 0xff;
515*38fd1498Szrj
516*38fd1498Szrj if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
517*38fd1498Szrj continue;
518*38fd1498Szrj
519*38fd1498Szrj switch (kind)
520*38fd1498Szrj {
521*38fd1498Szrj case GOMP_MAP_FORCE_ALLOC:
522*38fd1498Szrj case GOMP_MAP_FORCE_FROM:
523*38fd1498Szrj case GOMP_MAP_FORCE_TO:
524*38fd1498Szrj case GOMP_MAP_POINTER:
525*38fd1498Szrj case GOMP_MAP_DELETE:
526*38fd1498Szrj GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
527*38fd1498Szrj &kinds[i], 0, 0);
528*38fd1498Szrj break;
529*38fd1498Szrj
530*38fd1498Szrj case GOMP_MAP_FORCE_DEVICEPTR:
531*38fd1498Szrj break;
532*38fd1498Szrj
533*38fd1498Szrj case GOMP_MAP_ALLOC:
534*38fd1498Szrj if (!acc_is_present (hostaddrs[i], sizes[i]))
535*38fd1498Szrj GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
536*38fd1498Szrj &kinds[i], 0, 0);
537*38fd1498Szrj break;
538*38fd1498Szrj
539*38fd1498Szrj case GOMP_MAP_TO:
540*38fd1498Szrj GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
541*38fd1498Szrj &kinds[i], 0, 0);
542*38fd1498Szrj
543*38fd1498Szrj break;
544*38fd1498Szrj
545*38fd1498Szrj case GOMP_MAP_FROM:
546*38fd1498Szrj kinds[i] = GOMP_MAP_FORCE_FROM;
547*38fd1498Szrj GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
548*38fd1498Szrj &kinds[i], 0, 0);
549*38fd1498Szrj break;
550*38fd1498Szrj
551*38fd1498Szrj case GOMP_MAP_FORCE_PRESENT:
552*38fd1498Szrj if (!acc_is_present (hostaddrs[i], sizes[i]))
553*38fd1498Szrj gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
554*38fd1498Szrj (unsigned long) sizes[i]);
555*38fd1498Szrj break;
556*38fd1498Szrj
557*38fd1498Szrj default:
558*38fd1498Szrj assert (0);
559*38fd1498Szrj break;
560*38fd1498Szrj }
561*38fd1498Szrj }
562*38fd1498Szrj }
563