1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 /* This file handles OpenACC constructs. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40
41 static int
find_pset(int pos,size_t mapnum,unsigned short * kinds)42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 {
44 if (pos + 1 >= mapnum)
45 return 0;
46
47 unsigned char kind = kinds[pos+1] & 0xff;
48
49 return kind == GOMP_MAP_TO_PSET;
50 }
51
52 static void goacc_wait (int async, int num_waits, va_list *ap);
53
54
55 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
57 blocks to be copied to/from the device. Varadic arguments are
58 keyed optional parameters terminated with a zero. */
59
60 void
GOACC_parallel_keyed(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,...)61 GOACC_parallel_keyed (int device, void (*fn) (void *),
62 size_t mapnum, void **hostaddrs, size_t *sizes,
63 unsigned short *kinds, ...)
64 {
65 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66 va_list ap;
67 struct goacc_thread *thr;
68 struct gomp_device_descr *acc_dev;
69 struct target_mem_desc *tgt;
70 void **devaddrs;
71 unsigned int i;
72 struct splay_tree_key_s k;
73 splay_tree_key tgt_fn_key;
74 void (*tgt_fn);
75 int async = GOMP_ASYNC_SYNC;
76 unsigned dims[GOMP_DIM_MAX];
77 unsigned tag;
78
79 #ifdef HAVE_INTTYPES_H
80 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
82 #else
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
85 #endif
86 goacc_lazy_initialize ();
87
88 thr = goacc_thread ();
89 acc_dev = thr->dev;
90
91 /* Host fallback if "if" clause is false or if the current device is set to
92 the host. */
93 if (host_fallback)
94 {
95 goacc_save_and_set_bind (acc_device_host);
96 fn (hostaddrs);
97 goacc_restore_bind ();
98 return;
99 }
100 else if (acc_device_type (acc_dev->type) == acc_device_host)
101 {
102 fn (hostaddrs);
103 return;
104 }
105
106 /* Default: let the runtime choose. */
107 for (i = 0; i != GOMP_DIM_MAX; i++)
108 dims[i] = 0;
109
110 va_start (ap, kinds);
111 /* TODO: This will need amending when device_type is implemented. */
112 while ((tag = va_arg (ap, unsigned)) != 0)
113 {
114 if (GOMP_LAUNCH_DEVICE (tag))
115 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116 GOMP_LAUNCH_DEVICE (tag));
117
118 switch (GOMP_LAUNCH_CODE (tag))
119 {
120 case GOMP_LAUNCH_DIM:
121 {
122 unsigned mask = GOMP_LAUNCH_OP (tag);
123
124 for (i = 0; i != GOMP_DIM_MAX; i++)
125 if (mask & GOMP_DIM_MASK (i))
126 dims[i] = va_arg (ap, unsigned);
127 }
128 break;
129
130 case GOMP_LAUNCH_ASYNC:
131 {
132 /* Small constant values are encoded in the operand. */
133 async = GOMP_LAUNCH_OP (tag);
134
135 if (async == GOMP_LAUNCH_OP_MAX)
136 async = va_arg (ap, unsigned);
137 break;
138 }
139
140 case GOMP_LAUNCH_WAIT:
141 {
142 unsigned num_waits = GOMP_LAUNCH_OP (tag);
143
144 if (num_waits)
145 goacc_wait (async, num_waits, &ap);
146 break;
147 }
148
149 default:
150 gomp_fatal ("unrecognized offload code '%d',"
151 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152 }
153 }
154 va_end (ap);
155
156 acc_dev->openacc.async_set_async_func (async);
157
158 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159 {
160 k.host_start = (uintptr_t) fn;
161 k.host_end = k.host_start + 1;
162 gomp_mutex_lock (&acc_dev->lock);
163 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164 gomp_mutex_unlock (&acc_dev->lock);
165
166 if (tgt_fn_key == NULL)
167 gomp_fatal ("target function wasn't mapped");
168
169 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
170 }
171 else
172 tgt_fn = (void (*)) fn;
173
174 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
175 GOMP_MAP_VARS_OPENACC);
176
177 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
178 for (i = 0; i < mapnum; i++)
179 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180 + tgt->list[i].key->tgt_offset);
181
182 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183 async, dims, tgt);
184
185 /* If running synchronously, unmap immediately. */
186 if (async < acc_async_noval)
187 gomp_unmap_vars (tgt, true);
188 else
189 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
190
191 acc_dev->openacc.async_set_async_func (acc_async_sync);
192 }
193
194 /* Legacy entry point, only provide host execution. */
195
196 void
GOACC_parallel(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int num_gangs,int num_workers,int vector_length,int async,int num_waits,...)197 GOACC_parallel (int device, void (*fn) (void *),
198 size_t mapnum, void **hostaddrs, size_t *sizes,
199 unsigned short *kinds,
200 int num_gangs, int num_workers, int vector_length,
201 int async, int num_waits, ...)
202 {
203 goacc_save_and_set_bind (acc_device_host);
204 fn (hostaddrs);
205 goacc_restore_bind ();
206 }
207
208 void
GOACC_data_start(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)209 GOACC_data_start (int device, size_t mapnum,
210 void **hostaddrs, size_t *sizes, unsigned short *kinds)
211 {
212 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
213 struct target_mem_desc *tgt;
214
215 #ifdef HAVE_INTTYPES_H
216 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
217 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
218 #else
219 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
220 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
221 #endif
222
223 goacc_lazy_initialize ();
224
225 struct goacc_thread *thr = goacc_thread ();
226 struct gomp_device_descr *acc_dev = thr->dev;
227
228 /* Host fallback or 'do nothing'. */
229 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
230 || host_fallback)
231 {
232 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
233 GOMP_MAP_VARS_OPENACC);
234 tgt->prev = thr->mapped_data;
235 thr->mapped_data = tgt;
236
237 return;
238 }
239
240 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
241 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
242 GOMP_MAP_VARS_OPENACC);
243 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
244 tgt->prev = thr->mapped_data;
245 thr->mapped_data = tgt;
246 }
247
248 void
GOACC_data_end(void)249 GOACC_data_end (void)
250 {
251 struct goacc_thread *thr = goacc_thread ();
252 struct target_mem_desc *tgt = thr->mapped_data;
253
254 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
255 thr->mapped_data = tgt->prev;
256 gomp_unmap_vars (tgt, true);
257 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
258 }
259
260 void
GOACC_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)261 GOACC_enter_exit_data (int device, size_t mapnum,
262 void **hostaddrs, size_t *sizes, unsigned short *kinds,
263 int async, int num_waits, ...)
264 {
265 struct goacc_thread *thr;
266 struct gomp_device_descr *acc_dev;
267 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
268 bool data_enter = false;
269 size_t i;
270
271 goacc_lazy_initialize ();
272
273 thr = goacc_thread ();
274 acc_dev = thr->dev;
275
276 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
277 || host_fallback)
278 return;
279
280 if (num_waits)
281 {
282 va_list ap;
283
284 va_start (ap, num_waits);
285 goacc_wait (async, num_waits, &ap);
286 va_end (ap);
287 }
288
289 acc_dev->openacc.async_set_async_func (async);
290
291 /* Determine if this is an "acc enter data". */
292 for (i = 0; i < mapnum; ++i)
293 {
294 unsigned char kind = kinds[i] & 0xff;
295
296 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
297 continue;
298
299 if (kind == GOMP_MAP_FORCE_ALLOC
300 || kind == GOMP_MAP_FORCE_PRESENT
301 || kind == GOMP_MAP_FORCE_TO)
302 {
303 data_enter = true;
304 break;
305 }
306
307 if (kind == GOMP_MAP_DELETE
308 || kind == GOMP_MAP_FORCE_FROM)
309 break;
310
311 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312 kind);
313 }
314
315 if (data_enter)
316 {
317 for (i = 0; i < mapnum; i++)
318 {
319 unsigned char kind = kinds[i] & 0xff;
320
321 /* Scan for PSETs. */
322 int psets = find_pset (i, mapnum, kinds);
323
324 if (!psets)
325 {
326 switch (kind)
327 {
328 case GOMP_MAP_POINTER:
329 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
330 &kinds[i]);
331 break;
332 case GOMP_MAP_FORCE_ALLOC:
333 acc_create (hostaddrs[i], sizes[i]);
334 break;
335 case GOMP_MAP_FORCE_PRESENT:
336 acc_present_or_copyin (hostaddrs[i], sizes[i]);
337 break;
338 case GOMP_MAP_FORCE_TO:
339 acc_present_or_copyin (hostaddrs[i], sizes[i]);
340 break;
341 default:
342 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
343 kind);
344 break;
345 }
346 }
347 else
348 {
349 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
350 /* Increment 'i' by two because OpenACC requires fortran
351 arrays to be contiguous, so each PSET is associated with
352 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
353 one MAP_POINTER. */
354 i += 2;
355 }
356 }
357 }
358 else
359 for (i = 0; i < mapnum; ++i)
360 {
361 unsigned char kind = kinds[i] & 0xff;
362
363 int psets = find_pset (i, mapnum, kinds);
364
365 if (!psets)
366 {
367 switch (kind)
368 {
369 case GOMP_MAP_POINTER:
370 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
371 == GOMP_MAP_FORCE_FROM,
372 async, 1);
373 break;
374 case GOMP_MAP_DELETE:
375 acc_delete (hostaddrs[i], sizes[i]);
376 break;
377 case GOMP_MAP_FORCE_FROM:
378 acc_copyout (hostaddrs[i], sizes[i]);
379 break;
380 default:
381 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
382 kind);
383 break;
384 }
385 }
386 else
387 {
388 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
389 == GOMP_MAP_FORCE_FROM, async, 3);
390 /* See the above comment. */
391 i += 2;
392 }
393 }
394
395 acc_dev->openacc.async_set_async_func (acc_async_sync);
396 }
397
398 static void
goacc_wait(int async,int num_waits,va_list * ap)399 goacc_wait (int async, int num_waits, va_list *ap)
400 {
401 struct goacc_thread *thr = goacc_thread ();
402 struct gomp_device_descr *acc_dev = thr->dev;
403
404 while (num_waits--)
405 {
406 int qid = va_arg (*ap, int);
407
408 if (acc_async_test (qid))
409 continue;
410
411 if (async == acc_async_sync)
412 acc_wait (qid);
413 else if (qid == async)
414 ;/* If we're waiting on the same asynchronous queue as we're
415 launching on, the queue itself will order work as
416 required, so there's no need to wait explicitly. */
417 else
418 acc_dev->openacc.async_wait_async_func (qid, async);
419 }
420 }
421
422 void
GOACC_update(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)423 GOACC_update (int device, size_t mapnum,
424 void **hostaddrs, size_t *sizes, unsigned short *kinds,
425 int async, int num_waits, ...)
426 {
427 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
428 size_t i;
429
430 goacc_lazy_initialize ();
431
432 struct goacc_thread *thr = goacc_thread ();
433 struct gomp_device_descr *acc_dev = thr->dev;
434
435 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
436 || host_fallback)
437 return;
438
439 if (num_waits)
440 {
441 va_list ap;
442
443 va_start (ap, num_waits);
444 goacc_wait (async, num_waits, &ap);
445 va_end (ap);
446 }
447
448 acc_dev->openacc.async_set_async_func (async);
449
450 for (i = 0; i < mapnum; ++i)
451 {
452 unsigned char kind = kinds[i] & 0xff;
453
454 switch (kind)
455 {
456 case GOMP_MAP_POINTER:
457 case GOMP_MAP_TO_PSET:
458 break;
459
460 case GOMP_MAP_FORCE_TO:
461 acc_update_device (hostaddrs[i], sizes[i]);
462 break;
463
464 case GOMP_MAP_FORCE_FROM:
465 acc_update_self (hostaddrs[i], sizes[i]);
466 break;
467
468 default:
469 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
470 break;
471 }
472 }
473
474 acc_dev->openacc.async_set_async_func (acc_async_sync);
475 }
476
477 void
GOACC_wait(int async,int num_waits,...)478 GOACC_wait (int async, int num_waits, ...)
479 {
480 if (num_waits)
481 {
482 va_list ap;
483
484 va_start (ap, num_waits);
485 goacc_wait (async, num_waits, &ap);
486 va_end (ap);
487 }
488 else if (async == acc_async_sync)
489 acc_wait_all ();
490 else if (async == acc_async_noval)
491 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
492 }
493
494 int
GOACC_get_num_threads(void)495 GOACC_get_num_threads (void)
496 {
497 return 1;
498 }
499
500 int
GOACC_get_thread_num(void)501 GOACC_get_thread_num (void)
502 {
503 return 0;
504 }
505
506 void
GOACC_declare(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)507 GOACC_declare (int device, size_t mapnum,
508 void **hostaddrs, size_t *sizes, unsigned short *kinds)
509 {
510 int i;
511
512 for (i = 0; i < mapnum; i++)
513 {
514 unsigned char kind = kinds[i] & 0xff;
515
516 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
517 continue;
518
519 switch (kind)
520 {
521 case GOMP_MAP_FORCE_ALLOC:
522 case GOMP_MAP_FORCE_FROM:
523 case GOMP_MAP_FORCE_TO:
524 case GOMP_MAP_POINTER:
525 case GOMP_MAP_DELETE:
526 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
527 &kinds[i], 0, 0);
528 break;
529
530 case GOMP_MAP_FORCE_DEVICEPTR:
531 break;
532
533 case GOMP_MAP_ALLOC:
534 if (!acc_is_present (hostaddrs[i], sizes[i]))
535 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
536 &kinds[i], 0, 0);
537 break;
538
539 case GOMP_MAP_TO:
540 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
541 &kinds[i], 0, 0);
542
543 break;
544
545 case GOMP_MAP_FROM:
546 kinds[i] = GOMP_MAP_FORCE_FROM;
547 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
548 &kinds[i], 0, 0);
549 break;
550
551 case GOMP_MAP_FORCE_PRESENT:
552 if (!acc_is_present (hostaddrs[i], sizes[i]))
553 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
554 (unsigned long) sizes[i]);
555 break;
556
557 default:
558 assert (0);
559 break;
560 }
561 }
562 }
563