1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2 
3    Contributed by Mentor Embedded.
4 
5    This file is part of the GNU Offloading and Multi Processing Library
6    (libgomp).
7 
8    Libgomp is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3, or (at your option)
11    any later version.
12 
13    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16    more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 /* This file handles OpenACC constructs.  */
28 
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h>  /* For PRIu64.  */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40 
41 static int
42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 {
44   if (pos + 1 >= mapnum)
45     return 0;
46 
47   unsigned char kind = kinds[pos+1] & 0xff;
48 
49   return kind == GOMP_MAP_TO_PSET;
50 }
51 
52 static void goacc_wait (int async, int num_waits, va_list *ap);
53 
54 
55 /* Launch a possibly offloaded function on DEVICE.  FN is the host fn
56    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
57    blocks to be copied to/from the device.  Varadic arguments are
58    keyed optional parameters terminated with a zero.  */
59 
60 void
61 GOACC_parallel_keyed (int device, void (*fn) (void *),
62 		      size_t mapnum, void **hostaddrs, size_t *sizes,
63 		      unsigned short *kinds, ...)
64 {
65   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66   va_list ap;
67   struct goacc_thread *thr;
68   struct gomp_device_descr *acc_dev;
69   struct target_mem_desc *tgt;
70   void **devaddrs;
71   unsigned int i;
72   struct splay_tree_key_s k;
73   splay_tree_key tgt_fn_key;
74   void (*tgt_fn);
75   int async = GOMP_ASYNC_SYNC;
76   unsigned dims[GOMP_DIM_MAX];
77   unsigned tag;
78 
79 #ifdef HAVE_INTTYPES_H
80   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
82 #else
83   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
85 #endif
86   goacc_lazy_initialize ();
87 
88   thr = goacc_thread ();
89   acc_dev = thr->dev;
90 
91   /* Host fallback if "if" clause is false or if the current device is set to
92      the host.  */
93   if (host_fallback)
94     {
95       goacc_save_and_set_bind (acc_device_host);
96       fn (hostaddrs);
97       goacc_restore_bind ();
98       return;
99     }
100   else if (acc_device_type (acc_dev->type) == acc_device_host)
101     {
102       fn (hostaddrs);
103       return;
104     }
105 
106   /* Default: let the runtime choose.  */
107   for (i = 0; i != GOMP_DIM_MAX; i++)
108     dims[i] = 0;
109 
110   va_start (ap, kinds);
111   /* TODO: This will need amending when device_type is implemented.  */
112   while ((tag = va_arg (ap, unsigned)) != 0)
113     {
114       if (GOMP_LAUNCH_DEVICE (tag))
115 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116 		    GOMP_LAUNCH_DEVICE (tag));
117 
118       switch (GOMP_LAUNCH_CODE (tag))
119 	{
120 	case GOMP_LAUNCH_DIM:
121 	  {
122 	    unsigned mask = GOMP_LAUNCH_OP (tag);
123 
124 	    for (i = 0; i != GOMP_DIM_MAX; i++)
125 	      if (mask & GOMP_DIM_MASK (i))
126 		dims[i] = va_arg (ap, unsigned);
127 	  }
128 	  break;
129 
130 	case GOMP_LAUNCH_ASYNC:
131 	  {
132 	    /* Small constant values are encoded in the operand.  */
133 	    async = GOMP_LAUNCH_OP (tag);
134 
135 	    if (async == GOMP_LAUNCH_OP_MAX)
136 	      async = va_arg (ap, unsigned);
137 	    break;
138 	  }
139 
140 	case GOMP_LAUNCH_WAIT:
141 	  {
142 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
143 
144 	    if (num_waits)
145 	      goacc_wait (async, num_waits, &ap);
146 	    break;
147 	  }
148 
149 	default:
150 	  gomp_fatal ("unrecognized offload code '%d',"
151 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152 	}
153     }
154   va_end (ap);
155 
156   acc_dev->openacc.async_set_async_func (async);
157 
158   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159     {
160       k.host_start = (uintptr_t) fn;
161       k.host_end = k.host_start + 1;
162       gomp_mutex_lock (&acc_dev->lock);
163       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164       gomp_mutex_unlock (&acc_dev->lock);
165 
166       if (tgt_fn_key == NULL)
167 	gomp_fatal ("target function wasn't mapped");
168 
169       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
170     }
171   else
172     tgt_fn = (void (*)) fn;
173 
174   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
175 		       GOMP_MAP_VARS_OPENACC);
176 
177   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
178   for (i = 0; i < mapnum; i++)
179     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180 			    + tgt->list[i].key->tgt_offset);
181 
182   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183 			      async, dims, tgt);
184 
185   /* If running synchronously, unmap immediately.  */
186   if (async < acc_async_noval)
187     gomp_unmap_vars (tgt, true);
188   else
189     tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
190 
191   acc_dev->openacc.async_set_async_func (acc_async_sync);
192 }
193 
194 /* Legacy entry point, only provide host execution.  */
195 
196 void
197 GOACC_parallel (int device, void (*fn) (void *),
198 		size_t mapnum, void **hostaddrs, size_t *sizes,
199 		unsigned short *kinds,
200 		int num_gangs, int num_workers, int vector_length,
201 		int async, int num_waits, ...)
202 {
203   goacc_save_and_set_bind (acc_device_host);
204   fn (hostaddrs);
205   goacc_restore_bind ();
206 }
207 
208 void
209 GOACC_data_start (int device, size_t mapnum,
210 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
211 {
212   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
213   struct target_mem_desc *tgt;
214 
215 #ifdef HAVE_INTTYPES_H
216   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
217 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
218 #else
219   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
220 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
221 #endif
222 
223   goacc_lazy_initialize ();
224 
225   struct goacc_thread *thr = goacc_thread ();
226   struct gomp_device_descr *acc_dev = thr->dev;
227 
228   /* Host fallback or 'do nothing'.  */
229   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
230       || host_fallback)
231     {
232       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
233 			   GOMP_MAP_VARS_OPENACC);
234       tgt->prev = thr->mapped_data;
235       thr->mapped_data = tgt;
236 
237       return;
238     }
239 
240   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
241   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
242 		       GOMP_MAP_VARS_OPENACC);
243   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
244   tgt->prev = thr->mapped_data;
245   thr->mapped_data = tgt;
246 }
247 
248 void
249 GOACC_data_end (void)
250 {
251   struct goacc_thread *thr = goacc_thread ();
252   struct target_mem_desc *tgt = thr->mapped_data;
253 
254   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
255   thr->mapped_data = tgt->prev;
256   gomp_unmap_vars (tgt, true);
257   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
258 }
259 
260 void
261 GOACC_enter_exit_data (int device, size_t mapnum,
262 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
263 		       int async, int num_waits, ...)
264 {
265   struct goacc_thread *thr;
266   struct gomp_device_descr *acc_dev;
267   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
268   bool data_enter = false;
269   size_t i;
270 
271   goacc_lazy_initialize ();
272 
273   thr = goacc_thread ();
274   acc_dev = thr->dev;
275 
276   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
277       || host_fallback)
278     return;
279 
280   if (num_waits)
281     {
282       va_list ap;
283 
284       va_start (ap, num_waits);
285       goacc_wait (async, num_waits, &ap);
286       va_end (ap);
287     }
288 
289   acc_dev->openacc.async_set_async_func (async);
290 
291   /* Determine if this is an "acc enter data".  */
292   for (i = 0; i < mapnum; ++i)
293     {
294       unsigned char kind = kinds[i] & 0xff;
295 
296       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
297 	continue;
298 
299       if (kind == GOMP_MAP_FORCE_ALLOC
300 	  || kind == GOMP_MAP_FORCE_PRESENT
301 	  || kind == GOMP_MAP_FORCE_TO)
302 	{
303 	  data_enter = true;
304 	  break;
305 	}
306 
307       if (kind == GOMP_MAP_DELETE
308 	  || kind == GOMP_MAP_FORCE_FROM)
309 	break;
310 
311       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
312 		      kind);
313     }
314 
315   if (data_enter)
316     {
317       for (i = 0; i < mapnum; i++)
318 	{
319 	  unsigned char kind = kinds[i] & 0xff;
320 
321 	  /* Scan for PSETs.  */
322 	  int psets = find_pset (i, mapnum, kinds);
323 
324 	  if (!psets)
325 	    {
326 	      switch (kind)
327 		{
328 		case GOMP_MAP_POINTER:
329 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
330 					&kinds[i]);
331 		  break;
332 		case GOMP_MAP_FORCE_ALLOC:
333 		  acc_create (hostaddrs[i], sizes[i]);
334 		  break;
335 		case GOMP_MAP_FORCE_PRESENT:
336 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
337 		  break;
338 		case GOMP_MAP_FORCE_TO:
339 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
340 		  break;
341 		default:
342 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
343 			      kind);
344 		  break;
345 		}
346 	    }
347 	  else
348 	    {
349 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
350 	      /* Increment 'i' by two because OpenACC requires fortran
351 		 arrays to be contiguous, so each PSET is associated with
352 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
353 		 one MAP_POINTER.  */
354 	      i += 2;
355 	    }
356 	}
357     }
358   else
359     for (i = 0; i < mapnum; ++i)
360       {
361 	unsigned char kind = kinds[i] & 0xff;
362 
363 	int psets = find_pset (i, mapnum, kinds);
364 
365 	if (!psets)
366 	  {
367 	    switch (kind)
368 	      {
369 	      case GOMP_MAP_POINTER:
370 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
371 					 == GOMP_MAP_FORCE_FROM,
372 					 async, 1);
373 		break;
374 	      case GOMP_MAP_DELETE:
375 		acc_delete (hostaddrs[i], sizes[i]);
376 		break;
377 	      case GOMP_MAP_FORCE_FROM:
378 		acc_copyout (hostaddrs[i], sizes[i]);
379 		break;
380 	      default:
381 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
382 			    kind);
383 		break;
384 	      }
385 	  }
386 	else
387 	  {
388 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
389 				     == GOMP_MAP_FORCE_FROM, async, 3);
390 	    /* See the above comment.  */
391 	    i += 2;
392 	  }
393       }
394 
395   acc_dev->openacc.async_set_async_func (acc_async_sync);
396 }
397 
398 static void
399 goacc_wait (int async, int num_waits, va_list *ap)
400 {
401   struct goacc_thread *thr = goacc_thread ();
402   struct gomp_device_descr *acc_dev = thr->dev;
403 
404   while (num_waits--)
405     {
406       int qid = va_arg (*ap, int);
407 
408       if (acc_async_test (qid))
409 	continue;
410 
411       if (async == acc_async_sync)
412 	acc_wait (qid);
413       else if (qid == async)
414 	;/* If we're waiting on the same asynchronous queue as we're
415 	    launching on, the queue itself will order work as
416 	    required, so there's no need to wait explicitly.  */
417       else
418 	acc_dev->openacc.async_wait_async_func (qid, async);
419     }
420 }
421 
422 void
423 GOACC_update (int device, size_t mapnum,
424 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
425 	      int async, int num_waits, ...)
426 {
427   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
428   size_t i;
429 
430   goacc_lazy_initialize ();
431 
432   struct goacc_thread *thr = goacc_thread ();
433   struct gomp_device_descr *acc_dev = thr->dev;
434 
435   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
436       || host_fallback)
437     return;
438 
439   if (num_waits)
440     {
441       va_list ap;
442 
443       va_start (ap, num_waits);
444       goacc_wait (async, num_waits, &ap);
445       va_end (ap);
446     }
447 
448   acc_dev->openacc.async_set_async_func (async);
449 
450   for (i = 0; i < mapnum; ++i)
451     {
452       unsigned char kind = kinds[i] & 0xff;
453 
454       switch (kind)
455 	{
456 	case GOMP_MAP_POINTER:
457 	case GOMP_MAP_TO_PSET:
458 	  break;
459 
460 	case GOMP_MAP_FORCE_TO:
461 	  acc_update_device (hostaddrs[i], sizes[i]);
462 	  break;
463 
464 	case GOMP_MAP_FORCE_FROM:
465 	  acc_update_self (hostaddrs[i], sizes[i]);
466 	  break;
467 
468 	default:
469 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
470 	  break;
471 	}
472     }
473 
474   acc_dev->openacc.async_set_async_func (acc_async_sync);
475 }
476 
477 void
478 GOACC_wait (int async, int num_waits, ...)
479 {
480   if (num_waits)
481     {
482       va_list ap;
483 
484       va_start (ap, num_waits);
485       goacc_wait (async, num_waits, &ap);
486       va_end (ap);
487     }
488   else if (async == acc_async_sync)
489     acc_wait_all ();
490   else if (async == acc_async_noval)
491     goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
492 }
493 
494 int
495 GOACC_get_num_threads (void)
496 {
497   return 1;
498 }
499 
500 int
501 GOACC_get_thread_num (void)
502 {
503   return 0;
504 }
505 
506 void
507 GOACC_declare (int device, size_t mapnum,
508 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
509 {
510   int i;
511 
512   for (i = 0; i < mapnum; i++)
513     {
514       unsigned char kind = kinds[i] & 0xff;
515 
516       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
517 	continue;
518 
519       switch (kind)
520 	{
521 	  case GOMP_MAP_FORCE_ALLOC:
522 	  case GOMP_MAP_FORCE_FROM:
523 	  case GOMP_MAP_FORCE_TO:
524 	  case GOMP_MAP_POINTER:
525 	  case GOMP_MAP_DELETE:
526 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
527 				   &kinds[i], 0, 0);
528 	    break;
529 
530 	  case GOMP_MAP_FORCE_DEVICEPTR:
531 	    break;
532 
533 	  case GOMP_MAP_ALLOC:
534 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
535 	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
536 				     &kinds[i], 0, 0);
537 	    break;
538 
539 	  case GOMP_MAP_TO:
540 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
541 				   &kinds[i], 0, 0);
542 
543 	    break;
544 
545 	  case GOMP_MAP_FROM:
546 	    kinds[i] = GOMP_MAP_FORCE_FROM;
547 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
548 				   &kinds[i], 0, 0);
549 	    break;
550 
551 	  case GOMP_MAP_FORCE_PRESENT:
552 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
553 	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
554 			  (unsigned long) sizes[i]);
555 	    break;
556 
557 	  default:
558 	    assert (0);
559 	    break;
560 	}
561     }
562 }
563