1 /*
2     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 /*
32  * Include file for Offload API.
33  */
34 
35 #ifndef OFFLOAD_H_INCLUDED
36 #define OFFLOAD_H_INCLUDED
37 
38 #ifdef __cplusplus
39 #if defined(LINUX) || defined(FREEBSD)
40 #include <bits/functexcept.h>
41 #endif
42 #endif
43 
44 #include <stddef.h>
45 #include <omp.h>
46 
47 #ifdef TARGET_WINNT
48 // <stdint.h> is incompatible on Windows.
49 typedef unsigned long long int  uint64_t;
50 typedef   signed long long int   int64_t;
51 #else
52 #include <stdint.h>
53 #endif  // TARGET_WINNT
54 
55 #ifdef __cplusplus
56 extern "C" {
57 #endif
58 
59 #define TARGET_ATTRIBUTE __declspec(target(mic))
60 
61 /*
62  *  The target architecture.
63  */
64 typedef enum TARGET_TYPE {
65     TARGET_NONE,    /* Undefine target */
66     TARGET_HOST,    /* Host used as target */
67     TARGET_MIC      /* MIC target */
68 } TARGET_TYPE;
69 
70 /*
71  *  The default target type.
72  */
73 #define DEFAULT_TARGET_TYPE TARGET_MIC
74 
75 /*
76  *  The default target number.
77  */
78 #define DEFAULT_TARGET_NUMBER 0
79 
80 /*
81  *  Offload status.
82  */
83 typedef enum {
84     OFFLOAD_SUCCESS = 0,
85     OFFLOAD_DISABLED,               /* offload is disabled */
86     OFFLOAD_UNAVAILABLE,            /* card is not available */
87     OFFLOAD_OUT_OF_MEMORY,          /* not enough memory on device */
88     OFFLOAD_PROCESS_DIED,           /* target process has died */
89     OFFLOAD_ERROR                   /* unspecified error */
90 } _Offload_result;
91 
92 typedef struct {
93     _Offload_result result;         /* result, see above */
94     int             device_number;  /* device number */
95     size_t          data_sent;      /* number of bytes sent to the target */
96     size_t          data_received;  /* number of bytes received by host */
97 } _Offload_status;
98 
99 typedef int64_t _Offload_stream;
100 
101 #define OFFLOAD_STATUS_INIT(x) \
102     ((x).result = OFFLOAD_DISABLED)
103 
104 #define OFFLOAD_STATUS_INITIALIZER \
105     { OFFLOAD_DISABLED, -1, 0, 0 }
106 
107 /* Offload runtime interfaces */
108 
109 extern int _Offload_number_of_devices(void);
110 extern int _Offload_get_device_number(void);
111 extern int _Offload_get_physical_device_number(void);
112 
113 /* Offload stream runtime interfaces */
114 
115 extern _Offload_stream _Offload_stream_create(
116     int device,           // MIC device number
117     int number_of_cpus    // Cores allocated to the stream
118 );
119 
120 extern int _Offload_stream_destroy(
121     int device,             // MIC device number
122     _Offload_stream stream  // stream handle
123 );
124 
125 extern int _Offload_stream_delete(
126     _Offload_stream handle  // stream handle
127 );
128 
129 extern int _Offload_stream_completed(
130     int device,             // MIC device number
131     _Offload_stream handle  // stream handle
132 );
133 
134 extern int _Offload_device_streams_completed(
135     int device             // MIC device number
136 );
137 
138 extern int _Offload_stream_is_empty(
139     _Offload_stream handle  // stream handle
140 );
141 
142 /*
143  * _Offload_shared_malloc/free are only supported when offload is enabled
144  * else they are defined to malloc and free
145 */
146 #ifdef __INTEL_OFFLOAD
147 extern void* _Offload_shared_malloc(size_t size);
148 extern void  _Offload_shared_free(void *ptr);
149 extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
150 extern void  _Offload_shared_aligned_free(void *ptr);
151 #else
152 #include <malloc.h>
153 #define _Offload_shared_malloc(size)                 malloc(size)
154 #define _Offload_shared_free(ptr)                    free(ptr);
155 #if defined(_WIN32)
156 #define _Offload_shared_aligned_malloc(size, align)  _aligned_malloc(size, align)
157 #define _Offload_shared_aligned_free(ptr)            _aligned_free(ptr);
158 #else
159 #define _Offload_shared_aligned_malloc(size, align)  memalign(align, size)
160 #define _Offload_shared_aligned_free(ptr)            free(ptr);
161 #endif
162 #endif
163 
164 
165 extern int _Offload_signaled(int index, void *signal);
166 extern void _Offload_report(int val);
167 extern int _Offload_find_associated_mic_memory(
168    int           target,
169    const void*   cpu_addr,
170    void**        cpu_base_addr,
171    uint64_t*     buf_length,
172    void**        mic_addr,
173    uint64_t*     mic_buf_start_offset,
174    int*          is_static
175 );
176 
177 /* OpenMP API */
178 
179 extern void omp_set_default_device(int num) __GOMP_NOTHROW;
180 extern int  omp_get_default_device(void) __GOMP_NOTHROW;
181 extern int  omp_get_num_devices(void) __GOMP_NOTHROW;
182 
183 // OpenMP 4.5 APIs
184 
185 /*! \fn omp_get_initial_device
186     \brief Return the device id of the initial device.
187     \return Returns the device id of the initial device.
188 */
189 extern int omp_get_initial_device(
190     void
191 ) __GOMP_NOTHROW;
192 
193 /*! \fn omp_target_alloc
194     \brief Allocate memory in the device data environment.
195     \param size        Number of bytes to allocate.
196     \param device_num  The device number on which to allocate.
197     \return            Returns a pointer to the allocated memory.
198 */
199 extern void* omp_target_alloc(
200     size_t size,
201     int    device_num
202 ) __GOMP_NOTHROW;
203 
204 /*! \fn omp_target_free
205     \brief Free memory in the device data environment.
206     \param device_ptr  Address of allocated device memory.
207     \param device_num  The device number on which to free.
208 */
209 extern void omp_target_free(
210     void *device_ptr,
211     int   device_num
212 ) __GOMP_NOTHROW;
213 
214 /*! \fn omp_target_is_present
215     \brief Test whether a host pointer has corresponding storage on a device.
216     \param device_ptr  Address of allocated device memory.
217     \param device_num  The device number on which to test..
218     \return            true if storage is found, false otherwise.
219 */
220 extern int omp_target_is_present(
221     void *ptr,
222     int device_num
223 ) __GOMP_NOTHROW;
224 
225 /*! \fn omp_target_memcpy
226     \brief Copy memory between host/device pointers.
227     \param dst         Address of destination memory.
228     \param src         Address of source memory.
229     \param length      Number of bytes to copy.
230     \param dst_offset  Destination offset in bytes.
231     \param src_offset  Source offset in bytes.
232     \param dst_device  Destination device number.
233     \param src_device  Source device number.
234     \return            0 on success, 1 otherwise.
235 */
236 extern int omp_target_memcpy(
237     void   *dst,
238     void   *src,
239     size_t  length,
240     size_t  dst_offset,
241     size_t  src_offset,
242     int     dst_device,
243     int     src_device
244 ) __GOMP_NOTHROW;
245 
246 /*! \fn omp_target_memcpy_rect
247     \brief Copy a rectangular subsection from
248     \brief one multi-dimensional array to another.
249     \param dst           Address of destination array.
250     \param src           Address of source array.
251     \param element_size  Number of bytes in each array element.
252     \param num_dims      Number of dimensions.
253     \param volume        Array of element counts to copy in each dimension.
254     \param dst_offsets   Destination offsets array.
255     \param src_offsets   Source offsets array.
256     \param dst_dims      Destination array dimensions array.
257     \param src_dims      Source array dimensions array.
258     \param dst_device    Destination device number.
259     \param src_device    Source device number.
260     \return              0 on success, 1 otherwise.
261 */
262 extern int omp_target_memcpy_rect(
263     void         *dst,
264     void         *src,
265     size_t        element_size,
266     int           num_dims,
267     const size_t *volume,
268     const size_t *dst_offsets,
269     const size_t *src_offsets,
270     const size_t *dst_dimensions,
271     const size_t *src_dimensions,
272     int           dst_device,
273     int           src_device
274 ) __GOMP_NOTHROW;
275 
276 /*! \fn omp_target_associate_ptr
277     \brief Map a device pointer to a host pointer.
278     \param host_ptr       The host pointer.
279     \param device_ptr     The device pointer.
280     \param size           Number of bytes to map.
281     \param device_offset  Offset on device of mapped memory.
282     \param device_num     Device number.
283     \return               0 on success, 1 otherwise.
284 */
285 extern int omp_target_associate_ptr(
286     void   *host_ptr,
287     void   *device_ptr,
288     size_t  size,
289     size_t  device_offset,
290     int     device_num
291 ) __GOMP_NOTHROW;
292 
293 /*! \fn omp_target_disassociate_ptr
294     \brief Remove a host pointer to device pointer association.
295     \param ptr         The host pointer to disassociate.
296     \param device_num  Device number.
297     \return            0 on success, 1 otherwise.
298 */
299 extern int omp_target_disassociate_ptr(
300     void   *host_ptr,
301     int     device_num
302 ) __GOMP_NOTHROW;
303 
304 // End of OpenMP 4.5 APIs
305 
306 /* OpenMP API wrappers */
307 
308 /* Set num_threads on target */
309 extern void omp_set_num_threads_target(
310     TARGET_TYPE target_type,
311     int target_number,
312     int num_threads
313 );
314 
315 /* Get max_threads from target */
316 extern int omp_get_max_threads_target(
317     TARGET_TYPE target_type,
318     int target_number
319 );
320 
321 /* Get num_procs from target */
322 extern int omp_get_num_procs_target(
323     TARGET_TYPE target_type,
324     int target_number
325 );
326 
327 /* Set dynamic on target */
328 extern void omp_set_dynamic_target(
329     TARGET_TYPE target_type,
330     int target_number,
331     int num_threads
332 );
333 
334 /* Get dynamic from target */
335 extern int omp_get_dynamic_target(
336     TARGET_TYPE target_type,
337     int target_number
338 );
339 
340 /* Set nested on target */
341 extern void omp_set_nested_target(
342     TARGET_TYPE target_type,
343     int target_number,
344     int nested
345 );
346 
347 /* Get nested from target */
348 extern int omp_get_nested_target(
349     TARGET_TYPE target_type,
350     int target_number
351 );
352 
353 extern void omp_set_num_threads_target(
354     TARGET_TYPE target_type,
355     int target_number,
356     int num_threads
357 );
358 
359 extern int omp_get_max_threads_target(
360     TARGET_TYPE target_type,
361     int target_number
362 );
363 
364 extern int omp_get_num_procs_target(
365     TARGET_TYPE target_type,
366     int target_number
367 );
368 
369 extern void omp_set_dynamic_target(
370     TARGET_TYPE target_type,
371     int target_number,
372     int num_threads
373 );
374 
375 extern int omp_get_dynamic_target(
376     TARGET_TYPE target_type,
377     int target_number
378 );
379 
380 extern void omp_set_nested_target(
381     TARGET_TYPE target_type,
382     int target_number,
383     int num_threads
384 );
385 
386 extern int omp_get_nested_target(
387     TARGET_TYPE target_type,
388     int target_number
389 );
390 
391 extern void omp_set_schedule_target(
392     TARGET_TYPE target_type,
393     int target_number,
394     omp_sched_t kind,
395     int modifier
396 );
397 
398 extern void omp_get_schedule_target(
399     TARGET_TYPE target_type,
400     int target_number,
401     omp_sched_t *kind,
402     int *modifier
403 );
404 
405 /* lock API functions */
406 
407 typedef struct {
408     omp_lock_t lock;
409 } omp_lock_target_t;
410 
411 extern void omp_init_lock_target(
412     TARGET_TYPE target_type,
413     int target_number,
414     omp_lock_target_t *lock
415 );
416 
417 extern void omp_destroy_lock_target(
418     TARGET_TYPE target_type,
419     int target_number,
420     omp_lock_target_t *lock
421 );
422 
423 extern void omp_set_lock_target(
424     TARGET_TYPE target_type,
425     int target_number,
426     omp_lock_target_t *lock
427 );
428 
429 extern void omp_unset_lock_target(
430     TARGET_TYPE target_type,
431     int target_number,
432     omp_lock_target_t *lock
433 );
434 
435 extern int omp_test_lock_target(
436     TARGET_TYPE target_type,
437     int target_number,
438     omp_lock_target_t *lock
439 );
440 
441 /* nested lock API functions */
442 
443 typedef struct {
444     omp_nest_lock_t lock;
445 } omp_nest_lock_target_t;
446 
447 extern void omp_init_nest_lock_target(
448     TARGET_TYPE target_type,
449     int target_number,
450     omp_nest_lock_target_t *lock
451 );
452 
453 extern void omp_destroy_nest_lock_target(
454     TARGET_TYPE target_type,
455     int target_number,
456     omp_nest_lock_target_t *lock
457 );
458 
459 extern void omp_set_nest_lock_target(
460     TARGET_TYPE target_type,
461     int target_number,
462     omp_nest_lock_target_t *lock
463 );
464 
465 extern void omp_unset_nest_lock_target(
466     TARGET_TYPE target_type,
467     int target_number,
468     omp_nest_lock_target_t *lock
469 );
470 
471 extern int omp_test_nest_lock_target(
472     TARGET_TYPE target_type,
473     int target_number,
474     omp_nest_lock_target_t *lock
475 );
476 
477 #ifdef __cplusplus
478 } /* extern "C" */
479 
480 /* Namespace for the shared_allocator. */
481 namespace __offload {
482   /* This follows the specification for std::allocator. */
483   /* Forward declaration of the class template. */
484   template <typename T>
485   class shared_allocator;
486 
487   /* Specialization for shared_allocator<void>. */
488   template <>
489   class shared_allocator<void> {
490   public:
491     typedef void       *pointer;
492     typedef const void *const_pointer;
493     typedef void        value_type;
494     template <class U> struct rebind { typedef shared_allocator<U> other; };
495   };
496 
497   /* Definition of shared_allocator<T>. */
498   template <class T>
499   class shared_allocator {
500   public:
501     typedef size_t     size_type;
502     typedef ptrdiff_t  difference_type;
503     typedef T         *pointer;
504     typedef const T   *const_pointer;
505     typedef T         &reference;
506     typedef const T   &const_reference;
507     typedef T          value_type;
508     template <class U> struct rebind { typedef shared_allocator<U> other; };
throw()509     shared_allocator() throw() { }
throw()510     shared_allocator(const shared_allocator&) throw() { }
shared_allocator(const shared_allocator<U> &)511     template <class U> shared_allocator(const shared_allocator<U>&) throw() { }
throw()512     ~shared_allocator() throw() { }
address(reference x)513     pointer address(reference x) const { return &x; }
address(const_reference x)514     const_pointer address(const_reference x) const { return &x; }
515     pointer allocate(
516       size_type, shared_allocator<void>::const_pointer hint = 0);
517     void deallocate(pointer p, size_type n);
max_size()518     size_type max_size() const throw() {
519       return size_type(-1)/sizeof(T);
520     } /* max_size */
construct(pointer p,const T & arg)521     void construct(pointer p, const T& arg) {
522       ::new (p) T(arg);
523     } /* construct */
destroy(pointer p)524     void destroy(pointer p) {
525       p->~T();
526     } /* destroy */
527   };
528 
529   /* Definition for allocate. */
530   template <class T>
531   typename shared_allocator<T>::pointer
allocate(shared_allocator<T>::size_type s,shared_allocator<void>::const_pointer)532   shared_allocator<T>::allocate(shared_allocator<T>::size_type s,
533                                 shared_allocator<void>::const_pointer) {
534     /* Allocate from shared memory. */
535     void *ptr = _Offload_shared_malloc(s*sizeof(T));
536 #if (defined(_WIN32) || defined(_WIN64))   /* Windows */
537         if (ptr == 0) throw std::bad_alloc();
538 #else
539         if (ptr == 0) std::__throw_bad_alloc();
540 #endif
541     return static_cast<pointer>(ptr);
542   } /* allocate */
543 
544   template <class T>
deallocate(pointer p,shared_allocator<T>::size_type)545   void shared_allocator<T>::deallocate(pointer p,
546                                        shared_allocator<T>::size_type) {
547     /* Free the shared memory. */
548     _Offload_shared_free(p);
549   } /* deallocate */
550 
551   template <typename _T1, typename _T2>
552   inline bool operator==(const shared_allocator<_T1> &,
553                          const shared_allocator<_T2> &) throw() {
554     return true;
555   }  /* operator== */
556 
557   template <typename _T1, typename _T2>
558   inline bool operator!=(const shared_allocator<_T1> &,
559                          const shared_allocator<_T2> &) throw() {
560     return false;
561   }  /* operator!= */
562 }  /* __offload */
563 #endif /* __cplusplus */
564 
565 #endif /* OFFLOAD_H_INCLUDED */
566