1 /*
2     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 /*! \file
32     \brief The parts of the runtime library common to host and target
33 */
34 
35 #ifndef OFFLOAD_COMMON_H_INCLUDED
36 #define OFFLOAD_COMMON_H_INCLUDED
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <memory.h>
42 
43 #include "offload.h"
44 #include "offload_table.h"
45 #include "offload_trace.h"
46 #include "offload_timer.h"
47 #include "offload_util.h"
48 #include "cean_util.h"
49 #include "dv_util.h"
50 #include "liboffload_error_codes.h"
51 
52 #include <stdarg.h>
53 
54 // Use secure getenv if it's supported
55 #ifdef HAVE_SECURE_GETENV
56   #define getenv(x)	    secure_getenv(x)
57 #elif HAVE___SECURE_GETENV
58   #define getenv(x)	    __secure_getenv(x)
59 #endif
60 
61 // Offload Library versioning
62 DLL_LOCAL extern int offload_version;
63 DLL_LOCAL extern int offload_version_count;
64 
65 // The debug routines
66 
67 // Host console and file logging
68 DLL_LOCAL extern int console_enabled;
69 DLL_LOCAL extern int offload_report_level;
70 
71 
72 DLL_LOCAL extern const char *prefix;
73 DLL_LOCAL extern int offload_number;
74 #if !HOST_LIBRARY
75 DLL_LOCAL extern int mic_index;
76 #define OFFLOAD_DO_TRACE (offload_report_level == 3)
77 #else
78 #define OFFLOAD_DO_TRACE (offload_report_enabled && (offload_report_level == 3))
79 #endif
80 
81 #if HOST_LIBRARY
82 DLL_LOCAL void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
83 DLL_LOCAL void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
84 DLL_LOCAL void offload_report_free_data(OffloadHostTimerData * timer_data);
85 DLL_LOCAL void Offload_Timer_Print(void);
86 
87 #ifndef TARGET_WINNT
88 #define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
89         __sync_add_and_fetch(&offload_number, 1)
90 #else
91 #define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
92         _InterlockedIncrement(reinterpret_cast<long*>(&offload_number))
93 #endif
94 
95 #define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \
96         printf("%s:  ", prefix);
97 
98 #define OFFLOAD_DEBUG_PRINT_PREFIX() \
99         printf("%s:  ", prefix);
100 #else
101 #define OFFLOAD_DEBUG_PRINT_PREFIX() \
102         printf("%s%d:  ", prefix, mic_index);
103 #endif // HOST_LIBRARY
104 
105 #define OFFLOAD_TRACE(trace_level, ...)  \
106     if (console_enabled >= trace_level) { \
107         OFFLOAD_DEBUG_PRINT_PREFIX(); \
108         printf(__VA_ARGS__); \
109         fflush(NULL); \
110     }
111 
112 #if OFFLOAD_DEBUG > 0
113 
114 #define OFFLOAD_DEBUG_TRACE(level, ...) \
115     OFFLOAD_TRACE(level, __VA_ARGS__)
116 
117 #define OFFLOAD_REPORT(level, offload_number, stage, ...) \
118     if (OFFLOAD_DO_TRACE) { \
119         offload_stage_print(stage, offload_number, __VA_ARGS__); \
120         fflush(NULL); \
121     }
122 
123 #define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \
124     if (OFFLOAD_DO_TRACE) { \
125         offload_stage_print(stage, offload_number, __VA_ARGS__); \
126         fflush(NULL); \
127     } \
128     if (!OFFLOAD_DO_TRACE) { \
129         OFFLOAD_TRACE(level, __VA_ARGS__) \
130     }
131 
132 #define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
133     __dump_bytes(level, a, b)
134 
135 DLL_LOCAL extern void __dump_bytes(
136     int level,
137     const void *data,
138     int len
139 );
140 
141 #else
142 
143 #define OFFLOAD_DEBUG_LOG(level, ...)
144 #define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b)
145 
146 #endif
147 
148 // Runtime interface
149 
150 #define OFFLOAD_PREFIX(a) __offload_##a
151 
152 #define OFFLOAD_MALLOC            OFFLOAD_PREFIX(malloc)
153 #define OFFLOAD_FREE(a)           _mm_free(a)
154 
155 // Forward functions
156 
157 extern void *OFFLOAD_MALLOC(size_t size, size_t align);
158 
159 // The Marshaller
160 
161 // Flags describing an offload
162 
163 //! Flags describing an offload
164 union OffloadFlags{
165     uint32_t flags;
166     struct {
167         uint32_t fortran_traceback : 1; //!< Fortran traceback requested
168         uint32_t omp_async         : 1; //!< OpenMP asynchronous offload
169     } bits;
170 };
171 
172 //! \enum Indicator for the type of entry on an offload item list.
173 enum OffloadItemType {
174     c_data =   1,       //!< Plain data
175     c_data_ptr,         //!< Pointer data
176     c_func_ptr,         //!< Function pointer
177     c_void_ptr,         //!< void*
178     c_string_ptr,       //!< C string
179     c_dv,               //!< Dope vector variable
180     c_dv_data,          //!< Dope-vector data
181     c_dv_data_slice,    //!< Dope-vector data's slice
182     c_dv_ptr,           //!< Dope-vector variable pointer
183     c_dv_ptr_data,      //!< Dope-vector pointer data
184     c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice
185     c_cean_var,         //!< CEAN variable
186     c_cean_var_ptr,     //!< Pointer to CEAN variable
187     c_data_ptr_array,   //!< Pointer to data pointer array
188     c_extended_type,    //!< Is used to extend OffloadItemType
189                         //!< Actual OffloadItemType is in the
190                         //!< structure VarDescExtendedType
191     c_func_ptr_array,   //!< Pointer to function pointer array
192     c_void_ptr_array,   //!< Pointer to void* pointer array
193     c_string_ptr_array, //!< Pointer to char* pointer array
194     c_data_ptr_ptr,     //!< Pointer to pointer to data (struct member)
195     c_func_ptr_ptr,     //!< Pointer to pointer to function (struct member)
196     c_void_ptr_ptr,     //!< Pointer to pointer to void* (struct member)
197     c_string_ptr_ptr,   //!< Pointer to pointer to string (struct member)
198     c_cean_var_ptr_ptr  //!< Pointer to pointer to cean var (struct member)
199 };
200 
201 #define TYPE_IS_PTR_TO_PTR(t) ((t) == c_string_ptr_ptr || \
202                             (t) == c_data_ptr_ptr || \
203                             (t) == c_func_ptr_ptr || \
204                             (t) == c_void_ptr_ptr || \
205                             (t) == c_cean_var_ptr_ptr)
206 
207 #define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
208                             (t) == c_data_ptr || \
209                             (t) == c_cean_var_ptr || \
210                             (t) == c_dv_ptr || \
211                             TYPE_IS_PTR_TO_PTR(t))
212 
213 #define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
214                                (t) == c_void_ptr || \
215                                (t) == c_cean_var || \
216                                (t) == c_dv)
217 
218 #define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \
219                                 (t) == c_dv_ptr_data)
220 
221 #define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
222                                       (t) == c_dv_ptr_data_slice)
223 
224 //! \enum Specify direction to copy offloaded variable.
225 enum OffloadParameterType {
226     c_parameter_unknown = -1, //!< Unknown clause
227     c_parameter_nocopy,       //!< Variable listed in "nocopy" clause
228     c_parameter_in,           //!< Variable listed in "in" clause
229     c_parameter_out,          //!< Variable listed in "out" clause
230     c_parameter_inout         //!< Variable listed in "inout" clause
231 };
232 
233 
234 //! Flags describing an offloaded variable
235 union varDescFlags {
236     struct {
237         //! source variable has persistent storage
238         uint32_t is_static : 1;
239         //! destination variable has persistent storage
240         uint32_t is_static_dstn : 1;
241         //! has length for c_dv && c_dv_ptr
242         uint32_t has_length : 1;
243         //! persisted local scalar is in stack buffer
244         uint32_t is_stack_buf : 1;
245         //! "targetptr" modifier used
246         uint32_t targetptr : 1;
247         //! "preallocated" modifier used
248         uint32_t preallocated : 1;
249         //! pointer to a pointer array
250         uint32_t is_pointer : 1;
251 
252         //! buffer address is sent in data
253         uint32_t sink_addr : 1;
254         //! alloc displacement is sent in data
255         uint32_t alloc_disp : 1;
256         //! source data is noncontiguous
257         uint32_t is_noncont_src : 1;
258         //! destination data is noncontiguous
259         uint32_t is_noncont_dst : 1;
260 
261         //! "OpenMP always" modifier used
262         uint32_t always_copy : 1;
263         //! "OpenMP delete" modifier used
264         uint32_t always_delete : 1;
265         //! structured data is noncontiguous
266         uint32_t is_non_cont_struct : 1;
267         //! CPU memory pinning/unpinning operation
268         uint32_t pin : 1;
269         //! Pointer to device memory
270         uint32_t is_device_ptr : 1;
271         //! Hostpointer with associated device pointer
272         uint32_t use_device_ptr : 1;
273     };
274     uint32_t bits;
275 };
276 
277 //! An Offload Variable descriptor
278 struct VarDesc {
279     //! OffloadItemTypes of source and destination
280     union {
281         struct {
282             uint8_t dst : 4; //!< OffloadItemType of destination
283             uint8_t src : 4; //!< OffloadItemType of source
284         };
285         uint8_t bits;
286     } type;
287 
288     //! OffloadParameterType that describes direction of data transfer
289     union {
290         struct {
291             uint8_t in  : 1; //!< Set if IN or INOUT
292             uint8_t out : 1; //!< Set if OUT or INOUT
293         };
294         uint8_t bits;
295     } direction;
296 
297     uint8_t alloc_if;        //!< alloc_if modifier value
298     uint8_t free_if;         //!< free_if modifier value
299     uint32_t align;          //!< MIC alignment requested for pointer data
300     //! Not used by compiler; set to 0
301     /*! Used by runtime as offset to data from start of MIC buffer */
302     uint32_t mic_offset;
303     //! Flags describing this variable
304     varDescFlags flags;
305     //! Not used by compiler; set to 0
306     /*! Used by runtime as offset to base from data stored in a buffer */
307     int64_t offset;
308     //! Element byte-size of data to be transferred
309     /*! For dope-vector, the size of the dope-vector      */
310     int64_t size;
311     union {
312         //! Set to 0 for array expressions and dope-vectors
313         /*! Set to 1 for scalars                          */
314         /*! Set to value of length modifier for pointers  */
315         int64_t count;
316         //! Displacement not used by compiler
317         int64_t disp;
318     };
319 
320     //! This field not used by OpenMP 4.0
321     /*! The alloc section expression in #pragma offload   */
322     union {
323        void *alloc;
324        int64_t ptr_arr_offset;
325     };
326 
327     //! This field not used by OpenMP 4.0
328     /*! The into section expression in #pragma offload    */
329     /*! For c_data_ptr_array this is the into ptr array   */
330     void *into;
331 
332     //! For an ordinary variable, address of the variable
333     /*! For c_cean_var (C/C++ array expression),
334         pointer to arr_desc, which is an array descriptor. */
335     /*! For c_data_ptr_array (array of data pointers),
336         pointer to ptr_array_descriptor,
337         which is a descriptor for pointer array transfers. */
338     void *ptr;
339 };
340 
341 //! Auxiliary struct used when -g is enabled that holds variable names
342 struct VarDesc2 {
343     const char *sname; //!< Source name
344     const char *dname; //!< Destination name (when "into" is used)
345 };
346 
347 /*! When the OffloadItemType is c_data_ptr_array
348     the ptr field of the main descriptor points to this struct.          */
349 /*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer
350     type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr.
351     Therefore the actual pointer type is in the flags field of VarDesc3. */
352 /*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
353     is 0 then alignment/alloc_if/free_if are specified in VarDesc1.      */
354 /*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
355     is 1 then align_array/alloc_if_array/free_if_array specify
356     the set of alignment/alloc_if/free_if values.                        */
357 /*! For the other fields, if neither the scalar nor the array flag
358     is set, then that modifier was not specified. If the bits are set
359     they specify which modifier was set and whether it was a
360     scalar or an array expression.                                       */
361 struct VarDesc3
362 {
363     void *ptr_array;        //!< Pointer to arr_desc of array of pointers
364     void *align_array;      //!< Scalar value or pointer to arr_desc
365     void *alloc_if_array;   //!< Scalar value or pointer to arr_desc
366     void *free_if_array;    //!< Scalar value or pointer to arr_desc
367     void *extent_start;     //!< Scalar value or pointer to arr_desc
368     void *extent_elements;  //!< Scalar value or pointer to arr_desc
369     void *into_start;       //!< Scalar value or pointer to arr_desc
370     void *into_elements;    //!< Scalar value or pointer to arr_desc
371     void *alloc_start;      //!< Scalar value or pointer to arr_desc
372     void *alloc_elements;   //!< Scalar value or pointer to arr_desc
373     /*! Flags that describe the pointer type and whether each field
374         is a scalar value or an array expression.        */
375     /*! First 6 bits are pointer array element type:
376         c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */
377     /*! Then single bits specify:                        */
378     /*!     align_array is an array                      */
379     /*!     alloc_if_array is an array                   */
380     /*!     free_if_array is an array                    */
381     /*!     extent_start is a scalar expression          */
382     /*!     extent_start is an array expression          */
383     /*!     extent_elements is a scalar expression       */
384     /*!     extent_elements is an array expression       */
385     /*!     into_start is a scalar expression            */
386     /*!     into_start is an array expression            */
387     /*!     into_elements is a scalar expression         */
388     /*!     into_elements is an array expression         */
389     /*!     alloc_start is a scalar expression           */
390     /*!     alloc_start is an array expression           */
391     /*!     alloc_elements is a scalar expression        */
392     /*!     alloc_elements is an array expression        */
393     uint32_t array_fields;
394 };
395 const int flag_align_is_array = 6;
396 const int flag_alloc_if_is_array = 7;
397 const int flag_free_if_is_array = 8;
398 const int flag_extent_start_is_scalar = 9;
399 const int flag_extent_start_is_array = 10;
400 const int flag_extent_elements_is_scalar = 11;
401 const int flag_extent_elements_is_array = 12;
402 const int flag_into_start_is_scalar = 13;
403 const int flag_into_start_is_array = 14;
404 const int flag_into_elements_is_scalar = 15;
405 const int flag_into_elements_is_array = 16;
406 const int flag_alloc_start_is_scalar = 17;
407 const int flag_alloc_start_is_array = 18;
408 const int flag_alloc_elements_is_scalar = 19;
409 const int flag_alloc_elements_is_array = 20;
410 
411 //! Extended Variable Descriptor.  Since VarDesc uses 16 bits for
412 //! OffloadItemType, we have exceeded that limit,  So any Type
413 //! greater than 15 will have Type set in VarDesc as c_extended_type
414 //! and this structure will be used to represent those Types.
415 typedef struct VarDescExtendedType {
416 
417     // Represents overflow of OffloadItemType
418     uint32_t extended_type;
419 
420     //! For extended_type
421     //! address of the variable
422     //! Future Types can point to other descriptors
423     void *ptr;
424 } VarDescExtendedType;
425 
426 // The Marshaller
427 class Marshaller
428 {
429 private:
430     // Start address of buffer
431     char *buffer_start;
432 
433     // Current pointer within buffer
434     char *buffer_ptr;
435 
436     // Physical size of data sent (including flags)
437     long long buffer_size;
438 
439     // User data sent/received
440     long long tfr_size;
441 
442 public:
443     // Constructor
Marshaller()444     Marshaller() :
445         buffer_start(0), buffer_ptr(0),
446         buffer_size(0), tfr_size(0)
447     {
448     }
449 
450     // Return count of user data sent/received
get_tfr_size()451     long long get_tfr_size() const
452     {
453         return tfr_size;
454     }
455 
456     // Return pointer to buffer
get_buffer_start()457     char *get_buffer_start() const
458     {
459         return buffer_start;
460     }
461 
462     // Return current size of data in buffer
get_buffer_size()463     long long get_buffer_size() const
464     {
465         return buffer_size;
466     }
467 
468     // Set buffer pointer
init_buffer(char * d,long long s)469     void init_buffer(
470         char *d,
471         long long s
472     )
473     {
474         buffer_start = buffer_ptr = d;
475         buffer_size = s;
476     }
477 
478     // Send data
479     void send_data(
480         const void *data,
481         int64_t length
482     );
483 
484     // Receive data
485     void receive_data(
486         void *data,
487         int64_t length
488     );
489 
490     // Send function pointer
491     void send_func_ptr(
492         const void* data
493     );
494 
495     // Receive function pointer
496     void receive_func_ptr(
497         const void** data
498     );
499 };
500 
501 // End of the Marshaller
502 
503 // The offloaded function descriptor.
504 // Sent from host to target to specify which function to run.
505 // Also, sets console and file tracing levels.
506 struct FunctionDescriptor
507 {
508     // Input data size.
509     long long in_datalen;
510 
511     // Output data size.
512     long long out_datalen;
513 
514     // Whether trace is requested on console.
515     // A value of 1 produces only function name and data sent/received.
516     // Values > 1 produce copious trace information.
517     uint8_t console_enabled;
518 
519     // Flag controlling timing on the target side.
520     // Values > 0 enable timing on sink.
521     uint8_t timer_enabled;
522 
523     int offload_report_level;
524     int offload_number;
525 
526     // number of variable descriptors
527     int vars_num;
528 
529     // inout data offset if data is passed as misc/return data
530     // otherwise it should be zero.
531     int data_offset;
532 
533     // The name of the offloaded function
534     char data[];
535 };
536 
537 // typedef OFFLOAD.
538 // Pointer to OffloadDescriptor.
539 typedef struct OffloadDescriptor *OFFLOAD;
540 
541 // Use for setting affinity of a stream
542 enum affinity_type {
543     affinity_compact,
544     affinity_scatter
545 };
546 struct affinity_spec {
547     uint64_t sink_mask[16];
548     int affinity_type;
549     int num_cores;
550     int num_threads;
551 };
552 
553 #endif // OFFLOAD_COMMON_H_INCLUDED
554