1 /*
2     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #include "compiler_if_host.h"
32 
33 #include <malloc.h>
34 #ifndef TARGET_WINNT
35 #include <alloca.h>
36 #endif // TARGET_WINNT
37 
38 // Global counter on host.
39 // This variable is used if P2OPT_offload_do_data_persistence == 2.
40 // The variable used to identify offload constructs contained in one procedure.
41 // Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
42 // offload constructs.
43 static int offload_call_count = 0;
44 
OFFLOAD_TARGET_ACQUIRE(TARGET_TYPE target_type,int target_number,int is_optional,_Offload_status * status,const char * file,uint64_t line)45 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
46     TARGET_TYPE      target_type,
47     int              target_number,
48     int              is_optional,
49     _Offload_status* status,
50     const char*      file,
51     uint64_t         line
52 )
53 {
54     bool retval;
55     OFFLOAD ofld;
56 
57     // initialize status
58     if (status != 0) {
59         status->result = OFFLOAD_UNAVAILABLE;
60         status->device_number = -1;
61         status->data_sent = 0;
62         status->data_received = 0;
63     }
64 
65     // make sure libray is initialized
66     retval = __offload_init_library();
67 
68     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
69     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
70 
71     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
72 
73     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
74 
75     // initialize all devices is init_type is on_offload_all
76     if (retval && __offload_init_type == c_init_on_offload_all) {
77         for (int i = 0; i < mic_engines_total; i++) {
78              mic_engines[i].init();
79         }
80     }
81     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
82 
83     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
84 
85     if (target_type == TARGET_HOST) {
86         // Host always available
87         retval = true;
88     }
89     else if (target_type == TARGET_MIC) {
90         if (target_number >= -1) {
91             if (retval) {
92                 if (target_number >= 0) {
93                     // User provided the device number
94                     target_number = target_number % mic_engines_total;
95                 }
96                 else {
97                     // use device 0
98                     target_number = 0;
99                 }
100 
101                 // reserve device in ORSL
102                 if (is_optional) {
103                     if (!ORSL::try_reserve(target_number)) {
104                         target_number = -1;
105                     }
106                 }
107                 else {
108                     if (!ORSL::reserve(target_number)) {
109                         target_number = -1;
110                     }
111                 }
112 
113                 // initialize device
114                 if (target_number >= 0 &&
115                     __offload_init_type == c_init_on_offload) {
116                     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
117                     mic_engines[target_number].init();
118                     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
119                 }
120             }
121             else {
122                 // fallback to CPU
123                 target_number = -1;
124             }
125 
126             if (target_number < 0 || !retval) {
127                 if (!is_optional && status == 0) {
128                     LIBOFFLOAD_ERROR(c_device_is_not_available);
129                     exit(1);
130                 }
131 
132                 retval = false;
133             }
134         }
135         else {
136             LIBOFFLOAD_ERROR(c_invalid_device_number);
137             exit(1);
138         }
139     }
140 
141     if (retval) {
142         ofld = new OffloadDescriptor(target_number, status,
143                                      !is_optional, false, timer_data);
144         OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
145         Offload_Report_Prolog(timer_data);
146         OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
147                               "Starting offload: target_type = %d, "
148                               "number = %d, is_optional = %d\n",
149                               target_type, target_number, is_optional);
150 
151         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
152     }
153     else {
154         ofld = NULL;
155 
156         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
157         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
158         offload_report_free_data(timer_data);
159     }
160 
161     return ofld;
162 }
163 
164 // This routine is called for OpenMP4.5 offload calls
165 // OpenMP 4.5 offload is always optional.
OFFLOAD_TARGET_ACQUIRE1(const int * device_num,const char * file,uint64_t line)166 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
167     const int*  device_num,
168     const char* file,
169     uint64_t    line
170 )
171 {
172     int target_number;
173 
174     // make sure libray is initialized and at least one device is available
175     if (!__offload_init_library()) {
176        OFFLOAD_DEBUG_TRACE(2, "No device available, fall back to host\n");
177        return NULL;
178     }
179 
180     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
181 
182     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
183 
184     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
185 
186     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
187 
188     if (__offload_init_type == c_init_on_offload_all) {
189         for (int i = 0; i < mic_engines_total; i++) {
190              mic_engines[i].init();
191         }
192     }
193 
194     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
195 
196     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
197 
198     // use default device number if it is not provided
199     if (device_num != 0) {
200         target_number = *device_num;
201     }
202     else {
203         target_number = __omp_device_num;
204     }
205 
206     // device number should be a non-negative integer value
207     if (target_number < 0) {
208         LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
209         exit(1);
210     }
211 
212     // should we do this for OpenMP?
213     target_number %= mic_engines_total;
214 
215     // reserve device in ORSL
216     if (!ORSL::reserve(target_number)) {
217         LIBOFFLOAD_ERROR(c_device_is_not_available);
218         exit(1);
219     }
220 
221     // initialize device(s)
222     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
223 
224     if (__offload_init_type == c_init_on_offload) {
225         mic_engines[target_number].init();
226     }
227 
228     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
229 
230     OFFLOAD ofld =
231         new OffloadDescriptor(target_number, 0, true, true, timer_data);
232 
233     OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
234 
235     Offload_Report_Prolog(timer_data);
236 
237     OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
238                           "Starting OpenMP offload, device = %d\n",
239                           target_number);
240 
241     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
242 
243     return ofld;
244 }
245 
OFFLOAD_TARGET_ACQUIRE2(TARGET_TYPE target_type,int target_number,int is_optional,_Offload_status * status,const char * file,uint64_t line,const void ** stream)246 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
247     TARGET_TYPE      target_type,
248     int              target_number,
249     int              is_optional,
250     _Offload_status* status,
251     const char*      file,
252     uint64_t         line,
253     const void**     stream
254 )
255 {
256     bool retval;
257     OFFLOAD ofld;
258 
259     // initialize status
260     if (status != 0) {
261         status->result = OFFLOAD_UNAVAILABLE;
262         status->device_number = -1;
263         status->data_sent = 0;
264         status->data_received = 0;
265     }
266 
267     // make sure libray is initialized
268     retval = __offload_init_library();
269     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
270     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
271 
272     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
273 
274     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
275 
276     // initalize all devices if init_type is on_offload_all
277     if (retval && __offload_init_type == c_init_on_offload_all) {
278         for (int i = 0; i < mic_engines_total; i++) {
279              mic_engines[i].init();
280         }
281     }
282     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
283 
284     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
285 
286     if (target_type == TARGET_HOST) {
287         // Host always available
288         retval = true;
289     }
290     else if (target_type == TARGET_MIC) {
291         _Offload_stream handle = *(reinterpret_cast<_Offload_stream*>(stream));
292         Stream * stream = handle ? Stream::find_stream(handle, false) : NULL;
293         if (target_number >= -1) {
294             if (retval) {
295                 // device number is defined by stream
296                 if (stream) {
297                     target_number = stream->get_device();
298                     target_number = target_number % mic_engines_total;
299                 }
300 
301                 // reserve device in ORSL
302                 if (target_number != -1) {
303                     if (is_optional) {
304                         if (!ORSL::try_reserve(target_number)) {
305                             target_number = -1;
306                         }
307                     }
308                     else {
309                         if (!ORSL::reserve(target_number)) {
310                             target_number = -1;
311                         }
312                     }
313                 }
314 
315                 // initialize device
316                 if (target_number >= 0 &&
317                     __offload_init_type == c_init_on_offload) {
318                     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
319                     mic_engines[target_number].init();
320                     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
321                 }
322             }
323             else {
324                 // fallback to CPU
325                 target_number = -1;
326             }
327             if (!(target_number == -1 && handle == 0)) {
328                 if (target_number < 0 || !retval) {
329                     if (!is_optional && status == 0) {
330                         LIBOFFLOAD_ERROR(c_device_is_not_available);
331                         exit(1);
332                     }
333 
334                     retval = false;
335                 }
336             }
337         }
338         else {
339             LIBOFFLOAD_ERROR(c_invalid_device_number);
340             exit(1);
341         }
342     }
343 
344     if (retval) {
345         ofld = new OffloadDescriptor(target_number, status,
346                                      !is_optional, false, timer_data);
347         OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
348         Offload_Report_Prolog(timer_data);
349         OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
350                               "Starting offload: target_type = %d, "
351                               "number = %d, is_optional = %d\n",
352                               target_type, target_number, is_optional);
353 
354         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
355     }
356     else {
357         ofld = NULL;
358 
359         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
360         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
361         offload_report_free_data(timer_data);
362     }
363 
364     return ofld;
365 }
366 
offload_offload_wrap(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr,OffloadFlags offload_flags)367 static int offload_offload_wrap(
368     OFFLOAD ofld,
369     const char *name,
370     int is_empty,
371     int num_vars,
372     VarDesc *vars,
373     VarDesc2 *vars2,
374     int num_waits,
375     const void **waits,
376     const void **signal,
377     int entry_id,
378     const void *stack_addr,
379     OffloadFlags offload_flags
380 )
381 {
382     if (signal) {
383        ofld->set_signal(*signal);
384     }
385 
386     bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
387                              waits, num_waits, signal, entry_id,
388                              stack_addr, offload_flags);
389     if (!ret || (signal == 0 && ofld->get_stream() == 0 &&
390                  !offload_flags.bits.omp_async)) {
391         delete ofld;
392     }
393     return ret;
394 }
395 
OFFLOAD_OFFLOAD1(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal)396 extern "C" int OFFLOAD_OFFLOAD1(
397     OFFLOAD ofld,
398     const char *name,
399     int is_empty,
400     int num_vars,
401     VarDesc *vars,
402     VarDesc2 *vars2,
403     int num_waits,
404     const void **waits,
405     const void **signal
406 )
407 {
408     return offload_offload_wrap(ofld, name, is_empty,
409                             num_vars, vars, vars2,
410                             num_waits, waits,
411                             signal, 0, NULL, {0});
412 }
413 
OFFLOAD_OFFLOAD2(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr)414 extern "C" int OFFLOAD_OFFLOAD2(
415     OFFLOAD ofld,
416     const char *name,
417     int is_empty,
418     int num_vars,
419     VarDesc *vars,
420     VarDesc2 *vars2,
421     int num_waits,
422     const void** waits,
423     const void** signal,
424     int entry_id,
425     const void *stack_addr
426 )
427 {
428     return offload_offload_wrap(ofld, name, is_empty,
429                             num_vars, vars, vars2,
430                             num_waits, waits,
431                             signal, entry_id, stack_addr, {0});
432 }
433 
OFFLOAD_OFFLOAD3(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr,OffloadFlags offload_flags,const void ** stream)434 extern "C" int OFFLOAD_OFFLOAD3(
435     OFFLOAD ofld,
436     const char *name,
437     int is_empty,
438     int num_vars,
439     VarDesc *vars,
440     VarDesc2 *vars2,
441     int num_waits,
442     const void** waits,
443     const void** signal,
444     int entry_id,
445     const void *stack_addr,
446     OffloadFlags offload_flags,
447     const void** stream
448 )
449 {
450     // 1. if the source is compiled with -traceback then stream is 0
451     // 2. if offload has a stream clause then stream is address of stream value
452     if (stream) {
453         ofld->set_stream(*(reinterpret_cast<_Offload_stream *>(stream)));
454     }
455 
456     return offload_offload_wrap(ofld, name, is_empty,
457                             num_vars, vars, vars2,
458                             num_waits, waits,
459                             signal, entry_id, stack_addr, offload_flags);
460 }
461 
OFFLOAD_OFFLOAD(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void * signal,int entry_id,const void * stack_addr)462 extern "C" int OFFLOAD_OFFLOAD(
463     OFFLOAD ofld,
464     const char *name,
465     int is_empty,
466     int num_vars,
467     VarDesc *vars,
468     VarDesc2 *vars2,
469     int num_waits,
470     const void **waits,
471     const void *signal,
472     int entry_id,
473     const void *stack_addr
474 )
475 {
476     // signal is passed by reference now
477     const void **signal_new = (signal != 0) ? &signal : 0;
478     const void **waits_new = 0;
479     int num_waits_new = 0;
480 
481     // remove NULL values from the list of signals to wait for
482     if (num_waits > 0) {
483         waits_new = (const void**) alloca(sizeof(void*) * num_waits);
484         for (int i = 0; i < num_waits; i++) {
485             if (waits[i] != 0) {
486                 waits_new[num_waits_new++] = waits[i];
487             }
488         }
489     }
490 
491     return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
492                             num_vars, vars, vars2,
493                             num_waits_new, waits_new,
494                             signal_new);
495 }
496 
OFFLOAD_CALL_COUNT()497 extern "C" int OFFLOAD_CALL_COUNT()
498 {
499     offload_call_count++;
500     return offload_call_count;
501 }
502