1 /*
2     Copyright (c) 2014-2015 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #include "compiler_if_host.h"
32 
33 #include <malloc.h>
34 #ifndef TARGET_WINNT
35 #include <alloca.h>
36 #endif // TARGET_WINNT
37 
38 // Global counter on host.
39 // This variable is used if P2OPT_offload_do_data_persistence == 2.
40 // The variable used to identify offload constructs contained in one procedure.
41 // Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
42 // offload constructs.
43 static int offload_call_count = 0;
44 
OFFLOAD_TARGET_ACQUIRE(TARGET_TYPE target_type,int target_number,int is_optional,_Offload_status * status,const char * file,uint64_t line)45 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
46     TARGET_TYPE      target_type,
47     int              target_number,
48     int              is_optional,
49     _Offload_status* status,
50     const char*      file,
51     uint64_t         line
52 )
53 {
54     bool retval;
55     OFFLOAD ofld;
56 
57     // initialize status
58     if (status != 0) {
59         status->result = OFFLOAD_UNAVAILABLE;
60         status->device_number = -1;
61         status->data_sent = 0;
62         status->data_received = 0;
63     }
64 
65     // make sure libray is initialized
66     retval = __offload_init_library();
67 
68     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
69     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
70 
71     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
72 
73     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
74 
75     // initialize all devices is init_type is on_offload_all
76     if (retval && __offload_init_type == c_init_on_offload_all) {
77         for (int i = 0; i < mic_engines_total; i++) {
78              mic_engines[i].init();
79         }
80     }
81     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
82 
83     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
84 
85     if (target_type == TARGET_HOST) {
86         // Host always available
87         retval = true;
88     }
89     else if (target_type == TARGET_MIC) {
90         if (target_number >= -1) {
91             if (retval) {
92                 if (target_number >= 0) {
93                     // User provided the device number
94                     target_number = target_number % mic_engines_total;
95                 }
96                 else {
97                     // use device 0
98                     target_number = 0;
99                 }
100 
101                 // reserve device in ORSL
102                 if (is_optional) {
103                     if (!ORSL::try_reserve(target_number)) {
104                         target_number = -1;
105                     }
106                 }
107                 else {
108                     if (!ORSL::reserve(target_number)) {
109                         target_number = -1;
110                     }
111                 }
112 
113                 // initialize device
114                 if (target_number >= 0 &&
115                     __offload_init_type == c_init_on_offload) {
116                     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
117                     mic_engines[target_number].init();
118                     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
119                 }
120             }
121             else {
122                 // fallback to CPU
123                 target_number = -1;
124             }
125 
126             if (target_number < 0 || !retval) {
127                 if (!is_optional && status == 0) {
128                     LIBOFFLOAD_ERROR(c_device_is_not_available);
129                     exit(1);
130                 }
131 
132                 retval = false;
133             }
134         }
135         else {
136             LIBOFFLOAD_ERROR(c_invalid_device_number);
137             exit(1);
138         }
139     }
140 
141     if (retval) {
142         ofld = new OffloadDescriptor(target_number, status,
143                                      !is_optional, false, timer_data);
144         OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
145         Offload_Report_Prolog(timer_data);
146         OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
147                               "Starting offload: target_type = %d, "
148                               "number = %d, is_optional = %d\n",
149                               target_type, target_number, is_optional);
150 
151         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
152     }
153     else {
154         ofld = NULL;
155 
156         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
157         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
158         offload_report_free_data(timer_data);
159     }
160 
161     return ofld;
162 }
163 
OFFLOAD_TARGET_ACQUIRE1(const int * device_num,const char * file,uint64_t line)164 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
165     const int*  device_num,
166     const char* file,
167     uint64_t    line
168 )
169 {
170     int target_number;
171 
172     // make sure libray is initialized and at least one device is available
173     if (!__offload_init_library()) {
174         LIBOFFLOAD_ERROR(c_device_is_not_available);
175         exit(1);
176     }
177 
178     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
179 
180     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
181 
182     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
183 
184     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
185 
186     if (__offload_init_type == c_init_on_offload_all) {
187         for (int i = 0; i < mic_engines_total; i++) {
188              mic_engines[i].init();
189         }
190     }
191 
192     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
193 
194     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
195 
196     // use default device number if it is not provided
197     if (device_num != 0) {
198         target_number = *device_num;
199     }
200     else {
201         target_number = __omp_device_num;
202     }
203 
204     // device number should be a non-negative integer value
205     if (target_number < 0) {
206         LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
207         exit(1);
208     }
209 
210     // should we do this for OpenMP?
211     target_number %= mic_engines_total;
212 
213     // reserve device in ORSL
214     if (!ORSL::reserve(target_number)) {
215         LIBOFFLOAD_ERROR(c_device_is_not_available);
216         exit(1);
217     }
218 
219     // initialize device(s)
220     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
221 
222     if (__offload_init_type == c_init_on_offload) {
223         mic_engines[target_number].init();
224     }
225 
226     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
227 
228     OFFLOAD ofld =
229         new OffloadDescriptor(target_number, 0, true, true, timer_data);
230 
231     OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
232 
233     Offload_Report_Prolog(timer_data);
234 
235     OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
236                           "Starting OpenMP offload, device = %d\n",
237                           target_number);
238 
239     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
240 
241     return ofld;
242 }
243 
OFFLOAD_TARGET_ACQUIRE2(TARGET_TYPE target_type,int target_number,int is_optional,_Offload_status * status,const char * file,uint64_t line,const void ** stream)244 extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
245     TARGET_TYPE      target_type,
246     int              target_number,
247     int              is_optional,
248     _Offload_status* status,
249     const char*      file,
250     uint64_t         line,
251     const void**     stream
252 )
253 {
254     bool retval;
255     OFFLOAD ofld;
256 
257     // initialize status
258     if (status != 0) {
259         status->result = OFFLOAD_UNAVAILABLE;
260         status->device_number = -1;
261         status->data_sent = 0;
262         status->data_received = 0;
263     }
264 
265     // make sure libray is initialized
266     retval = __offload_init_library();
267     // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
268     OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
269 
270     OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
271 
272     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
273 
274     // initalize all devices if init_type is on_offload_all
275     if (retval && __offload_init_type == c_init_on_offload_all) {
276         for (int i = 0; i < mic_engines_total; i++) {
277              mic_engines[i].init();
278         }
279     }
280     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
281 
282     OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
283 
284     if (target_type == TARGET_HOST) {
285         // Host always available
286         retval = true;
287     }
288     else if (target_type == TARGET_MIC) {
289         _Offload_stream handle = *(reinterpret_cast<_Offload_stream*>(stream));
290         Stream * stream = handle ? Stream::find_stream(handle, false) : NULL;
291         if (target_number >= -1) {
292             if (retval) {
293                 // device number is defined by stream
294                 if (stream) {
295                     target_number = stream->get_device();
296                     target_number = target_number % mic_engines_total;
297                 }
298 
299                 // reserve device in ORSL
300                 if (target_number != -1) {
301                     if (is_optional) {
302                         if (!ORSL::try_reserve(target_number)) {
303                             target_number = -1;
304                         }
305                     }
306                     else {
307                         if (!ORSL::reserve(target_number)) {
308                             target_number = -1;
309                         }
310                     }
311                 }
312 
313                 // initialize device
314                 if (target_number >= 0 &&
315                     __offload_init_type == c_init_on_offload) {
316                     OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
317                     mic_engines[target_number].init();
318                     OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
319                 }
320             }
321             else {
322                 // fallback to CPU
323                 target_number = -1;
324             }
325             if (!(target_number == -1 && handle == 0)) {
326                 if (target_number < 0 || !retval) {
327                     if (!is_optional && status == 0) {
328                         LIBOFFLOAD_ERROR(c_device_is_not_available);
329                         exit(1);
330                     }
331 
332                     retval = false;
333                 }
334             }
335         }
336         else {
337             LIBOFFLOAD_ERROR(c_invalid_device_number);
338             exit(1);
339         }
340     }
341 
342     if (retval) {
343         ofld = new OffloadDescriptor(target_number, status,
344                                      !is_optional, false, timer_data);
345         OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
346         Offload_Report_Prolog(timer_data);
347         OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
348                               "Starting offload: target_type = %d, "
349                               "number = %d, is_optional = %d\n",
350                               target_type, target_number, is_optional);
351 
352         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
353     }
354     else {
355         ofld = NULL;
356 
357         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
358         OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
359         offload_report_free_data(timer_data);
360     }
361 
362     return ofld;
363 }
364 
offload_offload_wrap(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr,OffloadFlags offload_flags)365 static int offload_offload_wrap(
366     OFFLOAD ofld,
367     const char *name,
368     int is_empty,
369     int num_vars,
370     VarDesc *vars,
371     VarDesc2 *vars2,
372     int num_waits,
373     const void **waits,
374     const void **signal,
375     int entry_id,
376     const void *stack_addr,
377     OffloadFlags offload_flags
378 )
379 {
380     bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
381                              waits, num_waits, signal, entry_id,
382                              stack_addr, offload_flags);
383     if (!ret || (signal == 0 && ofld->get_stream() == 0 &&
384                  !offload_flags.bits.omp_async)) {
385         delete ofld;
386     }
387     return ret;
388 }
389 
OFFLOAD_OFFLOAD1(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal)390 extern "C" int OFFLOAD_OFFLOAD1(
391     OFFLOAD ofld,
392     const char *name,
393     int is_empty,
394     int num_vars,
395     VarDesc *vars,
396     VarDesc2 *vars2,
397     int num_waits,
398     const void **waits,
399     const void **signal
400 )
401 {
402     return offload_offload_wrap(ofld, name, is_empty,
403                             num_vars, vars, vars2,
404                             num_waits, waits,
405                             signal, 0, NULL, {0});
406 }
407 
OFFLOAD_OFFLOAD2(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr)408 extern "C" int OFFLOAD_OFFLOAD2(
409     OFFLOAD ofld,
410     const char *name,
411     int is_empty,
412     int num_vars,
413     VarDesc *vars,
414     VarDesc2 *vars2,
415     int num_waits,
416     const void** waits,
417     const void** signal,
418     int entry_id,
419     const void *stack_addr
420 )
421 {
422     return offload_offload_wrap(ofld, name, is_empty,
423                             num_vars, vars, vars2,
424                             num_waits, waits,
425                             signal, entry_id, stack_addr, {0});
426 }
427 
OFFLOAD_OFFLOAD3(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void ** signal,int entry_id,const void * stack_addr,OffloadFlags offload_flags,const void ** stream)428 extern "C" int OFFLOAD_OFFLOAD3(
429     OFFLOAD ofld,
430     const char *name,
431     int is_empty,
432     int num_vars,
433     VarDesc *vars,
434     VarDesc2 *vars2,
435     int num_waits,
436     const void** waits,
437     const void** signal,
438     int entry_id,
439     const void *stack_addr,
440     OffloadFlags offload_flags,
441     const void** stream
442 )
443 {
444     // 1. if the source is compiled with -traceback then stream is 0
445     // 2. if offload has a stream clause then stream is address of stream value
446     if (stream) {
447         ofld->set_stream(*(reinterpret_cast<_Offload_stream *>(stream)));
448     }
449 
450     return offload_offload_wrap(ofld, name, is_empty,
451                             num_vars, vars, vars2,
452                             num_waits, waits,
453                             signal, entry_id, stack_addr, offload_flags);
454 }
455 
OFFLOAD_OFFLOAD(OFFLOAD ofld,const char * name,int is_empty,int num_vars,VarDesc * vars,VarDesc2 * vars2,int num_waits,const void ** waits,const void * signal,int entry_id,const void * stack_addr)456 extern "C" int OFFLOAD_OFFLOAD(
457     OFFLOAD ofld,
458     const char *name,
459     int is_empty,
460     int num_vars,
461     VarDesc *vars,
462     VarDesc2 *vars2,
463     int num_waits,
464     const void **waits,
465     const void *signal,
466     int entry_id,
467     const void *stack_addr
468 )
469 {
470     // signal is passed by reference now
471     const void **signal_new = (signal != 0) ? &signal : 0;
472     const void **waits_new = 0;
473     int num_waits_new = 0;
474 
475     // remove NULL values from the list of signals to wait for
476     if (num_waits > 0) {
477         waits_new = (const void**) alloca(sizeof(void*) * num_waits);
478         for (int i = 0; i < num_waits; i++) {
479             if (waits[i] != 0) {
480                 waits_new[num_waits_new++] = waits[i];
481             }
482         }
483     }
484 
485     return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
486                             num_vars, vars, vars2,
487                             num_waits_new, waits_new,
488                             signal_new);
489 }
490 
OFFLOAD_CALL_COUNT()491 extern "C" int OFFLOAD_CALL_COUNT()
492 {
493     offload_call_count++;
494     return offload_call_count;
495 }
496