1 /*
2     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #include "offload_target.h"
32 #include <stdlib.h>
33 #include <unistd.h>
34 #ifdef SEP_SUPPORT
35 #include <fcntl.h>
36 #include <sys/ioctl.h>
37 #endif // SEP_SUPPORT
38 #include <omp.h>
39 #include <map>
40 
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms)(void *);
45 
46 // Target console and file logging
47 const char *prefix;
48 int console_enabled = 0;
49 int offload_report_level = 0;
50 
51 // Trace information
52 static const char* vardesc_direction_as_string[] = {
53     "NOCOPY",
54     "IN",
55     "OUT",
56     "INOUT"
57 };
58 static const char* vardesc_type_as_string[] = {
59     "unknown",
60     "data",
61     "data_ptr",
62     "func_ptr",
63     "void_ptr",
64     "string_ptr",
65     "dv",
66     "dv_data",
67     "dv_data_slice",
68     "dv_ptr",
69     "dv_ptr_data",
70     "dv_ptr_data_slice",
71     "cean_var",
72     "cean_var_ptr",
73     "c_data_ptr_array",
74     "c_extended_type",
75     "c_func_ptr_array",
76     "c_void_ptr_array",
77     "c_string_ptr_array",
78     "c_data_ptr_ptr",
79     "c_func_ptr_ptr",
80     "c_void_ptr_ptr",
81     "c_string_ptr_ptr",
82     "c_cean_var_ptr_ptr",
83 };
84 
85 int mic_index = -1;
86 int mic_engines_total = -1;
87 uint64_t mic_frequency = 0;
88 int offload_number = 0;
89 static std::map<void*, RefInfo*> ref_data;
90 static mutex_t add_ref_lock;
91 
92 #ifdef SEP_SUPPORT
93 static const char*  sep_monitor_env = "SEP_MONITOR";
94 static bool         sep_monitor = false;
95 static const char*  sep_device_env = "SEP_DEVICE";
96 static const char*  sep_device =  "/dev/sep3.8/c";
97 static int          sep_counter = 0;
98 
99 #define SEP_API_IOC_MAGIC   99
100 #define SEP_IOCTL_PAUSE     _IO (SEP_API_IOC_MAGIC, 31)
101 #define SEP_IOCTL_RESUME    _IO (SEP_API_IOC_MAGIC, 32)
102 
add_ref_count(void * buf,bool created)103 static void add_ref_count(void * buf, bool created)
104 {
105     mutex_locker_t locker(add_ref_lock);
106     RefInfo * info = ref_data[buf];
107 
108     if (info) {
109         info->count++;
110     }
111     else {
112         info = new RefInfo((int)created,(long)1);
113     }
114     info->is_added |= created;
115     ref_data[buf] = info;
116 }
117 
BufReleaseRef(void * buf)118 static void BufReleaseRef(void * buf)
119 {
120     mutex_locker_t locker(add_ref_lock);
121     RefInfo * info = ref_data[buf];
122 
123     if (info) {
124         --info->count;
125         if (info->count == 0 && info->is_added) {
126             OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
127                              ((RefInfo *) ref_data[buf])->count);
128             BufferReleaseRef(buf);
129             info->is_added = 0;
130         }
131     }
132 }
133 
VTPauseSampling(void)134 static int VTPauseSampling(void)
135 {
136     int ret = -1;
137     int handle = open(sep_device, O_RDWR);
138     if (handle > 0) {
139         ret = ioctl(handle, SEP_IOCTL_PAUSE);
140         close(handle);
141     }
142     return ret;
143 }
144 
VTResumeSampling(void)145 static int VTResumeSampling(void)
146 {
147     int ret = -1;
148     int handle = open(sep_device, O_RDWR);
149     if (handle > 0) {
150         ret = ioctl(handle, SEP_IOCTL_RESUME);
151         close(handle);
152     }
153     return ret;
154 }
155 #endif // SEP_SUPPORT
156 
offload(uint32_t buffer_count,void ** buffers,void * misc_data,uint16_t misc_data_len,void * return_data,uint16_t return_data_len)157 void OffloadDescriptor::offload(
158     uint32_t  buffer_count,
159     void**    buffers,
160     void*     misc_data,
161     uint16_t  misc_data_len,
162     void*     return_data,
163     uint16_t  return_data_len
164 )
165 {
166     FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
167     const char *name = func->data;
168     OffloadDescriptor ofld;
169     char *in_data = 0;
170     char *out_data = 0;
171     char *timer_data = 0;
172 
173     console_enabled = func->console_enabled;
174     timer_enabled = func->timer_enabled;
175     offload_report_level = func->offload_report_level;
176     offload_number = func->offload_number;
177     ofld.set_offload_number(func->offload_number);
178 
179 #ifdef SEP_SUPPORT
180     if (sep_monitor) {
181         if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
182             OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
183             VTResumeSampling();
184         }
185     }
186 #endif // SEP_SUPPORT
187 
188     OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
189                           c_offload_start_target_func,
190                           "Offload \"%s\" started\n", name);
191 
192     // initialize timer data
193     OFFLOAD_TIMER_INIT();
194 
195     OFFLOAD_TIMER_START(c_offload_target_total_time);
196 
197     OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
198 
199     // get input/output buffer addresses
200     if (func->in_datalen > 0 || func->out_datalen > 0) {
201         if (func->data_offset != 0) {
202             in_data = (char*) misc_data + func->data_offset;
203             out_data = (char*) return_data;
204         }
205         else {
206             char *inout_buf = (char*) buffers[--buffer_count];
207             in_data = inout_buf;
208             out_data = inout_buf;
209         }
210     }
211 
212     // assign variable descriptors
213     ofld.m_vars_total = func->vars_num;
214     if (ofld.m_vars_total > 0) {
215         uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
216 
217         ofld.m_vars = (VarDesc*) malloc(var_data_len);
218         if (ofld.m_vars == NULL)
219           LIBOFFLOAD_ERROR(c_malloc);
220         memcpy(ofld.m_vars, in_data, var_data_len);
221 
222         ofld.m_vars_extra =
223             (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra));
224         if (ofld.m_vars == NULL)
225           LIBOFFLOAD_ERROR(c_malloc);
226 
227         in_data += var_data_len;
228         func->in_datalen -= var_data_len;
229     }
230 
231     // timer data
232     if (func->timer_enabled) {
233         uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
234 
235         timer_data = out_data;
236         out_data += timer_data_len;
237         func->out_datalen -= timer_data_len;
238     }
239 
240     // init Marshallers
241     ofld.m_in.init_buffer(in_data, func->in_datalen);
242     ofld.m_out.init_buffer(out_data, func->out_datalen);
243 
244     // copy buffers to offload descriptor
245     std::copy(buffers, buffers + buffer_count,
246               std::back_inserter(ofld.m_buffers));
247 
248     OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
249 
250     // find offload entry address
251     OFFLOAD_TIMER_START(c_offload_target_func_lookup);
252 
253     offload_func_with_parms entry = (offload_func_with_parms)
254         __offload_entries.find_addr(name);
255 
256     if (entry == NULL) {
257 #if OFFLOAD_DEBUG > 0
258         if (console_enabled > 2) {
259             __offload_entries.dump();
260         }
261 #endif
262         LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
263         exit(1);
264     }
265 
266     OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
267 
268     OFFLOAD_TIMER_START(c_offload_target_func_time);
269 
270     // execute offload entry
271     entry(&ofld);
272 
273     OFFLOAD_TIMER_STOP(c_offload_target_func_time);
274 
275     OFFLOAD_TIMER_STOP(c_offload_target_total_time);
276 
277     // copy timer data to the buffer
278     OFFLOAD_TIMER_TARGET_DATA(timer_data);
279 
280     OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
281 
282 #ifdef SEP_SUPPORT
283     if (sep_monitor) {
284         if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
285             OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
286             VTPauseSampling();
287         }
288     }
289 #endif // SEP_SUPPORT
290 }
291 
merge_var_descs(VarDesc * vars,VarDesc2 * vars2,int vars_total)292 void OffloadDescriptor::merge_var_descs(
293     VarDesc *vars,
294     VarDesc2 *vars2,
295     int vars_total
296 )
297 {
298     // number of variable descriptors received from host and generated
299     // locally should match
300     if (m_vars_total < vars_total) {
301         LIBOFFLOAD_ERROR(c_merge_var_descs1);
302         exit(1);
303     }
304 
305     for (int i = 0; i < m_vars_total; i++) {
306         // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
307 
308         if (i < vars_total) {
309             // variable type must match
310             if (m_vars[i].type.bits != vars[i].type.bits) {
311                 OFFLOAD_TRACE(2,
312                     "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
313                     i, m_vars[i].type.bits, i, vars[i].type.bits);
314                 LIBOFFLOAD_ERROR(c_merge_var_descs2);
315                 exit(1);
316             }
317 
318             if (m_vars[i].type.src == c_extended_type) {
319                 VarDescExtendedType *etype =
320                     reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
321                 m_vars_extra[i].type_src = etype->extended_type;
322                 m_vars[i].ptr            = etype->ptr;
323             }
324             else {
325                 m_vars_extra[i].type_src = m_vars[i].type.src;
326                 if (!(m_vars[i].flags.use_device_ptr &&
327                       m_vars[i].type.src == c_dv)) {
328                     m_vars[i].ptr = vars[i].ptr;
329                 }
330             }
331             // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
332             if (i < vars_total && m_vars[i].type.dst == c_extended_type) {
333                 VarDescExtendedType *etype =
334                     reinterpret_cast<VarDescExtendedType*>(vars[i].into);
335                 m_vars_extra[i].type_dst = etype->extended_type;
336                 m_vars[i].into           = etype->ptr;
337             }
338             else {
339                 m_vars_extra[i].type_dst = m_vars[i].type.dst;
340                 m_vars[i].into = vars[i].into;
341             }
342 
343             const char *var_sname = "";
344             if (vars2 != NULL) {
345                 if (vars2[i].sname != NULL) {
346                     var_sname = vars2[i].sname;
347                 }
348             }
349             OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
350                 "   VarDesc %d, var=%s, %s, %s\n",
351                 i, var_sname,
352                 vardesc_direction_as_string[m_vars[i].direction.bits],
353                 vardesc_type_as_string[m_vars_extra[i].type_src]);
354             if (vars2 != NULL && vars2[i].dname != NULL) {
355                 OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
356                     vardesc_type_as_string[m_vars_extra[i].type_dst]);
357             }
358         }
359         else {
360             m_vars_extra[i].type_src = m_vars[i].type.src;
361             m_vars_extra[i].type_dst = m_vars[i].type.dst;
362         }
363 
364         OFFLOAD_TRACE(2,
365             "              type_src=%d, type_dstn=%d, direction=%d, "
366             "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
367             "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
368             m_vars_extra[i].type_src,
369             m_vars_extra[i].type_dst,
370             m_vars[i].direction.bits,
371             m_vars[i].alloc_if,
372             m_vars[i].free_if,
373             m_vars[i].align,
374             m_vars[i].mic_offset,
375             m_vars[i].flags.bits,
376             m_vars[i].offset,
377             m_vars[i].size,
378             m_vars[i].count,
379             m_vars[i].ptr,
380             m_vars[i].into);
381     }
382 }
383 
scatter_copyin_data()384 void OffloadDescriptor::scatter_copyin_data()
385 {
386     OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
387 
388     OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
389                         m_in.get_buffer_start(),
390                         m_in.get_buffer_size());
391     OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
392                              m_in.get_buffer_size());
393 
394     // receive data
395     for (int i = 0; i < m_vars_total; i++) {
396         bool src_is_for_mic = (m_vars[i].direction.out ||
397                                m_vars[i].into == NULL);
398         void** ptr_addr = src_is_for_mic ?
399                           static_cast<void**>(m_vars[i].ptr) :
400                           static_cast<void**>(m_vars[i].into);
401         int type = src_is_for_mic ? m_vars_extra[i].type_src :
402                                     m_vars_extra[i].type_dst;
403         bool is_static = src_is_for_mic ?
404                          m_vars[i].flags.is_static :
405                          m_vars[i].flags.is_static_dstn;
406         void *ptr = NULL;
407 
408         if (m_vars[i].flags.alloc_disp) {
409             int64_t offset = 0;
410             m_in.receive_data(&offset, sizeof(offset));
411         }
412         if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
413             VAR_TYPE_IS_DV_DATA(type)) {
414             ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
415                   reinterpret_cast<ArrDesc*>(ptr_addr) :
416                   *reinterpret_cast<ArrDesc**>(ptr_addr);
417             ptr_addr = reinterpret_cast<void**>(&dvp->Base);
418         }
419         // Set pointer values
420         switch (type) {
421             case c_data_ptr_array:
422                 {
423                     int j = m_vars[i].ptr_arr_offset;
424                     int max_el = j + m_vars[i].count;
425                     char *dst_arr_ptr = (src_is_for_mic)?
426                         *(reinterpret_cast<char**>(m_vars[i].ptr)) :
427                         reinterpret_cast<char*>(m_vars[i].into);
428 
429                     // if is_pointer is 1 it means that pointer array itself
430                     // is defined either via pointer or as class member.
431                     // i.e. arr_ptr[0:5] or this->ARR[0:5]
432                     if (m_vars[i].flags.is_pointer) {
433                         int64_t offset = 0;
434                         m_in.receive_data(&offset, sizeof(offset));
435                         dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
436                     }
437                     for (; j < max_el; j++) {
438                         if (src_is_for_mic) {
439                             m_vars[j].ptr =
440                                 dst_arr_ptr + m_vars[j].ptr_arr_offset;
441                         }
442                         else {
443                             m_vars[j].into =
444                                 dst_arr_ptr + m_vars[j].ptr_arr_offset;
445                         }
446                     }
447                 }
448                 break;
449             case c_data:
450             case c_void_ptr:
451             case c_void_ptr_ptr:
452             case c_cean_var:
453             case c_dv:
454                 break;
455 
456             case c_string_ptr:
457             case c_data_ptr:
458             case c_string_ptr_ptr:
459             case c_data_ptr_ptr:
460             case c_cean_var_ptr:
461             case c_cean_var_ptr_ptr:
462             case c_dv_ptr:
463                 // Don't need ptr_addr value for variables from stack buffer.
464                 // Stack buffer address is set at var_desc with #0.
465                 if (i != 0 && m_vars[i].flags.is_stack_buf) {
466                     break;
467                 }
468                 if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
469                     TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
470                     int64_t offset;
471 
472                     m_in.receive_data(&offset, sizeof(offset));
473                     ptr_addr = reinterpret_cast<void**>(
474                                  reinterpret_cast<char*>(*ptr_addr) + offset);
475 
476                 }
477 
478                 if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
479                     void *buf = NULL;
480                     if (m_vars[i].flags.sink_addr) {
481                         m_in.receive_data(&buf, sizeof(buf));
482                     }
483                     else {
484                         buf = m_buffers.front();
485                         m_buffers.pop_front();
486                     }
487                     if (buf) {
488                         if (!is_static) {
489                             if (!m_vars[i].flags.sink_addr) {
490                                 // increment buffer reference
491                                 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
492                                 BufferAddRef(buf);
493                                 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
494                                 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
495                             }
496                             add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
497                             OFFLOAD_TRACE(1, "    AddRef count = %d\n",
498                                               ((RefInfo *) ref_data[buf])->count);
499                         }
500                         ptr = static_cast<char*>(buf) +
501                                   m_vars[i].mic_offset +
502                                   (m_vars[i].flags.is_stack_buf ?
503                                    0 : m_vars[i].offset);
504 
505                     }
506                     *ptr_addr = ptr;
507                 }
508                 else if (m_vars[i].flags.sink_addr) {
509                     void *buf;
510                     m_in.receive_data(&buf, sizeof(buf));
511                     void *ptr = static_cast<char*>(buf) +
512                                     m_vars[i].mic_offset +
513                                     (m_vars[i].flags.is_stack_buf ?
514                                      0 : m_vars[i].offset);
515                     *ptr_addr = ptr;
516                 }
517                 break;
518 
519             case c_func_ptr:
520             case c_func_ptr_ptr:
521                 break;
522 
523             case c_dv_data:
524             case c_dv_ptr_data:
525             case c_dv_data_slice:
526             case c_dv_ptr_data_slice:
527                 if (m_vars[i].alloc_if) {
528                     void *buf;
529                     if (m_vars[i].flags.sink_addr) {
530                         m_in.receive_data(&buf, sizeof(buf));
531                     }
532                     else {
533                         buf = m_buffers.front();
534                         m_buffers.pop_front();
535                     }
536                     if (buf) {
537                         if (!is_static) {
538                             if (!m_vars[i].flags.sink_addr) {
539                                 // increment buffer reference
540                                 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
541                                 BufferAddRef(buf);
542                                 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
543                             }
544                             add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
545                         }
546                         ptr = static_cast<char*>(buf) +
547                             m_vars[i].mic_offset + m_vars[i].offset;
548                     }
549                     *ptr_addr = ptr;
550                 }
551                 else if (m_vars[i].flags.sink_addr) {
552                     void *buf;
553                     m_in.receive_data(&buf, sizeof(buf));
554                     ptr = static_cast<char*>(buf) +
555                           m_vars[i].mic_offset + m_vars[i].offset;
556                     *ptr_addr = ptr;
557                 }
558                 break;
559 
560             default:
561                 LIBOFFLOAD_ERROR(c_unknown_var_type, type);
562                 abort();
563         }
564         // Release obsolete buffers for stack of persistent objects.
565         // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
566         // stack buffer pointer.
567         if (i == 0 &&
568             m_vars[i].flags.is_stack_buf &&
569             !m_vars[i].direction.bits &&
570             m_vars[i].alloc_if &&
571             m_vars[i].size != 0) {
572                 for (int j=0; j < m_vars[i].size; j++) {
573                     void *buf;
574                     m_in.receive_data(&buf, sizeof(buf));
575                     OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
576                     BufferReleaseRef(buf);
577                     ref_data.erase(buf);
578                 }
579         }
580         // Do copyin
581         switch (m_vars_extra[i].type_dst) {
582             case c_data_ptr_array:
583                 break;
584             case c_data:
585             case c_void_ptr:
586             case c_void_ptr_ptr:
587             case c_cean_var:
588                 if (m_vars[i].direction.in &&
589                     !m_vars[i].flags.is_static_dstn) {
590                     int64_t size;
591                     int64_t disp;
592                     char* ptr = m_vars[i].into ?
593                                  static_cast<char*>(m_vars[i].into) :
594                                  static_cast<char*>(m_vars[i].ptr);
595                     if (m_vars_extra[i].type_dst == c_cean_var) {
596                         m_in.receive_data((&size), sizeof(int64_t));
597                         m_in.receive_data((&disp), sizeof(int64_t));
598                     }
599                     else {
600                         size = m_vars[i].size;
601                         disp = 0;
602                     }
603                     m_in.receive_data(ptr + disp, size);
604                 }
605                 break;
606 
607             case c_dv:
608                 if (m_vars[i].direction.bits ||
609                     m_vars[i].alloc_if ||
610                     m_vars[i].free_if) {
611                     char* ptr = m_vars[i].into ?
612                                  static_cast<char*>(m_vars[i].into) :
613                                  static_cast<char*>(m_vars[i].ptr);
614                     m_in.receive_data(ptr + sizeof(uint64_t),
615                                       m_vars[i].size - sizeof(uint64_t));
616                 }
617                 break;
618 
619             case c_string_ptr:
620             case c_data_ptr:
621             case c_string_ptr_ptr:
622             case c_data_ptr_ptr:
623             case c_cean_var_ptr:
624             case c_cean_var_ptr_ptr:
625             case c_dv_ptr:
626             case c_dv_data:
627             case c_dv_ptr_data:
628             case c_dv_data_slice:
629             case c_dv_ptr_data_slice:
630                 break;
631 
632             case c_func_ptr:
633             case c_func_ptr_ptr:
634                 if (m_vars[i].direction.in) {
635                     m_in.receive_func_ptr((const void**) m_vars[i].ptr);
636                 }
637                 break;
638 
639             default:
640                 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
641                 abort();
642         }
643     }
644 
645     OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
646                   m_in.get_tfr_size());
647 
648     OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
649 
650     OFFLOAD_TIMER_START(c_offload_target_compute);
651 }
652 
gather_copyout_data()653 void OffloadDescriptor::gather_copyout_data()
654 {
655     OFFLOAD_TIMER_STOP(c_offload_target_compute);
656 
657     OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
658 
659     for (int i = 0; i < m_vars_total; i++) {
660         bool src_is_for_mic = (m_vars[i].direction.out ||
661                                m_vars[i].into == NULL);
662         if (m_vars[i].flags.is_stack_buf) {
663             continue;
664         }
665         switch (m_vars_extra[i].type_src) {
666             case c_data_ptr_array:
667                 break;
668             case c_data:
669             case c_void_ptr:
670             case c_void_ptr_ptr:
671             case c_cean_var:
672                 if (m_vars[i].direction.out &&
673                     !m_vars[i].flags.is_static) {
674                     m_out.send_data(
675                         static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
676                         m_vars[i].size);
677                 }
678                 break;
679 
680             case c_dv:
681                 break;
682 
683             case c_string_ptr:
684             case c_data_ptr:
685             case c_string_ptr_ptr:
686             case c_data_ptr_ptr:
687             case c_cean_var_ptr:
688             case c_cean_var_ptr_ptr:
689             case c_dv_ptr:
690                 if (m_vars[i].free_if &&
691                     src_is_for_mic &&
692                     !m_vars[i].flags.preallocated &&
693                     !m_vars[i].flags.is_static) {
694                     void *buf = *static_cast<char**>(m_vars[i].ptr) -
695                                     m_vars[i].mic_offset -
696                                     (m_vars[i].flags.is_stack_buf?
697                                      0 : m_vars[i].offset);
698                     if (buf == NULL) {
699                         break;
700                     }
701                     // decrement buffer reference count
702                     OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
703                     BufReleaseRef(buf);
704                     OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
705                 }
706                 if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
707                     m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
708                 }
709                 break;
710 
711             case c_func_ptr:
712             case c_func_ptr_ptr:
713                 if (m_vars[i].direction.out) {
714                     m_out.send_func_ptr(*((void**) m_vars[i].ptr));
715                 }
716                 break;
717 
718             case c_dv_data:
719             case c_dv_ptr_data:
720             case c_dv_data_slice:
721             case c_dv_ptr_data_slice:
722                 if (src_is_for_mic &&
723                     m_vars[i].free_if &&
724                     !m_vars[i].flags.is_static) {
725                     ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data ||
726                                m_vars_extra[i].type_src == c_dv_data_slice) ?
727                                static_cast<ArrDesc*>(m_vars[i].ptr) :
728                                *static_cast<ArrDesc**>(m_vars[i].ptr);
729 
730                     void *buf = reinterpret_cast<char*>(dvp->Base) -
731                                 m_vars[i].mic_offset -
732                                 m_vars[i].offset;
733 
734                     if (buf == NULL) {
735                         break;
736                     }
737 
738                     // decrement buffer reference count
739                     OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
740                     BufReleaseRef(buf);
741                     OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
742                 }
743                 break;
744 
745             default:
746                 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
747                 abort();
748         }
749 
750         if (m_vars[i].into) {
751             switch (m_vars_extra[i].type_dst) {
752                 case c_data_ptr_array:
753                     break;
754                 case c_data:
755                 case c_void_ptr:
756                 case c_void_ptr_ptr:
757                 case c_cean_var:
758                 case c_dv:
759                     break;
760 
761                 case c_string_ptr:
762                 case c_data_ptr:
763                 case c_string_ptr_ptr:
764                 case c_data_ptr_ptr:
765                 case c_cean_var_ptr:
766                 case c_cean_var_ptr_ptr:
767                 case c_dv_ptr:
768                     if (m_vars[i].direction.in &&
769                         m_vars[i].free_if &&
770                         !m_vars[i].flags.is_static_dstn) {
771                         void *buf = *static_cast<char**>(m_vars[i].into) -
772                                     m_vars[i].mic_offset -
773                                     (m_vars[i].flags.is_stack_buf?
774                                      0 : m_vars[i].offset);
775 
776                         if (buf == NULL) {
777                             break;
778                         }
779                         // decrement buffer reference count
780                         OFFLOAD_TIMER_START(
781                             c_offload_target_release_buffer_refs);
782                         BufReleaseRef(buf);
783                         OFFLOAD_TIMER_STOP(
784                             c_offload_target_release_buffer_refs);
785                     }
786                     break;
787 
788                 case c_func_ptr:
789                 case c_func_ptr_ptr:
790                     break;
791 
792                 case c_dv_data:
793                 case c_dv_ptr_data:
794                 case c_dv_data_slice:
795                 case c_dv_ptr_data_slice:
796                     if (m_vars[i].free_if &&
797                         m_vars[i].direction.in &&
798                         !m_vars[i].flags.is_static_dstn) {
799                         ArrDesc *dvp =
800                             (m_vars_extra[i].type_dst == c_dv_data_slice ||
801                              m_vars_extra[i].type_dst == c_dv_data) ?
802                             static_cast<ArrDesc*>(m_vars[i].into) :
803                             *static_cast<ArrDesc**>(m_vars[i].into);
804                         void *buf = reinterpret_cast<char*>(dvp->Base) -
805                               m_vars[i].mic_offset -
806                               m_vars[i].offset;
807 
808                         if (buf == NULL) {
809                             break;
810                         }
811                         // decrement buffer reference count
812                         OFFLOAD_TIMER_START(
813                             c_offload_target_release_buffer_refs);
814                         BufReleaseRef(buf);
815                         OFFLOAD_TIMER_STOP(
816                             c_offload_target_release_buffer_refs);
817                     }
818                     break;
819 
820                 default:
821                     LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
822                     abort();
823             }
824         }
825     }
826 
827     OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
828                         m_out.get_buffer_start(),
829                         m_out.get_buffer_size());
830 
831     OFFLOAD_DEBUG_DUMP_BYTES(2,
832                              m_out.get_buffer_start(),
833                              m_out.get_buffer_size());
834 
835     OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
836                   "Total copyout data sent to host: [%lld] bytes\n",
837                   m_out.get_tfr_size());
838 
839     OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
840 }
841 
__offload_target_init(void)842 void __offload_target_init(void)
843 {
844 #ifdef SEP_SUPPORT
845     const char* env_var = getenv(sep_monitor_env);
846     if (env_var != 0 && *env_var != '\0') {
847         sep_monitor = atoi(env_var);
848     }
849     env_var = getenv(sep_device_env);
850     if (env_var != 0 && *env_var != '\0') {
851         sep_device = env_var;
852     }
853 #endif // SEP_SUPPORT
854 
855     prefix = report_get_message_str(c_report_mic);
856 
857     // init frequency
858     mic_frequency = COIPerfGetCycleFrequency();
859 }
860 
861 // User-visible offload API
862 
_Offload_number_of_devices(void)863 int _Offload_number_of_devices(void)
864 {
865     return mic_engines_total;
866 }
867 
_Offload_get_device_number(void)868 int _Offload_get_device_number(void)
869 {
870     return mic_index;
871 }
872 
_Offload_get_physical_device_number(void)873 int _Offload_get_physical_device_number(void)
874 {
875     uint32_t index;
876     EngineGetIndex(&index);
877     return index;
878 }
879