1 /*
2     Copyright (c) 2014-2015 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #include "offload_target.h"
32 #include <stdlib.h>
33 #include <unistd.h>
34 #ifdef SEP_SUPPORT
35 #include <fcntl.h>
36 #include <sys/ioctl.h>
37 #endif // SEP_SUPPORT
38 #include <omp.h>
39 #include <map>
40 
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms)(void *);
45 
46 // Target console and file logging
47 const char *prefix;
48 int console_enabled = 0;
49 int offload_report_level = 0;
50 
51 // Trace information
52 static const char* vardesc_direction_as_string[] = {
53     "NOCOPY",
54     "IN",
55     "OUT",
56     "INOUT"
57 };
58 static const char* vardesc_type_as_string[] = {
59     "unknown",
60     "data",
61     "data_ptr",
62     "func_ptr",
63     "void_ptr",
64     "string_ptr",
65     "dv",
66     "dv_data",
67     "dv_data_slice",
68     "dv_ptr",
69     "dv_ptr_data",
70     "dv_ptr_data_slice",
71     "cean_var",
72     "cean_var_ptr",
73     "c_data_ptr_array"
74 };
75 
76 int mic_index = -1;
77 int mic_engines_total = -1;
78 uint64_t mic_frequency = 0;
79 int offload_number = 0;
80 static std::map<void*, RefInfo*> ref_data;
81 static mutex_t add_ref_lock;
82 
83 #ifdef SEP_SUPPORT
84 static const char*  sep_monitor_env = "SEP_MONITOR";
85 static bool         sep_monitor = false;
86 static const char*  sep_device_env = "SEP_DEVICE";
87 static const char*  sep_device =  "/dev/sep3.8/c";
88 static int          sep_counter = 0;
89 
90 #define SEP_API_IOC_MAGIC   99
91 #define SEP_IOCTL_PAUSE     _IO (SEP_API_IOC_MAGIC, 31)
92 #define SEP_IOCTL_RESUME    _IO (SEP_API_IOC_MAGIC, 32)
93 
add_ref_count(void * buf,bool created)94 static void add_ref_count(void * buf, bool created)
95 {
96     mutex_locker_t locker(add_ref_lock);
97     RefInfo * info = ref_data[buf];
98 
99     if (info) {
100         info->count++;
101     }
102     else {
103         info = new RefInfo((int)created,(long)1);
104     }
105     info->is_added |= created;
106     ref_data[buf] = info;
107 }
108 
BufReleaseRef(void * buf)109 static void BufReleaseRef(void * buf)
110 {
111     mutex_locker_t locker(add_ref_lock);
112     RefInfo * info = ref_data[buf];
113 
114     if (info) {
115         --info->count;
116         if (info->count == 0 && info->is_added) {
117             OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
118                                               ((RefInfo *) ref_data[buf])->count);
119             BufferReleaseRef(buf);
120             info->is_added = 0;
121         }
122     }
123 }
124 
VTPauseSampling(void)125 static int VTPauseSampling(void)
126 {
127     int ret = -1;
128     int handle = open(sep_device, O_RDWR);
129     if (handle > 0) {
130         ret = ioctl(handle, SEP_IOCTL_PAUSE);
131         close(handle);
132     }
133     return ret;
134 }
135 
VTResumeSampling(void)136 static int VTResumeSampling(void)
137 {
138     int ret = -1;
139     int handle = open(sep_device, O_RDWR);
140     if (handle > 0) {
141         ret = ioctl(handle, SEP_IOCTL_RESUME);
142         close(handle);
143     }
144     return ret;
145 }
146 #endif // SEP_SUPPORT
147 
offload(uint32_t buffer_count,void ** buffers,void * misc_data,uint16_t misc_data_len,void * return_data,uint16_t return_data_len)148 void OffloadDescriptor::offload(
149     uint32_t  buffer_count,
150     void**    buffers,
151     void*     misc_data,
152     uint16_t  misc_data_len,
153     void*     return_data,
154     uint16_t  return_data_len
155 )
156 {
157     FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
158     const char *name = func->data;
159     OffloadDescriptor ofld;
160     char *in_data = 0;
161     char *out_data = 0;
162     char *timer_data = 0;
163 
164     console_enabled = func->console_enabled;
165     timer_enabled = func->timer_enabled;
166     offload_report_level = func->offload_report_level;
167     offload_number = func->offload_number;
168     ofld.set_offload_number(func->offload_number);
169 
170 #ifdef SEP_SUPPORT
171     if (sep_monitor) {
172         if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
173             OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
174             VTResumeSampling();
175         }
176     }
177 #endif // SEP_SUPPORT
178 
179     OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
180                           c_offload_start_target_func,
181                           "Offload \"%s\" started\n", name);
182 
183     // initialize timer data
184     OFFLOAD_TIMER_INIT();
185 
186     OFFLOAD_TIMER_START(c_offload_target_total_time);
187 
188     OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
189 
190     // get input/output buffer addresses
191     if (func->in_datalen > 0 || func->out_datalen > 0) {
192         if (func->data_offset != 0) {
193             in_data = (char*) misc_data + func->data_offset;
194             out_data = (char*) return_data;
195         }
196         else {
197             char *inout_buf = (char*) buffers[--buffer_count];
198             in_data = inout_buf;
199             out_data = inout_buf;
200         }
201     }
202 
203     // assign variable descriptors
204     ofld.m_vars_total = func->vars_num;
205     if (ofld.m_vars_total > 0) {
206         uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
207 
208         ofld.m_vars = (VarDesc*) malloc(var_data_len);
209         if (ofld.m_vars == NULL)
210           LIBOFFLOAD_ERROR(c_malloc);
211         memcpy(ofld.m_vars, in_data, var_data_len);
212 
213         in_data += var_data_len;
214         func->in_datalen -= var_data_len;
215     }
216 
217     // timer data
218     if (func->timer_enabled) {
219         uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
220 
221         timer_data = out_data;
222         out_data += timer_data_len;
223         func->out_datalen -= timer_data_len;
224     }
225 
226     // init Marshallers
227     ofld.m_in.init_buffer(in_data, func->in_datalen);
228     ofld.m_out.init_buffer(out_data, func->out_datalen);
229 
230     // copy buffers to offload descriptor
231     std::copy(buffers, buffers + buffer_count,
232               std::back_inserter(ofld.m_buffers));
233 
234     OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
235 
236     // find offload entry address
237     OFFLOAD_TIMER_START(c_offload_target_func_lookup);
238 
239     offload_func_with_parms entry = (offload_func_with_parms)
240         __offload_entries.find_addr(name);
241 
242     if (entry == NULL) {
243 #if OFFLOAD_DEBUG > 0
244         if (console_enabled > 2) {
245             __offload_entries.dump();
246         }
247 #endif
248         LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
249         exit(1);
250     }
251 
252     OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
253 
254     OFFLOAD_TIMER_START(c_offload_target_func_time);
255 
256     // execute offload entry
257     entry(&ofld);
258 
259     OFFLOAD_TIMER_STOP(c_offload_target_func_time);
260 
261     OFFLOAD_TIMER_STOP(c_offload_target_total_time);
262 
263     // copy timer data to the buffer
264     OFFLOAD_TIMER_TARGET_DATA(timer_data);
265 
266     OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
267 
268 #ifdef SEP_SUPPORT
269     if (sep_monitor) {
270         if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
271             OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
272             VTPauseSampling();
273         }
274     }
275 #endif // SEP_SUPPORT
276 }
277 
merge_var_descs(VarDesc * vars,VarDesc2 * vars2,int vars_total)278 void OffloadDescriptor::merge_var_descs(
279     VarDesc *vars,
280     VarDesc2 *vars2,
281     int vars_total
282 )
283 {
284     // number of variable descriptors received from host and generated
285     // locally should match
286     if (m_vars_total < vars_total) {
287         LIBOFFLOAD_ERROR(c_merge_var_descs1);
288         exit(1);
289     }
290 
291     for (int i = 0; i < m_vars_total; i++) {
292         if (i < vars_total) {
293             // variable type must match
294             if (m_vars[i].type.bits != vars[i].type.bits) {
295                 LIBOFFLOAD_ERROR(c_merge_var_descs2);
296                 exit(1);
297             }
298 
299             m_vars[i].ptr = vars[i].ptr;
300             m_vars[i].into = vars[i].into;
301 
302             const char *var_sname = "";
303             if (vars2 != NULL) {
304                 if (vars2[i].sname != NULL) {
305                     var_sname = vars2[i].sname;
306                 }
307             }
308             OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
309                 "   VarDesc %d, var=%s, %s, %s\n",
310                 i, var_sname,
311                 vardesc_direction_as_string[m_vars[i].direction.bits],
312                 vardesc_type_as_string[m_vars[i].type.src]);
313             if (vars2 != NULL && vars2[i].dname != NULL) {
314                 OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
315                     vardesc_type_as_string[m_vars[i].type.dst]);
316             }
317         }
318         OFFLOAD_TRACE(2,
319             "              type_src=%d, type_dstn=%d, direction=%d, "
320             "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
321             "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
322             m_vars[i].type.src,
323             m_vars[i].type.dst,
324             m_vars[i].direction.bits,
325             m_vars[i].alloc_if,
326             m_vars[i].free_if,
327             m_vars[i].align,
328             m_vars[i].mic_offset,
329             m_vars[i].flags.bits,
330             m_vars[i].offset,
331             m_vars[i].size,
332             m_vars[i].count,
333             m_vars[i].ptr,
334             m_vars[i].into);
335     }
336 }
337 
scatter_copyin_data()338 void OffloadDescriptor::scatter_copyin_data()
339 {
340     OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
341 
342     OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
343                         m_in.get_buffer_start(),
344                         m_in.get_buffer_size());
345     OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
346                              m_in.get_buffer_size());
347 
348     // receive data
349     for (int i = 0; i < m_vars_total; i++) {
350         bool src_is_for_mic = (m_vars[i].direction.out ||
351                                m_vars[i].into == NULL);
352         void** ptr_addr = src_is_for_mic ?
353                           static_cast<void**>(m_vars[i].ptr) :
354                           static_cast<void**>(m_vars[i].into);
355         int type = src_is_for_mic ? m_vars[i].type.src :
356                                     m_vars[i].type.dst;
357         bool is_static = src_is_for_mic ?
358                          m_vars[i].flags.is_static :
359                          m_vars[i].flags.is_static_dstn;
360         void *ptr = NULL;
361 
362         if (m_vars[i].flags.alloc_disp) {
363             int64_t offset = 0;
364             m_in.receive_data(&offset, sizeof(offset));
365         }
366         if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
367             VAR_TYPE_IS_DV_DATA(type)) {
368             ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
369                   reinterpret_cast<ArrDesc*>(ptr_addr) :
370                   *reinterpret_cast<ArrDesc**>(ptr_addr);
371             ptr_addr = reinterpret_cast<void**>(&dvp->Base);
372         }
373         // Set pointer values
374         switch (type) {
375             case c_data_ptr_array:
376                 {
377                     int j = m_vars[i].ptr_arr_offset;
378                     int max_el = j + m_vars[i].count;
379                     char *dst_arr_ptr = (src_is_for_mic)?
380                         *(reinterpret_cast<char**>(m_vars[i].ptr)) :
381                         reinterpret_cast<char*>(m_vars[i].into);
382 
383                     if (m_vars[i].flags.is_pointer) {
384                         dst_arr_ptr = *((char**)dst_arr_ptr);
385                     }
386                     for (; j < max_el; j++) {
387                         if (src_is_for_mic) {
388                             m_vars[j].ptr =
389                                 dst_arr_ptr + m_vars[j].ptr_arr_offset;
390                         }
391                         else {
392                             m_vars[j].into =
393                                 dst_arr_ptr + m_vars[j].ptr_arr_offset;
394                         }
395                     }
396                 }
397                 break;
398             case c_data:
399             case c_void_ptr:
400             case c_cean_var:
401             case c_dv:
402                 break;
403 
404             case c_string_ptr:
405             case c_data_ptr:
406             case c_cean_var_ptr:
407             case c_dv_ptr:
408                 if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
409                     void *buf = NULL;
410                     if (m_vars[i].flags.sink_addr) {
411                         m_in.receive_data(&buf, sizeof(buf));
412                     }
413                     else {
414                         buf = m_buffers.front();
415                         m_buffers.pop_front();
416                     }
417                     if (buf) {
418                         if (!is_static) {
419                             if (!m_vars[i].flags.sink_addr) {
420                                 // increment buffer reference
421                                 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
422                                 BufferAddRef(buf);
423                                 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
424                                 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
425                             }
426                             add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
427                             OFFLOAD_TRACE(1, "    AddRef count = %d\n",
428                                               ((RefInfo *) ref_data[buf])->count);
429                         }
430                         ptr = static_cast<char*>(buf) +
431                                   m_vars[i].mic_offset +
432                                   (m_vars[i].flags.is_stack_buf ?
433                                    0 : m_vars[i].offset);
434                     }
435                     *ptr_addr = ptr;
436                 }
437                 else if (m_vars[i].flags.sink_addr) {
438                     void *buf;
439                     m_in.receive_data(&buf, sizeof(buf));
440                     void *ptr = static_cast<char*>(buf) +
441                                     m_vars[i].mic_offset +
442                                     (m_vars[i].flags.is_stack_buf ?
443                                      0 : m_vars[i].offset);
444                     *ptr_addr = ptr;
445                 }
446                 break;
447 
448             case c_func_ptr:
449                 break;
450 
451             case c_dv_data:
452             case c_dv_ptr_data:
453             case c_dv_data_slice:
454             case c_dv_ptr_data_slice:
455                 if (m_vars[i].alloc_if) {
456                     void *buf;
457                     if (m_vars[i].flags.sink_addr) {
458                         m_in.receive_data(&buf, sizeof(buf));
459                     }
460                     else {
461                         buf = m_buffers.front();
462                         m_buffers.pop_front();
463                     }
464                     if (buf) {
465                         if (!is_static) {
466                             if (!m_vars[i].flags.sink_addr) {
467                                 // increment buffer reference
468                                 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
469                                 BufferAddRef(buf);
470                                 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
471                             }
472                             add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
473                         }
474                         ptr = static_cast<char*>(buf) +
475                             m_vars[i].mic_offset + m_vars[i].offset;
476                     }
477                     *ptr_addr = ptr;
478                 }
479                 else if (m_vars[i].flags.sink_addr) {
480                     void *buf;
481                     m_in.receive_data(&buf, sizeof(buf));
482                     ptr = static_cast<char*>(buf) +
483                           m_vars[i].mic_offset + m_vars[i].offset;
484                     *ptr_addr = ptr;
485                 }
486                 break;
487 
488             default:
489                 LIBOFFLOAD_ERROR(c_unknown_var_type, type);
490                 abort();
491         }
492         // Release obsolete buffers for stack of persistent objects
493         if (type = c_data_ptr &&
494             m_vars[i].flags.is_stack_buf &&
495             !m_vars[i].direction.bits &&
496             m_vars[i].alloc_if &&
497             m_vars[i].size != 0) {
498                 for (int j=0; j < m_vars[i].size; j++) {
499                     void *buf;
500                     m_in.receive_data(&buf, sizeof(buf));
501                     BufferReleaseRef(buf);
502                     ref_data.erase(buf);
503                 }
504         }
505         // Do copyin
506         switch (m_vars[i].type.dst) {
507             case c_data_ptr_array:
508                 break;
509             case c_data:
510             case c_void_ptr:
511             case c_cean_var:
512                 if (m_vars[i].direction.in &&
513                     !m_vars[i].flags.is_static_dstn) {
514                     int64_t size;
515                     int64_t disp;
516                     char* ptr = m_vars[i].into ?
517                                  static_cast<char*>(m_vars[i].into) :
518                                  static_cast<char*>(m_vars[i].ptr);
519                     if (m_vars[i].type.dst == c_cean_var) {
520                         m_in.receive_data((&size), sizeof(int64_t));
521                         m_in.receive_data((&disp), sizeof(int64_t));
522                     }
523                     else {
524                         size = m_vars[i].size;
525                         disp = 0;
526                     }
527                     m_in.receive_data(ptr + disp, size);
528                 }
529                 break;
530 
531             case c_dv:
532                 if (m_vars[i].direction.bits ||
533                     m_vars[i].alloc_if ||
534                     m_vars[i].free_if) {
535                     char* ptr = m_vars[i].into ?
536                                  static_cast<char*>(m_vars[i].into) :
537                                  static_cast<char*>(m_vars[i].ptr);
538                     m_in.receive_data(ptr + sizeof(uint64_t),
539                                       m_vars[i].size - sizeof(uint64_t));
540                 }
541                 break;
542 
543             case c_string_ptr:
544             case c_data_ptr:
545             case c_cean_var_ptr:
546             case c_dv_ptr:
547             case c_dv_data:
548             case c_dv_ptr_data:
549             case c_dv_data_slice:
550             case c_dv_ptr_data_slice:
551                 break;
552 
553             case c_func_ptr:
554                 if (m_vars[i].direction.in) {
555                     m_in.receive_func_ptr((const void**) m_vars[i].ptr);
556                 }
557                 break;
558 
559             default:
560                 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
561                 abort();
562         }
563     }
564 
565     OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
566                   m_in.get_tfr_size());
567 
568     OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
569 
570     OFFLOAD_TIMER_START(c_offload_target_compute);
571 }
572 
gather_copyout_data()573 void OffloadDescriptor::gather_copyout_data()
574 {
575     OFFLOAD_TIMER_STOP(c_offload_target_compute);
576 
577     OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
578 
579     for (int i = 0; i < m_vars_total; i++) {
580         bool src_is_for_mic = (m_vars[i].direction.out ||
581                                m_vars[i].into == NULL);
582 
583         switch (m_vars[i].type.src) {
584             case c_data_ptr_array:
585                 break;
586             case c_data:
587             case c_void_ptr:
588             case c_cean_var:
589                 if (m_vars[i].direction.out &&
590                     !m_vars[i].flags.is_static) {
591                     m_out.send_data(
592                         static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
593                         m_vars[i].size);
594                 }
595                 break;
596 
597             case c_dv:
598                 break;
599 
600             case c_string_ptr:
601             case c_data_ptr:
602             case c_cean_var_ptr:
603             case c_dv_ptr:
604                 if (m_vars[i].free_if &&
605                     src_is_for_mic &&
606                     !m_vars[i].flags.preallocated &&
607                     !m_vars[i].flags.is_static) {
608                     void *buf = *static_cast<char**>(m_vars[i].ptr) -
609                                     m_vars[i].mic_offset -
610                                     (m_vars[i].flags.is_stack_buf?
611                                      0 : m_vars[i].offset);
612                     if (buf == NULL) {
613                         break;
614                     }
615                     // decrement buffer reference count
616                     OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
617                     BufReleaseRef(buf);
618                     OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
619                 }
620                 if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
621                     m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
622                 }
623                 break;
624 
625             case c_func_ptr:
626                 if (m_vars[i].direction.out) {
627                     m_out.send_func_ptr(*((void**) m_vars[i].ptr));
628                 }
629                 break;
630 
631             case c_dv_data:
632             case c_dv_ptr_data:
633             case c_dv_data_slice:
634             case c_dv_ptr_data_slice:
635                 if (src_is_for_mic &&
636                     m_vars[i].free_if &&
637                     !m_vars[i].flags.is_static) {
638                     ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
639                                     m_vars[i].type.src == c_dv_data_slice) ?
640                         static_cast<ArrDesc*>(m_vars[i].ptr) :
641                         *static_cast<ArrDesc**>(m_vars[i].ptr);
642 
643                     void *buf = reinterpret_cast<char*>(dvp->Base) -
644                                 m_vars[i].mic_offset -
645                                 m_vars[i].offset;
646 
647                     if (buf == NULL) {
648                         break;
649                     }
650 
651                     // decrement buffer reference count
652                     OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
653                     BufReleaseRef(buf);
654                     OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
655                 }
656                 break;
657 
658             default:
659                 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
660                 abort();
661         }
662 
663         if (m_vars[i].into) {
664             switch (m_vars[i].type.dst) {
665                 case c_data_ptr_array:
666                     break;
667                 case c_data:
668                 case c_void_ptr:
669                 case c_cean_var:
670                 case c_dv:
671                     break;
672 
673                 case c_string_ptr:
674                 case c_data_ptr:
675                 case c_cean_var_ptr:
676                 case c_dv_ptr:
677                     if (m_vars[i].direction.in &&
678                         m_vars[i].free_if &&
679                         !m_vars[i].flags.is_static_dstn) {
680                         void *buf = *static_cast<char**>(m_vars[i].into) -
681                                     m_vars[i].mic_offset -
682                                     (m_vars[i].flags.is_stack_buf?
683                                      0 : m_vars[i].offset);
684 
685                         if (buf == NULL) {
686                             break;
687                         }
688                         // decrement buffer reference count
689                         OFFLOAD_TIMER_START(
690                             c_offload_target_release_buffer_refs);
691                         BufReleaseRef(buf);
692                         OFFLOAD_TIMER_STOP(
693                             c_offload_target_release_buffer_refs);
694                     }
695                     break;
696 
697                 case c_func_ptr:
698                     break;
699 
700                 case c_dv_data:
701                 case c_dv_ptr_data:
702                 case c_dv_data_slice:
703                 case c_dv_ptr_data_slice:
704                     if (m_vars[i].free_if &&
705                         m_vars[i].direction.in &&
706                         !m_vars[i].flags.is_static_dstn) {
707                         ArrDesc *dvp =
708                             (m_vars[i].type.dst == c_dv_data_slice ||
709                              m_vars[i].type.dst == c_dv_data) ?
710                             static_cast<ArrDesc*>(m_vars[i].into) :
711                             *static_cast<ArrDesc**>(m_vars[i].into);
712                         void *buf = reinterpret_cast<char*>(dvp->Base) -
713                               m_vars[i].mic_offset -
714                               m_vars[i].offset;
715 
716                         if (buf == NULL) {
717                             break;
718                         }
719                         // decrement buffer reference count
720                         OFFLOAD_TIMER_START(
721                             c_offload_target_release_buffer_refs);
722                         BufReleaseRef(buf);
723                         OFFLOAD_TIMER_STOP(
724                             c_offload_target_release_buffer_refs);
725                     }
726                     break;
727 
728                 default:
729                     LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
730                     abort();
731             }
732         }
733     }
734 
735     OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
736                         m_out.get_buffer_start(),
737                         m_out.get_buffer_size());
738 
739     OFFLOAD_DEBUG_DUMP_BYTES(2,
740                              m_out.get_buffer_start(),
741                              m_out.get_buffer_size());
742 
743     OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
744                   "Total copyout data sent to host: [%lld] bytes\n",
745                   m_out.get_tfr_size());
746 
747     OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
748 }
749 
__offload_target_init(void)750 void __offload_target_init(void)
751 {
752 #ifdef SEP_SUPPORT
753     const char* env_var = getenv(sep_monitor_env);
754     if (env_var != 0 && *env_var != '\0') {
755         sep_monitor = atoi(env_var);
756     }
757     env_var = getenv(sep_device_env);
758     if (env_var != 0 && *env_var != '\0') {
759         sep_device = env_var;
760     }
761 #endif // SEP_SUPPORT
762 
763     prefix = report_get_message_str(c_report_mic);
764 
765     // init frequency
766     mic_frequency = COIPerfGetCycleFrequency();
767 }
768 
769 // User-visible offload API
770 
_Offload_number_of_devices(void)771 int _Offload_number_of_devices(void)
772 {
773     return mic_engines_total;
774 }
775 
_Offload_get_device_number(void)776 int _Offload_get_device_number(void)
777 {
778     return mic_index;
779 }
780 
_Offload_get_physical_device_number(void)781 int _Offload_get_physical_device_number(void)
782 {
783     uint32_t index;
784     EngineGetIndex(&index);
785     return index;
786 }
787