1 /* Plugin for offload execution on Intel MIC devices.
2 
3    Copyright (C) 2014-2016 Free Software Foundation, Inc.
4 
5    Contributed by Ilya Verbin <ilya.verbin@intel.com>.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 /* Target side part of a libgomp plugin.  */
30 
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "compiler_if_target.h"
35 
36 
37 #ifdef DEBUG
38 #define TRACE(...)					      \
39 {							      \
40 fprintf (stderr, "TARGET:\t%s:%s ", __FILE__, __FUNCTION__);  \
41 fprintf (stderr, __VA_ARGS__);				      \
42 fprintf (stderr, "\n");					      \
43 }
44 #else
45 #define TRACE { }
46 #endif
47 
48 
49 static VarDesc vd_host2tgt = {
50   { 1, 1 },		      /* dst, src			      */
51   { 1, 0 },		      /* in, out			      */
52   1,			      /* alloc_if			      */
53   1,			      /* free_if			      */
54   4,			      /* align				      */
55   0,			      /* mic_offset			      */
56   { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
57 				 is_stack_buf, sink_addr, alloc_disp,
58 				 is_noncont_src, is_noncont_dst	      */
59   0,			      /* offset				      */
60   0,			      /* size				      */
61   1,			      /* count				      */
62   0,			      /* alloc				      */
63   0,			      /* into				      */
64   0			      /* ptr				      */
65 };
66 
67 static VarDesc vd_tgt2host = {
68   { 1, 1 },		      /* dst, src			      */
69   { 0, 1 },		      /* in, out			      */
70   1,			      /* alloc_if			      */
71   1,			      /* free_if			      */
72   4,			      /* align				      */
73   0,			      /* mic_offset			      */
74   { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
75 				 is_stack_buf, sink_addr, alloc_disp,
76 				 is_noncont_src, is_noncont_dst	      */
77   0,			      /* offset				      */
78   0,			      /* size				      */
79   1,			      /* count				      */
80   0,			      /* alloc				      */
81   0,			      /* into				      */
82   0			      /* ptr				      */
83 };
84 
85 /* Pointer to the descriptor of the last loaded shared library.  */
86 static void *last_loaded_library = NULL;
87 
88 /* Pointer and size of the variable, used in __offload_target_host2tgt_p[12]
89    and __offload_target_tgt2host_p[12].  */
90 static void *last_var_ptr = NULL;
91 static int last_var_size = 0;
92 
93 
94 /* Override the corresponding functions from libgomp.  */
95 extern "C" int
omp_is_initial_device(void)96 omp_is_initial_device (void) __GOMP_NOTHROW
97 {
98   return 0;
99 }
100 
101 extern "C" int32_t
omp_is_initial_device_(void)102 omp_is_initial_device_ (void)
103 {
104   return omp_is_initial_device ();
105 }
106 
107 
108 /* Dummy function needed for the initialization of target process during the
109    first call to __offload_offload1.  */
110 static void
__offload_target_init_proc(OFFLOAD ofldt)111 __offload_target_init_proc (OFFLOAD ofldt)
112 {
113   TRACE ("");
114 }
115 
116 /* Collect addresses of the offload functions and of the global variables from
117    the library descriptor and send them to host.
118    Part 1: Send num_funcs and num_vars to host.  */
119 static void
__offload_target_table_p1(OFFLOAD ofldt)120 __offload_target_table_p1 (OFFLOAD ofldt)
121 {
122   void ***lib_descr = (void ***) last_loaded_library;
123 
124   if (lib_descr == NULL)
125     {
126       TRACE ("");
127       fprintf (stderr, "Error! No shared libraries loaded on target.\n");
128       return;
129     }
130 
131   void **func_table_begin = lib_descr[0];
132   void **func_table_end   = lib_descr[1];
133   void **var_table_begin  = lib_descr[2];
134   void **var_table_end    = lib_descr[3];
135 
136   /* The func table contains only addresses, the var table contains addresses
137      and corresponding sizes.  */
138   int num_funcs = func_table_end - func_table_begin;
139   int num_vars = (var_table_end - var_table_begin) / 2;
140   TRACE ("(num_funcs = %d, num_vars = %d)", num_funcs, num_vars);
141 
142   VarDesc vd[2] = { vd_tgt2host, vd_tgt2host };
143   vd[0].ptr = &num_funcs;
144   vd[0].size = sizeof (num_funcs);
145   vd[1].ptr = &num_vars;
146   vd[1].size = sizeof (num_vars);
147 
148   __offload_target_enter (ofldt, 2, vd, NULL);
149   __offload_target_leave (ofldt);
150 }
151 
152 /* Part 2: Send the table with addresses to host.  */
153 static void
__offload_target_table_p2(OFFLOAD ofldt)154 __offload_target_table_p2 (OFFLOAD ofldt)
155 {
156   void ***lib_descr = (void ***) last_loaded_library;
157   void **func_table_begin = lib_descr[0];
158   void **func_table_end   = lib_descr[1];
159   void **var_table_begin  = lib_descr[2];
160   void **var_table_end    = lib_descr[3];
161 
162   int num_funcs = func_table_end - func_table_begin;
163   int num_vars = (var_table_end - var_table_begin) / 2;
164   int table_size = (num_funcs + 2 * num_vars) * sizeof (void *);
165   void **table = (void **) malloc (table_size);
166   TRACE ("(table_size = %d)", table_size);
167 
168   VarDesc vd = vd_tgt2host;
169   vd.ptr = table;
170   vd.size = table_size;
171 
172   __offload_target_enter (ofldt, 1, &vd, NULL);
173 
174   void **p;
175   int i = 0;
176   for (p = func_table_begin; p < func_table_end; p++, i++)
177     table[i] = *p;
178 
179   for (p = var_table_begin; p < var_table_end; p++, i++)
180     table[i] = *p;
181 
182   __offload_target_leave (ofldt);
183   free (table);
184 }
185 
186 /* Allocate size bytes and send a pointer to the allocated memory to host.  */
187 static void
__offload_target_alloc(OFFLOAD ofldt)188 __offload_target_alloc (OFFLOAD ofldt)
189 {
190   size_t size = 0;
191   void *ptr = NULL;
192 
193   VarDesc vd[2] = { vd_host2tgt, vd_tgt2host };
194   vd[0].ptr = &size;
195   vd[0].size = sizeof (size);
196   vd[1].ptr = &ptr;
197   vd[1].size = sizeof (void *);
198 
199   __offload_target_enter (ofldt, 2, vd, NULL);
200   ptr = malloc (size);
201   TRACE ("(size = %d): ptr = %p", size, ptr);
202   __offload_target_leave (ofldt);
203 }
204 
205 /* Free the memory space pointed to by ptr.  */
206 static void
__offload_target_free(OFFLOAD ofldt)207 __offload_target_free (OFFLOAD ofldt)
208 {
209   void *ptr = 0;
210 
211   VarDesc vd = vd_host2tgt;
212   vd.ptr = &ptr;
213   vd.size = sizeof (void *);
214 
215   __offload_target_enter (ofldt, 1, &vd, NULL);
216   TRACE ("(ptr = %p)", ptr);
217   free (ptr);
218   __offload_target_leave (ofldt);
219 }
220 
221 /* Receive var_size bytes from host and store to var_ptr.
222    Part 1: Receive var_ptr and var_size from host.  */
223 static void
__offload_target_host2tgt_p1(OFFLOAD ofldt)224 __offload_target_host2tgt_p1 (OFFLOAD ofldt)
225 {
226   void *var_ptr = NULL;
227   size_t var_size = 0;
228 
229   VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
230   vd[0].ptr = &var_ptr;
231   vd[0].size = sizeof (void *);
232   vd[1].ptr = &var_size;
233   vd[1].size = sizeof (var_size);
234 
235   __offload_target_enter (ofldt, 2, vd, NULL);
236   TRACE ("(var_ptr = %p, var_size = %d)", var_ptr, var_size);
237   last_var_ptr = var_ptr;
238   last_var_size = var_size;
239   __offload_target_leave (ofldt);
240 }
241 
242 /* Part 2: Receive the data from host.  */
243 static void
__offload_target_host2tgt_p2(OFFLOAD ofldt)244 __offload_target_host2tgt_p2 (OFFLOAD ofldt)
245 {
246   TRACE ("(last_var_ptr = %p, last_var_size = %d)",
247 	 last_var_ptr, last_var_size);
248 
249   VarDesc vd = vd_host2tgt;
250   vd.ptr = last_var_ptr;
251   vd.size = last_var_size;
252 
253   __offload_target_enter (ofldt, 1, &vd, NULL);
254   __offload_target_leave (ofldt);
255 }
256 
257 /* Send var_size bytes from var_ptr to host.
258    Part 1: Receive var_ptr and var_size from host.  */
259 static void
__offload_target_tgt2host_p1(OFFLOAD ofldt)260 __offload_target_tgt2host_p1 (OFFLOAD ofldt)
261 {
262   void *var_ptr = NULL;
263   size_t var_size = 0;
264 
265   VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
266   vd[0].ptr = &var_ptr;
267   vd[0].size = sizeof (void *);
268   vd[1].ptr = &var_size;
269   vd[1].size = sizeof (var_size);
270 
271   __offload_target_enter (ofldt, 2, vd, NULL);
272   TRACE ("(var_ptr = %p, var_size = %d)", var_ptr, var_size);
273   last_var_ptr = var_ptr;
274   last_var_size = var_size;
275   __offload_target_leave (ofldt);
276 }
277 
278 /* Part 2: Send the data to host.  */
279 static void
__offload_target_tgt2host_p2(OFFLOAD ofldt)280 __offload_target_tgt2host_p2 (OFFLOAD ofldt)
281 {
282   TRACE ("(last_var_ptr = %p, last_var_size = %d)",
283 	 last_var_ptr, last_var_size);
284 
285   VarDesc vd = vd_tgt2host;
286   vd.ptr = last_var_ptr;
287   vd.size = last_var_size;
288 
289   __offload_target_enter (ofldt, 1, &vd, NULL);
290   __offload_target_leave (ofldt);
291 }
292 
293 /* Copy SIZE bytes from SRC_PTR to DST_PTR.  */
294 static void
__offload_target_tgt2tgt(OFFLOAD ofldt)295 __offload_target_tgt2tgt (OFFLOAD ofldt)
296 {
297   void *src_ptr = NULL;
298   void *dst_ptr = NULL;
299   size_t size = 0;
300 
301   VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
302   vd[0].ptr = &dst_ptr;
303   vd[0].size = sizeof (void *);
304   vd[1].ptr = &src_ptr;
305   vd[1].size = sizeof (void *);
306   vd[2].ptr = &size;
307   vd[2].size = sizeof (size);
308 
309   __offload_target_enter (ofldt, 3, vd, NULL);
310   TRACE ("(dst_ptr = %p, src_ptr = %p, size = %d)", dst_ptr, src_ptr, size);
311   memcpy (dst_ptr, src_ptr, size);
312   __offload_target_leave (ofldt);
313 }
314 
315 /* Call offload function by the address fn_ptr and pass vars_ptr to it.  */
316 static void
__offload_target_run(OFFLOAD ofldt)317 __offload_target_run (OFFLOAD ofldt)
318 {
319   void *fn_ptr;
320   void *vars_ptr;
321 
322   VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
323   vd[0].ptr = &fn_ptr;
324   vd[0].size = sizeof (void *);
325   vd[1].ptr = &vars_ptr;
326   vd[1].size = sizeof (void *);
327 
328   __offload_target_enter (ofldt, 2, vd, NULL);
329   TRACE ("(fn_ptr = %p, vars_ptr = %p)", fn_ptr, vars_ptr);
330   void (*fn)(void *) = (void (*)(void *)) fn_ptr;
331   fn (vars_ptr);
332   __offload_target_leave (ofldt);
333 }
334 
335 
336 /* This should be called from every library with offloading.  */
337 extern "C" void
target_register_lib(const void * target_table)338 target_register_lib (const void *target_table)
339 {
340   TRACE ("(target_table = %p { %p, %p, %p, %p })", target_table,
341 	 ((void **) target_table)[0], ((void **) target_table)[1],
342 	 ((void **) target_table)[2], ((void **) target_table)[3]);
343 
344   last_loaded_library = (void *) target_table;
345 }
346 
347 /* Use __offload_target_main from liboffload.  */
348 int
main(int argc,char ** argv)349 main (int argc, char **argv)
350 {
351   __offload_target_main ();
352   return 0;
353 }
354 
355 
356 /* Register offload_target_main's functions in the liboffload.  */
357 
358 struct Entry {
359   const char *name;
360   void *func;
361 };
362 
363 #define REGISTER(f)				      \
364 extern "C" const Entry __offload_target_##f##_$entry  \
365 __attribute__ ((section(".OffloadEntryTable."))) = {  \
366   "__offload_target_"#f,			      \
367   (void *) __offload_target_##f			      \
368 }
369 REGISTER (init_proc);
370 REGISTER (table_p1);
371 REGISTER (table_p2);
372 REGISTER (alloc);
373 REGISTER (free);
374 REGISTER (host2tgt_p1);
375 REGISTER (host2tgt_p2);
376 REGISTER (tgt2host_p1);
377 REGISTER (tgt2host_p2);
378 REGISTER (tgt2tgt);
379 REGISTER (run);
380 #undef REGISTER
381