1 /* Plugin for offload execution on Intel MIC devices.
2 
3    Copyright (C) 2014-2016 Free Software Foundation, Inc.
4 
5    Contributed by Ilya Verbin <ilya.verbin@intel.com>.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 /* Host side part of a libgomp plugin.  */
30 
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <utility>
36 #include <vector>
37 #include <map>
38 #include "libgomp-plugin.h"
39 #include "compiler_if_host.h"
40 #include "main_target_image.h"
41 #include "gomp-constants.h"
42 
43 #define OFFLOAD_ACTIVE_WAIT_ENV	"OFFLOAD_ACTIVE_WAIT"
44 
45 #ifdef DEBUG
46 #define TRACE(...)					    \
47 {							    \
48 fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__);  \
49 fprintf (stderr, __VA_ARGS__);				    \
50 fprintf (stderr, "\n");					    \
51 }
52 #else
53 #define TRACE { }
54 #endif
55 
56 
57 /* Start/end addresses of functions and global variables on a device.  */
58 typedef std::vector<addr_pair> AddrVect;
59 
60 /* Addresses for one image and all devices.  */
61 typedef std::vector<AddrVect> DevAddrVect;
62 
63 /* Addresses for all images and all devices.  */
64 typedef std::map<const void *, DevAddrVect> ImgDevAddrMap;
65 
66 /* Image descriptor needed by __offload_[un]register_image.  */
67 struct TargetImageDesc {
68   int64_t size;
69   /* 10 characters is enough for max int value.  */
70   char name[sizeof ("lib0000000000.so")];
71   char data[];
72 };
73 
74 /* Image descriptors, indexed by a pointer obtained from libgomp.  */
75 typedef std::map<const void *, TargetImageDesc *> ImgDescMap;
76 
77 
78 /* Total number of available devices.  */
79 static int num_devices;
80 
81 /* Total number of shared libraries with offloading to Intel MIC.  */
82 static int num_images;
83 
84 /* Two dimensional array: one key is a pointer to image,
85    second key is number of device.  Contains a vector of pointer pairs.  */
86 static ImgDevAddrMap *address_table;
87 
88 /* Descriptors of all images, registered in liboffloadmic.  */
89 static ImgDescMap *image_descriptors;
90 
91 /* Thread-safe registration of the main image.  */
92 static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
93 
94 static VarDesc vd_host2tgt = {
95   { 1, 1 },		      /* dst, src			      */
96   { 1, 0 },		      /* in, out			      */
97   1,			      /* alloc_if			      */
98   1,			      /* free_if			      */
99   4,			      /* align				      */
100   0,			      /* mic_offset			      */
101   { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
102 				 is_stack_buf, sink_addr, alloc_disp,
103 				 is_noncont_src, is_noncont_dst	      */
104   0,			      /* offset				      */
105   0,			      /* size				      */
106   1,			      /* count				      */
107   0,			      /* alloc				      */
108   0,			      /* into				      */
109   0			      /* ptr				      */
110 };
111 
112 static VarDesc vd_tgt2host = {
113   { 1, 1 },		      /* dst, src			      */
114   { 0, 1 },		      /* in, out			      */
115   1,			      /* alloc_if			      */
116   1,			      /* free_if			      */
117   4,			      /* align				      */
118   0,			      /* mic_offset			      */
119   { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
120 				 is_stack_buf, sink_addr, alloc_disp,
121 				 is_noncont_src, is_noncont_dst	      */
122   0,			      /* offset				      */
123   0,			      /* size				      */
124   1,			      /* count				      */
125   0,			      /* alloc				      */
126   0,			      /* into				      */
127   0			      /* ptr				      */
128 };
129 
130 
131 __attribute__((constructor))
132 static void
init(void)133 init (void)
134 {
135   const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV);
136 
137   /* Disable active wait by default to avoid useless CPU usage.  */
138   if (!active_wait)
139     setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0);
140 
141   address_table = new ImgDevAddrMap;
142   image_descriptors = new ImgDescMap;
143   num_devices = _Offload_number_of_devices ();
144 }
145 
146 extern "C" const char *
GOMP_OFFLOAD_get_name(void)147 GOMP_OFFLOAD_get_name (void)
148 {
149   const char *res = "intelmic";
150   TRACE ("(): return %s", res);
151   return res;
152 }
153 
154 extern "C" unsigned int
GOMP_OFFLOAD_get_caps(void)155 GOMP_OFFLOAD_get_caps (void)
156 {
157   unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400;
158   TRACE ("(): return %x", res);
159   return res;
160 }
161 
162 extern "C" int
GOMP_OFFLOAD_get_type(void)163 GOMP_OFFLOAD_get_type (void)
164 {
165   enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC;
166   TRACE ("(): return %d", res);
167   return res;
168 }
169 
170 extern "C" int
GOMP_OFFLOAD_get_num_devices(void)171 GOMP_OFFLOAD_get_num_devices (void)
172 {
173   TRACE ("(): return %d", num_devices);
174   return num_devices;
175 }
176 
177 static bool
offload(const char * file,uint64_t line,int device,const char * name,int num_vars,VarDesc * vars,const void ** async_data)178 offload (const char *file, uint64_t line, int device, const char *name,
179 	 int num_vars, VarDesc *vars, const void **async_data)
180 {
181   OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
182   if (ofld)
183     {
184       if (async_data == NULL)
185 	return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
186 				   NULL, NULL);
187       else
188 	{
189 	  OffloadFlags flags;
190 	  flags.flags = 0;
191 	  flags.bits.omp_async = 1;
192 	  return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
193 				     NULL, async_data, 0, NULL, flags, NULL);
194 	}
195     }
196   else
197     {
198       GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
199       return false;
200     }
201 }
202 
203 static void
register_main_image()204 register_main_image ()
205 {
206   /* Do not check the return value, because old versions of liboffloadmic did
207      not have return values.  */
208   __offload_register_image (&main_target_image);
209 
210   /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
211      asynchronous task on target is completed.  */
212   __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
213 }
214 
215 /* liboffloadmic loads and runs offload_target_main on all available devices
216    during a first call to offload ().  */
217 extern "C" bool
GOMP_OFFLOAD_init_device(int device)218 GOMP_OFFLOAD_init_device (int device)
219 {
220   TRACE ("(device = %d)", device);
221   pthread_once (&main_image_is_registered, register_main_image);
222   return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
223 		  NULL, NULL);
224 }
225 
226 extern "C" bool
GOMP_OFFLOAD_fini_device(int device)227 GOMP_OFFLOAD_fini_device (int device)
228 {
229   TRACE ("(device = %d)", device);
230 
231   /* liboffloadmic will finalize target processes on all available devices.  */
232   __offload_unregister_image (&main_target_image);
233   return true;
234 }
235 
236 static bool
get_target_table(int device,int & num_funcs,int & num_vars,void ** & table)237 get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
238 {
239   VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
240   vd1[0].ptr = &num_funcs;
241   vd1[0].size = sizeof (num_funcs);
242   vd1[1].ptr = &num_vars;
243   vd1[1].size = sizeof (num_vars);
244 
245   if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
246 		vd1, NULL))
247     return false;
248 
249   int table_size = num_funcs + 2 * num_vars;
250   if (table_size > 0)
251     {
252       table = new void * [table_size];
253 
254       VarDesc vd2;
255       vd2 = vd_tgt2host;
256       vd2.ptr = table;
257       vd2.size = table_size * sizeof (void *);
258 
259       return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
260 		      1, &vd2, NULL);
261     }
262   return true;
263 }
264 
265 /* Offload TARGET_IMAGE to all available devices and fill address_table with
266    corresponding target addresses.  */
267 
268 static bool
offload_image(const void * target_image)269 offload_image (const void *target_image)
270 {
271   void *image_start = ((void **) target_image)[0];
272   void *image_end   = ((void **) target_image)[1];
273 
274   TRACE ("(target_image = %p { %p, %p })",
275 	 target_image, image_start, image_end);
276 
277   int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
278   TargetImageDesc *image = (TargetImageDesc *) malloc (offsetof (TargetImageDesc, data)
279 						       + image_size);
280   if (!image)
281     {
282       GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
283       return false;
284     }
285 
286   image->size = image_size;
287   sprintf (image->name, "lib%010d.so", num_images++);
288   memcpy (image->data, image_start, image->size);
289 
290   TRACE ("() __offload_register_image %s { %p, %d }",
291 	 image->name, image_start, image->size);
292   /* Do not check the return value, because old versions of liboffloadmic did
293      not have return values.  */
294   __offload_register_image (image);
295 
296   /* Receive tables for target_image from all devices.  */
297   DevAddrVect dev_table;
298   bool ret = true;
299   for (int dev = 0; dev < num_devices; dev++)
300     {
301       int num_funcs = 0;
302       int num_vars = 0;
303       void **table = NULL;
304 
305       ret &= get_target_table (dev, num_funcs, num_vars, table);
306 
307       AddrVect curr_dev_table;
308 
309       for (int i = 0; i < num_funcs; i++)
310 	{
311 	  addr_pair tgt_addr;
312 	  tgt_addr.start = (uintptr_t) table[i];
313 	  tgt_addr.end = tgt_addr.start + 1;
314 	  TRACE ("() func %d:\t0x%llx..0x%llx", i,
315 		 tgt_addr.start, tgt_addr.end);
316 	  curr_dev_table.push_back (tgt_addr);
317 	}
318 
319       for (int i = 0; i < num_vars; i++)
320 	{
321 	  addr_pair tgt_addr;
322 	  tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
323 	  tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
324 	  TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
325 	  curr_dev_table.push_back (tgt_addr);
326 	}
327 
328       dev_table.push_back (curr_dev_table);
329       delete [] table;
330     }
331 
332   address_table->insert (std::make_pair (target_image, dev_table));
333   image_descriptors->insert (std::make_pair (target_image, image));
334   return ret;
335 }
336 
337 /* Return the libgomp version number we're compatible with.  There is
338    no requirement for cross-version compatibility.  */
339 
340 extern "C" unsigned
GOMP_OFFLOAD_version(void)341 GOMP_OFFLOAD_version (void)
342 {
343   return GOMP_VERSION;
344 }
345 
346 extern "C" int
GOMP_OFFLOAD_load_image(int device,const unsigned version,const void * target_image,addr_pair ** result)347 GOMP_OFFLOAD_load_image (int device, const unsigned version,
348 			 const void *target_image, addr_pair **result)
349 {
350   TRACE ("(device = %d, target_image = %p)", device, target_image);
351 
352   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
353     {
354       GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
355 			 " (expected %u, received %u)",
356 			 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
357       return -1;
358     }
359 
360   /* If target_image is already present in address_table, then there is no need
361      to offload it.  */
362   if (address_table->count (target_image) == 0)
363     {
364       /* If fail, return -1 as error code.  */
365       if (!offload_image (target_image))
366 	return -1;
367     }
368 
369   AddrVect *curr_dev_table = &(*address_table)[target_image][device];
370   int table_size = curr_dev_table->size ();
371   addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
372   if (table == NULL)
373     {
374       GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
375       return -1;
376     }
377 
378   std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
379   *result = table;
380   return table_size;
381 }
382 
383 extern "C" bool
GOMP_OFFLOAD_unload_image(int device,unsigned version,const void * target_image)384 GOMP_OFFLOAD_unload_image (int device, unsigned version,
385 			   const void *target_image)
386 {
387   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
388     {
389       GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
390 			 " (expected %u, received %u)",
391 			 GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
392       return false;
393     }
394 
395   TRACE ("(device = %d, target_image = %p)", device, target_image);
396 
397   /* liboffloadmic unloads the image from all available devices.  */
398   if (image_descriptors->count (target_image) > 0)
399     {
400       TargetImageDesc *image_desc = (*image_descriptors)[target_image];
401       __offload_unregister_image (image_desc);
402       free (image_desc);
403 
404       address_table->erase (target_image);
405       image_descriptors->erase (target_image);
406     }
407   return true;
408 }
409 
410 extern "C" void *
GOMP_OFFLOAD_alloc(int device,size_t size)411 GOMP_OFFLOAD_alloc (int device, size_t size)
412 {
413   TRACE ("(device = %d, size = %d)", device, size);
414 
415   void *tgt_ptr;
416   VarDesc vd[2] = { vd_host2tgt, vd_tgt2host };
417   vd[0].ptr = &size;
418   vd[0].size = sizeof (size);
419   vd[1].ptr = &tgt_ptr;
420   vd[1].size = sizeof (void *);
421 
422   if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
423 		vd, NULL))
424     return NULL;
425 
426   return tgt_ptr;
427 }
428 
429 extern "C" bool
GOMP_OFFLOAD_free(int device,void * tgt_ptr)430 GOMP_OFFLOAD_free (int device, void *tgt_ptr)
431 {
432   TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
433 
434   VarDesc vd = vd_host2tgt;
435   vd.ptr = &tgt_ptr;
436   vd.size = sizeof (void *);
437 
438   return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
439 		  &vd, NULL);
440 }
441 
442 extern "C" bool
GOMP_OFFLOAD_host2dev(int device,void * tgt_ptr,const void * host_ptr,size_t size)443 GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
444 		       size_t size)
445 {
446   TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
447 	 device, tgt_ptr, host_ptr, size);
448   if (!size)
449     return true;
450 
451   VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
452   vd1[0].ptr = &tgt_ptr;
453   vd1[0].size = sizeof (void *);
454   vd1[1].ptr = &size;
455   vd1[1].size = sizeof (size);
456 
457   if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
458 		vd1, NULL))
459     return false;
460 
461   VarDesc vd2 = vd_host2tgt;
462   vd2.ptr = (void *) host_ptr;
463   vd2.size = size;
464 
465   return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
466 		  &vd2, NULL);
467 }
468 
469 extern "C" bool
GOMP_OFFLOAD_dev2host(int device,void * host_ptr,const void * tgt_ptr,size_t size)470 GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
471 		       size_t size)
472 {
473   TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
474 	 device, host_ptr, tgt_ptr, size);
475   if (!size)
476     return true;
477 
478   VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
479   vd1[0].ptr = &tgt_ptr;
480   vd1[0].size = sizeof (void *);
481   vd1[1].ptr = &size;
482   vd1[1].size = sizeof (size);
483 
484   if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
485 		vd1, NULL))
486     return false;
487 
488   VarDesc vd2 = vd_tgt2host;
489   vd2.ptr = (void *) host_ptr;
490   vd2.size = size;
491 
492   return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
493 		  &vd2, NULL);
494 }
495 
496 extern "C" bool
GOMP_OFFLOAD_dev2dev(int device,void * dst_ptr,const void * src_ptr,size_t size)497 GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
498 		      size_t size)
499 {
500   TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
501 	 device, dst_ptr, src_ptr, size);
502   if (!size)
503     return true;
504 
505   VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
506   vd[0].ptr = &dst_ptr;
507   vd[0].size = sizeof (void *);
508   vd[1].ptr = &src_ptr;
509   vd[1].size = sizeof (void *);
510   vd[2].ptr = &size;
511   vd[2].size = sizeof (size);
512 
513   return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
514 		  vd, NULL);
515 }
516 
517 extern "C" void
GOMP_OFFLOAD_async_run(int device,void * tgt_fn,void * tgt_vars,void **,void * async_data)518 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
519 			void **, void *async_data)
520 {
521   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
522 	 tgt_fn, tgt_vars, async_data);
523 
524   VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
525   vd[0].ptr = &tgt_fn;
526   vd[0].size = sizeof (void *);
527   vd[1].ptr = &tgt_vars;
528   vd[1].size = sizeof (void *);
529 
530   offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd,
531 	   (const void **) async_data);
532 }
533 
534 extern "C" void
GOMP_OFFLOAD_run(int device,void * tgt_fn,void * tgt_vars,void **)535 GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
536 {
537   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
538 
539   GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL, NULL);
540 }
541