1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL for generic 64-bit machine
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cassert>
14 #include <cstdio>
15 #include <cstring>
16 #include <cstdlib>
17 #include <dlfcn.h>
18 #include <ffi.h>
19 #include <gelf.h>
20 #include <link.h>
21 #include <list>
22 #include <string>
23 #include <vector>
24 
25 #include "Debug.h"
26 #include "omptargetplugin.h"
27 
28 #ifndef TARGET_NAME
29 #define TARGET_NAME Generic ELF - 64bit
30 #endif
31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
32 
33 #ifndef TARGET_ELF_ID
34 #define TARGET_ELF_ID 0
35 #endif
36 
37 #include "../../common/elf_common.c"
38 
39 #define NUMBER_OF_DEVICES 4
40 #define OFFLOADSECTIONNAME "omp_offloading_entries"
41 
42 /// Array of Dynamic libraries loaded for this target.
43 struct DynLibTy {
44   char *FileName;
45   void *Handle;
46 };
47 
48 /// Keep entries table per device.
49 struct FuncOrGblEntryTy {
50   __tgt_target_table Table;
51 };
52 
53 /// Class containing all the device information.
54 class RTLDeviceInfoTy {
55   std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
56 
57 public:
58   std::list<DynLibTy> DynLibs;
59 
60   // Record entry point associated with device.
createOffloadTable(int32_t device_id,__tgt_offload_entry * begin,__tgt_offload_entry * end)61   void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin,
62                           __tgt_offload_entry *end) {
63     assert(device_id < (int32_t)FuncGblEntries.size() &&
64            "Unexpected device id!");
65     FuncGblEntries[device_id].emplace_back();
66     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
67 
68     E.Table.EntriesBegin = begin;
69     E.Table.EntriesEnd = end;
70   }
71 
72   // Return true if the entry is associated with device.
findOffloadEntry(int32_t device_id,void * addr)73   bool findOffloadEntry(int32_t device_id, void *addr) {
74     assert(device_id < (int32_t)FuncGblEntries.size() &&
75            "Unexpected device id!");
76     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
77 
78     for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd;
79          i < e; ++i) {
80       if (i->addr == addr)
81         return true;
82     }
83 
84     return false;
85   }
86 
87   // Return the pointer to the target entries table.
getOffloadEntriesTable(int32_t device_id)88   __tgt_target_table *getOffloadEntriesTable(int32_t device_id) {
89     assert(device_id < (int32_t)FuncGblEntries.size() &&
90            "Unexpected device id!");
91     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
92 
93     return &E.Table;
94   }
95 
RTLDeviceInfoTy(int32_t num_devices)96   RTLDeviceInfoTy(int32_t num_devices) {
97 
98     FuncGblEntries.resize(num_devices);
99   }
100 
~RTLDeviceInfoTy()101   ~RTLDeviceInfoTy() {
102     // Close dynamic libraries
103     for (auto &lib : DynLibs) {
104       if (lib.Handle) {
105         dlclose(lib.Handle);
106         remove(lib.FileName);
107       }
108     }
109   }
110 };
111 
112 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
113 
114 #ifdef __cplusplus
115 extern "C" {
116 #endif
117 
__tgt_rtl_is_valid_binary(__tgt_device_image * image)118 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
119 // If we don't have a valid ELF ID we can just fail.
120 #if TARGET_ELF_ID < 1
121   return 0;
122 #else
123   return elf_check_machine(image, TARGET_ELF_ID);
124 #endif
125 }
126 
__tgt_rtl_number_of_devices()127 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
128 
__tgt_rtl_init_device(int32_t device_id)129 int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; }
130 
__tgt_rtl_load_binary(int32_t device_id,__tgt_device_image * image)131 __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
132                                           __tgt_device_image *image) {
133 
134   DP("Dev %d: load binary from " DPxMOD " image\n", device_id,
135      DPxPTR(image->ImageStart));
136 
137   assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id");
138 
139   size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart;
140   size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin);
141   DP("Expecting to have %zd entries defined.\n", NumEntries);
142 
143   // Is the library version incompatible with the header file?
144   if (elf_version(EV_CURRENT) == EV_NONE) {
145     DP("Incompatible ELF library!\n");
146     return NULL;
147   }
148 
149   // Obtain elf handler
150   Elf *e = elf_memory((char *)image->ImageStart, ImageSize);
151   if (!e) {
152     DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
153     return NULL;
154   }
155 
156   if (elf_kind(e) != ELF_K_ELF) {
157     DP("Invalid Elf kind!\n");
158     elf_end(e);
159     return NULL;
160   }
161 
162   // Find the entries section offset
163   Elf_Scn *section = 0;
164   Elf64_Off entries_offset = 0;
165 
166   size_t shstrndx;
167 
168   if (elf_getshdrstrndx(e, &shstrndx)) {
169     DP("Unable to get ELF strings index!\n");
170     elf_end(e);
171     return NULL;
172   }
173 
174   while ((section = elf_nextscn(e, section))) {
175     GElf_Shdr hdr;
176     gelf_getshdr(section, &hdr);
177 
178     if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) {
179       entries_offset = hdr.sh_addr;
180       break;
181     }
182   }
183 
184   if (!entries_offset) {
185     DP("Entries Section Offset Not Found\n");
186     elf_end(e);
187     return NULL;
188   }
189 
190   DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset));
191 
192   // load dynamic library and get the entry points. We use the dl library
193   // to do the loading of the library, but we could do it directly to avoid the
194   // dump to the temporary file.
195   //
196   // 1) Create tmp file with the library contents.
197   // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
198   char tmp_name[] = "/tmp/tmpfile_XXXXXX";
199   int tmp_fd = mkstemp(tmp_name);
200 
201   if (tmp_fd == -1) {
202     elf_end(e);
203     return NULL;
204   }
205 
206   FILE *ftmp = fdopen(tmp_fd, "wb");
207 
208   if (!ftmp) {
209     elf_end(e);
210     return NULL;
211   }
212 
213   fwrite(image->ImageStart, ImageSize, 1, ftmp);
214   fclose(ftmp);
215 
216   DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)};
217 
218   if (!Lib.Handle) {
219     DP("Target library loading error: %s\n", dlerror());
220     elf_end(e);
221     return NULL;
222   }
223 
224   DeviceInfo.DynLibs.push_back(Lib);
225 
226   struct link_map *libInfo = (struct link_map *)Lib.Handle;
227 
228   // The place where the entries info is loaded is the library base address
229   // plus the offset determined from the ELF file.
230   Elf64_Addr entries_addr = libInfo->l_addr + entries_offset;
231 
232   DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
233       DPxPTR(entries_addr));
234 
235   // Table of pointers to all the entries in the target.
236   __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr;
237 
238   __tgt_offload_entry *entries_begin = &entries_table[0];
239   __tgt_offload_entry *entries_end = entries_begin + NumEntries;
240 
241   if (!entries_begin) {
242     DP("Can't obtain entries begin\n");
243     elf_end(e);
244     return NULL;
245   }
246 
247   DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
248       DPxPTR(entries_begin), DPxPTR(entries_end));
249   DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end);
250 
251   elf_end(e);
252 
253   return DeviceInfo.getOffloadEntriesTable(device_id);
254 }
255 
__tgt_rtl_data_alloc(int32_t device_id,int64_t size,void * hst_ptr)256 void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
257   void *ptr = malloc(size);
258   return ptr;
259 }
260 
__tgt_rtl_data_submit(int32_t device_id,void * tgt_ptr,void * hst_ptr,int64_t size)261 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
262                               int64_t size) {
263   memcpy(tgt_ptr, hst_ptr, size);
264   return OFFLOAD_SUCCESS;
265 }
266 
__tgt_rtl_data_retrieve(int32_t device_id,void * hst_ptr,void * tgt_ptr,int64_t size)267 int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
268                                 int64_t size) {
269   memcpy(hst_ptr, tgt_ptr, size);
270   return OFFLOAD_SUCCESS;
271 }
272 
__tgt_rtl_data_delete(int32_t device_id,void * tgt_ptr)273 int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
274   free(tgt_ptr);
275   return OFFLOAD_SUCCESS;
276 }
277 
__tgt_rtl_run_target_team_region(int32_t device_id,void * tgt_entry_ptr,void ** tgt_args,ptrdiff_t * tgt_offsets,int32_t arg_num,int32_t team_num,int32_t thread_limit,uint64_t loop_tripcount)278 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
279                                          void **tgt_args,
280                                          ptrdiff_t *tgt_offsets,
281                                          int32_t arg_num, int32_t team_num,
282                                          int32_t thread_limit,
283                                          uint64_t loop_tripcount /*not used*/) {
284   // ignore team num and thread limit.
285 
286   // Use libffi to launch execution.
287   ffi_cif cif;
288 
289   // All args are references.
290   std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
291   std::vector<void *> args(arg_num);
292   std::vector<void *> ptrs(arg_num);
293 
294   for (int32_t i = 0; i < arg_num; ++i) {
295     ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
296     args[i] = &ptrs[i];
297   }
298 
299   ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
300                                    &ffi_type_void, &args_types[0]);
301 
302   assert(status == FFI_OK && "Unable to prepare target launch!");
303 
304   if (status != FFI_OK)
305     return OFFLOAD_FAIL;
306 
307   DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr));
308 
309   void (*entry)(void);
310   *((void**) &entry) = tgt_entry_ptr;
311   ffi_call(&cif, entry, NULL, &args[0]);
312   return OFFLOAD_SUCCESS;
313 }
314 
__tgt_rtl_run_target_region(int32_t device_id,void * tgt_entry_ptr,void ** tgt_args,ptrdiff_t * tgt_offsets,int32_t arg_num)315 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
316                                     void **tgt_args, ptrdiff_t *tgt_offsets,
317                                     int32_t arg_num) {
318   // use one team and one thread.
319   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
320                                           tgt_offsets, arg_num, 1, 1, 0);
321 }
322 
323 #ifdef __cplusplus
324 }
325 #endif
326