1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // RTL for generic 64-bit machine
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cassert>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <cstring>
17 #include <dlfcn.h>
18 #include <ffi.h>
19 #include <gelf.h>
20 #include <link.h>
21 #include <list>
22 #include <string>
23 #include <vector>
24 
25 #include "Debug.h"
26 #include "omptargetplugin.h"
27 
28 #ifndef TARGET_NAME
29 #define TARGET_NAME Generic ELF - 64bit
30 #endif
31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
32 
33 #ifndef TARGET_ELF_ID
34 #define TARGET_ELF_ID 0
35 #endif
36 
37 #include "elf_common.h"
38 
39 #define NUMBER_OF_DEVICES 4
40 #define OFFLOADSECTIONNAME "omp_offloading_entries"
41 
42 /// Array of Dynamic libraries loaded for this target.
43 struct DynLibTy {
44   char *FileName;
45   void *Handle;
46 };
47 
48 /// Keep entries table per device.
49 struct FuncOrGblEntryTy {
50   __tgt_target_table Table;
51 };
52 
53 /// Class containing all the device information.
54 class RTLDeviceInfoTy {
55   std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
56 
57 public:
58   std::list<DynLibTy> DynLibs;
59 
60   // Record entry point associated with device.
61   void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin,
62                           __tgt_offload_entry *end) {
63     assert(device_id < (int32_t)FuncGblEntries.size() &&
64            "Unexpected device id!");
65     FuncGblEntries[device_id].emplace_back();
66     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
67 
68     E.Table.EntriesBegin = begin;
69     E.Table.EntriesEnd = end;
70   }
71 
72   // Return true if the entry is associated with device.
73   bool findOffloadEntry(int32_t device_id, void *addr) {
74     assert(device_id < (int32_t)FuncGblEntries.size() &&
75            "Unexpected device id!");
76     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
77 
78     for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd;
79          i < e; ++i) {
80       if (i->addr == addr)
81         return true;
82     }
83 
84     return false;
85   }
86 
87   // Return the pointer to the target entries table.
88   __tgt_target_table *getOffloadEntriesTable(int32_t device_id) {
89     assert(device_id < (int32_t)FuncGblEntries.size() &&
90            "Unexpected device id!");
91     FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
92 
93     return &E.Table;
94   }
95 
96   RTLDeviceInfoTy(int32_t num_devices) { FuncGblEntries.resize(num_devices); }
97 
98   ~RTLDeviceInfoTy() {
99     // Close dynamic libraries
100     for (auto &lib : DynLibs) {
101       if (lib.Handle) {
102         dlclose(lib.Handle);
103         remove(lib.FileName);
104       }
105     }
106   }
107 };
108 
109 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
110 
111 #ifdef __cplusplus
112 extern "C" {
113 #endif
114 
115 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
116 // If we don't have a valid ELF ID we can just fail.
117 #if TARGET_ELF_ID < 1
118   return 0;
119 #else
120   return elf_check_machine(image, TARGET_ELF_ID);
121 #endif
122 }
123 
124 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
125 
126 int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; }
127 
128 __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
129                                           __tgt_device_image *image) {
130 
131   DP("Dev %d: load binary from " DPxMOD " image\n", device_id,
132      DPxPTR(image->ImageStart));
133 
134   assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id");
135 
136   size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart;
137   size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin);
138   DP("Expecting to have %zd entries defined.\n", NumEntries);
139 
140   // Is the library version incompatible with the header file?
141   if (elf_version(EV_CURRENT) == EV_NONE) {
142     DP("Incompatible ELF library!\n");
143     return NULL;
144   }
145 
146   // Obtain elf handler
147   Elf *e = elf_memory((char *)image->ImageStart, ImageSize);
148   if (!e) {
149     DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
150     return NULL;
151   }
152 
153   if (elf_kind(e) != ELF_K_ELF) {
154     DP("Invalid Elf kind!\n");
155     elf_end(e);
156     return NULL;
157   }
158 
159   // Find the entries section offset
160   Elf_Scn *section = 0;
161   Elf64_Off entries_offset = 0;
162 
163   size_t shstrndx;
164 
165   if (elf_getshdrstrndx(e, &shstrndx)) {
166     DP("Unable to get ELF strings index!\n");
167     elf_end(e);
168     return NULL;
169   }
170 
171   while ((section = elf_nextscn(e, section))) {
172     GElf_Shdr hdr;
173     gelf_getshdr(section, &hdr);
174 
175     if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) {
176       entries_offset = hdr.sh_addr;
177       break;
178     }
179   }
180 
181   if (!entries_offset) {
182     DP("Entries Section Offset Not Found\n");
183     elf_end(e);
184     return NULL;
185   }
186 
187   DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset));
188 
189   // load dynamic library and get the entry points. We use the dl library
190   // to do the loading of the library, but we could do it directly to avoid the
191   // dump to the temporary file.
192   //
193   // 1) Create tmp file with the library contents.
194   // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
195   char tmp_name[] = "/tmp/tmpfile_XXXXXX";
196   int tmp_fd = mkstemp(tmp_name);
197 
198   if (tmp_fd == -1) {
199     elf_end(e);
200     return NULL;
201   }
202 
203   FILE *ftmp = fdopen(tmp_fd, "wb");
204 
205   if (!ftmp) {
206     elf_end(e);
207     return NULL;
208   }
209 
210   fwrite(image->ImageStart, ImageSize, 1, ftmp);
211   fclose(ftmp);
212 
213   DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)};
214 
215   if (!Lib.Handle) {
216     DP("Target library loading error: %s\n", dlerror());
217     elf_end(e);
218     return NULL;
219   }
220 
221   DeviceInfo.DynLibs.push_back(Lib);
222 
223   struct link_map *libInfo = (struct link_map *)Lib.Handle;
224 
225   // The place where the entries info is loaded is the library base address
226   // plus the offset determined from the ELF file.
227   Elf64_Addr entries_addr = libInfo->l_addr + entries_offset;
228 
229   DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
230      DPxPTR(entries_addr));
231 
232   // Table of pointers to all the entries in the target.
233   __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr;
234 
235   __tgt_offload_entry *entries_begin = &entries_table[0];
236   __tgt_offload_entry *entries_end = entries_begin + NumEntries;
237 
238   if (!entries_begin) {
239     DP("Can't obtain entries begin\n");
240     elf_end(e);
241     return NULL;
242   }
243 
244   DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
245      DPxPTR(entries_begin), DPxPTR(entries_end));
246   DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end);
247 
248   elf_end(e);
249 
250   return DeviceInfo.getOffloadEntriesTable(device_id);
251 }
252 
253 // Sample implementation of explicit memory allocator. For this plugin all kinds
254 // are equivalent to each other.
255 void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr,
256                            int32_t kind) {
257   void *ptr = NULL;
258 
259   switch (kind) {
260   case TARGET_ALLOC_DEVICE:
261   case TARGET_ALLOC_HOST:
262   case TARGET_ALLOC_SHARED:
263   case TARGET_ALLOC_DEFAULT:
264     ptr = malloc(size);
265     break;
266   default:
267     REPORT("Invalid target data allocation kind");
268   }
269 
270   return ptr;
271 }
272 
273 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
274                               int64_t size) {
275   memcpy(tgt_ptr, hst_ptr, size);
276   return OFFLOAD_SUCCESS;
277 }
278 
279 int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
280                                 int64_t size) {
281   memcpy(hst_ptr, tgt_ptr, size);
282   return OFFLOAD_SUCCESS;
283 }
284 
285 int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
286   free(tgt_ptr);
287   return OFFLOAD_SUCCESS;
288 }
289 
290 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
291                                          void **tgt_args,
292                                          ptrdiff_t *tgt_offsets,
293                                          int32_t arg_num, int32_t team_num,
294                                          int32_t thread_limit,
295                                          uint64_t loop_tripcount /*not used*/) {
296   // ignore team num and thread limit.
297 
298   // Use libffi to launch execution.
299   ffi_cif cif;
300 
301   // All args are references.
302   std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
303   std::vector<void *> args(arg_num);
304   std::vector<void *> ptrs(arg_num);
305 
306   for (int32_t i = 0; i < arg_num; ++i) {
307     ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
308     args[i] = &ptrs[i];
309   }
310 
311   ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
312                                    &ffi_type_void, &args_types[0]);
313 
314   assert(status == FFI_OK && "Unable to prepare target launch!");
315 
316   if (status != FFI_OK)
317     return OFFLOAD_FAIL;
318 
319   DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr));
320 
321   void (*entry)(void);
322   *((void **)&entry) = tgt_entry_ptr;
323   ffi_call(&cif, entry, NULL, &args[0]);
324   return OFFLOAD_SUCCESS;
325 }
326 
327 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
328                                     void **tgt_args, ptrdiff_t *tgt_offsets,
329                                     int32_t arg_num) {
330   // use one team and one thread.
331   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
332                                           tgt_offsets, arg_num, 1, 1, 0);
333 }
334 
335 #ifdef __cplusplus
336 }
337 #endif
338