1 //===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // RTL for generic 64-bit machine 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cassert> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <cstring> 17 #include <dlfcn.h> 18 #include <ffi.h> 19 #include <gelf.h> 20 #include <link.h> 21 #include <list> 22 #include <string> 23 #include <vector> 24 25 #include "Debug.h" 26 #include "omptargetplugin.h" 27 28 #ifndef TARGET_NAME 29 #define TARGET_NAME Generic ELF - 64bit 30 #endif 31 #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL" 32 33 #ifndef TARGET_ELF_ID 34 #define TARGET_ELF_ID 0 35 #endif 36 37 #include "elf_common.h" 38 39 #define NUMBER_OF_DEVICES 4 40 #define OFFLOADSECTIONNAME "omp_offloading_entries" 41 42 /// Array of Dynamic libraries loaded for this target. 43 struct DynLibTy { 44 char *FileName; 45 void *Handle; 46 }; 47 48 /// Keep entries table per device. 49 struct FuncOrGblEntryTy { 50 __tgt_target_table Table; 51 }; 52 53 /// Class containing all the device information. 54 class RTLDeviceInfoTy { 55 std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries; 56 57 public: 58 std::list<DynLibTy> DynLibs; 59 60 // Record entry point associated with device. 61 void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin, 62 __tgt_offload_entry *end) { 63 assert(device_id < (int32_t)FuncGblEntries.size() && 64 "Unexpected device id!"); 65 FuncGblEntries[device_id].emplace_back(); 66 FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); 67 68 E.Table.EntriesBegin = begin; 69 E.Table.EntriesEnd = end; 70 } 71 72 // Return true if the entry is associated with device. 73 bool findOffloadEntry(int32_t device_id, void *addr) { 74 assert(device_id < (int32_t)FuncGblEntries.size() && 75 "Unexpected device id!"); 76 FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); 77 78 for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd; 79 i < e; ++i) { 80 if (i->addr == addr) 81 return true; 82 } 83 84 return false; 85 } 86 87 // Return the pointer to the target entries table. 88 __tgt_target_table *getOffloadEntriesTable(int32_t device_id) { 89 assert(device_id < (int32_t)FuncGblEntries.size() && 90 "Unexpected device id!"); 91 FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); 92 93 return &E.Table; 94 } 95 96 RTLDeviceInfoTy(int32_t num_devices) { FuncGblEntries.resize(num_devices); } 97 98 ~RTLDeviceInfoTy() { 99 // Close dynamic libraries 100 for (auto &lib : DynLibs) { 101 if (lib.Handle) { 102 dlclose(lib.Handle); 103 remove(lib.FileName); 104 } 105 } 106 } 107 }; 108 109 static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES); 110 111 #ifdef __cplusplus 112 extern "C" { 113 #endif 114 115 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { 116 // If we don't have a valid ELF ID we can just fail. 117 #if TARGET_ELF_ID < 1 118 return 0; 119 #else 120 return elf_check_machine(image, TARGET_ELF_ID); 121 #endif 122 } 123 124 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; } 125 126 int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; } 127 128 __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, 129 __tgt_device_image *image) { 130 131 DP("Dev %d: load binary from " DPxMOD " image\n", device_id, 132 DPxPTR(image->ImageStart)); 133 134 assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id"); 135 136 size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart; 137 size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin); 138 DP("Expecting to have %zd entries defined.\n", NumEntries); 139 140 // Is the library version incompatible with the header file? 141 if (elf_version(EV_CURRENT) == EV_NONE) { 142 DP("Incompatible ELF library!\n"); 143 return NULL; 144 } 145 146 // Obtain elf handler 147 Elf *e = elf_memory((char *)image->ImageStart, ImageSize); 148 if (!e) { 149 DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); 150 return NULL; 151 } 152 153 if (elf_kind(e) != ELF_K_ELF) { 154 DP("Invalid Elf kind!\n"); 155 elf_end(e); 156 return NULL; 157 } 158 159 // Find the entries section offset 160 Elf_Scn *section = 0; 161 Elf64_Off entries_offset = 0; 162 163 size_t shstrndx; 164 165 if (elf_getshdrstrndx(e, &shstrndx)) { 166 DP("Unable to get ELF strings index!\n"); 167 elf_end(e); 168 return NULL; 169 } 170 171 while ((section = elf_nextscn(e, section))) { 172 GElf_Shdr hdr; 173 gelf_getshdr(section, &hdr); 174 175 if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) { 176 entries_offset = hdr.sh_addr; 177 break; 178 } 179 } 180 181 if (!entries_offset) { 182 DP("Entries Section Offset Not Found\n"); 183 elf_end(e); 184 return NULL; 185 } 186 187 DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset)); 188 189 // load dynamic library and get the entry points. We use the dl library 190 // to do the loading of the library, but we could do it directly to avoid the 191 // dump to the temporary file. 192 // 193 // 1) Create tmp file with the library contents. 194 // 2) Use dlopen to load the file and dlsym to retrieve the symbols. 195 char tmp_name[] = "/tmp/tmpfile_XXXXXX"; 196 int tmp_fd = mkstemp(tmp_name); 197 198 if (tmp_fd == -1) { 199 elf_end(e); 200 return NULL; 201 } 202 203 FILE *ftmp = fdopen(tmp_fd, "wb"); 204 205 if (!ftmp) { 206 elf_end(e); 207 return NULL; 208 } 209 210 fwrite(image->ImageStart, ImageSize, 1, ftmp); 211 fclose(ftmp); 212 213 DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)}; 214 215 if (!Lib.Handle) { 216 DP("Target library loading error: %s\n", dlerror()); 217 elf_end(e); 218 return NULL; 219 } 220 221 DeviceInfo.DynLibs.push_back(Lib); 222 223 struct link_map *libInfo = (struct link_map *)Lib.Handle; 224 225 // The place where the entries info is loaded is the library base address 226 // plus the offset determined from the ELF file. 227 Elf64_Addr entries_addr = libInfo->l_addr + entries_offset; 228 229 DP("Pointer to first entry to be loaded is (" DPxMOD ").\n", 230 DPxPTR(entries_addr)); 231 232 // Table of pointers to all the entries in the target. 233 __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr; 234 235 __tgt_offload_entry *entries_begin = &entries_table[0]; 236 __tgt_offload_entry *entries_end = entries_begin + NumEntries; 237 238 if (!entries_begin) { 239 DP("Can't obtain entries begin\n"); 240 elf_end(e); 241 return NULL; 242 } 243 244 DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n", 245 DPxPTR(entries_begin), DPxPTR(entries_end)); 246 DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end); 247 248 elf_end(e); 249 250 return DeviceInfo.getOffloadEntriesTable(device_id); 251 } 252 253 // Sample implementation of explicit memory allocator. For this plugin all kinds 254 // are equivalent to each other. 255 void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr, 256 int32_t kind) { 257 void *ptr = NULL; 258 259 switch (kind) { 260 case TARGET_ALLOC_DEVICE: 261 case TARGET_ALLOC_HOST: 262 case TARGET_ALLOC_SHARED: 263 case TARGET_ALLOC_DEFAULT: 264 ptr = malloc(size); 265 break; 266 default: 267 REPORT("Invalid target data allocation kind"); 268 } 269 270 return ptr; 271 } 272 273 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr, 274 int64_t size) { 275 memcpy(tgt_ptr, hst_ptr, size); 276 return OFFLOAD_SUCCESS; 277 } 278 279 int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, 280 int64_t size) { 281 memcpy(hst_ptr, tgt_ptr, size); 282 return OFFLOAD_SUCCESS; 283 } 284 285 int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { 286 free(tgt_ptr); 287 return OFFLOAD_SUCCESS; 288 } 289 290 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, 291 void **tgt_args, 292 ptrdiff_t *tgt_offsets, 293 int32_t arg_num, int32_t team_num, 294 int32_t thread_limit, 295 uint64_t loop_tripcount /*not used*/) { 296 // ignore team num and thread limit. 297 298 // Use libffi to launch execution. 299 ffi_cif cif; 300 301 // All args are references. 302 std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer); 303 std::vector<void *> args(arg_num); 304 std::vector<void *> ptrs(arg_num); 305 306 for (int32_t i = 0; i < arg_num; ++i) { 307 ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); 308 args[i] = &ptrs[i]; 309 } 310 311 ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num, 312 &ffi_type_void, &args_types[0]); 313 314 assert(status == FFI_OK && "Unable to prepare target launch!"); 315 316 if (status != FFI_OK) 317 return OFFLOAD_FAIL; 318 319 DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr)); 320 321 void (*entry)(void); 322 *((void **)&entry) = tgt_entry_ptr; 323 ffi_call(&cif, entry, NULL, &args[0]); 324 return OFFLOAD_SUCCESS; 325 } 326 327 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, 328 void **tgt_args, ptrdiff_t *tgt_offsets, 329 int32_t arg_num) { 330 // use one team and one thread. 331 return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, 332 tgt_offsets, arg_num, 1, 1, 0); 333 } 334 335 #ifdef __cplusplus 336 } 337 #endif 338