//===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // RTL for generic 64-bit machine // //===----------------------------------------------------------------------===// #include #include #include #include #include #include #include #include #include #include #include #include "Debug.h" #include "omptargetplugin.h" #ifndef TARGET_NAME #define TARGET_NAME Generic ELF - 64bit #endif #define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL" #ifndef TARGET_ELF_ID #define TARGET_ELF_ID 0 #endif #include "elf_common.h" #define NUMBER_OF_DEVICES 4 #define OFFLOADSECTIONNAME "omp_offloading_entries" /// Array of Dynamic libraries loaded for this target. struct DynLibTy { char *FileName; void *Handle; }; /// Keep entries table per device. struct FuncOrGblEntryTy { __tgt_target_table Table; }; /// Class containing all the device information. class RTLDeviceInfoTy { std::vector> FuncGblEntries; public: std::list DynLibs; // Record entry point associated with device. void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin, __tgt_offload_entry *end) { assert(device_id < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); FuncGblEntries[device_id].emplace_back(); FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); E.Table.EntriesBegin = begin; E.Table.EntriesEnd = end; } // Return true if the entry is associated with device. bool findOffloadEntry(int32_t device_id, void *addr) { assert(device_id < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd; i < e; ++i) { if (i->addr == addr) return true; } return false; } // Return the pointer to the target entries table. __tgt_target_table *getOffloadEntriesTable(int32_t device_id) { assert(device_id < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); return &E.Table; } RTLDeviceInfoTy(int32_t num_devices) { FuncGblEntries.resize(num_devices); } ~RTLDeviceInfoTy() { // Close dynamic libraries for (auto &lib : DynLibs) { if (lib.Handle) { dlclose(lib.Handle); remove(lib.FileName); } } } }; static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES); #ifdef __cplusplus extern "C" { #endif int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { // If we don't have a valid ELF ID we can just fail. #if TARGET_ELF_ID < 1 return 0; #else return elf_check_machine(image, TARGET_ELF_ID); #endif } int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; } int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; } __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, __tgt_device_image *image) { DP("Dev %d: load binary from " DPxMOD " image\n", device_id, DPxPTR(image->ImageStart)); assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id"); size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart; size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin); DP("Expecting to have %zd entries defined.\n", NumEntries); // Is the library version incompatible with the header file? if (elf_version(EV_CURRENT) == EV_NONE) { DP("Incompatible ELF library!\n"); return NULL; } // Obtain elf handler Elf *e = elf_memory((char *)image->ImageStart, ImageSize); if (!e) { DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); return NULL; } if (elf_kind(e) != ELF_K_ELF) { DP("Invalid Elf kind!\n"); elf_end(e); return NULL; } // Find the entries section offset Elf_Scn *section = 0; Elf64_Off entries_offset = 0; size_t shstrndx; if (elf_getshdrstrndx(e, &shstrndx)) { DP("Unable to get ELF strings index!\n"); elf_end(e); return NULL; } while ((section = elf_nextscn(e, section))) { GElf_Shdr hdr; gelf_getshdr(section, &hdr); if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) { entries_offset = hdr.sh_addr; break; } } if (!entries_offset) { DP("Entries Section Offset Not Found\n"); elf_end(e); return NULL; } DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset)); // load dynamic library and get the entry points. We use the dl library // to do the loading of the library, but we could do it directly to avoid the // dump to the temporary file. // // 1) Create tmp file with the library contents. // 2) Use dlopen to load the file and dlsym to retrieve the symbols. char tmp_name[] = "/tmp/tmpfile_XXXXXX"; int tmp_fd = mkstemp(tmp_name); if (tmp_fd == -1) { elf_end(e); return NULL; } FILE *ftmp = fdopen(tmp_fd, "wb"); if (!ftmp) { elf_end(e); return NULL; } fwrite(image->ImageStart, ImageSize, 1, ftmp); fclose(ftmp); DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)}; if (!Lib.Handle) { DP("Target library loading error: %s\n", dlerror()); elf_end(e); return NULL; } DeviceInfo.DynLibs.push_back(Lib); struct link_map *libInfo = (struct link_map *)Lib.Handle; // The place where the entries info is loaded is the library base address // plus the offset determined from the ELF file. Elf64_Addr entries_addr = libInfo->l_addr + entries_offset; DP("Pointer to first entry to be loaded is (" DPxMOD ").\n", DPxPTR(entries_addr)); // Table of pointers to all the entries in the target. __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr; __tgt_offload_entry *entries_begin = &entries_table[0]; __tgt_offload_entry *entries_end = entries_begin + NumEntries; if (!entries_begin) { DP("Can't obtain entries begin\n"); elf_end(e); return NULL; } DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n", DPxPTR(entries_begin), DPxPTR(entries_end)); DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end); elf_end(e); return DeviceInfo.getOffloadEntriesTable(device_id); } // Sample implementation of explicit memory allocator. For this plugin all kinds // are equivalent to each other. void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr, int32_t kind) { void *ptr = NULL; switch (kind) { case TARGET_ALLOC_DEVICE: case TARGET_ALLOC_HOST: case TARGET_ALLOC_SHARED: case TARGET_ALLOC_DEFAULT: ptr = malloc(size); break; default: REPORT("Invalid target data allocation kind"); } return ptr; } int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr, int64_t size) { memcpy(tgt_ptr, hst_ptr, size); return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, int64_t size) { memcpy(hst_ptr, tgt_ptr, size); return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { free(tgt_ptr); return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num, int32_t thread_limit, uint64_t loop_tripcount /*not used*/) { // ignore team num and thread limit. // Use libffi to launch execution. ffi_cif cif; // All args are references. std::vector args_types(arg_num, &ffi_type_pointer); std::vector args(arg_num); std::vector ptrs(arg_num); for (int32_t i = 0; i < arg_num; ++i) { ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); args[i] = &ptrs[i]; } ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num, &ffi_type_void, &args_types[0]); assert(status == FFI_OK && "Unable to prepare target launch!"); if (status != FFI_OK) return OFFLOAD_FAIL; DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr)); void (*entry)(void); *((void **)&entry) = tgt_entry_ptr; ffi_call(&cif, entry, NULL, &args[0]); return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) { // use one team and one thread. return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, 1, 1, 0); } #ifdef __cplusplus } #endif