1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implement subset of cuda api by calling into cuda library via dlopen
10 // Does the dlopen/dlsym calls as part of the call to cuInit
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "cuda.h"
15 #include "Debug.h"
16 #include "dlwrap.h"
17
18 #include <string>
19 #include <unordered_map>
20
21 #include <dlfcn.h>
22
23 DLWRAP_INTERNAL(cuInit, 1);
24
25 DLWRAP(cuCtxGetDevice, 1);
26 DLWRAP(cuDeviceGet, 2);
27 DLWRAP(cuDeviceGetAttribute, 3);
28 DLWRAP(cuDeviceGetCount, 1);
29 DLWRAP(cuFuncGetAttribute, 3);
30
31 // Device info
32 DLWRAP(cuDeviceGetName, 3);
33 DLWRAP(cuDeviceTotalMem, 2);
34 DLWRAP(cuDriverGetVersion, 1);
35
36 DLWRAP(cuGetErrorString, 2);
37 DLWRAP(cuLaunchKernel, 11);
38
39 DLWRAP(cuMemAlloc, 2);
40 DLWRAP(cuMemAllocHost, 2);
41 DLWRAP(cuMemAllocManaged, 3);
42
43 DLWRAP(cuMemcpyDtoDAsync, 4);
44 DLWRAP(cuMemcpyDtoH, 3);
45 DLWRAP(cuMemcpyDtoHAsync, 4);
46 DLWRAP(cuMemcpyHtoD, 3);
47 DLWRAP(cuMemcpyHtoDAsync, 4);
48
49 DLWRAP(cuMemFree, 1);
50 DLWRAP(cuMemFreeHost, 1);
51 DLWRAP(cuModuleGetFunction, 3);
52 DLWRAP(cuModuleGetGlobal, 4);
53
54 DLWRAP(cuModuleUnload, 1);
55 DLWRAP(cuStreamCreate, 2);
56 DLWRAP(cuStreamDestroy, 1);
57 DLWRAP(cuStreamSynchronize, 1);
58 DLWRAP(cuCtxSetCurrent, 1);
59 DLWRAP(cuDevicePrimaryCtxRelease, 1);
60 DLWRAP(cuDevicePrimaryCtxGetState, 3);
61 DLWRAP(cuDevicePrimaryCtxSetFlags, 2);
62 DLWRAP(cuDevicePrimaryCtxRetain, 2);
63 DLWRAP(cuModuleLoadDataEx, 5);
64
65 DLWRAP(cuDeviceCanAccessPeer, 3);
66 DLWRAP(cuCtxEnablePeerAccess, 2);
67 DLWRAP(cuMemcpyPeerAsync, 6);
68
69 DLWRAP(cuCtxGetLimit, 2);
70 DLWRAP(cuCtxSetLimit, 2);
71
72 DLWRAP_FINALIZE();
73
74 #ifndef DYNAMIC_CUDA_PATH
75 #define DYNAMIC_CUDA_PATH "libcuda.so"
76 #endif
77
78 #define TARGET_NAME CUDA
79 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
80
checkForCUDA()81 static bool checkForCUDA() {
82 // return true if dlopen succeeded and all functions found
83
84 // Prefer _v2 versions of functions if found in the library
85 std::unordered_map<std::string, const char *> TryFirst = {
86 {"cuMemAlloc", "cuMemAlloc_v2"},
87 {"cuMemFree", "cuMemFree_v2"},
88 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
89 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
90 {"cuStreamDestroy", "cuStreamDestroy_v2"},
91 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
92 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
93 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
94 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
95 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
96 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
97 };
98
99 const char *CudaLib = DYNAMIC_CUDA_PATH;
100 void *DynlibHandle = dlopen(CudaLib, RTLD_NOW);
101 if (!DynlibHandle) {
102 DP("Unable to load library '%s': %s!\n", CudaLib, dlerror());
103 return false;
104 }
105
106 for (size_t I = 0; I < dlwrap::size(); I++) {
107 const char *Sym = dlwrap::symbol(I);
108
109 auto It = TryFirst.find(Sym);
110 if (It != TryFirst.end()) {
111 const char *First = It->second;
112 void *P = dlsym(DynlibHandle, First);
113 if (P) {
114 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
115 *dlwrap::pointer(I) = P;
116 continue;
117 }
118 }
119
120 void *P = dlsym(DynlibHandle, Sym);
121 if (P == nullptr) {
122 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
123 return false;
124 }
125 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
126
127 *dlwrap::pointer(I) = P;
128 }
129
130 return true;
131 }
132
cuInit(unsigned X)133 CUresult cuInit(unsigned X) {
134 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
135 // does not need to handle being called repeatedly or concurrently
136 if (!checkForCUDA()) {
137 return CUDA_ERROR_INVALID_HANDLE;
138 }
139 return dlwrap_cuInit(X);
140 }
141