1 /*
2 * Copyright 2011-2014 Blender Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17 #ifdef _MSC_VER
18 # if _MSC_VER < 1900
19 # define snprintf _snprintf
20 # endif
21 # define popen _popen
22 # define pclose _pclose
23 # define _CRT_SECURE_NO_WARNINGS
24 #endif
25
26 #include <cuew.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <sys/stat.h>
31
32 #ifdef _WIN32
33 # define WIN32_LEAN_AND_MEAN
34 # define VC_EXTRALEAN
35 # include <windows.h>
36
37 /* Utility macros. */
38
39 typedef HMODULE DynamicLibrary;
40
41 # define dynamic_library_open(path) LoadLibraryA(path)
42 # define dynamic_library_close(lib) FreeLibrary(lib)
43 # define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
44 #else
45 # include <dlfcn.h>
46
47 typedef void* DynamicLibrary;
48
49 # define dynamic_library_open(path) dlopen(path, RTLD_NOW)
50 # define dynamic_library_close(lib) dlclose(lib)
51 # define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
52 #endif
53
54 #define _LIBRARY_FIND_CHECKED(lib, name) \
55 name = (t##name *)dynamic_library_find(lib, #name); \
56 assert(name);
57
58 #define _LIBRARY_FIND(lib, name) \
59 name = (t##name *)dynamic_library_find(lib, #name);
60
61 #define CUDA_LIBRARY_FIND_CHECKED(name) \
62 _LIBRARY_FIND_CHECKED(cuda_lib, name)
63 #define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
64
65 #define NVRTC_LIBRARY_FIND_CHECKED(name) \
66 _LIBRARY_FIND_CHECKED(nvrtc_lib, name)
67 #define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
68
69 static DynamicLibrary cuda_lib;
70 static DynamicLibrary nvrtc_lib;
71
72 /* Function definitions. */
73 tcuGetErrorString *cuGetErrorString;
74 tcuGetErrorName *cuGetErrorName;
75 tcuInit *cuInit;
76 tcuDriverGetVersion *cuDriverGetVersion;
77 tcuDeviceGet *cuDeviceGet;
78 tcuDeviceGetCount *cuDeviceGetCount;
79 tcuDeviceGetName *cuDeviceGetName;
80 tcuDeviceGetUuid *cuDeviceGetUuid;
81 tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
82 tcuDeviceGetAttribute *cuDeviceGetAttribute;
83 tcuDeviceGetProperties *cuDeviceGetProperties;
84 tcuDeviceComputeCapability *cuDeviceComputeCapability;
85 tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
86 tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
87 tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
88 tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
89 tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
90 tcuCtxCreate_v2 *cuCtxCreate_v2;
91 tcuCtxDestroy_v2 *cuCtxDestroy_v2;
92 tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
93 tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
94 tcuCtxSetCurrent *cuCtxSetCurrent;
95 tcuCtxGetCurrent *cuCtxGetCurrent;
96 tcuCtxGetDevice *cuCtxGetDevice;
97 tcuCtxGetFlags *cuCtxGetFlags;
98 tcuCtxSynchronize *cuCtxSynchronize;
99 tcuCtxSetLimit *cuCtxSetLimit;
100 tcuCtxGetLimit *cuCtxGetLimit;
101 tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
102 tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
103 tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
104 tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
105 tcuCtxGetApiVersion *cuCtxGetApiVersion;
106 tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
107 tcuCtxAttach *cuCtxAttach;
108 tcuCtxDetach *cuCtxDetach;
109 tcuModuleLoad *cuModuleLoad;
110 tcuModuleLoadData *cuModuleLoadData;
111 tcuModuleLoadDataEx *cuModuleLoadDataEx;
112 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
113 tcuModuleUnload *cuModuleUnload;
114 tcuModuleGetFunction *cuModuleGetFunction;
115 tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
116 tcuModuleGetTexRef *cuModuleGetTexRef;
117 tcuModuleGetSurfRef *cuModuleGetSurfRef;
118 tcuLinkCreate_v2 *cuLinkCreate_v2;
119 tcuLinkAddData_v2 *cuLinkAddData_v2;
120 tcuLinkAddFile_v2 *cuLinkAddFile_v2;
121 tcuLinkComplete *cuLinkComplete;
122 tcuLinkDestroy *cuLinkDestroy;
123 tcuMemGetInfo_v2 *cuMemGetInfo_v2;
124 tcuMemAlloc_v2 *cuMemAlloc_v2;
125 tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
126 tcuMemFree_v2 *cuMemFree_v2;
127 tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
128 tcuMemAllocHost_v2 *cuMemAllocHost_v2;
129 tcuMemFreeHost *cuMemFreeHost;
130 tcuMemHostAlloc *cuMemHostAlloc;
131 tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
132 tcuMemHostGetFlags *cuMemHostGetFlags;
133 tcuMemAllocManaged *cuMemAllocManaged;
134 tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
135 tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
136 tcuIpcGetEventHandle *cuIpcGetEventHandle;
137 tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
138 tcuIpcGetMemHandle *cuIpcGetMemHandle;
139 tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
140 tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
141 tcuMemHostRegister_v2 *cuMemHostRegister_v2;
142 tcuMemHostUnregister *cuMemHostUnregister;
143 tcuMemcpy *cuMemcpy;
144 tcuMemcpyPeer *cuMemcpyPeer;
145 tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
146 tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
147 tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
148 tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
149 tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
150 tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
151 tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
152 tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
153 tcuMemcpy2D_v2 *cuMemcpy2D_v2;
154 tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
155 tcuMemcpy3D_v2 *cuMemcpy3D_v2;
156 tcuMemcpy3DPeer *cuMemcpy3DPeer;
157 tcuMemcpyAsync *cuMemcpyAsync;
158 tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
159 tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
160 tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
161 tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
162 tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
163 tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
164 tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
165 tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
166 tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
167 tcuMemsetD8_v2 *cuMemsetD8_v2;
168 tcuMemsetD16_v2 *cuMemsetD16_v2;
169 tcuMemsetD32_v2 *cuMemsetD32_v2;
170 tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
171 tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
172 tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
173 tcuMemsetD8Async *cuMemsetD8Async;
174 tcuMemsetD16Async *cuMemsetD16Async;
175 tcuMemsetD32Async *cuMemsetD32Async;
176 tcuMemsetD2D8Async *cuMemsetD2D8Async;
177 tcuMemsetD2D16Async *cuMemsetD2D16Async;
178 tcuMemsetD2D32Async *cuMemsetD2D32Async;
179 tcuArrayCreate_v2 *cuArrayCreate_v2;
180 tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
181 tcuArrayDestroy *cuArrayDestroy;
182 tcuArray3DCreate_v2 *cuArray3DCreate_v2;
183 tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
184 tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
185 tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
186 tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
187 tcuPointerGetAttribute *cuPointerGetAttribute;
188 tcuMemPrefetchAsync *cuMemPrefetchAsync;
189 tcuMemAdvise *cuMemAdvise;
190 tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
191 tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
192 tcuPointerSetAttribute *cuPointerSetAttribute;
193 tcuPointerGetAttributes *cuPointerGetAttributes;
194 tcuStreamCreate *cuStreamCreate;
195 tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
196 tcuStreamGetPriority *cuStreamGetPriority;
197 tcuStreamGetFlags *cuStreamGetFlags;
198 tcuStreamGetCtx *cuStreamGetCtx;
199 tcuStreamWaitEvent *cuStreamWaitEvent;
200 tcuStreamAddCallback *cuStreamAddCallback;
201 tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
202 tcuStreamQuery *cuStreamQuery;
203 tcuStreamSynchronize *cuStreamSynchronize;
204 tcuStreamDestroy_v2 *cuStreamDestroy_v2;
205 tcuEventCreate *cuEventCreate;
206 tcuEventRecord *cuEventRecord;
207 tcuEventQuery *cuEventQuery;
208 tcuEventSynchronize *cuEventSynchronize;
209 tcuEventDestroy_v2 *cuEventDestroy_v2;
210 tcuEventElapsedTime *cuEventElapsedTime;
211 tcuStreamWaitValue32 *cuStreamWaitValue32;
212 tcuStreamWaitValue64 *cuStreamWaitValue64;
213 tcuStreamWriteValue32 *cuStreamWriteValue32;
214 tcuStreamWriteValue64 *cuStreamWriteValue64;
215 tcuStreamBatchMemOp *cuStreamBatchMemOp;
216 tcuFuncGetAttribute *cuFuncGetAttribute;
217 tcuFuncSetAttribute *cuFuncSetAttribute;
218 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
219 tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
220 tcuLaunchKernel *cuLaunchKernel;
221 tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
222 tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
223 tcuFuncSetBlockShape *cuFuncSetBlockShape;
224 tcuFuncSetSharedSize *cuFuncSetSharedSize;
225 tcuParamSetSize *cuParamSetSize;
226 tcuParamSeti *cuParamSeti;
227 tcuParamSetf *cuParamSetf;
228 tcuParamSetv *cuParamSetv;
229 tcuLaunch *cuLaunch;
230 tcuLaunchGrid *cuLaunchGrid;
231 tcuLaunchGridAsync *cuLaunchGridAsync;
232 tcuParamSetTexRef *cuParamSetTexRef;
233 tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
234 tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
235 tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
236 tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
237 tcuTexRefSetArray *cuTexRefSetArray;
238 tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
239 tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
240 tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
241 tcuTexRefSetFormat *cuTexRefSetFormat;
242 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
243 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
244 tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
245 tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
246 tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
247 tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
248 tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
249 tcuTexRefSetFlags *cuTexRefSetFlags;
250 tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
251 tcuTexRefGetArray *cuTexRefGetArray;
252 tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
253 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
254 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
255 tcuTexRefGetFormat *cuTexRefGetFormat;
256 tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
257 tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
258 tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
259 tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
260 tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
261 tcuTexRefGetFlags *cuTexRefGetFlags;
262 tcuTexRefCreate *cuTexRefCreate;
263 tcuTexRefDestroy *cuTexRefDestroy;
264 tcuSurfRefSetArray *cuSurfRefSetArray;
265 tcuSurfRefGetArray *cuSurfRefGetArray;
266 tcuTexObjectCreate *cuTexObjectCreate;
267 tcuTexObjectDestroy *cuTexObjectDestroy;
268 tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
269 tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
270 tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
271 tcuSurfObjectCreate *cuSurfObjectCreate;
272 tcuSurfObjectDestroy *cuSurfObjectDestroy;
273 tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
274 tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
275 tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
276 tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
277 tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
278 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
279 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
280 tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
281 tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
282 tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
283 tcuGraphicsMapResources *cuGraphicsMapResources;
284 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
285 tcuGetExportTable *cuGetExportTable;
286
287 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
288 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
289 tcuGLGetDevices_v2 *cuGLGetDevices_v2;
290 tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
291 tcuGLInit *cuGLInit;
292 tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
293 tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
294 tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
295 tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
296 tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
297 tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
298 tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
299
300 tnvrtcGetErrorString *nvrtcGetErrorString;
301 tnvrtcVersion *nvrtcVersion;
302 tnvrtcCreateProgram *nvrtcCreateProgram;
303 tnvrtcDestroyProgram *nvrtcDestroyProgram;
304 tnvrtcCompileProgram *nvrtcCompileProgram;
305 tnvrtcGetPTXSize *nvrtcGetPTXSize;
306 tnvrtcGetPTX *nvrtcGetPTX;
307 tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
308 tnvrtcGetProgramLog *nvrtcGetProgramLog;
309 tnvrtcAddNameExpression *nvrtcAddNameExpression;
310 tnvrtcGetLoweredName *nvrtcGetLoweredName;
311
312
dynamic_library_open_find(const char ** paths)313 static DynamicLibrary dynamic_library_open_find(const char **paths) {
314 int i = 0;
315 while (paths[i] != NULL) {
316 DynamicLibrary lib = dynamic_library_open(paths[i]);
317 if (lib != NULL) {
318 return lib;
319 }
320 ++i;
321 }
322 return NULL;
323 }
324
325 /* Implementation function. */
cuewCudaExit(void)326 static void cuewCudaExit(void) {
327 if (cuda_lib != NULL) {
328 /* Ignore errors. */
329 dynamic_library_close(cuda_lib);
330 cuda_lib = NULL;
331 }
332 }
333
cuewCudaInit(void)334 static int cuewCudaInit(void) {
335 /* Library paths. */
336 #ifdef _WIN32
337 /* Expected in c:/windows/system or similar, no path needed. */
338 const char *cuda_paths[] = {"nvcuda.dll", NULL};
339 #elif defined(__APPLE__)
340 /* Default installation path. */
341 const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
342 #else
343 const char *cuda_paths[] = {"libcuda.so", "libcuda.so.1", NULL};
344 #endif
345 static int initialized = 0;
346 static int result = 0;
347 int error, driver_version;
348
349 if (initialized) {
350 return result;
351 }
352
353 initialized = 1;
354
355 error = atexit(cuewCudaExit);
356 if (error) {
357 result = CUEW_ERROR_ATEXIT_FAILED;
358 return result;
359 }
360
361 /* Load library. */
362 cuda_lib = dynamic_library_open_find(cuda_paths);
363
364 if (cuda_lib == NULL) {
365 result = CUEW_ERROR_OPEN_FAILED;
366 return result;
367 }
368
369 /* Detect driver version. */
370 driver_version = 1000;
371
372 CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
373 if (cuDriverGetVersion) {
374 cuDriverGetVersion(&driver_version);
375 }
376
377 /* We require version 4.0. */
378 if (driver_version < 4000) {
379 result = CUEW_ERROR_OPEN_FAILED;
380 return result;
381 }
382 /* Fetch all function pointers. */
383 CUDA_LIBRARY_FIND(cuGetErrorString);
384 CUDA_LIBRARY_FIND(cuGetErrorName);
385 CUDA_LIBRARY_FIND(cuInit);
386 CUDA_LIBRARY_FIND(cuDriverGetVersion);
387 CUDA_LIBRARY_FIND(cuDeviceGet);
388 CUDA_LIBRARY_FIND(cuDeviceGetCount);
389 CUDA_LIBRARY_FIND(cuDeviceGetName);
390 CUDA_LIBRARY_FIND(cuDeviceGetUuid);
391 CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
392 CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
393 CUDA_LIBRARY_FIND(cuDeviceGetProperties);
394 CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
395 CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
396 CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease);
397 CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags);
398 CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
399 CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset);
400 CUDA_LIBRARY_FIND(cuCtxCreate_v2);
401 CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
402 CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
403 CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
404 CUDA_LIBRARY_FIND(cuCtxSetCurrent);
405 CUDA_LIBRARY_FIND(cuCtxGetCurrent);
406 CUDA_LIBRARY_FIND(cuCtxGetDevice);
407 CUDA_LIBRARY_FIND(cuCtxGetFlags);
408 CUDA_LIBRARY_FIND(cuCtxSynchronize);
409 CUDA_LIBRARY_FIND(cuCtxSetLimit);
410 CUDA_LIBRARY_FIND(cuCtxGetLimit);
411 CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
412 CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
413 CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
414 CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
415 CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
416 CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
417 CUDA_LIBRARY_FIND(cuCtxAttach);
418 CUDA_LIBRARY_FIND(cuCtxDetach);
419 CUDA_LIBRARY_FIND(cuModuleLoad);
420 CUDA_LIBRARY_FIND(cuModuleLoadData);
421 CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
422 CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
423 CUDA_LIBRARY_FIND(cuModuleUnload);
424 CUDA_LIBRARY_FIND(cuModuleGetFunction);
425 CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
426 CUDA_LIBRARY_FIND(cuModuleGetTexRef);
427 CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
428 CUDA_LIBRARY_FIND(cuLinkCreate_v2);
429 CUDA_LIBRARY_FIND(cuLinkAddData_v2);
430 CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
431 CUDA_LIBRARY_FIND(cuLinkComplete);
432 CUDA_LIBRARY_FIND(cuLinkDestroy);
433 CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
434 CUDA_LIBRARY_FIND(cuMemAlloc_v2);
435 CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
436 CUDA_LIBRARY_FIND(cuMemFree_v2);
437 CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
438 CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
439 CUDA_LIBRARY_FIND(cuMemFreeHost);
440 CUDA_LIBRARY_FIND(cuMemHostAlloc);
441 CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
442 CUDA_LIBRARY_FIND(cuMemHostGetFlags);
443 CUDA_LIBRARY_FIND(cuMemAllocManaged);
444 CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
445 CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
446 CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
447 CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
448 CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
449 CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
450 CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
451 CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
452 CUDA_LIBRARY_FIND(cuMemHostUnregister);
453 CUDA_LIBRARY_FIND(cuMemcpy);
454 CUDA_LIBRARY_FIND(cuMemcpyPeer);
455 CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
456 CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
457 CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
458 CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
459 CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
460 CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
461 CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
462 CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
463 CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
464 CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
465 CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
466 CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
467 CUDA_LIBRARY_FIND(cuMemcpyAsync);
468 CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
469 CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
470 CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
471 CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
472 CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
473 CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
474 CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
475 CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
476 CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
477 CUDA_LIBRARY_FIND(cuMemsetD8_v2);
478 CUDA_LIBRARY_FIND(cuMemsetD16_v2);
479 CUDA_LIBRARY_FIND(cuMemsetD32_v2);
480 CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
481 CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
482 CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
483 CUDA_LIBRARY_FIND(cuMemsetD8Async);
484 CUDA_LIBRARY_FIND(cuMemsetD16Async);
485 CUDA_LIBRARY_FIND(cuMemsetD32Async);
486 CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
487 CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
488 CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
489 CUDA_LIBRARY_FIND(cuArrayCreate_v2);
490 CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
491 CUDA_LIBRARY_FIND(cuArrayDestroy);
492 CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
493 CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
494 CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
495 CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
496 CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
497 CUDA_LIBRARY_FIND(cuPointerGetAttribute);
498 CUDA_LIBRARY_FIND(cuMemPrefetchAsync);
499 CUDA_LIBRARY_FIND(cuMemAdvise);
500 CUDA_LIBRARY_FIND(cuMemRangeGetAttribute);
501 CUDA_LIBRARY_FIND(cuMemRangeGetAttributes);
502 CUDA_LIBRARY_FIND(cuPointerSetAttribute);
503 CUDA_LIBRARY_FIND(cuPointerGetAttributes);
504 CUDA_LIBRARY_FIND(cuStreamCreate);
505 CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
506 CUDA_LIBRARY_FIND(cuStreamGetPriority);
507 CUDA_LIBRARY_FIND(cuStreamGetFlags);
508 CUDA_LIBRARY_FIND(cuStreamGetCtx);
509 CUDA_LIBRARY_FIND(cuStreamWaitEvent);
510 CUDA_LIBRARY_FIND(cuStreamAddCallback);
511 CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
512 CUDA_LIBRARY_FIND(cuStreamQuery);
513 CUDA_LIBRARY_FIND(cuStreamSynchronize);
514 CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
515 CUDA_LIBRARY_FIND(cuEventCreate);
516 CUDA_LIBRARY_FIND(cuEventRecord);
517 CUDA_LIBRARY_FIND(cuEventQuery);
518 CUDA_LIBRARY_FIND(cuEventSynchronize);
519 CUDA_LIBRARY_FIND(cuEventDestroy_v2);
520 CUDA_LIBRARY_FIND(cuEventElapsedTime);
521 CUDA_LIBRARY_FIND(cuStreamWaitValue32);
522 CUDA_LIBRARY_FIND(cuStreamWaitValue64);
523 CUDA_LIBRARY_FIND(cuStreamWriteValue32);
524 CUDA_LIBRARY_FIND(cuStreamWriteValue64);
525 CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
526 CUDA_LIBRARY_FIND(cuFuncGetAttribute);
527 CUDA_LIBRARY_FIND(cuFuncSetAttribute);
528 CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
529 CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
530 CUDA_LIBRARY_FIND(cuLaunchKernel);
531 CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel);
532 CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice);
533 CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
534 CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
535 CUDA_LIBRARY_FIND(cuParamSetSize);
536 CUDA_LIBRARY_FIND(cuParamSeti);
537 CUDA_LIBRARY_FIND(cuParamSetf);
538 CUDA_LIBRARY_FIND(cuParamSetv);
539 CUDA_LIBRARY_FIND(cuLaunch);
540 CUDA_LIBRARY_FIND(cuLaunchGrid);
541 CUDA_LIBRARY_FIND(cuLaunchGridAsync);
542 CUDA_LIBRARY_FIND(cuParamSetTexRef);
543 CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
544 CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
545 CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
546 CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
547 CUDA_LIBRARY_FIND(cuTexRefSetArray);
548 CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
549 CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
550 CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
551 CUDA_LIBRARY_FIND(cuTexRefSetFormat);
552 CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
553 CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
554 CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
555 CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
556 CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
557 CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
558 CUDA_LIBRARY_FIND(cuTexRefSetBorderColor);
559 CUDA_LIBRARY_FIND(cuTexRefSetFlags);
560 CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
561 CUDA_LIBRARY_FIND(cuTexRefGetArray);
562 CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
563 CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
564 CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
565 CUDA_LIBRARY_FIND(cuTexRefGetFormat);
566 CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
567 CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
568 CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
569 CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
570 CUDA_LIBRARY_FIND(cuTexRefGetBorderColor);
571 CUDA_LIBRARY_FIND(cuTexRefGetFlags);
572 CUDA_LIBRARY_FIND(cuTexRefCreate);
573 CUDA_LIBRARY_FIND(cuTexRefDestroy);
574 CUDA_LIBRARY_FIND(cuSurfRefSetArray);
575 CUDA_LIBRARY_FIND(cuSurfRefGetArray);
576 CUDA_LIBRARY_FIND(cuTexObjectCreate);
577 CUDA_LIBRARY_FIND(cuTexObjectDestroy);
578 CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
579 CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
580 CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
581 CUDA_LIBRARY_FIND(cuSurfObjectCreate);
582 CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
583 CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
584 CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
585 CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
586 CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
587 CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
588 CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
589 CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
590 CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
591 CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
592 CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
593 CUDA_LIBRARY_FIND(cuGraphicsMapResources);
594 CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
595 CUDA_LIBRARY_FIND(cuGetExportTable);
596
597 CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
598 CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
599 CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
600 CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
601 CUDA_LIBRARY_FIND(cuGLInit);
602 CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
603 CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
604 CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
605 CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
606 CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
607 CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
608 CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
609
610 result = CUEW_SUCCESS;
611 return result;
612 }
613
cuewExitNvrtc(void)614 static void cuewExitNvrtc(void) {
615 if (nvrtc_lib != NULL) {
616 /* Ignore errors. */
617 dynamic_library_close(nvrtc_lib);
618 nvrtc_lib = NULL;
619 }
620 }
621
cuewNvrtcInit(void)622 static int cuewNvrtcInit(void) {
623 /* Library paths. */
624 #ifdef _WIN32
625 /* Expected in c:/windows/system or similar, no path needed. */
626 const char *nvrtc_paths[] = {"nvrtc64_101_0.dll",
627 "nvrtc64_100_0.dll",
628 "nvrtc64_91.dll",
629 "nvrtc64_90.dll",
630 "nvrtc64_80.dll",
631 NULL};
632 #elif defined(__APPLE__)
633 /* Default installation path. */
634 const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
635 #else
636 const char *nvrtc_paths[] = {"libnvrtc.so",
637 # if defined(__x86_64__) || defined(_M_X64)
638 "/usr/local/cuda/lib64/libnvrtc.so",
639 #else
640 "/usr/local/cuda/lib/libnvrtc.so",
641 #endif
642 NULL};
643 #endif
644 static int initialized = 0;
645 static int result = 0;
646 int error;
647
648 if (initialized) {
649 return result;
650 }
651
652 initialized = 1;
653
654 error = atexit(cuewExitNvrtc);
655 if (error) {
656 result = CUEW_ERROR_ATEXIT_FAILED;
657 return result;
658 }
659
660 /* Load library. */
661 nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
662
663 if (nvrtc_lib == NULL) {
664 result = CUEW_ERROR_OPEN_FAILED;
665 return result;
666 }
667
668 NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
669 NVRTC_LIBRARY_FIND(nvrtcVersion);
670 NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
671 NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
672 NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
673 NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
674 NVRTC_LIBRARY_FIND(nvrtcGetPTX);
675 NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
676 NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
677 NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
678 NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
679
680 result = CUEW_SUCCESS;
681 return result;
682 }
683
684
cuewInit(cuuint32_t flags)685 int cuewInit(cuuint32_t flags) {
686 int result = CUEW_SUCCESS;
687
688 if (flags & CUEW_INIT_CUDA) {
689 result = cuewCudaInit();
690 if (result != CUEW_SUCCESS) {
691 return result;
692 }
693 }
694
695 if (flags & CUEW_INIT_NVRTC) {
696 result = cuewNvrtcInit();
697 if (result != CUEW_SUCCESS) {
698 return result;
699 }
700 }
701
702 return result;
703 }
704
705
cuewErrorString(CUresult result)706 const char *cuewErrorString(CUresult result) {
707 switch (result) {
708 case CUDA_SUCCESS: return "No errors";
709 case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
710 case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
711 case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
712 case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
713 case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
714 case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
715 case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
716 case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
717 case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
718 case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
719 case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
720 case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
721 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
722 case CUDA_ERROR_MAP_FAILED: return "Map failed";
723 case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
724 case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
725 case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
726 case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
727 case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
728 case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
729 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
730 case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
731 case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
732 case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
733 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
734 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
735 case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
736 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
737 case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
738 case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
739 case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
740 case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
741 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
742 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
743 case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
744 case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
745 case CUDA_ERROR_NOT_FOUND: return "Not found";
746 case CUDA_ERROR_NOT_READY: return "CUDA not ready";
747 case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
748 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
749 case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
750 case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
751 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
752 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
753 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
754 case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
755 case CUDA_ERROR_ASSERT: return "Assert";
756 case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
757 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
758 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
759 case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
760 case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
761 case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
762 case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
763 case CUDA_ERROR_INVALID_PC: return "Invalid pc";
764 case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
765 case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large";
766 case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
767 case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
768 case CUDA_ERROR_UNKNOWN: return "Unknown error";
769 default: return "Unknown CUDA error value";
770 }
771 }
772
path_join(const char * path1,const char * path2,int maxlen,char * result)773 static void path_join(const char *path1,
774 const char *path2,
775 int maxlen,
776 char *result) {
777 #if defined(WIN32) || defined(_WIN32)
778 const char separator = '\\';
779 #else
780 const char separator = '/';
781 #endif
782 int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
783 if (n != -1 && n < maxlen) {
784 result[n] = '\0';
785 }
786 else {
787 result[maxlen - 1] = '\0';
788 }
789 }
790
path_exists(const char * path)791 static int path_exists(const char *path) {
792 struct stat st;
793 if (stat(path, &st)) {
794 return 0;
795 }
796 return 1;
797 }
798
cuewCompilerPath(void)799 const char *cuewCompilerPath(void) {
800 #ifdef _WIN32
801 const char *defaultpaths[] = {
802 "C:/CUDA/bin",
803 "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin",
804 NULL};
805 const char *executable = "nvcc.exe";
806 #else
807 const char *defaultpaths[] = {
808 "/Developer/NVIDIA/CUDA-5.0/bin",
809 "/usr/local/cuda-5.0/bin",
810 "/usr/local/cuda/bin",
811 "/Developer/NVIDIA/CUDA-6.0/bin",
812 "/usr/local/cuda-6.0/bin",
813 "/Developer/NVIDIA/CUDA-5.5/bin",
814 "/usr/local/cuda-5.5/bin",
815 NULL};
816 const char *executable = "nvcc";
817 #endif
818 int i;
819
820 const char *binpath = getenv("CUDA_BIN_PATH");
821
822 static char nvcc[65536];
823
824 if (binpath) {
825 path_join(binpath, executable, sizeof(nvcc), nvcc);
826 if (path_exists(nvcc)) {
827 return nvcc;
828 }
829 }
830
831 for (i = 0; defaultpaths[i]; ++i) {
832 path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
833 if (path_exists(nvcc)) {
834 return nvcc;
835 }
836 }
837
838 {
839 #ifdef _WIN32
840 FILE *handle = popen("where nvcc", "r");
841 #else
842 FILE *handle = popen("which nvcc", "r");
843 #endif
844 if (handle) {
845 char buffer[4096] = {0};
846 int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
847 buffer[len] = '\0';
848 pclose(handle);
849 if (buffer[0]) {
850 return "nvcc";
851 }
852 }
853 }
854
855 return NULL;
856 }
857
cuewNvrtcVersion(void)858 int cuewNvrtcVersion(void) {
859 int major, minor;
860 if (nvrtcVersion) {
861 nvrtcVersion(&major, &minor);
862 return 10 * major + minor;
863 }
864 return 0;
865 }
866
cuewCompilerVersion(void)867 int cuewCompilerVersion(void) {
868 const char *path = cuewCompilerPath();
869 const char *marker = "Cuda compilation tools, release ";
870 FILE *pipe;
871 int major, minor;
872 char *versionstr;
873 char buf[128];
874 char output[65536] = "\0";
875 char command[65536] = "\0";
876
877 if (path == NULL) {
878 return 0;
879 }
880
881 /* get --version output */
882 strcat(command, "\"");
883 strncat(command, path, sizeof(command) - 1);
884 strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
885 pipe = popen(command, "r");
886 if (!pipe) {
887 fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
888 return 0;
889 }
890
891 while (!feof(pipe)) {
892 if (fgets(buf, sizeof(buf), pipe) != NULL) {
893 strncat(output, buf, sizeof(output) - strlen(output) - 1);
894 }
895 }
896
897 pclose(pipe);
898
899 /* parse version number */
900 versionstr = strstr(output, marker);
901 if (versionstr == NULL) {
902 fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
903 return 0;
904 }
905 versionstr += strlen(marker);
906
907 if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
908 fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
909 return 0;
910 }
911
912 return 10 * major + minor;
913 }
914