1
2 #include <iostream>
3
4 #include <cublas_v2.h>
5 #include <cuda_runtime.h>
6
7 #if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
8
9 # include <pthread.h>
verify_linking_to_pthread()10 static int verify_linking_to_pthread()
11 {
12 return static_cast<int>(pthread_self());
13 }
14 #endif
15
16 // this test only makes sense for versions of CUDA that ships
17 // static libraries that have separable compilation device symbols
18 #if __CUDACC_VER_MAJOR__ <= 9
deviceCublasSgemm(int n,float alpha,float beta,const float * d_A,const float * d_B,float * d_C)19 __global__ void deviceCublasSgemm(int n, float alpha, float beta,
20 const float* d_A, const float* d_B,
21 float* d_C)
22 {
23 cublasHandle_t cnpHandle;
24 cublasStatus_t status = cublasCreate(&cnpHandle);
25
26 if (status != CUBLAS_STATUS_SUCCESS) {
27 return;
28 }
29
30 // Call function defined in the cublas_device system static library.
31 // This way we can verify that we properly pass system libraries to the
32 // device link line
33 status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
34 d_A, n, d_B, n, &beta, d_C, n);
35
36 cublasDestroy(cnpHandle);
37 }
38 #endif
39
choose_cuda_device()40 int choose_cuda_device()
41 {
42 int nDevices = 0;
43 cudaError_t err = cudaGetDeviceCount(&nDevices);
44 if (err != cudaSuccess) {
45 std::cerr << "Failed to retrieve the number of CUDA enabled devices"
46 << std::endl;
47 return 1;
48 }
49 for (int i = 0; i < nDevices; ++i) {
50 cudaDeviceProp prop;
51 cudaError_t err = cudaGetDeviceProperties(&prop, i);
52 if (err != cudaSuccess) {
53 std::cerr << "Could not retrieve properties from CUDA device " << i
54 << std::endl;
55 return 1;
56 }
57
58 if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
59 err = cudaSetDevice(i);
60 if (err != cudaSuccess) {
61 std::cout << "Could not select CUDA device " << i << std::endl;
62 } else {
63 return 0;
64 }
65 }
66 }
67
68 std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
69 << std::endl;
70 return 1;
71 }
72
main(int argc,char ** argv)73 int main(int argc, char** argv)
74 {
75 int ret = choose_cuda_device();
76 if (ret) {
77 return 0;
78 }
79
80 #if __CUDACC_VER_MAJOR__ <= 9
81 // initial values that will make sure that the cublasSgemm won't actually
82 // do any work
83 int n = 0;
84 float alpha = 1;
85 float beta = 1;
86 float* d_A = nullptr;
87 float* d_B = nullptr;
88 float* d_C = nullptr;
89 deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
90 #endif
91
92 return 0;
93 }
94