1 
2 #include <iostream>
3 
4 #include <cublas_v2.h>
5 #include <cuda_runtime.h>
6 
7 #if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
8 
9 #  include <pthread.h>
verify_linking_to_pthread()10 static int verify_linking_to_pthread()
11 {
12   return static_cast<int>(pthread_self());
13 }
14 #endif
15 
16 // this test only makes sense for versions of CUDA that ships
17 // static libraries that have separable compilation device symbols
18 #if __CUDACC_VER_MAJOR__ <= 9
deviceCublasSgemm(int n,float alpha,float beta,const float * d_A,const float * d_B,float * d_C)19 __global__ void deviceCublasSgemm(int n, float alpha, float beta,
20                                   const float* d_A, const float* d_B,
21                                   float* d_C)
22 {
23   cublasHandle_t cnpHandle;
24   cublasStatus_t status = cublasCreate(&cnpHandle);
25 
26   if (status != CUBLAS_STATUS_SUCCESS) {
27     return;
28   }
29 
30   // Call function defined in the cublas_device system static library.
31   // This way we can verify that we properly pass system libraries to the
32   // device link line
33   status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
34                        d_A, n, d_B, n, &beta, d_C, n);
35 
36   cublasDestroy(cnpHandle);
37 }
38 #endif
39 
choose_cuda_device()40 int choose_cuda_device()
41 {
42   int nDevices = 0;
43   cudaError_t err = cudaGetDeviceCount(&nDevices);
44   if (err != cudaSuccess) {
45     std::cerr << "Failed to retrieve the number of CUDA enabled devices"
46               << std::endl;
47     return 1;
48   }
49   for (int i = 0; i < nDevices; ++i) {
50     cudaDeviceProp prop;
51     cudaError_t err = cudaGetDeviceProperties(&prop, i);
52     if (err != cudaSuccess) {
53       std::cerr << "Could not retrieve properties from CUDA device " << i
54                 << std::endl;
55       return 1;
56     }
57 
58     if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
59       err = cudaSetDevice(i);
60       if (err != cudaSuccess) {
61         std::cout << "Could not select CUDA device " << i << std::endl;
62       } else {
63         return 0;
64       }
65     }
66   }
67 
68   std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
69             << std::endl;
70   return 1;
71 }
72 
main(int argc,char ** argv)73 int main(int argc, char** argv)
74 {
75   int ret = choose_cuda_device();
76   if (ret) {
77     return 0;
78   }
79 
80 #if __CUDACC_VER_MAJOR__ <= 9
81   // initial values that will make sure that the cublasSgemm won't actually
82   // do any work
83   int n = 0;
84   float alpha = 1;
85   float beta = 1;
86   float* d_A = nullptr;
87   float* d_B = nullptr;
88   float* d_C = nullptr;
89   deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
90 #endif
91 
92   return 0;
93 }
94