1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3
4 This file is part of JDFTx.
5
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19
20 #ifdef GPU_ENABLED
21 #include <core/GpuUtil.h>
22 #include <core/MPIUtil.h>
23 #include <core/Util.h>
24 #include <pthread.h>
25 #include <utility>
26 #include <cstdlib>
27 #include <algorithm>
28
29 cudaDeviceProp cudaDevProps; //cached properties of currently running device
30 cublasHandle_t cublasHandle;
31 #ifdef CUSOLVER_ENABLED
32 cusolverDnHandle_t cusolverHandle;
33 #endif
34
35 pthread_key_t gpuOwnerKey; //thread-local storage to identify thread that owns gpu
36 //NOTE: At the time of writing, c++0x threads implemented in g++, but not thread-local storage
37 //Using pthreads mechanism here, assuming that pthreads underly the c++0x threads
38 //This may not be true on Windows or for non-gcc compilers!
39
gpuInit(FILE * fpLog,const MPIUtil * mpiHostGpu,double * nGPUs)40 bool gpuInit(FILE* fpLog, const MPIUtil* mpiHostGpu, double* nGPUs)
41 { //Thread local storage to identify GPU owner thread
42 pthread_key_create(&gpuOwnerKey, 0);
43 pthread_setspecific(gpuOwnerKey, (const void*)1); //this will show up as 1 only on current thread
44
45 //Find compatible GPUs and select the one with maximum memory
46 int nDevices, selectedDevice=-1; unsigned long maxGlobalMem=0;
47 std::vector<int> compatibleDevices;
48 cudaGetDeviceCount(&nDevices);
49 for(int device=0; device<nDevices; device++)
50 { cudaDeviceProp prop;
51 cudaGetDeviceProperties(&prop, device);
52 std::pair<int,int> computeCap(prop.major, prop.minor);
53 if(computeCap != std::make_pair(9999,9999) // not the emulation device
54 && computeCap >= std::make_pair(1,3) //compute capability >= 1.3 for double precision
55 && !prop.integrated) //reject on-board devices
56 {
57 fprintf(fpLog, "gpuInit: Found compatible cuda device %d '%s'\n", device, prop.name);
58 compatibleDevices.push_back(device);
59 if(prop.totalGlobalMem > maxGlobalMem)
60 { maxGlobalMem = prop.totalGlobalMem;
61 selectedDevice = device;
62 }
63 }
64 }
65 if(selectedDevice < 0)
66 { fprintf(fpLog, "gpuInit: No compatible devices (>=1.3 compute capability, not on-board) found\n");
67 return false;
68 }
69 if(nGPUs) *nGPUs = 1.;
70
71 //Divide GPUs between processes, if requested:
72 if(mpiHostGpu && mpiHostGpu->nProcesses()>1) //only if more than one process per node
73 { selectedDevice = mpiHostGpu->iProcess() % int(compatibleDevices.size()); //round-robin selection of GPU
74 if(nGPUs) *nGPUs = std::min(1., compatibleDevices.size()*1./mpiHostGpu->nProcesses());
75 }
76
77 //Print selected devices:
78 fprintf(fpLog, "gpuInit: Selected device %d\n", selectedDevice);
79 cudaSetDevice(selectedDevice);
80 cudaGetDeviceProperties(&cudaDevProps, selectedDevice);
81 cublasCreate(&cublasHandle);
82 #ifdef CUSOLVER_ENABLED
83 cusolverDnCreate(&cusolverHandle);
84 #endif
85 return true;
86 }
87
isGpuMine()88 bool isGpuMine()
89 { return bool(pthread_getspecific(gpuOwnerKey));
90 }
91
gpuErrorCheck()92 void gpuErrorCheck()
93 { //cudaDeviceSynchronize(); //NOTE: Uncomment this when trying to debug GPU kernel launches
94 cudaError_t err = cudaGetLastError();
95 if(err != cudaSuccess)
96 { fprintf(stderr, "CUDA Error: %s\n", cudaGetErrorString(err));
97 stackTraceExit(1);
98 }
99 }
100
101 #endif //GPU_ENABLED
102