1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #ifdef GPU_ENABLED
21 #include <core/GpuUtil.h>
22 #include <core/MPIUtil.h>
23 #include <core/Util.h>
24 #include <pthread.h>
25 #include <utility>
26 #include <cstdlib>
27 #include <algorithm>
28 
29 cudaDeviceProp cudaDevProps; //cached properties of currently running device
30 cublasHandle_t cublasHandle;
31 #ifdef CUSOLVER_ENABLED
32 cusolverDnHandle_t cusolverHandle;
33 #endif
34 
35 pthread_key_t gpuOwnerKey; //thread-local storage to identify thread that owns gpu
36 //NOTE: At the time of writing, c++0x threads implemented in g++, but not thread-local storage
37 //Using pthreads mechanism here, assuming that pthreads underly the c++0x threads
38 //This may not be true on Windows or for non-gcc compilers!
39 
gpuInit(FILE * fpLog,const MPIUtil * mpiHostGpu,double * nGPUs)40 bool gpuInit(FILE* fpLog, const MPIUtil* mpiHostGpu, double* nGPUs)
41 {	//Thread local storage to identify GPU owner thread
42 	pthread_key_create(&gpuOwnerKey, 0);
43 	pthread_setspecific(gpuOwnerKey, (const void*)1); //this will show up as 1 only on current thread
44 
45 	//Find compatible GPUs and select the one with maximum memory
46 	int nDevices, selectedDevice=-1; unsigned long maxGlobalMem=0;
47 	std::vector<int> compatibleDevices;
48 	cudaGetDeviceCount(&nDevices);
49 	for(int device=0; device<nDevices; device++)
50 	{	cudaDeviceProp prop;
51 		cudaGetDeviceProperties(&prop, device);
52 		std::pair<int,int> computeCap(prop.major, prop.minor);
53 		if(computeCap != std::make_pair(9999,9999) // not the emulation device
54 			&& computeCap >= std::make_pair(1,3) //compute capability >= 1.3 for double precision
55 			&& !prop.integrated) //reject on-board devices
56 		{
57 			fprintf(fpLog, "gpuInit: Found compatible cuda device %d '%s'\n", device, prop.name);
58 			compatibleDevices.push_back(device);
59 			if(prop.totalGlobalMem > maxGlobalMem)
60 			{	maxGlobalMem = prop.totalGlobalMem;
61 				selectedDevice = device;
62 			}
63 		}
64 	}
65 	if(selectedDevice < 0)
66 	{	fprintf(fpLog, "gpuInit: No compatible devices (>=1.3 compute capability, not on-board) found\n");
67 		return false;
68 	}
69 	if(nGPUs) *nGPUs = 1.;
70 
71 	//Divide GPUs between processes, if requested:
72 	if(mpiHostGpu && mpiHostGpu->nProcesses()>1) //only if more than one process per node
73 	{	selectedDevice = mpiHostGpu->iProcess() % int(compatibleDevices.size()); //round-robin selection of GPU
74 		if(nGPUs) *nGPUs = std::min(1., compatibleDevices.size()*1./mpiHostGpu->nProcesses());
75 	}
76 
77 	//Print selected devices:
78 	fprintf(fpLog, "gpuInit: Selected device %d\n", selectedDevice);
79 	cudaSetDevice(selectedDevice);
80 	cudaGetDeviceProperties(&cudaDevProps, selectedDevice);
81 	cublasCreate(&cublasHandle);
82 	#ifdef CUSOLVER_ENABLED
83 	cusolverDnCreate(&cusolverHandle);
84 	#endif
85 	return true;
86 }
87 
isGpuMine()88 bool isGpuMine()
89 {	return bool(pthread_getspecific(gpuOwnerKey));
90 }
91 
gpuErrorCheck()92 void gpuErrorCheck()
93 {	//cudaDeviceSynchronize(); //NOTE: Uncomment this when trying to debug GPU kernel launches
94 	cudaError_t err = cudaGetLastError();
95 	if(err != cudaSuccess)
96 	{	fprintf(stderr, "CUDA Error: %s\n", cudaGetErrorString(err));
97 		stackTraceExit(1);
98 	}
99 }
100 
101 #endif //GPU_ENABLED
102