1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
8 // Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
9 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //
11 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
12 //////////////////////////////////////////////////////////////////////////////////////
13
14
15 #include "config.h"
16 #include "gpu_misc.h"
17
18 namespace gpu
19 {
20 cudaStream_t kernelStream;
21 cudaStream_t memoryStream;
22
23 cudaEvent_t syncEvent;
24
25 cudaEvent_t gradientSyncDiracEvent;
26 cudaEvent_t gradientSyncOneBodyEvent;
27 cudaEvent_t gradientSyncTwoBodyEvent;
28 cudaEvent_t ratioSyncDiracEvent;
29 cudaEvent_t ratioSyncOneBodyEvent;
30 cudaEvent_t ratioSyncTwoBodyEvent;
31 cublasHandle_t cublasHandle;
32
33 size_t MaxGPUSpineSizeMB;
34 int rank;
35 int relative_rank; // relative rank number on the node the rank is on, counting starts at zero
36 int device_group_size; // size of the lists below
37 bool cudamps; // is set to true if Cuda MPS service is running
38 std::vector<int> device_group_numbers; // on node list of GPU device numbers with respect to relative rank number
39 std::vector<int>
40 device_rank_numbers; // on node list of MPI rank numbers (absolute) with respect to relative rank number
41
initCUDAStreams()42 void initCUDAStreams()
43 {
44 cudaStreamCreate(&kernelStream);
45 cudaStreamCreate(&memoryStream);
46 }
47
initCUDAEvents()48 void initCUDAEvents()
49 {
50 cudaEventCreateWithFlags(&syncEvent, cudaEventDisableTiming);
51 cudaEventCreateWithFlags(&gradientSyncDiracEvent, cudaEventDisableTiming);
52 cudaEventCreateWithFlags(&gradientSyncOneBodyEvent, cudaEventDisableTiming);
53 cudaEventCreateWithFlags(&gradientSyncTwoBodyEvent, cudaEventDisableTiming);
54 cudaEventCreateWithFlags(&ratioSyncDiracEvent, cudaEventDisableTiming);
55 cudaEventCreateWithFlags(&ratioSyncOneBodyEvent, cudaEventDisableTiming);
56 cudaEventCreateWithFlags(&ratioSyncTwoBodyEvent, cudaEventDisableTiming);
57 }
58
initCublas()59 void initCublas() { cublasCreate(&cublasHandle); }
60
finalizeCUDAStreams()61 void finalizeCUDAStreams()
62 {
63 cudaStreamDestroy(kernelStream);
64 cudaStreamDestroy(memoryStream);
65 }
66
finalizeCUDAEvents()67 void finalizeCUDAEvents()
68 {
69 cudaEventDestroy(syncEvent);
70 cudaEventDestroy(gradientSyncDiracEvent);
71 cudaEventDestroy(gradientSyncOneBodyEvent);
72 cudaEventDestroy(gradientSyncTwoBodyEvent);
73 cudaEventDestroy(ratioSyncDiracEvent);
74 cudaEventDestroy(ratioSyncOneBodyEvent);
75 cudaEventDestroy(ratioSyncTwoBodyEvent);
76 }
77
finalizeCublas()78 void finalizeCublas() { cublasDestroy(cublasHandle); }
79
synchronize()80 void synchronize() { cudaDeviceSynchronize(); }
81
streamsSynchronize()82 void streamsSynchronize() { cudaEventRecord(syncEvent, 0); }
83
84 } // namespace gpu
85