1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
8 //                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
9 //                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //
11 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
12 //////////////////////////////////////////////////////////////////////////////////////
13 
14 
15 #include "config.h"
16 #include "gpu_misc.h"
17 
18 namespace gpu
19 {
20 cudaStream_t kernelStream;
21 cudaStream_t memoryStream;
22 
23 cudaEvent_t syncEvent;
24 
25 cudaEvent_t gradientSyncDiracEvent;
26 cudaEvent_t gradientSyncOneBodyEvent;
27 cudaEvent_t gradientSyncTwoBodyEvent;
28 cudaEvent_t ratioSyncDiracEvent;
29 cudaEvent_t ratioSyncOneBodyEvent;
30 cudaEvent_t ratioSyncTwoBodyEvent;
31 cublasHandle_t cublasHandle;
32 
33 size_t MaxGPUSpineSizeMB;
34 int rank;
35 int relative_rank;                     // relative rank number on the node the rank is on, counting starts at zero
36 int device_group_size;                 // size of the lists below
37 bool cudamps;                          // is set to true if Cuda MPS service is running
38 std::vector<int> device_group_numbers; // on node list of GPU device numbers with respect to relative rank number
39 std::vector<int>
40     device_rank_numbers; // on node list of MPI rank numbers (absolute) with respect to relative rank number
41 
initCUDAStreams()42 void initCUDAStreams()
43 {
44   cudaStreamCreate(&kernelStream);
45   cudaStreamCreate(&memoryStream);
46 }
47 
initCUDAEvents()48 void initCUDAEvents()
49 {
50   cudaEventCreateWithFlags(&syncEvent, cudaEventDisableTiming);
51   cudaEventCreateWithFlags(&gradientSyncDiracEvent, cudaEventDisableTiming);
52   cudaEventCreateWithFlags(&gradientSyncOneBodyEvent, cudaEventDisableTiming);
53   cudaEventCreateWithFlags(&gradientSyncTwoBodyEvent, cudaEventDisableTiming);
54   cudaEventCreateWithFlags(&ratioSyncDiracEvent, cudaEventDisableTiming);
55   cudaEventCreateWithFlags(&ratioSyncOneBodyEvent, cudaEventDisableTiming);
56   cudaEventCreateWithFlags(&ratioSyncTwoBodyEvent, cudaEventDisableTiming);
57 }
58 
initCublas()59 void initCublas() { cublasCreate(&cublasHandle); }
60 
finalizeCUDAStreams()61 void finalizeCUDAStreams()
62 {
63   cudaStreamDestroy(kernelStream);
64   cudaStreamDestroy(memoryStream);
65 }
66 
finalizeCUDAEvents()67 void finalizeCUDAEvents()
68 {
69   cudaEventDestroy(syncEvent);
70   cudaEventDestroy(gradientSyncDiracEvent);
71   cudaEventDestroy(gradientSyncOneBodyEvent);
72   cudaEventDestroy(gradientSyncTwoBodyEvent);
73   cudaEventDestroy(ratioSyncDiracEvent);
74   cudaEventDestroy(ratioSyncOneBodyEvent);
75   cudaEventDestroy(ratioSyncTwoBodyEvent);
76 }
77 
finalizeCublas()78 void finalizeCublas() { cublasDestroy(cublasHandle); }
79 
synchronize()80 void synchronize() { cudaDeviceSynchronize(); }
81 
streamsSynchronize()82 void streamsSynchronize() { cudaEventRecord(syncEvent, 0); }
83 
84 } // namespace gpu
85