1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "yaksi.h"
7 #include "yaksuri_cudai.h"
8 #include <assert.h>
9 #include <string.h>
10 #include <cuda.h>
11 #include <cuda_runtime_api.h>
12
cuda_host_malloc(uintptr_t size)13 static void *cuda_host_malloc(uintptr_t size)
14 {
15 void *ptr = NULL;
16
17 cudaError_t cerr = cudaMallocHost(&ptr, size);
18 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
19
20 return ptr;
21 }
22
cuda_gpu_malloc(uintptr_t size,int device)23 static void *cuda_gpu_malloc(uintptr_t size, int device)
24 {
25 void *ptr = NULL;
26 cudaError_t cerr;
27
28 int cur_device;
29 cerr = cudaGetDevice(&cur_device);
30 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
31
32 if (cur_device != device) {
33 cerr = cudaSetDevice(device);
34 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
35 }
36
37 cerr = cudaMalloc(&ptr, size);
38 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
39
40 if (cur_device != device) {
41 cerr = cudaSetDevice(cur_device);
42 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
43 }
44
45 return ptr;
46 }
47
cuda_host_free(void * ptr)48 static void cuda_host_free(void *ptr)
49 {
50 cudaError_t cerr = cudaFreeHost(ptr);
51 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
52 }
53
cuda_gpu_free(void * ptr)54 static void cuda_gpu_free(void *ptr)
55 {
56 cudaError_t cerr = cudaFree(ptr);
57 YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
58 }
59
60 yaksuri_cudai_global_s yaksuri_cudai_global;
61
finalize_hook(void)62 static int finalize_hook(void)
63 {
64 int rc = YAKSA_SUCCESS;
65 cudaError_t cerr;
66
67 for (int i = 0; i < yaksuri_cudai_global.ndevices; i++) {
68 cerr = cudaSetDevice(i);
69 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
70
71 cerr = cudaStreamDestroy(yaksuri_cudai_global.stream[i]);
72 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
73
74 free(yaksuri_cudai_global.p2p[i]);
75 }
76 free(yaksuri_cudai_global.stream);
77 free(yaksuri_cudai_global.p2p);
78
79 fn_exit:
80 return rc;
81 fn_fail:
82 goto fn_exit;
83 }
84
get_num_devices(int * ndevices)85 static int get_num_devices(int *ndevices)
86 {
87 *ndevices = yaksuri_cudai_global.ndevices;
88
89 return YAKSA_SUCCESS;
90 }
91
check_p2p_comm(int sdev,int ddev,bool * is_enabled)92 static int check_p2p_comm(int sdev, int ddev, bool * is_enabled)
93 {
94 #if CUDA_P2P == CUDA_P2P_ENABLED
95 *is_enabled = yaksuri_cudai_global.p2p[sdev][ddev];
96 #elif CUDA_P2P == CUDA_P2P_CLIQUES
97 if ((sdev + ddev) % 2)
98 *is_enabled = 0;
99 else
100 *is_enabled = yaksuri_cudai_global.p2p[sdev][ddev];
101 #else
102 *is_enabled = 0;
103 #endif
104
105 return YAKSA_SUCCESS;
106 }
107
yaksuri_cuda_init_hook(yaksur_gpudriver_hooks_s ** hooks)108 int yaksuri_cuda_init_hook(yaksur_gpudriver_hooks_s ** hooks)
109 {
110 int rc = YAKSA_SUCCESS;
111 cudaError_t cerr;
112
113 cerr = cudaGetDeviceCount(&yaksuri_cudai_global.ndevices);
114 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
115
116 if (getenv("CUDA_VISIBLE_DEVICES") == NULL) {
117 /* user did not do any filtering for us; if any of the devices
118 * is in exclusive mode, disable GPU support to avoid
119 * incorrect device sharing */
120 bool excl = false;
121 for (int i = 0; i < yaksuri_cudai_global.ndevices; i++) {
122 struct cudaDeviceProp prop;
123
124 cerr = cudaGetDeviceProperties(&prop, i);
125 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
126
127 if (prop.computeMode != cudaComputeModeDefault) {
128 excl = true;
129 break;
130 }
131 }
132
133 if (excl == true) {
134 fprintf(stderr, "[yaksa] ====> Disabling CUDA support <====\n");
135 fprintf(stderr,
136 "[yaksa] CUDA is setup in exclusive compute mode, but CUDA_VISIBLE_DEVICES is not set\n");
137 fprintf(stderr,
138 "[yaksa] You can silence this warning by setting CUDA_VISIBLE_DEVICES\n");
139 fflush(stderr);
140 *hooks = NULL;
141 goto fn_exit;
142 }
143 }
144
145 yaksuri_cudai_global.stream = (cudaStream_t *)
146 malloc(yaksuri_cudai_global.ndevices * sizeof(cudaStream_t));
147
148 yaksuri_cudai_global.p2p = (bool **) malloc(yaksuri_cudai_global.ndevices * sizeof(bool *));
149 for (int i = 0; i < yaksuri_cudai_global.ndevices; i++) {
150 yaksuri_cudai_global.p2p[i] = (bool *) malloc(yaksuri_cudai_global.ndevices * sizeof(bool));
151 }
152
153 int cur_device;
154 cerr = cudaGetDevice(&cur_device);
155 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
156
157 for (int i = 0; i < yaksuri_cudai_global.ndevices; i++) {
158 cerr = cudaSetDevice(i);
159 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
160
161 cerr = cudaStreamCreateWithFlags(&yaksuri_cudai_global.stream[i], cudaStreamNonBlocking);
162 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
163
164 for (int j = 0; j < yaksuri_cudai_global.ndevices; j++) {
165 if (i == j) {
166 yaksuri_cudai_global.p2p[i][j] = 1;
167 } else {
168 int val;
169 cerr = cudaDeviceCanAccessPeer(&val, i, j);
170 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
171
172 if (val) {
173 cerr = cudaDeviceEnablePeerAccess(j, 0);
174 if (cerr != cudaErrorPeerAccessAlreadyEnabled) {
175 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
176 }
177 yaksuri_cudai_global.p2p[i][j] = 1;
178 } else {
179 yaksuri_cudai_global.p2p[i][j] = 0;
180 }
181 }
182 }
183 }
184
185 cerr = cudaSetDevice(cur_device);
186 YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail);
187
188 *hooks = (yaksur_gpudriver_hooks_s *) malloc(sizeof(yaksur_gpudriver_hooks_s));
189 (*hooks)->get_num_devices = get_num_devices;
190 (*hooks)->check_p2p_comm = check_p2p_comm;
191 (*hooks)->finalize = finalize_hook;
192 (*hooks)->get_iov_pack_threshold = yaksuri_cudai_get_iov_pack_threshold;
193 (*hooks)->get_iov_unpack_threshold = yaksuri_cudai_get_iov_unpack_threshold;
194 (*hooks)->ipack = yaksuri_cudai_ipack;
195 (*hooks)->iunpack = yaksuri_cudai_iunpack;
196 (*hooks)->pup_is_supported = yaksuri_cudai_pup_is_supported;
197 (*hooks)->host_malloc = cuda_host_malloc;
198 (*hooks)->host_free = cuda_host_free;
199 (*hooks)->gpu_malloc = cuda_gpu_malloc;
200 (*hooks)->gpu_free = cuda_gpu_free;
201 (*hooks)->get_ptr_attr = yaksuri_cudai_get_ptr_attr;
202 (*hooks)->event_record = yaksuri_cudai_event_record;
203 (*hooks)->event_query = yaksuri_cudai_event_query;
204 (*hooks)->add_dependency = yaksuri_cudai_add_dependency;
205 (*hooks)->type_create = yaksuri_cudai_type_create_hook;
206 (*hooks)->type_free = yaksuri_cudai_type_free_hook;
207 (*hooks)->info_create = yaksuri_cudai_info_create_hook;
208 (*hooks)->info_free = yaksuri_cudai_info_free_hook;
209 (*hooks)->info_keyval_append = yaksuri_cudai_info_keyval_append;
210
211 fn_exit:
212 return rc;
213 fn_fail:
214 goto fn_exit;
215 }
216