1 /*******************************************************************************
2 Copyright (c) 2015-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_rm_mem.h"
25 #include "uvm_gpu.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_linux.h"
29 #include "nv_uvm_interface.h"
30
uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)31 bool uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
32 {
33 return uvm_processor_mask_test(&rm_mem->mapped_on, gpu->id);
34 }
35
uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)36 bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
37 {
38 if (rm_mem->proxy_vas == NULL)
39 return false;
40
41 if (rm_mem->proxy_vas[uvm_id_value(gpu->id)] == 0)
42 return false;
43
44 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
45 UVM_ASSERT(uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent));
46
47 return true;
48 }
49
uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t * rm_mem)50 bool uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t *rm_mem)
51 {
52 return uvm_processor_mask_test(&rm_mem->mapped_on, UVM_ID_CPU);
53 }
54
rm_mem_set_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 va)55 static void rm_mem_set_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
56 {
57 rm_mem->vas[uvm_id_value(gpu->id)] = va;
58 uvm_processor_mask_set(&rm_mem->mapped_on, gpu->id);
59 }
60
rm_mem_set_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 va)61 static void rm_mem_set_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
62 {
63 rm_mem->proxy_vas[uvm_id_value(gpu->id)] = va;
64 }
65
rm_mem_set_cpu_va(uvm_rm_mem_t * rm_mem,void * va)66 static void rm_mem_set_cpu_va(uvm_rm_mem_t *rm_mem, void *va)
67 {
68 rm_mem->vas[UVM_ID_CPU_VALUE] = (uintptr_t) va;
69 uvm_processor_mask_set(&rm_mem->mapped_on, UVM_ID_CPU);
70 }
71
rm_mem_clear_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)72 static void rm_mem_clear_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
73 {
74 UVM_ASSERT(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
75
76 uvm_processor_mask_clear(&rm_mem->mapped_on, gpu->id);
77 rm_mem->vas[uvm_id_value(gpu->id)] = 0;
78 }
79
rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)80 static void rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
81 {
82 rm_mem->proxy_vas[uvm_id_value(gpu->id)] = 0;
83 }
84
rm_mem_clear_cpu_va(uvm_rm_mem_t * rm_mem)85 static void rm_mem_clear_cpu_va(uvm_rm_mem_t *rm_mem)
86 {
87 uvm_processor_mask_clear(&rm_mem->mapped_on, UVM_ID_CPU);
88 rm_mem->vas[UVM_ID_CPU_VALUE] = 0;
89 }
90
uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)91 NvU64 uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
92 {
93 UVM_ASSERT_MSG(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu), "GPU %s\n", uvm_gpu_name(gpu));
94
95 return rm_mem->vas[uvm_id_value(gpu->id)];
96 }
97
uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)98 NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
99 {
100 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
101
102 return rm_mem->proxy_vas[uvm_id_value(gpu->id)];
103 }
104
uvm_rm_mem_get_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,bool is_proxy_va_space)105 uvm_gpu_address_t uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space)
106 {
107 uvm_gpu_address_t gpu_va = {0};
108
109 gpu_va.aperture = UVM_APERTURE_MAX;
110 gpu_va.is_virtual = true;
111
112 if (g_uvm_global.conf_computing_enabled && (rm_mem->type == UVM_RM_MEM_TYPE_SYS))
113 gpu_va.is_unprotected = true;
114
115 if (is_proxy_va_space)
116 gpu_va.address = uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu);
117 else
118 gpu_va.address = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
119
120 return gpu_va;
121 }
122
uvm_rm_mem_get_cpu_va(uvm_rm_mem_t * rm_mem)123 void *uvm_rm_mem_get_cpu_va(uvm_rm_mem_t *rm_mem)
124 {
125 UVM_ASSERT(uvm_rm_mem_mapped_on_cpu(rm_mem));
126
127 return (void *)(uintptr_t)rm_mem->vas[UVM_ID_CPU_VALUE];
128 }
129
rm_mem_map_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)130 static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
131 {
132 NV_STATUS status;
133 uvm_gpu_t *gpu_owner;
134 NvU64 gpu_owner_va;
135 NvU64 proxy_va;
136
137 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
138
139 if (!uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
140 return NV_OK;
141
142 if (uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
143 return NV_OK;
144
145 if (rm_mem->proxy_vas == NULL) {
146 NvU64 *proxy_vas = uvm_kvmalloc_zero(sizeof(rm_mem->vas));
147 if (proxy_vas == NULL)
148 return NV_ERR_NO_MEMORY;
149
150 rm_mem->proxy_vas = proxy_vas;
151 }
152
153 gpu_owner = rm_mem->gpu_owner;
154 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
155
156 status = uvm_rm_locked_call(nvUvmInterfacePagingChannelsMap(gpu_owner->rm_address_space,
157 gpu_owner_va,
158 uvm_gpu_device_handle(gpu),
159 &proxy_va));
160 if (status != NV_OK) {
161 UVM_ERR_PRINT("nvUvmInterfacePagingChannelsMap() failed: %s, src GPU %s, dst GPU %s\n",
162 nvstatusToString(status),
163 uvm_gpu_name(gpu_owner),
164 uvm_gpu_name(gpu));
165 return status;
166 }
167
168 rm_mem_set_gpu_proxy_va(rm_mem, gpu, proxy_va);
169
170 return NV_OK;
171 }
172
rm_mem_unmap_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)173 static void rm_mem_unmap_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
174 {
175 uvm_gpu_t *gpu_owner;
176 NvU64 gpu_owner_va;
177
178 if (!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
179 return;
180
181 gpu_owner = rm_mem->gpu_owner;
182 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
183
184
185 uvm_rm_locked_call_void(nvUvmInterfacePagingChannelsUnmap(gpu_owner->rm_address_space,
186 gpu_owner_va,
187 uvm_gpu_device_handle(gpu)));
188
189 rm_mem_clear_gpu_proxy_va(rm_mem, gpu);
190 }
191
192
uvm_rm_mem_alloc(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)193 NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
194 uvm_rm_mem_type_t type,
195 NvLength size,
196 NvU64 gpu_alignment,
197 uvm_rm_mem_t **rm_mem_out)
198 {
199 NV_STATUS status = NV_OK;
200 uvm_rm_mem_t *rm_mem;
201 UvmGpuAllocInfo alloc_info = { 0 };
202 NvU64 gpu_va;
203
204 UVM_ASSERT(gpu);
205 UVM_ASSERT((type == UVM_RM_MEM_TYPE_SYS) || (type == UVM_RM_MEM_TYPE_GPU));
206 UVM_ASSERT(size != 0);
207
208 rm_mem = uvm_kvmalloc_zero(sizeof(*rm_mem));
209 if (rm_mem == NULL)
210 return NV_ERR_NO_MEMORY;
211
212 if (!g_uvm_global.conf_computing_enabled || type == UVM_RM_MEM_TYPE_SYS)
213 alloc_info.bUnprotected = NV_TRUE;
214
215 alloc_info.alignment = gpu_alignment;
216
217 if (type == UVM_RM_MEM_TYPE_SYS)
218 status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info));
219 else
220 status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocFB(gpu->rm_address_space, size, &gpu_va, &alloc_info));
221
222 if (status != NV_OK) {
223 UVM_ERR_PRINT("nvUvmInterfaceMemoryAlloc%s() failed: %s, GPU %s\n",
224 type == UVM_RM_MEM_TYPE_SYS ? "Sys" : "FB",
225 nvstatusToString(status),
226 uvm_gpu_name(gpu));
227 goto error;
228 }
229
230 rm_mem->gpu_owner = gpu;
231 rm_mem->type = type;
232 rm_mem->size = size;
233 rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
234
235 status = rm_mem_map_gpu_proxy(rm_mem, gpu);
236 if (status != NV_OK)
237 goto error;
238
239 *rm_mem_out = rm_mem;
240 return NV_OK;
241
242 error:
243 uvm_rm_mem_free(rm_mem);
244 return status;
245 }
246
uvm_rm_mem_map_cpu(uvm_rm_mem_t * rm_mem)247 NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem)
248 {
249 NV_STATUS status;
250 uvm_gpu_t *gpu;
251 NvU64 gpu_va;
252 void *cpu_va;
253
254 UVM_ASSERT(rm_mem);
255
256 if (uvm_rm_mem_mapped_on_cpu(rm_mem))
257 return NV_OK;
258
259 if (g_uvm_global.conf_computing_enabled)
260 UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
261
262 gpu = rm_mem->gpu_owner;
263 gpu_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
264
265 status = uvm_rm_locked_call(nvUvmInterfaceMemoryCpuMap(gpu->rm_address_space,
266 gpu_va,
267 rm_mem->size,
268 &cpu_va,
269 UVM_PAGE_SIZE_DEFAULT));
270 if (status != NV_OK) {
271 UVM_ERR_PRINT("nvUvmInterfaceMemoryCpuMap() failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
272 return status;
273 }
274
275 rm_mem_set_cpu_va(rm_mem, cpu_va);
276
277 return NV_OK;
278 }
279
uvm_rm_mem_unmap_cpu(uvm_rm_mem_t * rm_mem)280 void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem)
281 {
282 UVM_ASSERT(rm_mem);
283
284 if (!uvm_rm_mem_mapped_on_cpu(rm_mem))
285 return;
286
287 uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space,
288 uvm_rm_mem_get_cpu_va(rm_mem)));
289
290 rm_mem_clear_cpu_va(rm_mem);
291 }
292
uvm_rm_mem_map_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 gpu_alignment)293 NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment)
294 {
295 NV_STATUS status;
296 uvm_gpu_t *gpu_owner;
297 NvU64 gpu_owner_va;
298 NvU64 gpu_va;
299
300 UVM_ASSERT(rm_mem);
301 UVM_ASSERT(gpu);
302
303 if (uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
304 return NV_OK;
305
306 // Peer mappings are not supported yet
307 UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
308
309 gpu_owner = rm_mem->gpu_owner;
310 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
311
312 status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space,
313 gpu_owner_va,
314 gpu->rm_address_space,
315 gpu_alignment,
316 &gpu_va));
317 if (status != NV_OK) {
318 UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n",
319 nvstatusToString(status),
320 uvm_gpu_name(gpu_owner),
321 uvm_gpu_name(gpu));
322 return status;
323 }
324
325 rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
326
327 // Map to proxy VA space, if applicable
328 return rm_mem_map_gpu_proxy(rm_mem, gpu);
329 }
330
331 // This internal unmap variant allows the GPU owner to be unmapped, unlike
332 // uvm_rm_mem_unmap_gpu
rm_mem_unmap_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)333 static void rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
334 {
335 NvU64 va;
336
337 if (!uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
338 return;
339
340 // Remove mappings in proxy address space, if any
341 rm_mem_unmap_gpu_proxy(rm_mem, gpu);
342
343 va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
344 uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va));
345 rm_mem_clear_gpu_va(rm_mem, gpu);
346 }
347
uvm_rm_mem_unmap_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)348 void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
349 {
350 UVM_ASSERT(rm_mem);
351 UVM_ASSERT(gpu);
352
353 // The GPU owner mapping remains valid until the memory is freed.
354 if (gpu == rm_mem->gpu_owner)
355 return;
356
357 rm_mem_unmap_gpu(rm_mem, gpu);
358 }
359
uvm_rm_mem_free(uvm_rm_mem_t * rm_mem)360 void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
361 {
362 uvm_gpu_id_t gpu_id;
363 uvm_gpu_t *gpu_owner;
364
365 if (rm_mem == NULL)
366 return;
367
368 // If the GPU owner is not set, allocation of backing storage by RM failed
369 gpu_owner = rm_mem->gpu_owner;
370 if (gpu_owner == NULL) {
371 uvm_kvfree(rm_mem);
372 return;
373 }
374
375 uvm_rm_mem_unmap_cpu(rm_mem);
376
377 // Don't use for_each_gpu_in_mask() as the owning GPU might be being
378 // destroyed and already removed from the global GPU array causing the
379 // iteration to stop prematurely.
380 for_each_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) {
381 if (!uvm_id_equal(gpu_id, gpu_owner->id))
382 uvm_rm_mem_unmap_gpu(rm_mem, uvm_gpu_get(gpu_id));
383 }
384
385 rm_mem_unmap_gpu(rm_mem, gpu_owner);
386
387 UVM_ASSERT_MSG(uvm_processor_mask_empty(&rm_mem->mapped_on),
388 "Left-over %u mappings in rm_mem\n",
389 uvm_processor_mask_get_count(&rm_mem->mapped_on));
390
391 uvm_kvfree(rm_mem->proxy_vas);
392 uvm_kvfree(rm_mem);
393 }
394
uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)395 NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu,
396 uvm_rm_mem_type_t type,
397 NvLength size,
398 NvU64 gpu_alignment,
399 uvm_rm_mem_t **rm_mem_out)
400 {
401 uvm_rm_mem_t *rm_mem;
402 NV_STATUS status;
403
404 status = uvm_rm_mem_alloc(gpu, type, size, gpu_alignment, &rm_mem);
405 if (status != NV_OK)
406 return status;
407
408 status = uvm_rm_mem_map_cpu(rm_mem);
409 if (status != NV_OK)
410 goto error;
411
412 *rm_mem_out = rm_mem;
413
414 return NV_OK;
415
416 error:
417 uvm_rm_mem_free(rm_mem);
418 return status;
419 }
420
uvm_rm_mem_map_all_gpus(uvm_rm_mem_t * rm_mem,NvU64 gpu_alignment)421 NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment)
422 {
423 uvm_gpu_t *gpu;
424
425 UVM_ASSERT(rm_mem);
426
427 for_each_gpu(gpu) {
428 NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment);
429 if (status != NV_OK)
430 return status;
431 }
432 return NV_OK;
433 }
434
uvm_rm_mem_alloc_and_map_all(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)435 NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
436 uvm_rm_mem_type_t type,
437 NvLength size,
438 NvU64 gpu_alignment,
439 uvm_rm_mem_t **rm_mem_out)
440 {
441 uvm_rm_mem_t *rm_mem;
442 NV_STATUS status;
443
444 UVM_ASSERT(gpu);
445
446 status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, gpu_alignment, &rm_mem);
447 if (status != NV_OK)
448 return status;
449
450 status = uvm_rm_mem_map_all_gpus(rm_mem, gpu_alignment);
451 if (status != NV_OK)
452 goto error;
453
454 *rm_mem_out = rm_mem;
455
456 return NV_OK;
457
458 error:
459 uvm_rm_mem_free(rm_mem);
460 return status;
461 }
462