1 /*******************************************************************************
2 Copyright (c) 2016-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_mem.h"
25 #include "uvm_hal_types.h"
26 #include "uvm_mmu.h"
27 #include "uvm_processors.h"
28 #include "uvm_va_space.h"
29 #include "uvm_gpu.h"
30 #include "uvm_global.h"
31 #include "uvm_kvmalloc.h"
32 #include "uvm_push.h"
33 #include "uvm_range_allocator.h"
34 #include "uvm_hal.h"
35 #include "uvm_linux.h"
36
37 static uvm_range_allocator_t g_free_ranges;
38 static bool g_mem_initialized;
39
uvm_mem_global_init(void)40 NV_STATUS uvm_mem_global_init(void)
41 {
42 NV_STATUS status = uvm_range_allocator_init(UVM_MEM_VA_SIZE, &g_free_ranges);
43 if (status != NV_OK)
44 return status;
45
46 g_mem_initialized = true;
47
48 return NV_OK;
49 }
50
uvm_mem_global_exit(void)51 void uvm_mem_global_exit(void)
52 {
53 if (!g_mem_initialized)
54 return;
55
56 uvm_range_allocator_deinit(&g_free_ranges);
57 }
58
vidmem_can_be_mapped(uvm_mem_t * vidmem,bool is_user_space)59 static bool vidmem_can_be_mapped(uvm_mem_t *vidmem, bool is_user_space)
60 {
61 UVM_ASSERT(uvm_mem_is_vidmem(vidmem));
62
63 // Mapping a vidmem allocation on a user VA space is currently unsupported,
64 // because there is no use case.
65 if (is_user_space)
66 return false;
67
68 return true;
69 }
70
mem_can_be_mapped_on_cpu(uvm_mem_t * mem,bool is_user_space)71 static bool mem_can_be_mapped_on_cpu(uvm_mem_t *mem, bool is_user_space)
72 {
73 if (uvm_mem_is_sysmem(mem))
74 return true;
75
76 if (!vidmem_can_be_mapped(mem, is_user_space))
77 return false;
78
79 return mem->backing_gpu->mem_info.numa.enabled && PAGE_ALIGNED(mem->chunk_size);
80 }
81
mem_can_be_mapped_on_cpu_kernel(uvm_mem_t * mem)82 static bool mem_can_be_mapped_on_cpu_kernel(uvm_mem_t *mem)
83 {
84 return mem_can_be_mapped_on_cpu(mem, false);
85 }
86
mem_can_be_mapped_on_cpu_user(uvm_mem_t * mem)87 static bool mem_can_be_mapped_on_cpu_user(uvm_mem_t *mem)
88 {
89 return mem_can_be_mapped_on_cpu(mem, true);
90 }
91
sysmem_can_be_mapped_on_gpu(uvm_mem_t * sysmem)92 static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
93 {
94 UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
95
96 // In Confidential Computing, only unprotected memory can be mapped on the
97 // GPU
98 if (g_uvm_global.conf_computing_enabled)
99 return uvm_mem_is_sysmem_dma(sysmem);
100
101 return true;
102 }
103
mem_can_be_mapped_on_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,bool is_user_space)104 static bool mem_can_be_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, bool is_user_space)
105 {
106 if (uvm_mem_is_sysmem(mem))
107 return sysmem_can_be_mapped_on_gpu(mem);
108
109 if (!vidmem_can_be_mapped(mem, is_user_space))
110 return false;
111
112 return uvm_mem_is_local_vidmem(mem, gpu);
113 }
114
mem_can_be_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)115 static bool mem_can_be_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
116 {
117 return mem_can_be_mapped_on_gpu(mem, gpu, false);
118 }
119
mem_can_be_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)120 static bool mem_can_be_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
121 {
122 return mem_can_be_mapped_on_gpu(mem, gpu, true);
123 }
124
uvm_mem_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)125 bool uvm_mem_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
126 {
127 if (mem->user == NULL)
128 return false;
129
130 return uvm_processor_mask_test(&mem->user->mapped_on, gpu->id);
131 }
132
uvm_mem_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)133 bool uvm_mem_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
134 {
135 return uvm_processor_mask_test(&mem->kernel.mapped_on, gpu->id);
136 }
137
uvm_mem_mapped_on_cpu_user(uvm_mem_t * mem)138 bool uvm_mem_mapped_on_cpu_user(uvm_mem_t *mem)
139 {
140 if (mem->user == NULL)
141 return false;
142
143 return uvm_processor_mask_test(&mem->user->mapped_on, UVM_ID_CPU);
144 }
145
uvm_mem_mapped_on_cpu_kernel(uvm_mem_t * mem)146 bool uvm_mem_mapped_on_cpu_kernel(uvm_mem_t *mem)
147 {
148 return uvm_processor_mask_test(&mem->kernel.mapped_on, UVM_ID_CPU);
149 }
150
mem_set_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)151 static void mem_set_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
152 {
153 UVM_ASSERT(mem->user != NULL);
154 UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
155 UVM_ASSERT(!uvm_mem_mapped_on_gpu_user(mem, gpu));
156
157 uvm_processor_mask_set(&mem->user->mapped_on, gpu->id);
158 }
159
mem_set_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)160 static void mem_set_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
161 {
162 UVM_ASSERT(mem_can_be_mapped_on_gpu_kernel(mem, gpu));
163 UVM_ASSERT(!uvm_mem_mapped_on_gpu_kernel(mem, gpu));
164
165 uvm_processor_mask_set(&mem->kernel.mapped_on, gpu->id);
166 }
167
mem_set_mapped_on_cpu_user(uvm_mem_t * mem)168 static void mem_set_mapped_on_cpu_user(uvm_mem_t *mem)
169 {
170 UVM_ASSERT(mem->user != NULL);
171 UVM_ASSERT(mem_can_be_mapped_on_cpu_user(mem));
172 UVM_ASSERT(!uvm_mem_mapped_on_cpu_user(mem));
173
174 uvm_processor_mask_set(&mem->user->mapped_on, UVM_ID_CPU);
175 }
176
mem_set_mapped_on_cpu_kernel(uvm_mem_t * mem)177 static void mem_set_mapped_on_cpu_kernel(uvm_mem_t *mem)
178 {
179 UVM_ASSERT(mem_can_be_mapped_on_cpu_kernel(mem));
180 UVM_ASSERT(!uvm_mem_mapped_on_cpu_kernel(mem));
181
182 uvm_processor_mask_set(&mem->kernel.mapped_on, UVM_ID_CPU);
183 }
184
mem_clear_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)185 static void mem_clear_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
186 {
187 uvm_processor_mask_clear(&mem->kernel.mapped_on, gpu->id);
188 }
189
mem_clear_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)190 static void mem_clear_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
191 {
192 UVM_ASSERT(mem->user != NULL);
193
194 uvm_processor_mask_clear(&mem->user->mapped_on, gpu->id);
195 }
196
mem_clear_mapped_on_cpu_user(uvm_mem_t * mem)197 static void mem_clear_mapped_on_cpu_user(uvm_mem_t *mem)
198 {
199 UVM_ASSERT(mem->user != NULL);
200
201 uvm_processor_mask_clear(&mem->user->mapped_on, UVM_ID_CPU);
202 }
203
mem_clear_mapped_on_cpu_kernel(uvm_mem_t * mem)204 static void mem_clear_mapped_on_cpu_kernel(uvm_mem_t *mem)
205 {
206 uvm_processor_mask_clear(&mem->kernel.mapped_on, UVM_ID_CPU);
207 }
208
sysmem_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)209 static bool sysmem_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
210 {
211 UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
212
213 return uvm_processor_mask_test(&sysmem->sysmem.mapped_on_phys, gpu->id);
214 }
215
sysmem_set_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)216 static void sysmem_set_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
217 {
218 UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
219 UVM_ASSERT(!sysmem_mapped_on_gpu_phys(sysmem, gpu));
220
221 uvm_processor_mask_set(&sysmem->sysmem.mapped_on_phys, gpu->id);
222 }
223
sysmem_clear_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)224 static void sysmem_clear_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
225 {
226 UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
227
228 uvm_processor_mask_clear(&sysmem->sysmem.mapped_on_phys, gpu->id);
229 }
230
uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes * attrs,uvm_va_space_t * va_space,uvm_gpu_t ** gpu_out,uvm_mem_gpu_mapping_attrs_t * attrs_out)231 NV_STATUS uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes *attrs,
232 uvm_va_space_t *va_space,
233 uvm_gpu_t **gpu_out,
234 uvm_mem_gpu_mapping_attrs_t *attrs_out)
235 {
236 uvm_gpu_t *gpu;
237
238 switch (attrs->gpuMappingType) {
239 case UvmGpuMappingTypeDefault:
240 break;
241 case UvmGpuMappingTypeReadWriteAtomic:
242 attrs_out->protection = UVM_PROT_READ_WRITE_ATOMIC;
243 break;
244 case UvmGpuMappingTypeReadWrite:
245 attrs_out->protection = UVM_PROT_READ_WRITE;
246 break;
247 case UvmGpuMappingTypeReadOnly:
248 attrs_out->protection = UVM_PROT_READ_ONLY;
249 break;
250 default:
251 return NV_ERR_INVALID_ARGUMENT;
252 }
253
254 switch (attrs->gpuCachingType) {
255 case UvmGpuCachingTypeDefault:
256 break;
257 case UvmGpuCachingTypeForceUncached:
258 attrs_out->is_cacheable = false;
259 break;
260 case UvmGpuCachingTypeForceCached:
261 attrs_out->is_cacheable = true;
262 break;
263 default:
264 return NV_ERR_INVALID_ARGUMENT;
265 }
266
267 gpu = uvm_va_space_get_gpu_by_uuid(va_space, &attrs->gpuUuid);
268 if (!gpu)
269 return NV_ERR_INVALID_DEVICE;
270
271 if (gpu_out)
272 *gpu_out = gpu;
273
274 return NV_OK;
275 }
276
uvm_virt_to_page(const void * addr)277 static struct page *uvm_virt_to_page(const void *addr)
278 {
279 if (virt_addr_valid(addr))
280 return virt_to_page(addr);
281
282 if (is_vmalloc_addr(addr))
283 return vmalloc_to_page(addr);
284
285 return NULL;
286 }
287
uvm_mem_kernel_chunk_sizes(uvm_gpu_t * gpu)288 uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
289 {
290 // Get the mmu mode hal directly as the internal address space tree has not
291 // been created yet.
292 uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
293 NvU32 page_sizes = hal->page_sizes();
294
295 return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
296 }
297
mem_pick_chunk_size(uvm_mem_t * mem)298 static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
299 {
300 NvU32 biggest_page_size;
301 NvU32 chunk_size;
302
303 if (uvm_mem_is_sysmem(mem))
304 return PAGE_SIZE;
305
306 biggest_page_size = uvm_mmu_biggest_page_size_up_to(&mem->backing_gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
307
308 if (mem->size < mem->backing_gpu->big_page.internal_size)
309 chunk_size = UVM_PAGE_SIZE_4K;
310 else if (mem->size < biggest_page_size)
311 chunk_size = mem->backing_gpu->big_page.internal_size;
312 else
313 chunk_size = biggest_page_size;
314
315 // When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
316 // chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
317 if (mem->backing_gpu->mem_info.numa.enabled)
318 chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
319
320 return chunk_size;
321 }
322
mem_pick_gpu_page_size(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_page_tree_t * gpu_page_tree)323 static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
324 {
325 if (uvm_mem_is_vidmem(mem)) {
326 // For vidmem allocations the chunk size is picked out of the supported
327 // page sizes and can be used directly.
328 return mem->chunk_size;
329 }
330
331 // For sysmem, check whether the GPU supports mapping it with large pages.
332 if (gpu->parent->can_map_sysmem_with_large_pages) {
333 // If it's supported, pick the largest page size not bigger than
334 // the chunk size.
335 return uvm_mmu_biggest_page_size_up_to(gpu_page_tree, mem->chunk_size);
336 }
337
338 // Otherwise just use 4K.
339 return UVM_PAGE_SIZE_4K;
340 }
341
mem_free_vidmem_chunks(uvm_mem_t * mem)342 static void mem_free_vidmem_chunks(uvm_mem_t *mem)
343 {
344 size_t i;
345
346 UVM_ASSERT(uvm_mem_is_vidmem(mem));
347
348 if (!mem->vidmem.chunks)
349 return;
350
351 for (i = 0; i < mem->chunks_count; ++i) {
352 // On allocation error PMM guarantees the chunks array to be zeroed so
353 // just check for NULL.
354 if (mem->vidmem.chunks[i] == NULL)
355 break;
356 uvm_pmm_gpu_free(&mem->backing_gpu->pmm, mem->vidmem.chunks[i], NULL);
357 }
358
359 uvm_kvfree(mem->vidmem.chunks);
360 mem->vidmem.chunks = NULL;
361 }
362
mem_free_sysmem_dma_chunks(uvm_mem_t * mem)363 static void mem_free_sysmem_dma_chunks(uvm_mem_t *mem)
364 {
365 size_t i;
366 NvU32 gpu_index;
367
368 UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
369 gpu_index = uvm_id_gpu_index(mem->dma_owner->id);
370
371 if (!mem->sysmem.pages || !mem->sysmem.va)
372 goto end;
373
374 for (i = 0; i < mem->chunks_count; ++i) {
375 if (!mem->sysmem.va[i])
376 break;
377
378 uvm_parent_gpu_dma_free_page(mem->dma_owner->parent,
379 mem->sysmem.va[i],
380 mem->sysmem.dma_addrs[gpu_index][i]);
381 }
382
383 end:
384 sysmem_clear_mapped_on_gpu_phys(mem, mem->dma_owner);
385
386 uvm_kvfree(mem->sysmem.dma_addrs[gpu_index]);
387 mem->sysmem.dma_addrs[gpu_index] = NULL;
388
389 uvm_kvfree(mem->sysmem.pages);
390 mem->sysmem.pages = NULL;
391
392 uvm_kvfree(mem->sysmem.va);
393 mem->sysmem.va = NULL;
394 }
395
mem_free_sysmem_chunks(uvm_mem_t * mem)396 static void mem_free_sysmem_chunks(uvm_mem_t *mem)
397 {
398 size_t i;
399
400 UVM_ASSERT(uvm_mem_is_sysmem(mem));
401
402 if (!mem->sysmem.pages)
403 return;
404
405 for (i = 0; i < mem->chunks_count; ++i) {
406 if (!mem->sysmem.pages[i])
407 break;
408 __free_pages(mem->sysmem.pages[i], get_order(mem->chunk_size));
409 }
410
411 uvm_kvfree(mem->sysmem.pages);
412 mem->sysmem.pages = NULL;
413 }
414
mem_free_chunks(uvm_mem_t * mem)415 static void mem_free_chunks(uvm_mem_t *mem)
416 {
417 if (uvm_mem_is_vidmem(mem))
418 mem_free_vidmem_chunks(mem);
419 else if (uvm_mem_is_sysmem_dma(mem))
420 mem_free_sysmem_dma_chunks(mem);
421 else
422 mem_free_sysmem_chunks(mem);
423 }
424
mem_alloc_dma_addrs(uvm_mem_t * mem,const uvm_gpu_t * gpu)425 static NV_STATUS mem_alloc_dma_addrs(uvm_mem_t *mem, const uvm_gpu_t *gpu)
426 {
427 NvU64 *dma_addrs = NULL;
428 NvU32 gpu_index = uvm_id_gpu_index(gpu->id);
429
430 dma_addrs = uvm_kvmalloc_zero(sizeof(*dma_addrs) * mem->chunks_count);
431 if (!dma_addrs)
432 return NV_ERR_NO_MEMORY;
433
434 mem->sysmem.dma_addrs[gpu_index] = dma_addrs;
435
436 return NV_OK;
437 }
438
sysmem_allocation_gfp_flags(int order,bool zero)439 static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
440 {
441 gfp_t gfp_flags = NV_UVM_GFP_FLAGS;
442
443 if (zero)
444 gfp_flags |= __GFP_ZERO;
445
446 // High-order page allocations require the __GFP_COMP flag to work with
447 // vm_insert_page.
448 if (order > 0)
449 gfp_flags |= __GFP_COMP;
450
451 return gfp_flags;
452 }
453
454 // This allocation is a non-protected memory allocation under Confidential
455 // Computing.
456 //
457 // There is a tighter coupling between allocation and mapping because of the
458 // allocator UVM must use. Hence, this function does the equivalent of
459 // uvm_mem_map_gpu_phys().
460 //
461 // In case of failure, the caller is required to handle cleanup by calling
462 // uvm_mem_free
mem_alloc_sysmem_dma_chunks(uvm_mem_t * mem,gfp_t gfp_flags)463 static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
464 {
465 size_t i;
466 NV_STATUS status;
467 NvU64 *dma_addrs;
468
469 UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
470 "mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
471 mem->chunk_size);
472 UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
473
474 mem->sysmem.pages = uvm_kvmalloc_zero(sizeof(*mem->sysmem.pages) * mem->chunks_count);
475 mem->sysmem.va = uvm_kvmalloc_zero(sizeof(*mem->sysmem.va) * mem->chunks_count);
476 if (!mem->sysmem.pages || !mem->sysmem.va)
477 goto err_no_mem;
478
479 status = mem_alloc_dma_addrs(mem, mem->dma_owner);
480 if (status != NV_OK)
481 goto error;
482
483 dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(mem->dma_owner->id)];
484
485 for (i = 0; i < mem->chunks_count; ++i) {
486 mem->sysmem.va[i] = uvm_parent_gpu_dma_alloc_page(mem->dma_owner->parent, gfp_flags, &dma_addrs[i]);
487 if (!mem->sysmem.va[i])
488 goto err_no_mem;
489
490 mem->sysmem.pages[i] = uvm_virt_to_page(mem->sysmem.va[i]);
491 if (!mem->sysmem.pages[i])
492 goto err_no_mem;
493 }
494
495 sysmem_set_mapped_on_gpu_phys(mem, mem->dma_owner);
496
497 return NV_OK;
498
499 err_no_mem:
500 status = NV_ERR_NO_MEMORY;
501 error:
502 mem_free_sysmem_dma_chunks(mem);
503 return status;
504 }
505
506 // In case of failure, the caller is required to handle cleanup by calling
507 // uvm_mem_free
mem_alloc_sysmem_chunks(uvm_mem_t * mem,gfp_t gfp_flags)508 static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
509 {
510 size_t i;
511 int order;
512
513 UVM_ASSERT(uvm_mem_is_sysmem(mem) && !uvm_mem_is_sysmem_dma(mem));
514
515 mem->sysmem.pages = uvm_kvmalloc_zero(sizeof(*mem->sysmem.pages) * mem->chunks_count);
516 if (!mem->sysmem.pages)
517 return NV_ERR_NO_MEMORY;
518
519 order = get_order(mem->chunk_size);
520 for (i = 0; i < mem->chunks_count; ++i) {
521 mem->sysmem.pages[i] = alloc_pages(gfp_flags, order);
522 if (!mem->sysmem.pages[i])
523 return NV_ERR_NO_MEMORY;
524 }
525
526 return NV_OK;
527 }
528
529 // In case of failure, the caller is required to handle cleanup by calling
530 // uvm_mem_free
mem_alloc_vidmem_chunks(uvm_mem_t * mem,bool zero,bool is_unprotected)531 static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
532 {
533 NV_STATUS status;
534 uvm_pmm_gpu_memory_type_t mem_type;
535
536 UVM_ASSERT(uvm_mem_is_vidmem(mem));
537
538 // TODO: Bug 2446832: A non-zeroing request may not be obeyed because PMM
539 // does not support explicit allocation of non-zeroed (or zeroed) chunks.
540 //
541 // The zeroing case can be implemented even without resolving that bug, by
542 // clearing the chunks after PMM allocation. But this functionality has not
543 // been implemented, because the only expected use case is a memory that
544 // gets mapped on user space, and vidmem never is.
545 UVM_ASSERT(!zero);
546
547 mem->vidmem.chunks = uvm_kvmalloc_zero(mem->chunks_count * sizeof(*mem->vidmem.chunks));
548 if (!mem->vidmem.chunks)
549 return NV_ERR_NO_MEMORY;
550
551 // When CC is disabled the behavior is identical to that of PMM, and the
552 // protection flag is ignored (squashed by PMM internally).
553 if (is_unprotected)
554 mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
555 else
556 mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
557
558 status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
559 mem->chunks_count,
560 mem->chunk_size,
561 mem_type,
562 UVM_PMM_ALLOC_FLAGS_NONE,
563 mem->vidmem.chunks,
564 NULL);
565
566 if (status != NV_OK) {
567 UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
568 mem->chunks_count,
569 mem->chunk_size,
570 nvstatusToString(status));
571 return status;
572 }
573
574 return NV_OK;
575 }
576
mem_alloc_chunks(uvm_mem_t * mem,struct mm_struct * mm,bool zero,bool is_unprotected)577 static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
578 {
579 if (uvm_mem_is_sysmem(mem)) {
580 gfp_t gfp_flags;
581 uvm_memcg_context_t memcg_context;
582 NV_STATUS status;
583
584 UVM_ASSERT(PAGE_ALIGNED(mem->chunk_size));
585 gfp_flags = sysmem_allocation_gfp_flags(get_order(mem->chunk_size), zero);
586 if (UVM_CGROUP_ACCOUNTING_SUPPORTED() && mm)
587 gfp_flags |= NV_UVM_GFP_FLAGS_ACCOUNT;
588
589 uvm_memcg_context_start(&memcg_context, mm);
590 if (uvm_mem_is_sysmem_dma(mem))
591 status = mem_alloc_sysmem_dma_chunks(mem, gfp_flags);
592 else
593 status = mem_alloc_sysmem_chunks(mem, gfp_flags);
594
595 uvm_memcg_context_end(&memcg_context);
596 return status;
597 }
598
599 return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
600 }
601
uvm_mem_map_kernel(uvm_mem_t * mem,const uvm_processor_mask_t * mask)602 NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
603 {
604 uvm_gpu_t *gpu;
605 NV_STATUS status;
606
607 if (!mask)
608 return NV_OK;
609
610 if (uvm_processor_mask_test(mask, UVM_ID_CPU)) {
611 status = uvm_mem_map_cpu_kernel(mem);
612 if (status != NV_OK)
613 return status;
614 }
615
616 for_each_gpu_in_mask(gpu, mask) {
617 status = uvm_mem_map_gpu_kernel(mem, gpu);
618 if (status != NV_OK)
619 return status;
620 }
621 return NV_OK;
622 }
623
uvm_mem_alloc(const uvm_mem_alloc_params_t * params,uvm_mem_t ** mem_out)624 NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_out)
625 {
626 NV_STATUS status;
627 NvU64 physical_size;
628 uvm_mem_t *mem = NULL;
629 bool is_unprotected = false;
630
631 UVM_ASSERT(params->size > 0);
632
633 mem = uvm_kvmalloc_zero(sizeof(*mem));
634 if (mem == NULL)
635 return NV_ERR_NO_MEMORY;
636
637 mem->backing_gpu = params->backing_gpu;
638 mem->dma_owner = params->dma_owner;
639 UVM_ASSERT(!mem->dma_owner || !mem->backing_gpu);
640
641 mem->size = params->size;
642 mem->chunk_size = params->page_size;
643 if (mem->chunk_size == UVM_PAGE_SIZE_DEFAULT)
644 mem->chunk_size = mem_pick_chunk_size(mem);
645
646 UVM_ASSERT(mem->chunk_size > 0);
647
648 physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
649 mem->chunks_count = physical_size / mem->chunk_size;
650
651 if (params->is_unprotected)
652 UVM_ASSERT(uvm_mem_is_vidmem(mem));
653
654 is_unprotected = params->is_unprotected;
655
656 status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
657 if (status != NV_OK)
658 goto error;
659
660 *mem_out = mem;
661 return NV_OK;
662
663 error:
664 uvm_mem_free(mem);
665 return status;
666 }
667
mem_init_user_mapping(uvm_mem_t * mem,uvm_va_space_t * user_va_space,void * user_addr)668 static NV_STATUS mem_init_user_mapping(uvm_mem_t *mem, uvm_va_space_t *user_va_space, void *user_addr)
669 {
670 UVM_ASSERT(user_va_space);
671 UVM_ASSERT(user_addr);
672
673 // If the user structure exists, the VA space and address should match
674 if (mem->user != NULL) {
675 UVM_ASSERT(mem->user->va_space == user_va_space);
676 UVM_ASSERT(mem->user->addr == user_addr);
677 return NV_OK;
678 }
679
680 UVM_ASSERT(IS_ALIGNED((NvU64)user_addr, mem->chunk_size));
681 UVM_ASSERT(uvm_mem_physical_size(mem) == mem->size);
682
683 mem->user = uvm_kvmalloc_zero(sizeof(*mem->user));
684 if (mem->user == NULL)
685 return NV_ERR_NO_MEMORY;
686
687 mem->user->va_space = user_va_space;
688 mem->user->addr = user_addr;
689
690 return NV_OK;
691 }
692
mem_deinit_user_mapping(uvm_mem_t * mem)693 static void mem_deinit_user_mapping(uvm_mem_t *mem)
694 {
695 if (mem->user == NULL)
696 return;
697
698 if (!uvm_processor_mask_empty(&mem->user->mapped_on))
699 return;
700
701 uvm_kvfree(mem->user);
702 mem->user = NULL;
703 }
704
reserved_gpu_va(uvm_mem_t * mem,uvm_gpu_t * gpu)705 static NvU64 reserved_gpu_va(uvm_mem_t *mem, uvm_gpu_t *gpu)
706 {
707 UVM_ASSERT(mem->kernel.range_alloc.aligned_start + uvm_mem_physical_size(mem) < gpu->parent->uvm_mem_va_size);
708
709 return gpu->parent->uvm_mem_va_base + mem->kernel.range_alloc.aligned_start;
710 }
711
mem_cpu_page(uvm_mem_t * mem,NvU64 offset)712 static struct page *mem_cpu_page(uvm_mem_t *mem, NvU64 offset)
713 {
714 struct page *base_page = mem->sysmem.pages[offset / mem->chunk_size];
715
716 UVM_ASSERT_MSG(PAGE_ALIGNED(offset), "offset 0x%llx\n", offset);
717
718 offset = offset % mem->chunk_size;
719 return pfn_to_page(page_to_pfn(base_page) + offset / PAGE_SIZE);
720 }
721
mem_map_cpu_to_sysmem_kernel(uvm_mem_t * mem)722 static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
723 {
724 struct page **pages = mem->sysmem.pages;
725 size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
726 pgprot_t prot = PAGE_KERNEL;
727
728 UVM_ASSERT(uvm_mem_is_sysmem(mem));
729
730 // If chunk size is different than PAGE_SIZE then create a temporary array
731 // of all the pages to map so that vmap() can be used.
732 if (mem->chunk_size != PAGE_SIZE) {
733 size_t page_index;
734 pages = uvm_kvmalloc(sizeof(*pages) * num_pages);
735 if (!pages)
736 return NV_ERR_NO_MEMORY;
737 for (page_index = 0; page_index < num_pages; ++page_index)
738 pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
739 }
740
741 if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
742 prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
743
744 mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
745
746 if (mem->chunk_size != PAGE_SIZE)
747 uvm_kvfree(pages);
748
749 if (!mem->kernel.cpu_addr)
750 return NV_ERR_NO_MEMORY;
751
752 return NV_OK;
753 }
754
mem_map_cpu_to_vidmem_kernel(uvm_mem_t * mem)755 static NV_STATUS mem_map_cpu_to_vidmem_kernel(uvm_mem_t *mem)
756 {
757 struct page **pages;
758 size_t num_chunk_pages = mem->chunk_size / PAGE_SIZE;
759 size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
760 size_t page_index;
761 size_t chunk_index;
762
763 UVM_ASSERT(uvm_mem_is_vidmem(mem));
764
765 pages = uvm_kvmalloc(sizeof(*pages) * num_pages);
766 if (!pages)
767 return NV_ERR_NO_MEMORY;
768
769 page_index = 0;
770
771 for (chunk_index = 0; chunk_index < mem->chunks_count; ++chunk_index) {
772 uvm_gpu_chunk_t *chunk = mem->vidmem.chunks[chunk_index];
773 struct page *page = uvm_gpu_chunk_to_page(&mem->backing_gpu->pmm, chunk);
774 size_t chunk_page_index;
775
776 for (chunk_page_index = 0; chunk_page_index < num_chunk_pages; ++chunk_page_index)
777 pages[page_index++] = page + chunk_page_index;
778 }
779 UVM_ASSERT(page_index == num_pages);
780
781 mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
782
783 uvm_kvfree(pages);
784
785 if (!mem->kernel.cpu_addr)
786 return NV_ERR_NO_MEMORY;
787
788 return NV_OK;
789 }
790
uvm_mem_unmap_cpu_kernel(uvm_mem_t * mem)791 void uvm_mem_unmap_cpu_kernel(uvm_mem_t *mem)
792 {
793 if (!uvm_mem_mapped_on_cpu_kernel(mem))
794 return;
795
796 vunmap(mem->kernel.cpu_addr);
797 mem->kernel.cpu_addr = NULL;
798 mem_clear_mapped_on_cpu_kernel(mem);
799 }
800
mem_map_cpu_to_sysmem_user(uvm_mem_t * mem,struct vm_area_struct * vma)801 static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struct *vma)
802 {
803 NV_STATUS status;
804 NvU64 offset;
805
806 UVM_ASSERT(mem->user != NULL);
807 UVM_ASSERT(uvm_mem_is_sysmem(mem));
808 uvm_assert_mmap_lock_locked(vma->vm_mm);
809
810 // TODO: Bug 1995015: high-order page allocations need to be allocated as
811 // compound pages in order to be able to use vm_insert_page on them. This
812 // is not currently being exercised because the only allocations using this
813 // are semaphore pools (which typically use a single page).
814 for (offset = 0; offset < uvm_mem_physical_size(mem); offset += PAGE_SIZE) {
815 int ret = vm_insert_page(vma, (unsigned long)mem->user->addr + offset, mem_cpu_page(mem, offset));
816 if (ret) {
817 UVM_ASSERT_MSG(ret == -ENOMEM, "ret: %d\n", ret);
818 status = errno_to_nv_status(ret);
819 goto error;
820 }
821 }
822
823 return NV_OK;
824
825 error:
826 unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
827 return status;
828 }
829
uvm_mem_unmap_cpu_user(uvm_mem_t * mem)830 void uvm_mem_unmap_cpu_user(uvm_mem_t *mem)
831 {
832 if (!uvm_mem_mapped_on_cpu_user(mem))
833 return;
834
835 unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
836 mem_clear_mapped_on_cpu_user(mem);
837 mem_deinit_user_mapping(mem);
838 }
839
uvm_mem_map_cpu_user(uvm_mem_t * mem,uvm_va_space_t * user_va_space,struct vm_area_struct * vma)840 NV_STATUS uvm_mem_map_cpu_user(uvm_mem_t *mem, uvm_va_space_t *user_va_space, struct vm_area_struct *vma)
841 {
842 NV_STATUS status;
843 void *user_addr;
844
845 UVM_ASSERT(mem);
846 UVM_ASSERT(mem_can_be_mapped_on_cpu_user(mem));
847
848 if (uvm_mem_mapped_on_cpu_user(mem))
849 return NV_OK;
850
851 UVM_ASSERT((vma->vm_end - vma->vm_start) == mem->size);
852
853 user_addr = (void *) (uintptr_t)vma->vm_start;
854
855 status = mem_init_user_mapping(mem, user_va_space, user_addr);
856 if (status != NV_OK)
857 return status;
858
859 status = mem_map_cpu_to_sysmem_user(mem, vma);
860 if (status != NV_OK)
861 goto cleanup;
862
863 mem_set_mapped_on_cpu_user(mem);
864
865 return NV_OK;
866
867 cleanup:
868 mem_deinit_user_mapping(mem);
869 return status;
870 }
871
uvm_mem_map_cpu_kernel(uvm_mem_t * mem)872 NV_STATUS uvm_mem_map_cpu_kernel(uvm_mem_t *mem)
873 {
874 NV_STATUS status;
875
876 UVM_ASSERT(mem);
877 UVM_ASSERT(mem_can_be_mapped_on_cpu_kernel(mem));
878
879 if (uvm_mem_mapped_on_cpu_kernel(mem))
880 return NV_OK;
881
882 if (uvm_mem_is_sysmem(mem))
883 status = mem_map_cpu_to_sysmem_kernel(mem);
884 else
885 status = mem_map_cpu_to_vidmem_kernel(mem);
886
887 if (status != NV_OK)
888 return status;
889
890 mem_set_mapped_on_cpu_kernel(mem);
891
892 return NV_OK;
893 }
894
sysmem_unmap_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)895 static void sysmem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
896 {
897 NvU64 *dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)];
898 NvU32 i;
899
900 UVM_ASSERT(uvm_mem_is_sysmem(mem));
901 UVM_ASSERT(gpu != mem->dma_owner);
902 UVM_ASSERT(dma_addrs);
903
904 for (i = 0; i < mem->chunks_count; ++i) {
905 if (dma_addrs[i] == 0) {
906 // The DMA address can only be 0 when cleaning up after a failed
907 // partial map_gpu_sysmem_iommu() operation.
908 break;
909 }
910 uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addrs[i], mem->chunk_size);
911 dma_addrs[i] = 0;
912 }
913
914 uvm_kvfree(dma_addrs);
915 mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)] = NULL;
916 }
917
sysmem_map_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)918 static NV_STATUS sysmem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
919 {
920 NV_STATUS status;
921 size_t i;
922
923 UVM_ASSERT(uvm_mem_is_sysmem(mem));
924 UVM_ASSERT(gpu != mem->dma_owner);
925
926 status = mem_alloc_dma_addrs(mem, gpu);
927 if (status != NV_OK)
928 return status;
929
930 for (i = 0; i < mem->chunks_count; ++i) {
931 status = uvm_parent_gpu_map_cpu_pages(gpu->parent,
932 mem->sysmem.pages[i],
933 mem->chunk_size,
934 &mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)][i]);
935 if (status != NV_OK)
936 goto error;
937 }
938
939 return NV_OK;
940
941 error:
942 sysmem_unmap_gpu_phys(mem, gpu);
943 return status;
944 }
945
mem_get_chunk(uvm_mem_t * mem,size_t mem_offset,size_t * offset_in_chunk)946 static uvm_gpu_chunk_t *mem_get_chunk(uvm_mem_t *mem, size_t mem_offset, size_t *offset_in_chunk)
947 {
948 size_t chunk_index = uvm_div_pow2_64(mem_offset, mem->chunk_size);
949
950 if (offset_in_chunk)
951 *offset_in_chunk = mem_offset & (mem->chunk_size - 1);
952
953 UVM_ASSERT(uvm_mem_is_vidmem(mem));
954 return mem->vidmem.chunks[chunk_index];
955 }
956
mem_gpu_physical_vidmem(uvm_mem_t * mem,size_t offset)957 static uvm_gpu_phys_address_t mem_gpu_physical_vidmem(uvm_mem_t *mem, size_t offset)
958 {
959 size_t chunk_offset;
960 uvm_gpu_chunk_t *chunk = mem_get_chunk(mem, offset, &chunk_offset);
961 return uvm_gpu_phys_address(UVM_APERTURE_VID, chunk->address + chunk_offset);
962 }
963
mem_gpu_physical_sysmem(uvm_mem_t * mem,uvm_gpu_t * gpu,size_t offset)964 static uvm_gpu_phys_address_t mem_gpu_physical_sysmem(uvm_mem_t *mem, uvm_gpu_t *gpu, size_t offset)
965 {
966 NvU64 *dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)];
967 NvU64 dma_addr = dma_addrs[offset / mem->chunk_size];
968
969 UVM_ASSERT(uvm_mem_is_sysmem(mem));
970 UVM_ASSERT(sysmem_mapped_on_gpu_phys(mem, gpu));
971
972 return uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr + offset % mem->chunk_size);
973 }
974
uvm_mem_is_physically_contiguous(uvm_mem_t * mem,NvU64 offset,NvU64 size)975 bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size)
976 {
977 UVM_ASSERT(size != 0);
978 UVM_ASSERT((offset + size) <= uvm_mem_physical_size(mem));
979
980 return UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size);
981 }
982
uvm_mem_gpu_physical(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 offset,NvU64 size)983 uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
984 {
985 UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
986
987 if (uvm_mem_is_vidmem(mem)) {
988 UVM_ASSERT(uvm_mem_is_local_vidmem(mem, gpu));
989
990 return mem_gpu_physical_vidmem(mem, offset);
991 }
992
993 return mem_gpu_physical_sysmem(mem, gpu, offset);
994 }
995
uvm_mem_gpu_address_copy(uvm_mem_t * mem,uvm_gpu_t * accessing_gpu,NvU64 offset,NvU64 size)996 uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_gpu, NvU64 offset, NvU64 size)
997 {
998 uvm_gpu_address_t copy_addr;
999 size_t chunk_offset;
1000 uvm_gpu_chunk_t *chunk;
1001
1002 UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
1003
1004 if (uvm_mem_is_sysmem(mem) || uvm_mem_is_local_vidmem(mem, accessing_gpu))
1005 return uvm_gpu_address_copy(accessing_gpu, uvm_mem_gpu_physical(mem, accessing_gpu, offset, size));
1006
1007 // Peer GPUs may need to use some form of translation (identity mappings,
1008 // indirect peers) to copy.
1009 chunk = mem_get_chunk(mem, offset, &chunk_offset);
1010 copy_addr = uvm_pmm_gpu_peer_copy_address(&mem->backing_gpu->pmm, chunk, accessing_gpu);
1011 copy_addr.address += chunk_offset;
1012 return copy_addr;
1013 }
1014
1015 typedef struct uvm_mem_pte_maker_data_struct
1016 {
1017 uvm_mem_t *mem;
1018 const uvm_mem_gpu_mapping_attrs_t *attrs;
1019 } uvm_mem_pte_maker_data_t;
1020
mem_pte_maker(uvm_page_table_range_vec_t * range_vec,NvU64 offset,void * vp_data)1021 static NvU64 mem_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *vp_data)
1022 {
1023 uvm_mem_pte_maker_data_t *data = (uvm_mem_pte_maker_data_t *)vp_data;
1024 uvm_page_tree_t *tree = range_vec->tree;
1025 uvm_gpu_t *gpu = tree->gpu;
1026 uvm_gpu_phys_address_t phys = uvm_mem_gpu_physical(data->mem, gpu, offset, range_vec->page_size);
1027
1028 return tree->hal->make_pte(phys.aperture,
1029 phys.address,
1030 data->attrs->protection,
1031 data->attrs->is_cacheable ? UVM_MMU_PTE_FLAGS_CACHED : UVM_MMU_PTE_FLAGS_NONE);
1032 }
1033
mem_unmap_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_page_table_range_vec_t ** range_vec)1034 static void mem_unmap_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_table_range_vec_t **range_vec)
1035 {
1036 uvm_membar_t tlb_membar = uvm_hal_downgrade_membar_type(gpu, uvm_mem_is_local_vidmem(mem, gpu));
1037 NV_STATUS status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
1038 if (status != NV_OK)
1039 UVM_ERR_PRINT("Clearing PTEs failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
1040
1041 uvm_page_table_range_vec_destroy(*range_vec);
1042 *range_vec = NULL;
1043 }
1044
mem_map_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 gpu_va,uvm_page_tree_t * tree,const uvm_mem_gpu_mapping_attrs_t * attrs,uvm_page_table_range_vec_t ** range_vec)1045 static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
1046 uvm_gpu_t *gpu,
1047 NvU64 gpu_va,
1048 uvm_page_tree_t *tree,
1049 const uvm_mem_gpu_mapping_attrs_t *attrs,
1050 uvm_page_table_range_vec_t **range_vec)
1051 {
1052 NV_STATUS status;
1053 NvU32 page_size;
1054 uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
1055
1056 uvm_mem_pte_maker_data_t pte_maker_data = {
1057 .mem = mem,
1058 .attrs = attrs
1059 };
1060
1061 page_size = mem_pick_gpu_page_size(mem, gpu, tree);
1062 UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
1063
1064 // When the Confidential Computing feature is enabled, DMA allocations are
1065 // majoritarily allocated and managed by a per-GPU DMA buffer pool
1066 // (uvm_conf_computing_dma_buffer_pool_t). Because we would typically
1067 // already hold the DMA_BUFFER_POOL lock at this time, we cannot hold
1068 // the block lock. Allocate PTEs without eviction in this context.
1069 //
1070 // See uvm_pmm_gpu_alloc()
1071 if (uvm_mem_is_sysmem_dma(mem))
1072 pmm_flags = UVM_PMM_ALLOC_FLAGS_NONE;
1073
1074 status = uvm_page_table_range_vec_create(tree,
1075 gpu_va,
1076 uvm_mem_physical_size(mem),
1077 page_size,
1078 pmm_flags,
1079 range_vec);
1080 if (status != NV_OK) {
1081 UVM_ERR_PRINT("Failed to init page mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
1082 gpu_va,
1083 gpu_va + uvm_mem_physical_size(mem),
1084 nvstatusToString(status),
1085 uvm_gpu_name(gpu));
1086 return status;
1087 }
1088
1089 status = uvm_page_table_range_vec_write_ptes(*range_vec, UVM_MEMBAR_NONE, mem_pte_maker, &pte_maker_data);
1090 if (status != NV_OK) {
1091 UVM_ERR_PRINT("Failed to write PTEs for mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
1092 gpu_va,
1093 gpu_va + uvm_mem_physical_size(mem),
1094 nvstatusToString(status),
1095 uvm_gpu_name(gpu));
1096 goto error;
1097 }
1098
1099 return NV_OK;
1100
1101 error:
1102 mem_unmap_gpu(mem, gpu, range_vec);
1103 return status;
1104 }
1105
mem_init_gpu_kernel_range(uvm_mem_t * mem)1106 static NV_STATUS mem_init_gpu_kernel_range(uvm_mem_t *mem)
1107 {
1108 if (mem->kernel.range_alloc.node != NULL)
1109 return NV_OK;
1110
1111 return uvm_range_allocator_alloc(&g_free_ranges,
1112 uvm_mem_physical_size(mem),
1113 mem->chunk_size,
1114 &mem->kernel.range_alloc);
1115 }
1116
mem_deinit_gpu_kernel_range(uvm_mem_t * mem)1117 static void mem_deinit_gpu_kernel_range(uvm_mem_t *mem)
1118 {
1119 if (mem->kernel.range_alloc.node == NULL)
1120 return;
1121
1122 // Do not remove the range allocation if there is any GPU where the memory
1123 // is still mapped on kernel space.
1124 if (UVM_ID_IS_VALID(uvm_processor_mask_find_first_gpu_id(&mem->kernel.mapped_on)))
1125 return;
1126
1127 uvm_range_allocator_free(&g_free_ranges, &mem->kernel.range_alloc);
1128 }
1129
uvm_mem_map_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1130 NV_STATUS uvm_mem_map_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1131 {
1132 NV_STATUS status;
1133 NvU64 gpu_va;
1134 uvm_page_table_range_vec_t **range_vec;
1135 uvm_mem_gpu_mapping_attrs_t attrs = {
1136 .protection = UVM_PROT_READ_WRITE_ATOMIC,
1137 .is_cacheable = uvm_mem_is_vidmem(mem)
1138 };
1139
1140 UVM_ASSERT(mem_can_be_mapped_on_gpu_kernel(mem, gpu));
1141
1142 if (uvm_mem_mapped_on_gpu_kernel(mem, gpu))
1143 return NV_OK;
1144
1145 status = uvm_mem_map_gpu_phys(mem, gpu);
1146 if (status != NV_OK)
1147 return status;
1148
1149 status = mem_init_gpu_kernel_range(mem);
1150 if (status != NV_OK)
1151 return status;
1152
1153 gpu_va = reserved_gpu_va(mem, gpu);
1154 range_vec = &mem->kernel.range_vecs[uvm_id_gpu_index(gpu->id)];
1155
1156 status = mem_map_gpu(mem, gpu, gpu_va, &gpu->address_space_tree, &attrs, range_vec);
1157 if (status != NV_OK)
1158 goto cleanup;
1159
1160 mem_set_mapped_on_gpu_kernel(mem, gpu);
1161
1162 return NV_OK;
1163
1164 cleanup:
1165 mem_deinit_gpu_kernel_range(mem);
1166
1167 return status;
1168 }
1169
uvm_mem_map_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_va_space_t * user_va_space,void * user_addr,const uvm_mem_gpu_mapping_attrs_t * attrs)1170 NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
1171 uvm_gpu_t *gpu,
1172 uvm_va_space_t *user_va_space,
1173 void *user_addr,
1174 const uvm_mem_gpu_mapping_attrs_t *attrs)
1175 {
1176 NV_STATUS status;
1177 uvm_gpu_va_space_t *gpu_va_space;
1178 uvm_page_table_range_vec_t **range_vec;
1179 NvU64 gpu_va;
1180
1181 UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
1182 uvm_assert_rwsem_locked(&user_va_space->lock);
1183
1184 if (uvm_mem_mapped_on_gpu_user(mem, gpu))
1185 return NV_OK;
1186
1187 gpu_va = (NvU64)user_addr;
1188 if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
1189 return NV_ERR_OUT_OF_RANGE;
1190
1191 status = uvm_mem_map_gpu_phys(mem, gpu);
1192 if (status != NV_OK)
1193 return status;
1194
1195 status = mem_init_user_mapping(mem, user_va_space, user_addr);
1196 if (status != NV_OK)
1197 return status;
1198
1199 gpu_va_space = uvm_gpu_va_space_get(mem->user->va_space, gpu);
1200 range_vec = &mem->user->range_vecs[uvm_id_gpu_index(gpu->id)];
1201
1202 status = mem_map_gpu(mem, gpu, gpu_va, &gpu_va_space->page_tables, attrs, range_vec);
1203 if (status != NV_OK)
1204 goto cleanup;
1205
1206 mem_set_mapped_on_gpu_user(mem, gpu);
1207
1208 return NV_OK;
1209
1210 cleanup:
1211 mem_deinit_user_mapping(mem);
1212 return status;
1213 }
1214
uvm_mem_unmap_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)1215 void uvm_mem_unmap_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
1216 {
1217 if (!uvm_mem_mapped_on_gpu_user(mem, gpu))
1218 return;
1219
1220 mem_unmap_gpu(mem, gpu, &mem->user->range_vecs[uvm_id_gpu_index(gpu->id)]);
1221 mem_clear_mapped_on_gpu_user(mem, gpu);
1222 mem_deinit_user_mapping(mem);
1223 }
1224
uvm_mem_unmap_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1225 void uvm_mem_unmap_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1226 {
1227 if (!uvm_mem_mapped_on_gpu_kernel(mem, gpu))
1228 return;
1229
1230 mem_unmap_gpu(mem, gpu, &mem->kernel.range_vecs[uvm_id_gpu_index(gpu->id)]);
1231 mem_clear_mapped_on_gpu_kernel(mem, gpu);
1232 mem_deinit_gpu_kernel_range(mem);
1233 }
1234
mem_can_be_phys_mapped_on_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu)1235 static bool mem_can_be_phys_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu)
1236 {
1237 if (uvm_mem_is_sysmem(mem))
1238 return sysmem_can_be_mapped_on_gpu(mem);
1239 else
1240 return uvm_mem_is_local_vidmem(mem, gpu);
1241 }
1242
uvm_mem_map_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)1243 NV_STATUS uvm_mem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
1244 {
1245 NV_STATUS status;
1246
1247 UVM_ASSERT(mem_can_be_phys_mapped_on_gpu(mem, gpu));
1248
1249 if (uvm_mem_is_vidmem(mem))
1250 return NV_OK;
1251
1252 if (gpu == mem->dma_owner)
1253 return NV_OK;
1254
1255 if (sysmem_mapped_on_gpu_phys(mem, gpu))
1256 return NV_OK;
1257
1258 status = sysmem_map_gpu_phys(mem, gpu);
1259 if (status != NV_OK)
1260 return status;
1261
1262 sysmem_set_mapped_on_gpu_phys(mem, gpu);
1263 return NV_OK;
1264 }
1265
uvm_mem_unmap_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)1266 void uvm_mem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
1267 {
1268 UVM_ASSERT(mem);
1269 UVM_ASSERT(gpu);
1270
1271 if (uvm_mem_is_vidmem(mem))
1272 return;
1273
1274 // GPU for which the mapping is managed by the dma_alloc_coherent
1275 // API will be unmapped when the allocation is freed.
1276 if (gpu == mem->dma_owner)
1277 return;
1278
1279 if (!sysmem_mapped_on_gpu_phys(mem, gpu))
1280 return;
1281
1282 uvm_mem_unmap_gpu_user(mem, gpu);
1283 uvm_mem_unmap_gpu_kernel(mem, gpu);
1284
1285 sysmem_unmap_gpu_phys(mem, gpu);
1286 sysmem_clear_mapped_on_gpu_phys(mem, gpu);
1287 }
1288
uvm_mem_free(uvm_mem_t * mem)1289 void uvm_mem_free(uvm_mem_t *mem)
1290 {
1291 uvm_gpu_t *gpu;
1292
1293 if (mem == NULL)
1294 return;
1295
1296 uvm_mem_unmap_cpu_user(mem);
1297 uvm_mem_unmap_cpu_kernel(mem);
1298
1299 if (mem->user != NULL) {
1300 for_each_gpu_in_mask(gpu, &mem->user->mapped_on) {
1301 uvm_mem_unmap_gpu_user(mem, gpu);
1302
1303 // If we unmapped the last device, the user mapping is freed, so
1304 // exit the loop before the iterator accesses a non-existing mask.
1305 if (mem->user == NULL)
1306 break;
1307 }
1308 }
1309
1310 for_each_gpu_in_mask(gpu, &mem->kernel.mapped_on)
1311 uvm_mem_unmap_gpu_kernel(mem, gpu);
1312
1313 if (uvm_mem_is_sysmem(mem)) {
1314 for_each_gpu_in_mask(gpu, &mem->sysmem.mapped_on_phys)
1315 uvm_mem_unmap_gpu_phys(mem, gpu);
1316 }
1317
1318 mem_free_chunks(mem);
1319
1320 uvm_kvfree(mem);
1321 }
1322
uvm_mem_get_cpu_addr_kernel(uvm_mem_t * mem)1323 void *uvm_mem_get_cpu_addr_kernel(uvm_mem_t *mem)
1324 {
1325 UVM_ASSERT(uvm_mem_mapped_on_cpu_kernel(mem));
1326
1327 return mem->kernel.cpu_addr;
1328 }
1329
uvm_mem_get_gpu_va_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1330 NvU64 uvm_mem_get_gpu_va_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1331 {
1332 UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(mem, gpu));
1333
1334 return reserved_gpu_va(mem, gpu);
1335 }
1336
uvm_mem_gpu_address_virtual_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1337 uvm_gpu_address_t uvm_mem_gpu_address_virtual_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1338 {
1339 uvm_gpu_address_t addr = uvm_gpu_address_virtual(uvm_mem_get_gpu_va_kernel(mem, gpu));
1340
1341 if (g_uvm_global.conf_computing_enabled && mem->dma_owner)
1342 addr.is_unprotected = true;
1343
1344 return addr;
1345 }
1346
uvm_mem_gpu_address_physical(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 offset,NvU64 size)1347 uvm_gpu_address_t uvm_mem_gpu_address_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
1348 {
1349 uvm_gpu_address_t addr = uvm_gpu_address_from_phys(uvm_mem_gpu_physical(mem, gpu, offset, size));
1350
1351 if (g_uvm_global.conf_computing_enabled && mem->dma_owner)
1352 addr.is_unprotected = true;
1353
1354 return addr;
1355 }
1356