1 /*******************************************************************************
2     Copyright (c) 2016-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_mem.h"
25 #include "uvm_hal_types.h"
26 #include "uvm_mmu.h"
27 #include "uvm_processors.h"
28 #include "uvm_va_space.h"
29 #include "uvm_gpu.h"
30 #include "uvm_global.h"
31 #include "uvm_kvmalloc.h"
32 #include "uvm_push.h"
33 #include "uvm_range_allocator.h"
34 #include "uvm_hal.h"
35 #include "uvm_linux.h"
36 
37 static uvm_range_allocator_t g_free_ranges;
38 static bool g_mem_initialized;
39 
uvm_mem_global_init(void)40 NV_STATUS uvm_mem_global_init(void)
41 {
42     NV_STATUS status = uvm_range_allocator_init(UVM_MEM_VA_SIZE, &g_free_ranges);
43     if (status != NV_OK)
44         return status;
45 
46     g_mem_initialized = true;
47 
48     return NV_OK;
49 }
50 
uvm_mem_global_exit(void)51 void uvm_mem_global_exit(void)
52 {
53     if (!g_mem_initialized)
54         return;
55 
56     uvm_range_allocator_deinit(&g_free_ranges);
57 }
58 
vidmem_can_be_mapped(uvm_mem_t * vidmem,bool is_user_space)59 static bool vidmem_can_be_mapped(uvm_mem_t *vidmem, bool is_user_space)
60 {
61     UVM_ASSERT(uvm_mem_is_vidmem(vidmem));
62 
63     // Mapping a vidmem allocation on a user VA space is currently unsupported,
64     // because there is no use case.
65     if (is_user_space)
66         return false;
67 
68     return true;
69 }
70 
mem_can_be_mapped_on_cpu(uvm_mem_t * mem,bool is_user_space)71 static bool mem_can_be_mapped_on_cpu(uvm_mem_t *mem, bool is_user_space)
72 {
73     if (uvm_mem_is_sysmem(mem))
74         return true;
75 
76     if (!vidmem_can_be_mapped(mem, is_user_space))
77         return false;
78 
79     return mem->backing_gpu->mem_info.numa.enabled && PAGE_ALIGNED(mem->chunk_size);
80 }
81 
mem_can_be_mapped_on_cpu_kernel(uvm_mem_t * mem)82 static bool mem_can_be_mapped_on_cpu_kernel(uvm_mem_t *mem)
83 {
84     return mem_can_be_mapped_on_cpu(mem, false);
85 }
86 
mem_can_be_mapped_on_cpu_user(uvm_mem_t * mem)87 static bool mem_can_be_mapped_on_cpu_user(uvm_mem_t *mem)
88 {
89     return mem_can_be_mapped_on_cpu(mem, true);
90 }
91 
sysmem_can_be_mapped_on_gpu(uvm_mem_t * sysmem)92 static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
93 {
94     UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
95 
96     // In Confidential Computing, only unprotected memory can be mapped on the
97     // GPU
98     if (g_uvm_global.conf_computing_enabled)
99         return uvm_mem_is_sysmem_dma(sysmem);
100 
101     return true;
102 }
103 
mem_can_be_mapped_on_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,bool is_user_space)104 static bool mem_can_be_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, bool is_user_space)
105 {
106     if (uvm_mem_is_sysmem(mem))
107         return sysmem_can_be_mapped_on_gpu(mem);
108 
109     if (!vidmem_can_be_mapped(mem, is_user_space))
110         return false;
111 
112     return uvm_mem_is_local_vidmem(mem, gpu);
113 }
114 
mem_can_be_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)115 static bool mem_can_be_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
116 {
117     return mem_can_be_mapped_on_gpu(mem, gpu, false);
118 }
119 
mem_can_be_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)120 static bool mem_can_be_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
121 {
122     return mem_can_be_mapped_on_gpu(mem, gpu, true);
123 }
124 
uvm_mem_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)125 bool uvm_mem_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
126 {
127     if (mem->user == NULL)
128         return false;
129 
130     return uvm_processor_mask_test(&mem->user->mapped_on, gpu->id);
131 }
132 
uvm_mem_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)133 bool uvm_mem_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
134 {
135     return uvm_processor_mask_test(&mem->kernel.mapped_on, gpu->id);
136 }
137 
uvm_mem_mapped_on_cpu_user(uvm_mem_t * mem)138 bool uvm_mem_mapped_on_cpu_user(uvm_mem_t *mem)
139 {
140     if (mem->user == NULL)
141         return false;
142 
143     return uvm_processor_mask_test(&mem->user->mapped_on, UVM_ID_CPU);
144 }
145 
uvm_mem_mapped_on_cpu_kernel(uvm_mem_t * mem)146 bool uvm_mem_mapped_on_cpu_kernel(uvm_mem_t *mem)
147 {
148     return uvm_processor_mask_test(&mem->kernel.mapped_on, UVM_ID_CPU);
149 }
150 
mem_set_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)151 static void mem_set_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
152 {
153     UVM_ASSERT(mem->user != NULL);
154     UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
155     UVM_ASSERT(!uvm_mem_mapped_on_gpu_user(mem, gpu));
156 
157     uvm_processor_mask_set(&mem->user->mapped_on, gpu->id);
158 }
159 
mem_set_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)160 static void mem_set_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
161 {
162     UVM_ASSERT(mem_can_be_mapped_on_gpu_kernel(mem, gpu));
163     UVM_ASSERT(!uvm_mem_mapped_on_gpu_kernel(mem, gpu));
164 
165     uvm_processor_mask_set(&mem->kernel.mapped_on, gpu->id);
166 }
167 
mem_set_mapped_on_cpu_user(uvm_mem_t * mem)168 static void mem_set_mapped_on_cpu_user(uvm_mem_t *mem)
169 {
170     UVM_ASSERT(mem->user != NULL);
171     UVM_ASSERT(mem_can_be_mapped_on_cpu_user(mem));
172     UVM_ASSERT(!uvm_mem_mapped_on_cpu_user(mem));
173 
174     uvm_processor_mask_set(&mem->user->mapped_on, UVM_ID_CPU);
175 }
176 
mem_set_mapped_on_cpu_kernel(uvm_mem_t * mem)177 static void mem_set_mapped_on_cpu_kernel(uvm_mem_t *mem)
178 {
179     UVM_ASSERT(mem_can_be_mapped_on_cpu_kernel(mem));
180     UVM_ASSERT(!uvm_mem_mapped_on_cpu_kernel(mem));
181 
182     uvm_processor_mask_set(&mem->kernel.mapped_on, UVM_ID_CPU);
183 }
184 
mem_clear_mapped_on_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)185 static void mem_clear_mapped_on_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
186 {
187     uvm_processor_mask_clear(&mem->kernel.mapped_on, gpu->id);
188 }
189 
mem_clear_mapped_on_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)190 static void mem_clear_mapped_on_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
191 {
192     UVM_ASSERT(mem->user != NULL);
193 
194     uvm_processor_mask_clear(&mem->user->mapped_on, gpu->id);
195 }
196 
mem_clear_mapped_on_cpu_user(uvm_mem_t * mem)197 static void mem_clear_mapped_on_cpu_user(uvm_mem_t *mem)
198 {
199     UVM_ASSERT(mem->user != NULL);
200 
201     uvm_processor_mask_clear(&mem->user->mapped_on, UVM_ID_CPU);
202 }
203 
mem_clear_mapped_on_cpu_kernel(uvm_mem_t * mem)204 static void mem_clear_mapped_on_cpu_kernel(uvm_mem_t *mem)
205 {
206     uvm_processor_mask_clear(&mem->kernel.mapped_on, UVM_ID_CPU);
207 }
208 
sysmem_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)209 static bool sysmem_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
210 {
211     UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
212 
213     return uvm_processor_mask_test(&sysmem->sysmem.mapped_on_phys, gpu->id);
214 }
215 
sysmem_set_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)216 static void sysmem_set_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
217 {
218     UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
219     UVM_ASSERT(!sysmem_mapped_on_gpu_phys(sysmem, gpu));
220 
221     uvm_processor_mask_set(&sysmem->sysmem.mapped_on_phys, gpu->id);
222 }
223 
sysmem_clear_mapped_on_gpu_phys(uvm_mem_t * sysmem,uvm_gpu_t * gpu)224 static void sysmem_clear_mapped_on_gpu_phys(uvm_mem_t *sysmem, uvm_gpu_t *gpu)
225 {
226     UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
227 
228     uvm_processor_mask_clear(&sysmem->sysmem.mapped_on_phys, gpu->id);
229 }
230 
uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes * attrs,uvm_va_space_t * va_space,uvm_gpu_t ** gpu_out,uvm_mem_gpu_mapping_attrs_t * attrs_out)231 NV_STATUS uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes *attrs,
232                                            uvm_va_space_t *va_space,
233                                            uvm_gpu_t **gpu_out,
234                                            uvm_mem_gpu_mapping_attrs_t *attrs_out)
235 {
236     uvm_gpu_t *gpu;
237 
238     switch (attrs->gpuMappingType) {
239         case UvmGpuMappingTypeDefault:
240             break;
241         case UvmGpuMappingTypeReadWriteAtomic:
242             attrs_out->protection = UVM_PROT_READ_WRITE_ATOMIC;
243             break;
244         case UvmGpuMappingTypeReadWrite:
245             attrs_out->protection = UVM_PROT_READ_WRITE;
246             break;
247         case UvmGpuMappingTypeReadOnly:
248             attrs_out->protection = UVM_PROT_READ_ONLY;
249             break;
250         default:
251             return NV_ERR_INVALID_ARGUMENT;
252     }
253 
254     switch (attrs->gpuCachingType) {
255         case UvmGpuCachingTypeDefault:
256             break;
257         case UvmGpuCachingTypeForceUncached:
258             attrs_out->is_cacheable = false;
259             break;
260         case UvmGpuCachingTypeForceCached:
261             attrs_out->is_cacheable = true;
262             break;
263         default:
264             return NV_ERR_INVALID_ARGUMENT;
265     }
266 
267     gpu = uvm_va_space_get_gpu_by_uuid(va_space, &attrs->gpuUuid);
268     if (!gpu)
269         return NV_ERR_INVALID_DEVICE;
270 
271     if (gpu_out)
272         *gpu_out = gpu;
273 
274     return NV_OK;
275 }
276 
uvm_virt_to_page(const void * addr)277 static struct page *uvm_virt_to_page(const void *addr)
278 {
279     if (virt_addr_valid(addr))
280         return virt_to_page(addr);
281 
282     if (is_vmalloc_addr(addr))
283         return vmalloc_to_page(addr);
284 
285     return NULL;
286 }
287 
uvm_mem_kernel_chunk_sizes(uvm_gpu_t * gpu)288 uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
289 {
290     // Get the mmu mode hal directly as the internal address space tree has not
291     // been created yet.
292     uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
293     NvU32 page_sizes = hal->page_sizes();
294 
295     return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
296 }
297 
mem_pick_chunk_size(uvm_mem_t * mem)298 static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
299 {
300     NvU32 biggest_page_size;
301     NvU32 chunk_size;
302 
303     if (uvm_mem_is_sysmem(mem))
304         return PAGE_SIZE;
305 
306     biggest_page_size = uvm_mmu_biggest_page_size_up_to(&mem->backing_gpu->address_space_tree, UVM_CHUNK_SIZE_MAX);
307 
308     if (mem->size < mem->backing_gpu->big_page.internal_size)
309         chunk_size = UVM_PAGE_SIZE_4K;
310     else if (mem->size < biggest_page_size)
311         chunk_size = mem->backing_gpu->big_page.internal_size;
312     else
313         chunk_size = biggest_page_size;
314 
315     // When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
316     // chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
317     if (mem->backing_gpu->mem_info.numa.enabled)
318         chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
319 
320     return chunk_size;
321 }
322 
mem_pick_gpu_page_size(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_page_tree_t * gpu_page_tree)323 static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
324 {
325     if (uvm_mem_is_vidmem(mem)) {
326         // For vidmem allocations the chunk size is picked out of the supported
327         // page sizes and can be used directly.
328         return mem->chunk_size;
329     }
330 
331     // For sysmem, check whether the GPU supports mapping it with large pages.
332     if (gpu->parent->can_map_sysmem_with_large_pages) {
333         // If it's supported, pick the largest page size not bigger than
334         // the chunk size.
335         return uvm_mmu_biggest_page_size_up_to(gpu_page_tree, mem->chunk_size);
336     }
337 
338     // Otherwise just use 4K.
339     return UVM_PAGE_SIZE_4K;
340 }
341 
mem_free_vidmem_chunks(uvm_mem_t * mem)342 static void mem_free_vidmem_chunks(uvm_mem_t *mem)
343 {
344     size_t i;
345 
346     UVM_ASSERT(uvm_mem_is_vidmem(mem));
347 
348     if (!mem->vidmem.chunks)
349         return;
350 
351     for (i = 0; i < mem->chunks_count; ++i) {
352         // On allocation error PMM guarantees the chunks array to be zeroed so
353         // just check for NULL.
354         if (mem->vidmem.chunks[i] == NULL)
355             break;
356         uvm_pmm_gpu_free(&mem->backing_gpu->pmm, mem->vidmem.chunks[i], NULL);
357     }
358 
359     uvm_kvfree(mem->vidmem.chunks);
360     mem->vidmem.chunks = NULL;
361 }
362 
mem_free_sysmem_dma_chunks(uvm_mem_t * mem)363 static void mem_free_sysmem_dma_chunks(uvm_mem_t *mem)
364 {
365     size_t i;
366     NvU32 gpu_index;
367 
368     UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
369     gpu_index = uvm_id_gpu_index(mem->dma_owner->id);
370 
371     if (!mem->sysmem.pages || !mem->sysmem.va)
372         goto end;
373 
374     for (i = 0; i < mem->chunks_count; ++i) {
375         if (!mem->sysmem.va[i])
376             break;
377 
378         uvm_parent_gpu_dma_free_page(mem->dma_owner->parent,
379                                      mem->sysmem.va[i],
380                                      mem->sysmem.dma_addrs[gpu_index][i]);
381     }
382 
383 end:
384     sysmem_clear_mapped_on_gpu_phys(mem, mem->dma_owner);
385 
386     uvm_kvfree(mem->sysmem.dma_addrs[gpu_index]);
387     mem->sysmem.dma_addrs[gpu_index] = NULL;
388 
389     uvm_kvfree(mem->sysmem.pages);
390     mem->sysmem.pages = NULL;
391 
392     uvm_kvfree(mem->sysmem.va);
393     mem->sysmem.va = NULL;
394 }
395 
mem_free_sysmem_chunks(uvm_mem_t * mem)396 static void mem_free_sysmem_chunks(uvm_mem_t *mem)
397 {
398     size_t i;
399 
400     UVM_ASSERT(uvm_mem_is_sysmem(mem));
401 
402     if (!mem->sysmem.pages)
403         return;
404 
405     for (i = 0; i < mem->chunks_count; ++i) {
406         if (!mem->sysmem.pages[i])
407             break;
408         __free_pages(mem->sysmem.pages[i], get_order(mem->chunk_size));
409     }
410 
411     uvm_kvfree(mem->sysmem.pages);
412     mem->sysmem.pages = NULL;
413 }
414 
mem_free_chunks(uvm_mem_t * mem)415 static void mem_free_chunks(uvm_mem_t *mem)
416 {
417     if (uvm_mem_is_vidmem(mem))
418         mem_free_vidmem_chunks(mem);
419     else if (uvm_mem_is_sysmem_dma(mem))
420         mem_free_sysmem_dma_chunks(mem);
421     else
422         mem_free_sysmem_chunks(mem);
423 }
424 
mem_alloc_dma_addrs(uvm_mem_t * mem,const uvm_gpu_t * gpu)425 static NV_STATUS mem_alloc_dma_addrs(uvm_mem_t *mem, const uvm_gpu_t *gpu)
426 {
427     NvU64 *dma_addrs = NULL;
428     NvU32 gpu_index = uvm_id_gpu_index(gpu->id);
429 
430     dma_addrs = uvm_kvmalloc_zero(sizeof(*dma_addrs) * mem->chunks_count);
431     if (!dma_addrs)
432         return NV_ERR_NO_MEMORY;
433 
434     mem->sysmem.dma_addrs[gpu_index] = dma_addrs;
435 
436     return NV_OK;
437 }
438 
sysmem_allocation_gfp_flags(int order,bool zero)439 static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
440 {
441     gfp_t gfp_flags = NV_UVM_GFP_FLAGS;
442 
443     if (zero)
444         gfp_flags |= __GFP_ZERO;
445 
446     // High-order page allocations require the __GFP_COMP flag to work with
447     // vm_insert_page.
448     if (order > 0)
449         gfp_flags |= __GFP_COMP;
450 
451     return gfp_flags;
452 }
453 
454 // This allocation is a non-protected memory allocation under Confidential
455 // Computing.
456 //
457 // There is a tighter coupling between allocation and mapping because of the
458 // allocator UVM must use. Hence, this function does the equivalent of
459 // uvm_mem_map_gpu_phys().
460 //
461 // In case of failure, the caller is required to handle cleanup by calling
462 // uvm_mem_free
mem_alloc_sysmem_dma_chunks(uvm_mem_t * mem,gfp_t gfp_flags)463 static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
464 {
465     size_t i;
466     NV_STATUS status;
467     NvU64 *dma_addrs;
468 
469     UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
470                    "mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
471                    mem->chunk_size);
472     UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
473 
474     mem->sysmem.pages = uvm_kvmalloc_zero(sizeof(*mem->sysmem.pages) * mem->chunks_count);
475     mem->sysmem.va = uvm_kvmalloc_zero(sizeof(*mem->sysmem.va) * mem->chunks_count);
476     if (!mem->sysmem.pages || !mem->sysmem.va)
477         goto err_no_mem;
478 
479     status = mem_alloc_dma_addrs(mem, mem->dma_owner);
480     if (status != NV_OK)
481         goto error;
482 
483     dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(mem->dma_owner->id)];
484 
485     for (i = 0; i < mem->chunks_count; ++i) {
486         mem->sysmem.va[i] = uvm_parent_gpu_dma_alloc_page(mem->dma_owner->parent, gfp_flags, &dma_addrs[i]);
487         if (!mem->sysmem.va[i])
488             goto err_no_mem;
489 
490         mem->sysmem.pages[i] = uvm_virt_to_page(mem->sysmem.va[i]);
491         if (!mem->sysmem.pages[i])
492             goto err_no_mem;
493     }
494 
495     sysmem_set_mapped_on_gpu_phys(mem, mem->dma_owner);
496 
497     return NV_OK;
498 
499 err_no_mem:
500     status = NV_ERR_NO_MEMORY;
501 error:
502     mem_free_sysmem_dma_chunks(mem);
503     return status;
504 }
505 
506 // In case of failure, the caller is required to handle cleanup by calling
507 // uvm_mem_free
mem_alloc_sysmem_chunks(uvm_mem_t * mem,gfp_t gfp_flags)508 static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
509 {
510     size_t i;
511     int order;
512 
513     UVM_ASSERT(uvm_mem_is_sysmem(mem) && !uvm_mem_is_sysmem_dma(mem));
514 
515     mem->sysmem.pages = uvm_kvmalloc_zero(sizeof(*mem->sysmem.pages) * mem->chunks_count);
516     if (!mem->sysmem.pages)
517         return NV_ERR_NO_MEMORY;
518 
519     order = get_order(mem->chunk_size);
520     for (i = 0; i < mem->chunks_count; ++i) {
521         mem->sysmem.pages[i] = alloc_pages(gfp_flags, order);
522         if (!mem->sysmem.pages[i])
523             return NV_ERR_NO_MEMORY;
524     }
525 
526     return NV_OK;
527 }
528 
529 // In case of failure, the caller is required to handle cleanup by calling
530 // uvm_mem_free
mem_alloc_vidmem_chunks(uvm_mem_t * mem,bool zero,bool is_unprotected)531 static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
532 {
533     NV_STATUS status;
534     uvm_pmm_gpu_memory_type_t mem_type;
535 
536     UVM_ASSERT(uvm_mem_is_vidmem(mem));
537 
538     // TODO: Bug 2446832: A non-zeroing request may not be obeyed because PMM
539     // does not support explicit allocation of non-zeroed (or zeroed) chunks.
540     //
541     // The zeroing case can be implemented even without resolving that bug, by
542     // clearing the chunks after PMM allocation. But this functionality has not
543     // been implemented, because the only expected use case is a memory that
544     // gets mapped on user space, and vidmem never is.
545     UVM_ASSERT(!zero);
546 
547     mem->vidmem.chunks = uvm_kvmalloc_zero(mem->chunks_count * sizeof(*mem->vidmem.chunks));
548     if (!mem->vidmem.chunks)
549         return NV_ERR_NO_MEMORY;
550 
551     // When CC is disabled the behavior is identical to that of PMM, and the
552     // protection flag is ignored (squashed by PMM internally).
553     if (is_unprotected)
554         mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
555     else
556         mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
557 
558     status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
559                                mem->chunks_count,
560                                mem->chunk_size,
561                                mem_type,
562                                UVM_PMM_ALLOC_FLAGS_NONE,
563                                mem->vidmem.chunks,
564                                NULL);
565 
566     if (status != NV_OK) {
567         UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
568                       mem->chunks_count,
569                       mem->chunk_size,
570                       nvstatusToString(status));
571         return status;
572     }
573 
574     return NV_OK;
575 }
576 
mem_alloc_chunks(uvm_mem_t * mem,struct mm_struct * mm,bool zero,bool is_unprotected)577 static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
578 {
579     if (uvm_mem_is_sysmem(mem)) {
580         gfp_t gfp_flags;
581         uvm_memcg_context_t memcg_context;
582         NV_STATUS status;
583 
584         UVM_ASSERT(PAGE_ALIGNED(mem->chunk_size));
585         gfp_flags = sysmem_allocation_gfp_flags(get_order(mem->chunk_size), zero);
586         if (UVM_CGROUP_ACCOUNTING_SUPPORTED() && mm)
587             gfp_flags |= NV_UVM_GFP_FLAGS_ACCOUNT;
588 
589         uvm_memcg_context_start(&memcg_context, mm);
590         if (uvm_mem_is_sysmem_dma(mem))
591             status = mem_alloc_sysmem_dma_chunks(mem, gfp_flags);
592         else
593             status = mem_alloc_sysmem_chunks(mem, gfp_flags);
594 
595         uvm_memcg_context_end(&memcg_context);
596         return status;
597     }
598 
599     return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
600 }
601 
uvm_mem_map_kernel(uvm_mem_t * mem,const uvm_processor_mask_t * mask)602 NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
603 {
604     uvm_gpu_t *gpu;
605     NV_STATUS status;
606 
607     if (!mask)
608         return NV_OK;
609 
610     if (uvm_processor_mask_test(mask, UVM_ID_CPU)) {
611         status = uvm_mem_map_cpu_kernel(mem);
612         if (status != NV_OK)
613             return status;
614     }
615 
616     for_each_gpu_in_mask(gpu, mask) {
617         status = uvm_mem_map_gpu_kernel(mem, gpu);
618         if (status != NV_OK)
619             return status;
620     }
621     return NV_OK;
622 }
623 
uvm_mem_alloc(const uvm_mem_alloc_params_t * params,uvm_mem_t ** mem_out)624 NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_out)
625 {
626     NV_STATUS status;
627     NvU64 physical_size;
628     uvm_mem_t *mem = NULL;
629     bool is_unprotected = false;
630 
631     UVM_ASSERT(params->size > 0);
632 
633     mem = uvm_kvmalloc_zero(sizeof(*mem));
634     if (mem == NULL)
635         return NV_ERR_NO_MEMORY;
636 
637     mem->backing_gpu = params->backing_gpu;
638     mem->dma_owner = params->dma_owner;
639     UVM_ASSERT(!mem->dma_owner || !mem->backing_gpu);
640 
641     mem->size = params->size;
642     mem->chunk_size = params->page_size;
643     if (mem->chunk_size == UVM_PAGE_SIZE_DEFAULT)
644         mem->chunk_size = mem_pick_chunk_size(mem);
645 
646     UVM_ASSERT(mem->chunk_size > 0);
647 
648     physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
649     mem->chunks_count = physical_size / mem->chunk_size;
650 
651     if (params->is_unprotected)
652         UVM_ASSERT(uvm_mem_is_vidmem(mem));
653 
654     is_unprotected = params->is_unprotected;
655 
656     status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
657     if (status != NV_OK)
658         goto error;
659 
660     *mem_out = mem;
661     return NV_OK;
662 
663 error:
664     uvm_mem_free(mem);
665     return status;
666 }
667 
mem_init_user_mapping(uvm_mem_t * mem,uvm_va_space_t * user_va_space,void * user_addr)668 static NV_STATUS mem_init_user_mapping(uvm_mem_t *mem, uvm_va_space_t *user_va_space, void *user_addr)
669 {
670     UVM_ASSERT(user_va_space);
671     UVM_ASSERT(user_addr);
672 
673     // If the user structure exists, the VA space and address should match
674     if (mem->user != NULL) {
675         UVM_ASSERT(mem->user->va_space == user_va_space);
676         UVM_ASSERT(mem->user->addr == user_addr);
677         return NV_OK;
678     }
679 
680     UVM_ASSERT(IS_ALIGNED((NvU64)user_addr, mem->chunk_size));
681     UVM_ASSERT(uvm_mem_physical_size(mem) == mem->size);
682 
683     mem->user = uvm_kvmalloc_zero(sizeof(*mem->user));
684     if (mem->user == NULL)
685         return NV_ERR_NO_MEMORY;
686 
687     mem->user->va_space = user_va_space;
688     mem->user->addr = user_addr;
689 
690     return NV_OK;
691 }
692 
mem_deinit_user_mapping(uvm_mem_t * mem)693 static void mem_deinit_user_mapping(uvm_mem_t *mem)
694 {
695     if (mem->user == NULL)
696         return;
697 
698     if (!uvm_processor_mask_empty(&mem->user->mapped_on))
699         return;
700 
701     uvm_kvfree(mem->user);
702     mem->user = NULL;
703 }
704 
reserved_gpu_va(uvm_mem_t * mem,uvm_gpu_t * gpu)705 static NvU64 reserved_gpu_va(uvm_mem_t *mem, uvm_gpu_t *gpu)
706 {
707     UVM_ASSERT(mem->kernel.range_alloc.aligned_start + uvm_mem_physical_size(mem) < gpu->parent->uvm_mem_va_size);
708 
709     return gpu->parent->uvm_mem_va_base + mem->kernel.range_alloc.aligned_start;
710 }
711 
mem_cpu_page(uvm_mem_t * mem,NvU64 offset)712 static struct page *mem_cpu_page(uvm_mem_t *mem, NvU64 offset)
713 {
714     struct page *base_page = mem->sysmem.pages[offset / mem->chunk_size];
715 
716     UVM_ASSERT_MSG(PAGE_ALIGNED(offset), "offset 0x%llx\n", offset);
717 
718     offset = offset % mem->chunk_size;
719     return pfn_to_page(page_to_pfn(base_page) + offset / PAGE_SIZE);
720 }
721 
mem_map_cpu_to_sysmem_kernel(uvm_mem_t * mem)722 static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
723 {
724     struct page **pages = mem->sysmem.pages;
725     size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
726     pgprot_t prot = PAGE_KERNEL;
727 
728     UVM_ASSERT(uvm_mem_is_sysmem(mem));
729 
730     // If chunk size is different than PAGE_SIZE then create a temporary array
731     // of all the pages to map so that vmap() can be used.
732     if (mem->chunk_size != PAGE_SIZE) {
733         size_t page_index;
734         pages = uvm_kvmalloc(sizeof(*pages) * num_pages);
735         if (!pages)
736             return NV_ERR_NO_MEMORY;
737         for (page_index = 0; page_index < num_pages; ++page_index)
738             pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
739     }
740 
741     if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
742         prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
743 
744     mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
745 
746     if (mem->chunk_size != PAGE_SIZE)
747         uvm_kvfree(pages);
748 
749     if (!mem->kernel.cpu_addr)
750         return NV_ERR_NO_MEMORY;
751 
752     return NV_OK;
753 }
754 
mem_map_cpu_to_vidmem_kernel(uvm_mem_t * mem)755 static NV_STATUS mem_map_cpu_to_vidmem_kernel(uvm_mem_t *mem)
756 {
757     struct page **pages;
758     size_t num_chunk_pages = mem->chunk_size / PAGE_SIZE;
759     size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
760     size_t page_index;
761     size_t chunk_index;
762 
763     UVM_ASSERT(uvm_mem_is_vidmem(mem));
764 
765     pages = uvm_kvmalloc(sizeof(*pages) * num_pages);
766     if (!pages)
767         return NV_ERR_NO_MEMORY;
768 
769     page_index = 0;
770 
771     for (chunk_index = 0; chunk_index < mem->chunks_count; ++chunk_index) {
772         uvm_gpu_chunk_t *chunk = mem->vidmem.chunks[chunk_index];
773         struct page *page = uvm_gpu_chunk_to_page(&mem->backing_gpu->pmm, chunk);
774         size_t chunk_page_index;
775 
776         for (chunk_page_index = 0; chunk_page_index < num_chunk_pages; ++chunk_page_index)
777             pages[page_index++] = page + chunk_page_index;
778     }
779     UVM_ASSERT(page_index == num_pages);
780 
781     mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
782 
783     uvm_kvfree(pages);
784 
785     if (!mem->kernel.cpu_addr)
786         return NV_ERR_NO_MEMORY;
787 
788     return NV_OK;
789 }
790 
uvm_mem_unmap_cpu_kernel(uvm_mem_t * mem)791 void uvm_mem_unmap_cpu_kernel(uvm_mem_t *mem)
792 {
793     if (!uvm_mem_mapped_on_cpu_kernel(mem))
794         return;
795 
796     vunmap(mem->kernel.cpu_addr);
797     mem->kernel.cpu_addr = NULL;
798     mem_clear_mapped_on_cpu_kernel(mem);
799 }
800 
mem_map_cpu_to_sysmem_user(uvm_mem_t * mem,struct vm_area_struct * vma)801 static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struct *vma)
802 {
803     NV_STATUS status;
804     NvU64 offset;
805 
806     UVM_ASSERT(mem->user != NULL);
807     UVM_ASSERT(uvm_mem_is_sysmem(mem));
808     uvm_assert_mmap_lock_locked(vma->vm_mm);
809 
810     // TODO: Bug 1995015: high-order page allocations need to be allocated as
811     // compound pages in order to be able to use vm_insert_page on them. This
812     // is not currently being exercised because the only allocations using this
813     // are semaphore pools (which typically use a single page).
814     for (offset = 0; offset < uvm_mem_physical_size(mem); offset += PAGE_SIZE) {
815         int ret = vm_insert_page(vma, (unsigned long)mem->user->addr + offset, mem_cpu_page(mem, offset));
816         if (ret) {
817             UVM_ASSERT_MSG(ret == -ENOMEM, "ret: %d\n", ret);
818             status = errno_to_nv_status(ret);
819             goto error;
820         }
821     }
822 
823     return NV_OK;
824 
825 error:
826     unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
827     return status;
828 }
829 
uvm_mem_unmap_cpu_user(uvm_mem_t * mem)830 void uvm_mem_unmap_cpu_user(uvm_mem_t *mem)
831 {
832     if (!uvm_mem_mapped_on_cpu_user(mem))
833         return;
834 
835     unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
836     mem_clear_mapped_on_cpu_user(mem);
837     mem_deinit_user_mapping(mem);
838 }
839 
uvm_mem_map_cpu_user(uvm_mem_t * mem,uvm_va_space_t * user_va_space,struct vm_area_struct * vma)840 NV_STATUS uvm_mem_map_cpu_user(uvm_mem_t *mem, uvm_va_space_t *user_va_space, struct vm_area_struct *vma)
841 {
842     NV_STATUS status;
843     void *user_addr;
844 
845     UVM_ASSERT(mem);
846     UVM_ASSERT(mem_can_be_mapped_on_cpu_user(mem));
847 
848     if (uvm_mem_mapped_on_cpu_user(mem))
849         return NV_OK;
850 
851     UVM_ASSERT((vma->vm_end - vma->vm_start) == mem->size);
852 
853     user_addr = (void *) (uintptr_t)vma->vm_start;
854 
855     status = mem_init_user_mapping(mem, user_va_space, user_addr);
856     if (status != NV_OK)
857         return status;
858 
859     status = mem_map_cpu_to_sysmem_user(mem, vma);
860     if (status != NV_OK)
861         goto cleanup;
862 
863     mem_set_mapped_on_cpu_user(mem);
864 
865     return NV_OK;
866 
867 cleanup:
868     mem_deinit_user_mapping(mem);
869     return status;
870 }
871 
uvm_mem_map_cpu_kernel(uvm_mem_t * mem)872 NV_STATUS uvm_mem_map_cpu_kernel(uvm_mem_t *mem)
873 {
874     NV_STATUS status;
875 
876     UVM_ASSERT(mem);
877     UVM_ASSERT(mem_can_be_mapped_on_cpu_kernel(mem));
878 
879     if (uvm_mem_mapped_on_cpu_kernel(mem))
880         return NV_OK;
881 
882     if (uvm_mem_is_sysmem(mem))
883         status = mem_map_cpu_to_sysmem_kernel(mem);
884     else
885         status = mem_map_cpu_to_vidmem_kernel(mem);
886 
887     if (status != NV_OK)
888         return status;
889 
890     mem_set_mapped_on_cpu_kernel(mem);
891 
892     return NV_OK;
893 }
894 
sysmem_unmap_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)895 static void sysmem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
896 {
897     NvU64 *dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)];
898     NvU32 i;
899 
900     UVM_ASSERT(uvm_mem_is_sysmem(mem));
901     UVM_ASSERT(gpu != mem->dma_owner);
902     UVM_ASSERT(dma_addrs);
903 
904     for (i = 0; i < mem->chunks_count; ++i) {
905         if (dma_addrs[i] == 0) {
906             // The DMA address can only be 0 when cleaning up after a failed
907             // partial map_gpu_sysmem_iommu() operation.
908             break;
909         }
910         uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addrs[i], mem->chunk_size);
911         dma_addrs[i] = 0;
912     }
913 
914     uvm_kvfree(dma_addrs);
915     mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)] = NULL;
916 }
917 
sysmem_map_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)918 static NV_STATUS sysmem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
919 {
920     NV_STATUS status;
921     size_t i;
922 
923     UVM_ASSERT(uvm_mem_is_sysmem(mem));
924     UVM_ASSERT(gpu != mem->dma_owner);
925 
926     status = mem_alloc_dma_addrs(mem, gpu);
927     if (status != NV_OK)
928         return status;
929 
930     for (i = 0; i < mem->chunks_count; ++i) {
931         status = uvm_parent_gpu_map_cpu_pages(gpu->parent,
932                                               mem->sysmem.pages[i],
933                                               mem->chunk_size,
934                                               &mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)][i]);
935         if (status != NV_OK)
936             goto error;
937     }
938 
939     return NV_OK;
940 
941 error:
942     sysmem_unmap_gpu_phys(mem, gpu);
943     return status;
944 }
945 
mem_get_chunk(uvm_mem_t * mem,size_t mem_offset,size_t * offset_in_chunk)946 static uvm_gpu_chunk_t *mem_get_chunk(uvm_mem_t *mem, size_t mem_offset, size_t *offset_in_chunk)
947 {
948     size_t chunk_index = uvm_div_pow2_64(mem_offset, mem->chunk_size);
949 
950     if (offset_in_chunk)
951         *offset_in_chunk = mem_offset & (mem->chunk_size - 1);
952 
953     UVM_ASSERT(uvm_mem_is_vidmem(mem));
954     return mem->vidmem.chunks[chunk_index];
955 }
956 
mem_gpu_physical_vidmem(uvm_mem_t * mem,size_t offset)957 static uvm_gpu_phys_address_t mem_gpu_physical_vidmem(uvm_mem_t *mem, size_t offset)
958 {
959     size_t chunk_offset;
960     uvm_gpu_chunk_t *chunk = mem_get_chunk(mem, offset, &chunk_offset);
961     return uvm_gpu_phys_address(UVM_APERTURE_VID, chunk->address + chunk_offset);
962 }
963 
mem_gpu_physical_sysmem(uvm_mem_t * mem,uvm_gpu_t * gpu,size_t offset)964 static uvm_gpu_phys_address_t mem_gpu_physical_sysmem(uvm_mem_t *mem, uvm_gpu_t *gpu, size_t offset)
965 {
966     NvU64 *dma_addrs = mem->sysmem.dma_addrs[uvm_id_gpu_index(gpu->id)];
967     NvU64 dma_addr = dma_addrs[offset / mem->chunk_size];
968 
969     UVM_ASSERT(uvm_mem_is_sysmem(mem));
970     UVM_ASSERT(sysmem_mapped_on_gpu_phys(mem, gpu));
971 
972     return uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr + offset % mem->chunk_size);
973 }
974 
uvm_mem_is_physically_contiguous(uvm_mem_t * mem,NvU64 offset,NvU64 size)975 bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size)
976 {
977     UVM_ASSERT(size != 0);
978     UVM_ASSERT((offset + size) <= uvm_mem_physical_size(mem));
979 
980     return UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size);
981 }
982 
uvm_mem_gpu_physical(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 offset,NvU64 size)983 uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
984 {
985     UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
986 
987     if (uvm_mem_is_vidmem(mem)) {
988         UVM_ASSERT(uvm_mem_is_local_vidmem(mem, gpu));
989 
990         return mem_gpu_physical_vidmem(mem, offset);
991     }
992 
993     return mem_gpu_physical_sysmem(mem, gpu, offset);
994 }
995 
uvm_mem_gpu_address_copy(uvm_mem_t * mem,uvm_gpu_t * accessing_gpu,NvU64 offset,NvU64 size)996 uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_gpu, NvU64 offset, NvU64 size)
997 {
998     uvm_gpu_address_t copy_addr;
999     size_t chunk_offset;
1000     uvm_gpu_chunk_t *chunk;
1001 
1002     UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
1003 
1004     if (uvm_mem_is_sysmem(mem) || uvm_mem_is_local_vidmem(mem, accessing_gpu))
1005         return uvm_gpu_address_copy(accessing_gpu, uvm_mem_gpu_physical(mem, accessing_gpu, offset, size));
1006 
1007     // Peer GPUs may need to use some form of translation (identity mappings,
1008     // indirect peers) to copy.
1009     chunk = mem_get_chunk(mem, offset, &chunk_offset);
1010     copy_addr = uvm_pmm_gpu_peer_copy_address(&mem->backing_gpu->pmm, chunk, accessing_gpu);
1011     copy_addr.address += chunk_offset;
1012     return copy_addr;
1013 }
1014 
1015 typedef struct uvm_mem_pte_maker_data_struct
1016 {
1017     uvm_mem_t *mem;
1018     const uvm_mem_gpu_mapping_attrs_t *attrs;
1019 } uvm_mem_pte_maker_data_t;
1020 
mem_pte_maker(uvm_page_table_range_vec_t * range_vec,NvU64 offset,void * vp_data)1021 static NvU64 mem_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset, void *vp_data)
1022 {
1023     uvm_mem_pte_maker_data_t *data = (uvm_mem_pte_maker_data_t *)vp_data;
1024     uvm_page_tree_t *tree = range_vec->tree;
1025     uvm_gpu_t *gpu = tree->gpu;
1026     uvm_gpu_phys_address_t phys = uvm_mem_gpu_physical(data->mem, gpu, offset, range_vec->page_size);
1027 
1028     return tree->hal->make_pte(phys.aperture,
1029                                phys.address,
1030                                data->attrs->protection,
1031                                data->attrs->is_cacheable ? UVM_MMU_PTE_FLAGS_CACHED : UVM_MMU_PTE_FLAGS_NONE);
1032 }
1033 
mem_unmap_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_page_table_range_vec_t ** range_vec)1034 static void mem_unmap_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_table_range_vec_t **range_vec)
1035 {
1036     uvm_membar_t tlb_membar = uvm_hal_downgrade_membar_type(gpu, uvm_mem_is_local_vidmem(mem, gpu));
1037     NV_STATUS status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
1038     if (status != NV_OK)
1039         UVM_ERR_PRINT("Clearing PTEs failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
1040 
1041     uvm_page_table_range_vec_destroy(*range_vec);
1042     *range_vec = NULL;
1043 }
1044 
mem_map_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 gpu_va,uvm_page_tree_t * tree,const uvm_mem_gpu_mapping_attrs_t * attrs,uvm_page_table_range_vec_t ** range_vec)1045 static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
1046                              uvm_gpu_t *gpu,
1047                              NvU64 gpu_va,
1048                              uvm_page_tree_t *tree,
1049                              const uvm_mem_gpu_mapping_attrs_t *attrs,
1050                              uvm_page_table_range_vec_t **range_vec)
1051 {
1052     NV_STATUS status;
1053     NvU32 page_size;
1054     uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
1055 
1056     uvm_mem_pte_maker_data_t pte_maker_data = {
1057             .mem = mem,
1058             .attrs = attrs
1059         };
1060 
1061     page_size = mem_pick_gpu_page_size(mem, gpu, tree);
1062     UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
1063 
1064     // When the Confidential Computing feature is enabled, DMA allocations are
1065     // majoritarily allocated and managed by a per-GPU DMA buffer pool
1066     // (uvm_conf_computing_dma_buffer_pool_t). Because we would typically
1067     // already hold the DMA_BUFFER_POOL lock at this time, we cannot hold
1068     // the block lock. Allocate PTEs without eviction in this context.
1069     //
1070     // See uvm_pmm_gpu_alloc()
1071     if (uvm_mem_is_sysmem_dma(mem))
1072         pmm_flags = UVM_PMM_ALLOC_FLAGS_NONE;
1073 
1074     status = uvm_page_table_range_vec_create(tree,
1075                                              gpu_va,
1076                                              uvm_mem_physical_size(mem),
1077                                              page_size,
1078                                              pmm_flags,
1079                                              range_vec);
1080     if (status != NV_OK) {
1081         UVM_ERR_PRINT("Failed to init page mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
1082                       gpu_va,
1083                       gpu_va + uvm_mem_physical_size(mem),
1084                       nvstatusToString(status),
1085                       uvm_gpu_name(gpu));
1086         return status;
1087     }
1088 
1089     status = uvm_page_table_range_vec_write_ptes(*range_vec, UVM_MEMBAR_NONE, mem_pte_maker, &pte_maker_data);
1090     if (status != NV_OK) {
1091         UVM_ERR_PRINT("Failed to write PTEs for mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
1092                       gpu_va,
1093                       gpu_va + uvm_mem_physical_size(mem),
1094                       nvstatusToString(status),
1095                       uvm_gpu_name(gpu));
1096         goto error;
1097     }
1098 
1099     return NV_OK;
1100 
1101 error:
1102     mem_unmap_gpu(mem, gpu, range_vec);
1103     return status;
1104 }
1105 
mem_init_gpu_kernel_range(uvm_mem_t * mem)1106 static NV_STATUS mem_init_gpu_kernel_range(uvm_mem_t *mem)
1107 {
1108     if (mem->kernel.range_alloc.node != NULL)
1109         return NV_OK;
1110 
1111     return uvm_range_allocator_alloc(&g_free_ranges,
1112                                      uvm_mem_physical_size(mem),
1113                                      mem->chunk_size,
1114                                      &mem->kernel.range_alloc);
1115 }
1116 
mem_deinit_gpu_kernel_range(uvm_mem_t * mem)1117 static void mem_deinit_gpu_kernel_range(uvm_mem_t *mem)
1118 {
1119     if (mem->kernel.range_alloc.node == NULL)
1120         return;
1121 
1122     // Do not remove the range allocation if there is any GPU where the memory
1123     // is still mapped on kernel space.
1124     if (UVM_ID_IS_VALID(uvm_processor_mask_find_first_gpu_id(&mem->kernel.mapped_on)))
1125         return;
1126 
1127     uvm_range_allocator_free(&g_free_ranges, &mem->kernel.range_alloc);
1128 }
1129 
uvm_mem_map_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1130 NV_STATUS uvm_mem_map_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1131 {
1132     NV_STATUS status;
1133     NvU64 gpu_va;
1134     uvm_page_table_range_vec_t **range_vec;
1135     uvm_mem_gpu_mapping_attrs_t attrs = {
1136             .protection = UVM_PROT_READ_WRITE_ATOMIC,
1137             .is_cacheable = uvm_mem_is_vidmem(mem)
1138         };
1139 
1140     UVM_ASSERT(mem_can_be_mapped_on_gpu_kernel(mem, gpu));
1141 
1142     if (uvm_mem_mapped_on_gpu_kernel(mem, gpu))
1143         return NV_OK;
1144 
1145     status = uvm_mem_map_gpu_phys(mem, gpu);
1146     if (status != NV_OK)
1147         return status;
1148 
1149     status = mem_init_gpu_kernel_range(mem);
1150     if (status != NV_OK)
1151         return status;
1152 
1153     gpu_va = reserved_gpu_va(mem, gpu);
1154     range_vec = &mem->kernel.range_vecs[uvm_id_gpu_index(gpu->id)];
1155 
1156     status = mem_map_gpu(mem, gpu, gpu_va, &gpu->address_space_tree, &attrs, range_vec);
1157     if (status != NV_OK)
1158         goto cleanup;
1159 
1160     mem_set_mapped_on_gpu_kernel(mem, gpu);
1161 
1162     return NV_OK;
1163 
1164 cleanup:
1165     mem_deinit_gpu_kernel_range(mem);
1166 
1167     return status;
1168 }
1169 
uvm_mem_map_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu,uvm_va_space_t * user_va_space,void * user_addr,const uvm_mem_gpu_mapping_attrs_t * attrs)1170 NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
1171                                uvm_gpu_t *gpu,
1172                                uvm_va_space_t *user_va_space,
1173                                void *user_addr,
1174                                const uvm_mem_gpu_mapping_attrs_t *attrs)
1175 {
1176     NV_STATUS status;
1177     uvm_gpu_va_space_t *gpu_va_space;
1178     uvm_page_table_range_vec_t **range_vec;
1179     NvU64 gpu_va;
1180 
1181     UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
1182     uvm_assert_rwsem_locked(&user_va_space->lock);
1183 
1184     if (uvm_mem_mapped_on_gpu_user(mem, gpu))
1185         return NV_OK;
1186 
1187     gpu_va = (NvU64)user_addr;
1188     if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
1189         return NV_ERR_OUT_OF_RANGE;
1190 
1191     status = uvm_mem_map_gpu_phys(mem, gpu);
1192     if (status != NV_OK)
1193         return status;
1194 
1195     status = mem_init_user_mapping(mem, user_va_space, user_addr);
1196     if (status != NV_OK)
1197         return status;
1198 
1199     gpu_va_space = uvm_gpu_va_space_get(mem->user->va_space, gpu);
1200     range_vec = &mem->user->range_vecs[uvm_id_gpu_index(gpu->id)];
1201 
1202     status = mem_map_gpu(mem, gpu, gpu_va, &gpu_va_space->page_tables, attrs, range_vec);
1203     if (status != NV_OK)
1204         goto cleanup;
1205 
1206     mem_set_mapped_on_gpu_user(mem, gpu);
1207 
1208     return NV_OK;
1209 
1210 cleanup:
1211     mem_deinit_user_mapping(mem);
1212     return status;
1213 }
1214 
uvm_mem_unmap_gpu_user(uvm_mem_t * mem,uvm_gpu_t * gpu)1215 void uvm_mem_unmap_gpu_user(uvm_mem_t *mem, uvm_gpu_t *gpu)
1216 {
1217     if (!uvm_mem_mapped_on_gpu_user(mem, gpu))
1218         return;
1219 
1220     mem_unmap_gpu(mem, gpu, &mem->user->range_vecs[uvm_id_gpu_index(gpu->id)]);
1221     mem_clear_mapped_on_gpu_user(mem, gpu);
1222     mem_deinit_user_mapping(mem);
1223 }
1224 
uvm_mem_unmap_gpu_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1225 void uvm_mem_unmap_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1226 {
1227     if (!uvm_mem_mapped_on_gpu_kernel(mem, gpu))
1228         return;
1229 
1230     mem_unmap_gpu(mem, gpu, &mem->kernel.range_vecs[uvm_id_gpu_index(gpu->id)]);
1231     mem_clear_mapped_on_gpu_kernel(mem, gpu);
1232     mem_deinit_gpu_kernel_range(mem);
1233 }
1234 
mem_can_be_phys_mapped_on_gpu(uvm_mem_t * mem,uvm_gpu_t * gpu)1235 static bool mem_can_be_phys_mapped_on_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu)
1236 {
1237     if (uvm_mem_is_sysmem(mem))
1238         return sysmem_can_be_mapped_on_gpu(mem);
1239     else
1240         return uvm_mem_is_local_vidmem(mem, gpu);
1241 }
1242 
uvm_mem_map_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)1243 NV_STATUS uvm_mem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
1244 {
1245     NV_STATUS status;
1246 
1247     UVM_ASSERT(mem_can_be_phys_mapped_on_gpu(mem, gpu));
1248 
1249     if (uvm_mem_is_vidmem(mem))
1250         return NV_OK;
1251 
1252     if (gpu == mem->dma_owner)
1253         return NV_OK;
1254 
1255     if (sysmem_mapped_on_gpu_phys(mem, gpu))
1256         return NV_OK;
1257 
1258     status = sysmem_map_gpu_phys(mem, gpu);
1259     if (status != NV_OK)
1260         return status;
1261 
1262     sysmem_set_mapped_on_gpu_phys(mem, gpu);
1263     return NV_OK;
1264 }
1265 
uvm_mem_unmap_gpu_phys(uvm_mem_t * mem,uvm_gpu_t * gpu)1266 void uvm_mem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
1267 {
1268     UVM_ASSERT(mem);
1269     UVM_ASSERT(gpu);
1270 
1271     if (uvm_mem_is_vidmem(mem))
1272         return;
1273 
1274     // GPU for which the mapping is managed by the dma_alloc_coherent
1275     // API will be unmapped when the allocation is freed.
1276     if (gpu == mem->dma_owner)
1277         return;
1278 
1279     if (!sysmem_mapped_on_gpu_phys(mem, gpu))
1280         return;
1281 
1282     uvm_mem_unmap_gpu_user(mem, gpu);
1283     uvm_mem_unmap_gpu_kernel(mem, gpu);
1284 
1285     sysmem_unmap_gpu_phys(mem, gpu);
1286     sysmem_clear_mapped_on_gpu_phys(mem, gpu);
1287 }
1288 
uvm_mem_free(uvm_mem_t * mem)1289 void uvm_mem_free(uvm_mem_t *mem)
1290 {
1291     uvm_gpu_t *gpu;
1292 
1293     if (mem == NULL)
1294         return;
1295 
1296     uvm_mem_unmap_cpu_user(mem);
1297     uvm_mem_unmap_cpu_kernel(mem);
1298 
1299     if (mem->user != NULL) {
1300         for_each_gpu_in_mask(gpu, &mem->user->mapped_on) {
1301             uvm_mem_unmap_gpu_user(mem, gpu);
1302 
1303             // If we unmapped the last device, the user mapping is freed, so
1304             // exit the loop before the iterator accesses a non-existing mask.
1305             if (mem->user == NULL)
1306                 break;
1307         }
1308     }
1309 
1310     for_each_gpu_in_mask(gpu, &mem->kernel.mapped_on)
1311         uvm_mem_unmap_gpu_kernel(mem, gpu);
1312 
1313     if (uvm_mem_is_sysmem(mem)) {
1314         for_each_gpu_in_mask(gpu, &mem->sysmem.mapped_on_phys)
1315             uvm_mem_unmap_gpu_phys(mem, gpu);
1316     }
1317 
1318     mem_free_chunks(mem);
1319 
1320     uvm_kvfree(mem);
1321 }
1322 
uvm_mem_get_cpu_addr_kernel(uvm_mem_t * mem)1323 void *uvm_mem_get_cpu_addr_kernel(uvm_mem_t *mem)
1324 {
1325     UVM_ASSERT(uvm_mem_mapped_on_cpu_kernel(mem));
1326 
1327     return mem->kernel.cpu_addr;
1328 }
1329 
uvm_mem_get_gpu_va_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1330 NvU64 uvm_mem_get_gpu_va_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1331 {
1332     UVM_ASSERT(uvm_mem_mapped_on_gpu_kernel(mem, gpu));
1333 
1334     return reserved_gpu_va(mem, gpu);
1335 }
1336 
uvm_mem_gpu_address_virtual_kernel(uvm_mem_t * mem,uvm_gpu_t * gpu)1337 uvm_gpu_address_t uvm_mem_gpu_address_virtual_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
1338 {
1339     uvm_gpu_address_t addr = uvm_gpu_address_virtual(uvm_mem_get_gpu_va_kernel(mem, gpu));
1340 
1341     if (g_uvm_global.conf_computing_enabled && mem->dma_owner)
1342         addr.is_unprotected = true;
1343 
1344     return addr;
1345 }
1346 
uvm_mem_gpu_address_physical(uvm_mem_t * mem,uvm_gpu_t * gpu,NvU64 offset,NvU64 size)1347 uvm_gpu_address_t uvm_mem_gpu_address_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
1348 {
1349     uvm_gpu_address_t addr = uvm_gpu_address_from_phys(uvm_mem_gpu_physical(mem, gpu, offset, size));
1350 
1351     if (g_uvm_global.conf_computing_enabled && mem->dma_owner)
1352         addr.is_unprotected = true;
1353 
1354     return addr;
1355 }
1356