1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_rm_mem.h"
25 #include "uvm_gpu.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_linux.h"
29 #include "nv_uvm_interface.h"
30 
uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)31 bool uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
32 {
33     return uvm_processor_mask_test(&rm_mem->mapped_on, gpu->id);
34 }
35 
uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)36 bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
37 {
38     if (rm_mem->proxy_vas == NULL)
39         return false;
40 
41     if (rm_mem->proxy_vas[uvm_id_value(gpu->id)] == 0)
42         return false;
43 
44     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
45     UVM_ASSERT(uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent));
46 
47     return true;
48 }
49 
uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t * rm_mem)50 bool uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t *rm_mem)
51 {
52     return uvm_processor_mask_test(&rm_mem->mapped_on, UVM_ID_CPU);
53 }
54 
rm_mem_set_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 va)55 static void rm_mem_set_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
56 {
57     rm_mem->vas[uvm_id_value(gpu->id)] = va;
58     uvm_processor_mask_set(&rm_mem->mapped_on, gpu->id);
59 }
60 
rm_mem_set_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 va)61 static void rm_mem_set_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
62 {
63     rm_mem->proxy_vas[uvm_id_value(gpu->id)] = va;
64 }
65 
rm_mem_set_cpu_va(uvm_rm_mem_t * rm_mem,void * va)66 static void rm_mem_set_cpu_va(uvm_rm_mem_t *rm_mem, void *va)
67 {
68     rm_mem->vas[UVM_ID_CPU_VALUE] = (uintptr_t) va;
69     uvm_processor_mask_set(&rm_mem->mapped_on, UVM_ID_CPU);
70 }
71 
rm_mem_clear_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)72 static void rm_mem_clear_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
73 {
74     UVM_ASSERT(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
75 
76     uvm_processor_mask_clear(&rm_mem->mapped_on, gpu->id);
77     rm_mem->vas[uvm_id_value(gpu->id)] = 0;
78 }
79 
rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)80 static void rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
81 {
82     rm_mem->proxy_vas[uvm_id_value(gpu->id)] = 0;
83 }
84 
rm_mem_clear_cpu_va(uvm_rm_mem_t * rm_mem)85 static void rm_mem_clear_cpu_va(uvm_rm_mem_t *rm_mem)
86 {
87     uvm_processor_mask_clear(&rm_mem->mapped_on, UVM_ID_CPU);
88     rm_mem->vas[UVM_ID_CPU_VALUE] = 0;
89 }
90 
uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)91 NvU64 uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
92 {
93     UVM_ASSERT_MSG(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu), "GPU %s\n", uvm_gpu_name(gpu));
94 
95     return rm_mem->vas[uvm_id_value(gpu->id)];
96 }
97 
uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)98 NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
99 {
100     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
101 
102     return rm_mem->proxy_vas[uvm_id_value(gpu->id)];
103 }
104 
uvm_rm_mem_get_gpu_va(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,bool is_proxy_va_space)105 uvm_gpu_address_t uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space)
106 {
107     uvm_gpu_address_t gpu_va = {0};
108 
109     gpu_va.aperture = UVM_APERTURE_MAX;
110     gpu_va.is_virtual = true;
111 
112     if (g_uvm_global.conf_computing_enabled && (rm_mem->type == UVM_RM_MEM_TYPE_SYS))
113         gpu_va.is_unprotected = true;
114 
115     if (is_proxy_va_space)
116         gpu_va.address = uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu);
117     else
118         gpu_va.address = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
119 
120     return gpu_va;
121 }
122 
uvm_rm_mem_get_cpu_va(uvm_rm_mem_t * rm_mem)123 void *uvm_rm_mem_get_cpu_va(uvm_rm_mem_t *rm_mem)
124 {
125     UVM_ASSERT(uvm_rm_mem_mapped_on_cpu(rm_mem));
126 
127     return (void *)(uintptr_t)rm_mem->vas[UVM_ID_CPU_VALUE];
128 }
129 
rm_mem_map_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)130 static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
131 {
132     NV_STATUS status;
133     uvm_gpu_t *gpu_owner;
134     NvU64 gpu_owner_va;
135     NvU64 proxy_va;
136 
137     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
138 
139     if (!uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
140         return NV_OK;
141 
142     if (uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
143         return NV_OK;
144 
145     if (rm_mem->proxy_vas == NULL) {
146         NvU64 *proxy_vas = uvm_kvmalloc_zero(sizeof(rm_mem->vas));
147         if (proxy_vas == NULL)
148             return NV_ERR_NO_MEMORY;
149 
150         rm_mem->proxy_vas = proxy_vas;
151     }
152 
153     gpu_owner = rm_mem->gpu_owner;
154     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
155 
156     status = uvm_rm_locked_call(nvUvmInterfacePagingChannelsMap(gpu_owner->rm_address_space,
157                                                                 gpu_owner_va,
158                                                                 uvm_gpu_device_handle(gpu),
159                                                                 &proxy_va));
160     if (status != NV_OK) {
161         UVM_ERR_PRINT("nvUvmInterfacePagingChannelsMap() failed: %s, src GPU %s, dst GPU %s\n",
162                       nvstatusToString(status),
163                       uvm_gpu_name(gpu_owner),
164                       uvm_gpu_name(gpu));
165         return status;
166     }
167 
168     rm_mem_set_gpu_proxy_va(rm_mem, gpu, proxy_va);
169 
170     return NV_OK;
171 }
172 
rm_mem_unmap_gpu_proxy(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)173 static void rm_mem_unmap_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
174 {
175     uvm_gpu_t *gpu_owner;
176     NvU64 gpu_owner_va;
177 
178     if (!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
179         return;
180 
181     gpu_owner = rm_mem->gpu_owner;
182     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
183 
184 
185     uvm_rm_locked_call_void(nvUvmInterfacePagingChannelsUnmap(gpu_owner->rm_address_space,
186                                                               gpu_owner_va,
187                                                               uvm_gpu_device_handle(gpu)));
188 
189     rm_mem_clear_gpu_proxy_va(rm_mem, gpu);
190 }
191 
192 
uvm_rm_mem_alloc(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)193 NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
194                            uvm_rm_mem_type_t type,
195                            NvLength size,
196                            NvU64 gpu_alignment,
197                            uvm_rm_mem_t **rm_mem_out)
198 {
199     NV_STATUS status = NV_OK;
200     uvm_rm_mem_t *rm_mem;
201     UvmGpuAllocInfo alloc_info = { 0 };
202     NvU64 gpu_va;
203 
204     UVM_ASSERT(gpu);
205     UVM_ASSERT((type == UVM_RM_MEM_TYPE_SYS) || (type == UVM_RM_MEM_TYPE_GPU));
206     UVM_ASSERT(size != 0);
207 
208     rm_mem = uvm_kvmalloc_zero(sizeof(*rm_mem));
209     if (rm_mem == NULL)
210         return NV_ERR_NO_MEMORY;
211 
212     if (!g_uvm_global.conf_computing_enabled || type == UVM_RM_MEM_TYPE_SYS)
213         alloc_info.bUnprotected = NV_TRUE;
214 
215     alloc_info.alignment = gpu_alignment;
216 
217     if (type == UVM_RM_MEM_TYPE_SYS)
218         status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info));
219     else
220         status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocFB(gpu->rm_address_space, size, &gpu_va, &alloc_info));
221 
222     if (status != NV_OK) {
223         UVM_ERR_PRINT("nvUvmInterfaceMemoryAlloc%s() failed: %s, GPU %s\n",
224                       type == UVM_RM_MEM_TYPE_SYS ? "Sys" : "FB",
225                       nvstatusToString(status),
226                       uvm_gpu_name(gpu));
227         goto error;
228     }
229 
230     rm_mem->gpu_owner = gpu;
231     rm_mem->type = type;
232     rm_mem->size = size;
233     rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
234 
235     status = rm_mem_map_gpu_proxy(rm_mem, gpu);
236     if (status != NV_OK)
237         goto error;
238 
239     *rm_mem_out = rm_mem;
240     return NV_OK;
241 
242 error:
243     uvm_rm_mem_free(rm_mem);
244     return status;
245 }
246 
uvm_rm_mem_map_cpu(uvm_rm_mem_t * rm_mem)247 NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem)
248 {
249     NV_STATUS status;
250     uvm_gpu_t *gpu;
251     NvU64 gpu_va;
252     void *cpu_va;
253 
254     UVM_ASSERT(rm_mem);
255 
256     if (uvm_rm_mem_mapped_on_cpu(rm_mem))
257         return NV_OK;
258 
259     if (g_uvm_global.conf_computing_enabled)
260         UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
261 
262     gpu = rm_mem->gpu_owner;
263     gpu_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
264 
265     status = uvm_rm_locked_call(nvUvmInterfaceMemoryCpuMap(gpu->rm_address_space,
266                                                            gpu_va,
267                                                            rm_mem->size,
268                                                            &cpu_va,
269                                                            UVM_PAGE_SIZE_DEFAULT));
270     if (status != NV_OK) {
271         UVM_ERR_PRINT("nvUvmInterfaceMemoryCpuMap() failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
272         return status;
273     }
274 
275     rm_mem_set_cpu_va(rm_mem, cpu_va);
276 
277     return NV_OK;
278 }
279 
uvm_rm_mem_unmap_cpu(uvm_rm_mem_t * rm_mem)280 void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem)
281 {
282     UVM_ASSERT(rm_mem);
283 
284     if (!uvm_rm_mem_mapped_on_cpu(rm_mem))
285         return;
286 
287     uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space,
288                                                          uvm_rm_mem_get_cpu_va(rm_mem)));
289 
290     rm_mem_clear_cpu_va(rm_mem);
291 }
292 
uvm_rm_mem_map_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu,NvU64 gpu_alignment)293 NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment)
294 {
295     NV_STATUS status;
296     uvm_gpu_t *gpu_owner;
297     NvU64 gpu_owner_va;
298     NvU64 gpu_va;
299 
300     UVM_ASSERT(rm_mem);
301     UVM_ASSERT(gpu);
302 
303     if (uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
304         return NV_OK;
305 
306     // Peer mappings are not supported yet
307     UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
308 
309     gpu_owner = rm_mem->gpu_owner;
310     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
311 
312     status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space,
313                                                             gpu_owner_va,
314                                                             gpu->rm_address_space,
315                                                             gpu_alignment,
316                                                             &gpu_va));
317     if (status != NV_OK) {
318         UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n",
319                       nvstatusToString(status),
320                       uvm_gpu_name(gpu_owner),
321                       uvm_gpu_name(gpu));
322         return status;
323     }
324 
325     rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
326 
327     // Map to proxy VA space, if applicable
328     return rm_mem_map_gpu_proxy(rm_mem, gpu);
329 }
330 
331 // This internal unmap variant allows the GPU owner to be unmapped, unlike
332 // uvm_rm_mem_unmap_gpu
rm_mem_unmap_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)333 static void rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
334 {
335     NvU64 va;
336 
337     if (!uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
338         return;
339 
340     // Remove mappings in proxy address space, if any
341     rm_mem_unmap_gpu_proxy(rm_mem, gpu);
342 
343     va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
344     uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va));
345     rm_mem_clear_gpu_va(rm_mem, gpu);
346 }
347 
uvm_rm_mem_unmap_gpu(uvm_rm_mem_t * rm_mem,uvm_gpu_t * gpu)348 void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
349 {
350     UVM_ASSERT(rm_mem);
351     UVM_ASSERT(gpu);
352 
353     // The GPU owner mapping remains valid until the memory is freed.
354     if (gpu == rm_mem->gpu_owner)
355         return;
356 
357     rm_mem_unmap_gpu(rm_mem, gpu);
358 }
359 
uvm_rm_mem_free(uvm_rm_mem_t * rm_mem)360 void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
361 {
362     uvm_gpu_id_t gpu_id;
363     uvm_gpu_t *gpu_owner;
364 
365     if (rm_mem == NULL)
366         return;
367 
368     // If the GPU owner is not set, allocation of backing storage by RM failed
369     gpu_owner = rm_mem->gpu_owner;
370     if (gpu_owner == NULL) {
371         uvm_kvfree(rm_mem);
372         return;
373     }
374 
375     uvm_rm_mem_unmap_cpu(rm_mem);
376 
377     // Don't use for_each_gpu_in_mask() as the owning GPU might be being
378     // destroyed and already removed from the global GPU array causing the
379     // iteration to stop prematurely.
380     for_each_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) {
381         if (!uvm_id_equal(gpu_id, gpu_owner->id))
382             uvm_rm_mem_unmap_gpu(rm_mem, uvm_gpu_get(gpu_id));
383     }
384 
385     rm_mem_unmap_gpu(rm_mem, gpu_owner);
386 
387     UVM_ASSERT_MSG(uvm_processor_mask_empty(&rm_mem->mapped_on),
388                    "Left-over %u mappings in rm_mem\n",
389                    uvm_processor_mask_get_count(&rm_mem->mapped_on));
390 
391     uvm_kvfree(rm_mem->proxy_vas);
392     uvm_kvfree(rm_mem);
393 }
394 
uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)395 NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu,
396                                        uvm_rm_mem_type_t type,
397                                        NvLength size,
398                                        NvU64 gpu_alignment,
399                                        uvm_rm_mem_t **rm_mem_out)
400 {
401     uvm_rm_mem_t *rm_mem;
402     NV_STATUS status;
403 
404     status = uvm_rm_mem_alloc(gpu, type, size, gpu_alignment, &rm_mem);
405     if (status != NV_OK)
406         return status;
407 
408     status = uvm_rm_mem_map_cpu(rm_mem);
409     if (status != NV_OK)
410         goto error;
411 
412     *rm_mem_out = rm_mem;
413 
414     return NV_OK;
415 
416 error:
417     uvm_rm_mem_free(rm_mem);
418     return status;
419 }
420 
uvm_rm_mem_map_all_gpus(uvm_rm_mem_t * rm_mem,NvU64 gpu_alignment)421 NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment)
422 {
423     uvm_gpu_t *gpu;
424 
425     UVM_ASSERT(rm_mem);
426 
427     for_each_gpu(gpu) {
428         NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment);
429         if (status != NV_OK)
430             return status;
431     }
432     return NV_OK;
433 }
434 
uvm_rm_mem_alloc_and_map_all(uvm_gpu_t * gpu,uvm_rm_mem_type_t type,NvLength size,NvU64 gpu_alignment,uvm_rm_mem_t ** rm_mem_out)435 NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
436                                        uvm_rm_mem_type_t type,
437                                        NvLength size,
438                                        NvU64 gpu_alignment,
439                                        uvm_rm_mem_t **rm_mem_out)
440 {
441     uvm_rm_mem_t *rm_mem;
442     NV_STATUS status;
443 
444     UVM_ASSERT(gpu);
445 
446     status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, gpu_alignment, &rm_mem);
447     if (status != NV_OK)
448         return status;
449 
450     status = uvm_rm_mem_map_all_gpus(rm_mem, gpu_alignment);
451     if (status != NV_OK)
452         goto error;
453 
454     *rm_mem_out = rm_mem;
455 
456     return NV_OK;
457 
458 error:
459     uvm_rm_mem_free(rm_mem);
460     return status;
461 }
462