1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_rm_mem.h"
25 #include "uvm_gpu.h"
26 #include "uvm_global.h"
27 #include "uvm_kvmalloc.h"
28 #include "uvm_linux.h"
29 #include "nv_uvm_interface.h"
30 
31 bool uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
32 {
33     return uvm_global_processor_mask_test(&rm_mem->mapped_on, gpu->global_id);
34 }
35 
36 bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
37 {
38     if (rm_mem->proxy_vas == NULL)
39         return false;
40 
41     if (rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] == 0)
42         return false;
43 
44     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
45     UVM_ASSERT(uvm_gpu_uses_proxy_channel_pool(gpu));
46 
47     return true;
48 }
49 
50 bool uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t *rm_mem)
51 {
52     return uvm_global_processor_mask_test(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
53 }
54 
55 static void rm_mem_set_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
56 {
57     rm_mem->vas[uvm_global_id_value(gpu->global_id)] = va;
58     uvm_global_processor_mask_set(&rm_mem->mapped_on, gpu->global_id);
59 }
60 
61 static void rm_mem_set_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va)
62 {
63     rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = va;
64 }
65 
66 static void rm_mem_set_cpu_va(uvm_rm_mem_t *rm_mem, void *va)
67 {
68     rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = (uintptr_t) va;
69     uvm_global_processor_mask_set(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
70 }
71 
72 static void rm_mem_clear_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
73 {
74     UVM_ASSERT(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
75 
76     uvm_global_processor_mask_clear(&rm_mem->mapped_on, gpu->global_id);
77     rm_mem->vas[uvm_global_id_value(gpu->global_id)] = 0;
78 }
79 
80 static void rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
81 {
82     rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = 0;
83 }
84 
85 static void rm_mem_clear_cpu_va(uvm_rm_mem_t *rm_mem)
86 {
87     uvm_global_processor_mask_clear(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU);
88     rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = 0;
89 }
90 
91 NvU64 uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
92 {
93     UVM_ASSERT_MSG(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu), "GPU %s\n", uvm_gpu_name(gpu));
94 
95     return rm_mem->vas[uvm_global_id_value(gpu->global_id)];
96 }
97 
98 NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
99 {
100     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
101 
102     return rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)];
103 }
104 
105 NvU64 uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space)
106 {
107     if (is_proxy_va_space)
108         return uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu);
109     else
110         return uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
111 }
112 
113 void *uvm_rm_mem_get_cpu_va(uvm_rm_mem_t *rm_mem)
114 {
115     UVM_ASSERT(uvm_rm_mem_mapped_on_cpu(rm_mem));
116 
117     return (void *)(uintptr_t)rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE];
118 }
119 
120 static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
121 {
122     NV_STATUS status;
123     uvm_gpu_t *gpu_owner;
124     NvU64 gpu_owner_va;
125     NvU64 proxy_va;
126 
127     UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
128 
129     if (!uvm_gpu_uses_proxy_channel_pool(gpu))
130         return NV_OK;
131 
132     if (uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
133         return NV_OK;
134 
135     if (rm_mem->proxy_vas == NULL) {
136         NvU64 *proxy_vas = uvm_kvmalloc_zero(sizeof(rm_mem->vas));
137         if (proxy_vas == NULL)
138             return NV_ERR_NO_MEMORY;
139 
140         rm_mem->proxy_vas = proxy_vas;
141     }
142 
143     gpu_owner = rm_mem->gpu_owner;
144     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
145 
146     status = uvm_rm_locked_call(nvUvmInterfacePagingChannelsMap(gpu_owner->rm_address_space,
147                                                                 gpu_owner_va,
148                                                                 uvm_gpu_device_handle(gpu),
149                                                                 &proxy_va));
150     if (status != NV_OK) {
151         UVM_ERR_PRINT("nvUvmInterfacePagingChannelsMap() failed: %s, src GPU %s, dst GPU %s\n",
152                       nvstatusToString(status),
153                       uvm_gpu_name(gpu_owner),
154                       uvm_gpu_name(gpu));
155         return status;
156     }
157 
158     rm_mem_set_gpu_proxy_va(rm_mem, gpu, proxy_va);
159 
160     return NV_OK;
161 }
162 
163 static void rm_mem_unmap_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
164 {
165     uvm_gpu_t *gpu_owner;
166     NvU64 gpu_owner_va;
167 
168     if (!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu))
169         return;
170 
171     gpu_owner = rm_mem->gpu_owner;
172     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
173 
174 
175     uvm_rm_locked_call_void(nvUvmInterfacePagingChannelsUnmap(gpu_owner->rm_address_space,
176                                                               gpu_owner_va,
177                                                               uvm_gpu_device_handle(gpu)));
178 
179     rm_mem_clear_gpu_proxy_va(rm_mem, gpu);
180 }
181 
182 
183 NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
184                            uvm_rm_mem_type_t type,
185                            NvLength size,
186                            NvU64 gpu_alignment,
187                            uvm_rm_mem_t **rm_mem_out)
188 {
189     NV_STATUS status = NV_OK;
190     uvm_rm_mem_t *rm_mem;
191     UvmGpuAllocInfo alloc_info = { 0 };
192     NvU64 gpu_va;
193 
194     UVM_ASSERT(gpu);
195     UVM_ASSERT((type == UVM_RM_MEM_TYPE_SYS) || (type == UVM_RM_MEM_TYPE_GPU));
196     UVM_ASSERT(size != 0);
197 
198     rm_mem = uvm_kvmalloc_zero(sizeof(*rm_mem));
199     if (rm_mem == NULL)
200         return NV_ERR_NO_MEMORY;
201 
202     alloc_info.alignment = gpu_alignment;
203 
204     if (type == UVM_RM_MEM_TYPE_SYS)
205         status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info));
206     else
207         status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocFB(gpu->rm_address_space, size, &gpu_va, &alloc_info));
208 
209     if (status != NV_OK) {
210         UVM_ERR_PRINT("nvUvmInterfaceMemoryAlloc%s() failed: %s, GPU %s\n",
211                       type == UVM_RM_MEM_TYPE_SYS ? "Sys" : "FB",
212                       nvstatusToString(status),
213                       uvm_gpu_name(gpu));
214         goto error;
215     }
216 
217     rm_mem->gpu_owner = gpu;
218     rm_mem->type = type;
219     rm_mem->size = size;
220     rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
221 
222     status = rm_mem_map_gpu_proxy(rm_mem, gpu);
223     if (status != NV_OK)
224         goto error;
225 
226     *rm_mem_out = rm_mem;
227     return NV_OK;
228 
229 error:
230     uvm_rm_mem_free(rm_mem);
231     return status;
232 }
233 
234 NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem)
235 {
236     NV_STATUS status;
237     uvm_gpu_t *gpu;
238     NvU64 gpu_va;
239     void *cpu_va;
240 
241     UVM_ASSERT(rm_mem);
242 
243     if (uvm_rm_mem_mapped_on_cpu(rm_mem))
244         return NV_OK;
245 
246     gpu = rm_mem->gpu_owner;
247     gpu_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
248 
249     status = uvm_rm_locked_call(nvUvmInterfaceMemoryCpuMap(gpu->rm_address_space,
250                                                            gpu_va,
251                                                            rm_mem->size,
252                                                            &cpu_va,
253                                                            UVM_PAGE_SIZE_DEFAULT));
254     if (status != NV_OK) {
255         UVM_ERR_PRINT("nvUvmInterfaceMemoryCpuMap() failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
256         return status;
257     }
258 
259     rm_mem_set_cpu_va(rm_mem, cpu_va);
260 
261     return NV_OK;
262 }
263 
264 void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem)
265 {
266     UVM_ASSERT(rm_mem);
267 
268     if (!uvm_rm_mem_mapped_on_cpu(rm_mem))
269         return;
270 
271     uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space,
272                                                          uvm_rm_mem_get_cpu_va(rm_mem)));
273 
274     rm_mem_clear_cpu_va(rm_mem);
275 }
276 
277 NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment)
278 {
279     NV_STATUS status;
280     uvm_gpu_t *gpu_owner;
281     NvU64 gpu_owner_va;
282     NvU64 gpu_va;
283 
284     UVM_ASSERT(rm_mem);
285     UVM_ASSERT(gpu);
286 
287     if (uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
288         return NV_OK;
289 
290     // Peer mappings are not supported yet
291     UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
292 
293     gpu_owner = rm_mem->gpu_owner;
294     gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
295 
296     status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space,
297                                                             gpu_owner_va,
298                                                             gpu->rm_address_space,
299                                                             gpu_alignment,
300                                                             &gpu_va));
301     if (status != NV_OK) {
302         UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n",
303                       nvstatusToString(status),
304                       uvm_gpu_name(gpu_owner),
305                       uvm_gpu_name(gpu));
306         return status;
307     }
308 
309     rm_mem_set_gpu_va(rm_mem, gpu, gpu_va);
310 
311     // Map to proxy VA space, if applicable
312     return rm_mem_map_gpu_proxy(rm_mem, gpu);
313 }
314 
315 // This internal unmap variant allows the GPU owner to be unmapped, unlike
316 // uvm_rm_mem_unmap_gpu
317 static void rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
318 {
319     NvU64 va;
320 
321     if (!uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
322         return;
323 
324     // Remove mappings in proxy address space, if any
325     rm_mem_unmap_gpu_proxy(rm_mem, gpu);
326 
327     va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
328     uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va));
329     rm_mem_clear_gpu_va(rm_mem, gpu);
330 }
331 
332 void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
333 {
334     UVM_ASSERT(rm_mem);
335     UVM_ASSERT(gpu);
336 
337     // The GPU owner mapping remains valid until the memory is freed.
338     if (gpu == rm_mem->gpu_owner)
339         return;
340 
341     rm_mem_unmap_gpu(rm_mem, gpu);
342 }
343 
344 void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
345 {
346     uvm_global_gpu_id_t gpu_id;
347     uvm_gpu_t *gpu_owner;
348 
349     if (rm_mem == NULL)
350         return;
351 
352     // If the GPU owner is not set, allocation of backing storage by RM failed
353     gpu_owner = rm_mem->gpu_owner;
354     if (gpu_owner == NULL) {
355         uvm_kvfree(rm_mem);
356         return;
357     }
358 
359     uvm_rm_mem_unmap_cpu(rm_mem);
360 
361     // Don't use for_each_global_gpu_in_mask() as the owning GPU might be being
362     // destroyed and already removed from the global GPU array causing the iteration
363     // to stop prematurely.
364     for_each_global_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) {
365         if (!uvm_global_id_equal(gpu_id, gpu_owner->global_id))
366             uvm_rm_mem_unmap_gpu(rm_mem, uvm_gpu_get(gpu_id));
367     }
368 
369     rm_mem_unmap_gpu(rm_mem, gpu_owner);
370 
371     UVM_ASSERT_MSG(uvm_global_processor_mask_empty(&rm_mem->mapped_on),
372                    "Left-over %u mappings in rm_mem\n",
373                    uvm_global_processor_mask_get_count(&rm_mem->mapped_on));
374 
375     uvm_kvfree(rm_mem->proxy_vas);
376     uvm_kvfree(rm_mem);
377 }
378 
379 NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu,
380                                        uvm_rm_mem_type_t type,
381                                        NvLength size,
382                                        NvU64 gpu_alignment,
383                                        uvm_rm_mem_t **rm_mem_out)
384 {
385     uvm_rm_mem_t *rm_mem;
386     NV_STATUS status;
387 
388     status = uvm_rm_mem_alloc(gpu, type, size, gpu_alignment, &rm_mem);
389     if (status != NV_OK)
390         return status;
391 
392     status = uvm_rm_mem_map_cpu(rm_mem);
393     if (status != NV_OK)
394         goto error;
395 
396     *rm_mem_out = rm_mem;
397 
398     return NV_OK;
399 
400 error:
401     uvm_rm_mem_free(rm_mem);
402     return status;
403 }
404 
405 NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment)
406 {
407     uvm_gpu_t *gpu;
408 
409     UVM_ASSERT(rm_mem);
410 
411     for_each_global_gpu(gpu) {
412         NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment);
413         if (status != NV_OK)
414             return status;
415     }
416     return NV_OK;
417 }
418 
419 NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
420                                        uvm_rm_mem_type_t type,
421                                        NvLength size,
422                                        NvU64 gpu_alignment,
423                                        uvm_rm_mem_t **rm_mem_out)
424 {
425     uvm_rm_mem_t *rm_mem;
426     NV_STATUS status;
427 
428     UVM_ASSERT(gpu);
429 
430     status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, gpu_alignment, &rm_mem);
431     if (status != NV_OK)
432         return status;
433 
434     status = uvm_rm_mem_map_all_gpus(rm_mem, gpu_alignment);
435     if (status != NV_OK)
436         goto error;
437 
438     *rm_mem_out = rm_mem;
439 
440     return NV_OK;
441 
442 error:
443     uvm_rm_mem_free(rm_mem);
444     return status;
445 }
446