1 /******************************************************************************* 2 Copyright (c) 2015-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_rm_mem.h" 25 #include "uvm_gpu.h" 26 #include "uvm_global.h" 27 #include "uvm_kvmalloc.h" 28 #include "uvm_linux.h" 29 #include "nv_uvm_interface.h" 30 31 bool uvm_rm_mem_mapped_on_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 32 { 33 return uvm_global_processor_mask_test(&rm_mem->mapped_on, gpu->global_id); 34 } 35 36 bool uvm_rm_mem_mapped_on_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 37 { 38 if (rm_mem->proxy_vas == NULL) 39 return false; 40 41 if (rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] == 0) 42 return false; 43 44 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)); 45 UVM_ASSERT(uvm_gpu_uses_proxy_channel_pool(gpu)); 46 47 return true; 48 } 49 50 bool uvm_rm_mem_mapped_on_cpu(uvm_rm_mem_t *rm_mem) 51 { 52 return uvm_global_processor_mask_test(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU); 53 } 54 55 static void rm_mem_set_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va) 56 { 57 rm_mem->vas[uvm_global_id_value(gpu->global_id)] = va; 58 uvm_global_processor_mask_set(&rm_mem->mapped_on, gpu->global_id); 59 } 60 61 static void rm_mem_set_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 va) 62 { 63 rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = va; 64 } 65 66 static void rm_mem_set_cpu_va(uvm_rm_mem_t *rm_mem, void *va) 67 { 68 rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = (uintptr_t) va; 69 uvm_global_processor_mask_set(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU); 70 } 71 72 static void rm_mem_clear_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 73 { 74 UVM_ASSERT(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu)); 75 76 uvm_global_processor_mask_clear(&rm_mem->mapped_on, gpu->global_id); 77 rm_mem->vas[uvm_global_id_value(gpu->global_id)] = 0; 78 } 79 80 static void rm_mem_clear_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 81 { 82 rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)] = 0; 83 } 84 85 static void rm_mem_clear_cpu_va(uvm_rm_mem_t *rm_mem) 86 { 87 uvm_global_processor_mask_clear(&rm_mem->mapped_on, UVM_GLOBAL_ID_CPU); 88 rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE] = 0; 89 } 90 91 NvU64 uvm_rm_mem_get_gpu_uvm_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 92 { 93 UVM_ASSERT_MSG(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu), "GPU %s\n", uvm_gpu_name(gpu)); 94 95 return rm_mem->vas[uvm_global_id_value(gpu->global_id)]; 96 } 97 98 NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 99 { 100 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu)); 101 102 return rm_mem->proxy_vas[uvm_global_id_value(gpu->global_id)]; 103 } 104 105 NvU64 uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space) 106 { 107 if (is_proxy_va_space) 108 return uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu); 109 else 110 return uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu); 111 } 112 113 void *uvm_rm_mem_get_cpu_va(uvm_rm_mem_t *rm_mem) 114 { 115 UVM_ASSERT(uvm_rm_mem_mapped_on_cpu(rm_mem)); 116 117 return (void *)(uintptr_t)rm_mem->vas[UVM_GLOBAL_ID_CPU_VALUE]; 118 } 119 120 static NV_STATUS rm_mem_map_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 121 { 122 NV_STATUS status; 123 uvm_gpu_t *gpu_owner; 124 NvU64 gpu_owner_va; 125 NvU64 proxy_va; 126 127 UVM_ASSERT(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)); 128 129 if (!uvm_gpu_uses_proxy_channel_pool(gpu)) 130 return NV_OK; 131 132 if (uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu)) 133 return NV_OK; 134 135 if (rm_mem->proxy_vas == NULL) { 136 NvU64 *proxy_vas = uvm_kvmalloc_zero(sizeof(rm_mem->vas)); 137 if (proxy_vas == NULL) 138 return NV_ERR_NO_MEMORY; 139 140 rm_mem->proxy_vas = proxy_vas; 141 } 142 143 gpu_owner = rm_mem->gpu_owner; 144 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner); 145 146 status = uvm_rm_locked_call(nvUvmInterfacePagingChannelsMap(gpu_owner->rm_address_space, 147 gpu_owner_va, 148 uvm_gpu_device_handle(gpu), 149 &proxy_va)); 150 if (status != NV_OK) { 151 UVM_ERR_PRINT("nvUvmInterfacePagingChannelsMap() failed: %s, src GPU %s, dst GPU %s\n", 152 nvstatusToString(status), 153 uvm_gpu_name(gpu_owner), 154 uvm_gpu_name(gpu)); 155 return status; 156 } 157 158 rm_mem_set_gpu_proxy_va(rm_mem, gpu, proxy_va); 159 160 return NV_OK; 161 } 162 163 static void rm_mem_unmap_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 164 { 165 uvm_gpu_t *gpu_owner; 166 NvU64 gpu_owner_va; 167 168 if (!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu)) 169 return; 170 171 gpu_owner = rm_mem->gpu_owner; 172 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner); 173 174 175 uvm_rm_locked_call_void(nvUvmInterfacePagingChannelsUnmap(gpu_owner->rm_address_space, 176 gpu_owner_va, 177 uvm_gpu_device_handle(gpu))); 178 179 rm_mem_clear_gpu_proxy_va(rm_mem, gpu); 180 } 181 182 183 NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu, 184 uvm_rm_mem_type_t type, 185 NvLength size, 186 NvU64 gpu_alignment, 187 uvm_rm_mem_t **rm_mem_out) 188 { 189 NV_STATUS status = NV_OK; 190 uvm_rm_mem_t *rm_mem; 191 UvmGpuAllocInfo alloc_info = { 0 }; 192 NvU64 gpu_va; 193 194 UVM_ASSERT(gpu); 195 UVM_ASSERT((type == UVM_RM_MEM_TYPE_SYS) || (type == UVM_RM_MEM_TYPE_GPU)); 196 UVM_ASSERT(size != 0); 197 198 rm_mem = uvm_kvmalloc_zero(sizeof(*rm_mem)); 199 if (rm_mem == NULL) 200 return NV_ERR_NO_MEMORY; 201 202 alloc_info.alignment = gpu_alignment; 203 204 if (type == UVM_RM_MEM_TYPE_SYS) 205 status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info)); 206 else 207 status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocFB(gpu->rm_address_space, size, &gpu_va, &alloc_info)); 208 209 if (status != NV_OK) { 210 UVM_ERR_PRINT("nvUvmInterfaceMemoryAlloc%s() failed: %s, GPU %s\n", 211 type == UVM_RM_MEM_TYPE_SYS ? "Sys" : "FB", 212 nvstatusToString(status), 213 uvm_gpu_name(gpu)); 214 goto error; 215 } 216 217 rm_mem->gpu_owner = gpu; 218 rm_mem->type = type; 219 rm_mem->size = size; 220 rm_mem_set_gpu_va(rm_mem, gpu, gpu_va); 221 222 status = rm_mem_map_gpu_proxy(rm_mem, gpu); 223 if (status != NV_OK) 224 goto error; 225 226 *rm_mem_out = rm_mem; 227 return NV_OK; 228 229 error: 230 uvm_rm_mem_free(rm_mem); 231 return status; 232 } 233 234 NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem) 235 { 236 NV_STATUS status; 237 uvm_gpu_t *gpu; 238 NvU64 gpu_va; 239 void *cpu_va; 240 241 UVM_ASSERT(rm_mem); 242 243 if (uvm_rm_mem_mapped_on_cpu(rm_mem)) 244 return NV_OK; 245 246 gpu = rm_mem->gpu_owner; 247 gpu_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu); 248 249 status = uvm_rm_locked_call(nvUvmInterfaceMemoryCpuMap(gpu->rm_address_space, 250 gpu_va, 251 rm_mem->size, 252 &cpu_va, 253 UVM_PAGE_SIZE_DEFAULT)); 254 if (status != NV_OK) { 255 UVM_ERR_PRINT("nvUvmInterfaceMemoryCpuMap() failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu)); 256 return status; 257 } 258 259 rm_mem_set_cpu_va(rm_mem, cpu_va); 260 261 return NV_OK; 262 } 263 264 void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem) 265 { 266 UVM_ASSERT(rm_mem); 267 268 if (!uvm_rm_mem_mapped_on_cpu(rm_mem)) 269 return; 270 271 uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space, 272 uvm_rm_mem_get_cpu_va(rm_mem))); 273 274 rm_mem_clear_cpu_va(rm_mem); 275 } 276 277 NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment) 278 { 279 NV_STATUS status; 280 uvm_gpu_t *gpu_owner; 281 NvU64 gpu_owner_va; 282 NvU64 gpu_va; 283 284 UVM_ASSERT(rm_mem); 285 UVM_ASSERT(gpu); 286 287 if (uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)) 288 return NV_OK; 289 290 // Peer mappings are not supported yet 291 UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS); 292 293 gpu_owner = rm_mem->gpu_owner; 294 gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner); 295 296 status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space, 297 gpu_owner_va, 298 gpu->rm_address_space, 299 gpu_alignment, 300 &gpu_va)); 301 if (status != NV_OK) { 302 UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n", 303 nvstatusToString(status), 304 uvm_gpu_name(gpu_owner), 305 uvm_gpu_name(gpu)); 306 return status; 307 } 308 309 rm_mem_set_gpu_va(rm_mem, gpu, gpu_va); 310 311 // Map to proxy VA space, if applicable 312 return rm_mem_map_gpu_proxy(rm_mem, gpu); 313 } 314 315 // This internal unmap variant allows the GPU owner to be unmapped, unlike 316 // uvm_rm_mem_unmap_gpu 317 static void rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 318 { 319 NvU64 va; 320 321 if (!uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)) 322 return; 323 324 // Remove mappings in proxy address space, if any 325 rm_mem_unmap_gpu_proxy(rm_mem, gpu); 326 327 va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu); 328 uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va)); 329 rm_mem_clear_gpu_va(rm_mem, gpu); 330 } 331 332 void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) 333 { 334 UVM_ASSERT(rm_mem); 335 UVM_ASSERT(gpu); 336 337 // The GPU owner mapping remains valid until the memory is freed. 338 if (gpu == rm_mem->gpu_owner) 339 return; 340 341 rm_mem_unmap_gpu(rm_mem, gpu); 342 } 343 344 void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem) 345 { 346 uvm_global_gpu_id_t gpu_id; 347 uvm_gpu_t *gpu_owner; 348 349 if (rm_mem == NULL) 350 return; 351 352 // If the GPU owner is not set, allocation of backing storage by RM failed 353 gpu_owner = rm_mem->gpu_owner; 354 if (gpu_owner == NULL) { 355 uvm_kvfree(rm_mem); 356 return; 357 } 358 359 uvm_rm_mem_unmap_cpu(rm_mem); 360 361 // Don't use for_each_global_gpu_in_mask() as the owning GPU might be being 362 // destroyed and already removed from the global GPU array causing the iteration 363 // to stop prematurely. 364 for_each_global_gpu_id_in_mask(gpu_id, &rm_mem->mapped_on) { 365 if (!uvm_global_id_equal(gpu_id, gpu_owner->global_id)) 366 uvm_rm_mem_unmap_gpu(rm_mem, uvm_gpu_get(gpu_id)); 367 } 368 369 rm_mem_unmap_gpu(rm_mem, gpu_owner); 370 371 UVM_ASSERT_MSG(uvm_global_processor_mask_empty(&rm_mem->mapped_on), 372 "Left-over %u mappings in rm_mem\n", 373 uvm_global_processor_mask_get_count(&rm_mem->mapped_on)); 374 375 uvm_kvfree(rm_mem->proxy_vas); 376 uvm_kvfree(rm_mem); 377 } 378 379 NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu, 380 uvm_rm_mem_type_t type, 381 NvLength size, 382 NvU64 gpu_alignment, 383 uvm_rm_mem_t **rm_mem_out) 384 { 385 uvm_rm_mem_t *rm_mem; 386 NV_STATUS status; 387 388 status = uvm_rm_mem_alloc(gpu, type, size, gpu_alignment, &rm_mem); 389 if (status != NV_OK) 390 return status; 391 392 status = uvm_rm_mem_map_cpu(rm_mem); 393 if (status != NV_OK) 394 goto error; 395 396 *rm_mem_out = rm_mem; 397 398 return NV_OK; 399 400 error: 401 uvm_rm_mem_free(rm_mem); 402 return status; 403 } 404 405 NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment) 406 { 407 uvm_gpu_t *gpu; 408 409 UVM_ASSERT(rm_mem); 410 411 for_each_global_gpu(gpu) { 412 NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment); 413 if (status != NV_OK) 414 return status; 415 } 416 return NV_OK; 417 } 418 419 NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu, 420 uvm_rm_mem_type_t type, 421 NvLength size, 422 NvU64 gpu_alignment, 423 uvm_rm_mem_t **rm_mem_out) 424 { 425 uvm_rm_mem_t *rm_mem; 426 NV_STATUS status; 427 428 UVM_ASSERT(gpu); 429 430 status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, gpu_alignment, &rm_mem); 431 if (status != NV_OK) 432 return status; 433 434 status = uvm_rm_mem_map_all_gpus(rm_mem, gpu_alignment); 435 if (status != NV_OK) 436 goto error; 437 438 *rm_mem_out = rm_mem; 439 440 return NV_OK; 441 442 error: 443 uvm_rm_mem_free(rm_mem); 444 return status; 445 } 446