1 /******************************************************************************* 2 Copyright (c) 2015-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_channel.h" 25 #include "uvm_global.h" 26 #include "uvm_hal.h" 27 #include "uvm_kvmalloc.h" 28 #include "uvm_push.h" 29 #include "uvm_test.h" 30 #include "uvm_tracker.h" 31 #include "uvm_va_space.h" 32 #include "uvm_rm_mem.h" 33 #include "uvm_mem.h" 34 #include "uvm_gpu.h" 35 36 #define CE_TEST_MEM_SIZE (2 * 1024 * 1024) 37 #define CE_TEST_MEM_END_SIZE 32 38 #define CE_TEST_MEM_BEGIN_SIZE 32 39 #define CE_TEST_MEM_MIDDLE_SIZE (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE - CE_TEST_MEM_END_SIZE) 40 #define CE_TEST_MEM_MIDDLE_OFFSET (CE_TEST_MEM_BEGIN_SIZE) 41 #define CE_TEST_MEM_END_OFFSET (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE) 42 #define CE_TEST_MEM_COUNT 5 43 44 static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu) 45 { 46 NvU32 i; 47 NV_STATUS status; 48 uvm_rm_mem_t *mem[CE_TEST_MEM_COUNT] = { NULL }; 49 uvm_rm_mem_t *host_mem = NULL; 50 NvU32 *host_ptr; 51 NvU64 host_mem_gpu_va, mem_gpu_va; 52 NvU64 dst_va; 53 NvU64 src_va; 54 uvm_push_t push; 55 bool is_proxy; 56 57 // TODO: Bug 3839176: the test is waived on Confidential Computing because 58 // it assumes that GPU can access system memory without using encryption. 59 if (uvm_conf_computing_mode_enabled(gpu)) 60 return NV_OK; 61 62 status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem); 63 TEST_CHECK_GOTO(status == NV_OK, done); 64 host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem); 65 memset(host_ptr, 0, CE_TEST_MEM_SIZE); 66 67 for (i = 0; i < CE_TEST_MEM_COUNT; ++i) { 68 status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]); 69 TEST_CHECK_GOTO(status == NV_OK, done); 70 } 71 72 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Non-pipelined test"); 73 TEST_CHECK_GOTO(status == NV_OK, done); 74 75 is_proxy = uvm_channel_is_proxy(push.channel); 76 host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy).address; 77 78 // All of the following CE transfers are done from a single (L)CE and 79 // disabling pipelining is enough to order them when needed. Only push_end 80 // needs a MEMBAR SYS to order everything with the CPU. 81 82 // Initialize to a bad value 83 for (i = 0; i < CE_TEST_MEM_COUNT; ++i) { 84 mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address; 85 86 uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 87 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 88 gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1337 + i, CE_TEST_MEM_SIZE); 89 } 90 91 // Set the first buffer to 1 92 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 93 mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy).address; 94 gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE); 95 96 for (i = 0; i < CE_TEST_MEM_COUNT; ++i) { 97 NvU32 dst = i + 1; 98 if (dst == CE_TEST_MEM_COUNT) 99 dst_va = host_mem_gpu_va; 100 else 101 dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy).address; 102 103 src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy).address; 104 105 // The first memcpy needs to be non-pipelined as otherwise the previous 106 // memset/memcpy to the source may not be done yet. 107 108 // Alternate the order of copying the beginning and the end 109 if (i % 2 == 0) { 110 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 111 gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va + CE_TEST_MEM_END_OFFSET, src_va + CE_TEST_MEM_END_OFFSET, CE_TEST_MEM_END_SIZE); 112 113 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 114 uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 115 gpu->parent->ce_hal->memcopy_v_to_v(&push, 116 dst_va + CE_TEST_MEM_MIDDLE_OFFSET, 117 src_va + CE_TEST_MEM_MIDDLE_OFFSET, 118 CE_TEST_MEM_MIDDLE_SIZE); 119 120 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 121 uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 122 gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE); 123 } 124 else { 125 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 126 gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE); 127 128 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 129 uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 130 gpu->parent->ce_hal->memcopy_v_to_v(&push, 131 dst_va + CE_TEST_MEM_MIDDLE_OFFSET, 132 src_va + CE_TEST_MEM_MIDDLE_OFFSET, 133 CE_TEST_MEM_MIDDLE_SIZE); 134 135 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 136 uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 137 gpu->parent->ce_hal->memcopy_v_to_v(&push, 138 dst_va + CE_TEST_MEM_END_OFFSET, 139 src_va + CE_TEST_MEM_END_OFFSET, 140 CE_TEST_MEM_END_SIZE); 141 } 142 } 143 144 status = uvm_push_end_and_wait(&push); 145 TEST_CHECK_GOTO(status == NV_OK, done); 146 147 148 for (i = 0; i < CE_TEST_MEM_SIZE / sizeof(NvU32); ++i) { 149 if (host_ptr[i] != 1) { 150 UVM_TEST_PRINT("host_ptr[%u] = %u instead of 1\n", i, host_ptr[i]); 151 status = NV_ERR_INVALID_STATE; 152 goto done; 153 } 154 } 155 156 done: 157 for (i = 0; i < CE_TEST_MEM_COUNT; ++i) { 158 uvm_rm_mem_free(mem[i]); 159 } 160 uvm_rm_mem_free(host_mem); 161 162 return status; 163 } 164 165 #define REDUCTIONS 32 166 167 static NV_STATUS test_membar(uvm_gpu_t *gpu) 168 { 169 NvU32 i; 170 NV_STATUS status; 171 uvm_rm_mem_t *host_mem = NULL; 172 NvU32 *host_ptr; 173 NvU64 host_mem_gpu_va; 174 uvm_push_t push; 175 NvU32 value; 176 177 // TODO: Bug 3839176: the test is waived on Confidential Computing because 178 // it assumes that GPU can access system memory without using encryption. 179 if (uvm_conf_computing_mode_enabled(gpu)) 180 return NV_OK; 181 182 status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem); 183 TEST_CHECK_GOTO(status == NV_OK, done); 184 host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem); 185 *host_ptr = 0; 186 187 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test"); 188 TEST_CHECK_GOTO(status == NV_OK, done); 189 190 host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel)).address; 191 192 for (i = 0; i < REDUCTIONS; ++i) { 193 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 194 gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1); 195 } 196 197 // Without a sys membar the channel tracking semaphore can and does complete 198 // before all the reductions. 199 status = uvm_push_end_and_wait(&push); 200 TEST_CHECK_GOTO(status == NV_OK, done); 201 202 value = *host_ptr; 203 if (value != REDUCTIONS) { 204 UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu)); 205 status = NV_ERR_INVALID_STATE; 206 goto done; 207 } 208 209 done: 210 uvm_rm_mem_free(host_mem); 211 212 return status; 213 } 214 215 static void push_memset(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t element_size, size_t size) 216 { 217 switch (element_size) { 218 case 1: 219 uvm_push_get_gpu(push)->parent->ce_hal->memset_1(push, dst, (NvU8)value, size); 220 break; 221 case 4: 222 uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, dst, (NvU32)value, size); 223 break; 224 case 8: 225 uvm_push_get_gpu(push)->parent->ce_hal->memset_8(push, dst, value, size); 226 break; 227 default: 228 UVM_ASSERT(0); 229 } 230 } 231 232 static NV_STATUS test_unaligned_memset(uvm_gpu_t *gpu, 233 uvm_gpu_address_t gpu_verif_addr, 234 NvU8 *cpu_verif_addr, 235 size_t size, 236 size_t element_size, 237 size_t offset) 238 { 239 uvm_push_t push; 240 NV_STATUS status; 241 size_t i; 242 NvU64 value64 = (offset + 2) * (1ull << 32) + (offset + 1); 243 NvU64 test_value, expected_value = 0; 244 uvm_gpu_address_t dst; 245 246 // Copy a single element at an unaligned position and make sure it doesn't 247 // clobber anything else 248 TEST_CHECK_RET(gpu_verif_addr.address % element_size == 0); 249 TEST_CHECK_RET(offset + element_size <= size); 250 dst = gpu_verif_addr; 251 dst.address += offset; 252 253 memset(cpu_verif_addr, (NvU8)(~value64), size); 254 255 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, 256 "memset_%zu offset %zu", 257 element_size, offset); 258 TEST_CHECK_RET(status == NV_OK); 259 260 push_memset(&push, dst, value64, element_size, element_size); 261 status = uvm_push_end_and_wait(&push); 262 TEST_CHECK_RET(status == NV_OK); 263 264 // Make sure all bytes of element are present 265 test_value = 0; 266 memcpy(&test_value, cpu_verif_addr + offset, element_size); 267 268 switch (element_size) { 269 case 1: 270 expected_value = (NvU8)value64; 271 break; 272 case 4: 273 expected_value = (NvU32)value64; 274 break; 275 case 8: 276 expected_value = value64; 277 break; 278 default: 279 UVM_ASSERT(0); 280 } 281 282 if (test_value != expected_value) { 283 UVM_TEST_PRINT("memset_%zu offset %zu failed, written value is 0x%llx instead of 0x%llx\n", 284 element_size, offset, test_value, expected_value); 285 return NV_ERR_INVALID_STATE; 286 } 287 288 // Make sure all other bytes are unchanged 289 for (i = 0; i < size; i++) { 290 if (i >= offset && i < offset + element_size) 291 continue; 292 if (cpu_verif_addr[i] != (NvU8)(~value64)) { 293 UVM_TEST_PRINT("memset_%zu offset %zu failed, immutable byte %zu changed value from 0x%x to 0x%x\n", 294 element_size, offset, i, (NvU8)(~value64), 295 cpu_verif_addr[i]); 296 return NV_ERR_INVALID_STATE; 297 } 298 } 299 300 return NV_OK; 301 } 302 303 static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu, 304 uvm_gpu_address_t dst, 305 uvm_gpu_address_t src, 306 size_t size, 307 size_t element_size, 308 uvm_gpu_address_t gpu_verif_addr, 309 void *cpu_verif_addr, 310 int test_iteration) 311 { 312 uvm_push_t push; 313 size_t i; 314 const char *src_type = src.is_virtual ? "virtual" : "physical"; 315 const char *src_loc = src.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem"; 316 const char *dst_type = dst.is_virtual ? "virtual" : "physical"; 317 const char *dst_loc = dst.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem"; 318 319 NvU64 value64 = (test_iteration + 2) * (1ull << 32) + (test_iteration + 1); 320 NvU64 test_value = 0, expected_value = 0; 321 322 TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, 323 UVM_CHANNEL_TYPE_GPU_INTERNAL, 324 &push, 325 "Memset %s %s (0x%llx) and memcopy to %s %s (0x%llx), iter %d", 326 src_type, 327 src_loc, 328 src.address, 329 dst_type, 330 dst_loc, 331 dst.address, 332 test_iteration)); 333 334 // Waive if any of the input addresses is physical but the channel does not 335 // support physical addressing 336 if (!uvm_channel_is_privileged(push.channel) && (!dst.is_virtual || !src.is_virtual)) { 337 TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push)); 338 return NV_OK; 339 } 340 341 // The input virtual addresses exist in UVM's internal address space, not 342 // the proxy address space 343 if (uvm_channel_is_proxy(push.channel)) { 344 TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push)); 345 return NV_ERR_INVALID_STATE; 346 } 347 348 // If physical accesses aren't supported, silently convert to virtual to 349 // test the flat mapping. 350 TEST_CHECK_RET(gpu_verif_addr.is_virtual); 351 352 if (!src.is_virtual) 353 src = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(src.aperture, src.address)); 354 355 if (!dst.is_virtual) 356 dst = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(dst.aperture, dst.address)); 357 358 // Memset src with the appropriate element size, then memcpy to dst and from 359 // dst to the verif location (physical sysmem). 360 361 push_memset(&push, src, value64, element_size, size); 362 gpu->parent->ce_hal->memcopy(&push, dst, src, size); 363 gpu->parent->ce_hal->memcopy(&push, gpu_verif_addr, dst, size); 364 365 TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push)); 366 367 for (i = 0; i < size / element_size; i++) { 368 switch (element_size) { 369 case 1: 370 expected_value = (NvU8)value64; 371 test_value = ((NvU8 *)cpu_verif_addr)[i]; 372 break; 373 case 4: 374 expected_value = (NvU32)value64; 375 test_value = ((NvU32 *)cpu_verif_addr)[i]; 376 break; 377 case 8: 378 expected_value = value64; 379 test_value = ((NvU64 *)cpu_verif_addr)[i]; 380 break; 381 default: 382 UVM_ASSERT(0); 383 } 384 385 if (test_value != expected_value) { 386 UVM_TEST_PRINT("memset_%zu of %s %s and memcpy into %s %s failed, value[%zu] = 0x%llx instead of 0x%llx\n", 387 element_size, src_type, src_loc, dst_type, dst_loc, 388 i, test_value, expected_value); 389 return NV_ERR_INVALID_STATE; 390 } 391 } 392 393 return NV_OK; 394 } 395 396 static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu) 397 { 398 NV_STATUS status = NV_OK; 399 bool is_proxy_va_space = false; 400 uvm_gpu_address_t gpu_verif_addr; 401 void *cpu_verif_addr; 402 uvm_mem_t *verif_mem = NULL; 403 uvm_mem_t *sys_uvm_mem = NULL; 404 uvm_mem_t *gpu_uvm_mem = NULL; 405 uvm_rm_mem_t *sys_rm_mem = NULL; 406 uvm_rm_mem_t *gpu_rm_mem = NULL; 407 uvm_gpu_address_t gpu_addresses[4] = {0}; 408 size_t size = gpu->big_page.internal_size; 409 static const size_t element_sizes[] = {1, 4, 8}; 410 const size_t iterations = 4; 411 size_t i, j, k, s; 412 uvm_mem_alloc_params_t mem_params = {0}; 413 414 if (uvm_conf_computing_mode_enabled(gpu)) 415 TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done); 416 else 417 TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done); 418 TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done); 419 420 gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu); 421 cpu_verif_addr = uvm_mem_get_cpu_addr_kernel(verif_mem); 422 423 for (i = 0; i < iterations; ++i) { 424 for (s = 0; s < ARRAY_SIZE(element_sizes); s++) { 425 TEST_NV_CHECK_GOTO(test_unaligned_memset(gpu, 426 gpu_verif_addr, 427 cpu_verif_addr, 428 size, 429 element_sizes[s], 430 i), 431 done); 432 } 433 } 434 435 // Virtual address (in UVM's internal address space) backed by sysmem 436 TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done); 437 gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space); 438 439 if (uvm_conf_computing_mode_enabled(gpu)) { 440 for (i = 0; i < iterations; ++i) { 441 for (s = 0; s < ARRAY_SIZE(element_sizes); s++) { 442 TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu, 443 gpu_addresses[0], 444 gpu_addresses[0], 445 size, 446 element_sizes[s], 447 gpu_verif_addr, 448 cpu_verif_addr, 449 i), 450 done); 451 452 } 453 } 454 455 // Because gpu_verif_addr is in sysmem, when the Confidential 456 // Computing feature is enabled, only the previous cases are valid. 457 // TODO: Bug 3839176: the test partially waived on Confidential 458 // Computing because it assumes that GPU can access system memory 459 // without using encryption. 460 goto done; 461 } 462 463 // Using a page size equal to the allocation size ensures that the UVM 464 // memories about to be allocated are physically contiguous. And since the 465 // size is a valid GPU page size, the memories can be virtually mapped on 466 // the GPU if needed. 467 mem_params.size = size; 468 mem_params.page_size = size; 469 mem_params.mm = current->mm; 470 471 // Physical address in sysmem 472 TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done); 473 TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done); 474 gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size); 475 476 // Physical address in vidmem 477 mem_params.backing_gpu = gpu; 478 TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done); 479 gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size); 480 481 // Virtual address (in UVM's internal address space) backed by vidmem 482 TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done); 483 gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space); 484 485 486 for (i = 0; i < iterations; ++i) { 487 for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) { 488 for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) { 489 for (s = 0; s < ARRAY_SIZE(element_sizes); s++) { 490 TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu, 491 gpu_addresses[k], 492 gpu_addresses[j], 493 size, 494 element_sizes[s], 495 gpu_verif_addr, 496 cpu_verif_addr, 497 i), 498 done); 499 } 500 } 501 } 502 } 503 504 done: 505 uvm_rm_mem_free(sys_rm_mem); 506 uvm_rm_mem_free(gpu_rm_mem); 507 uvm_mem_free(gpu_uvm_mem); 508 uvm_mem_free(sys_uvm_mem); 509 uvm_mem_free(verif_mem); 510 511 return status; 512 } 513 514 static NV_STATUS test_semaphore_alloc_sem(uvm_gpu_t *gpu, size_t size, uvm_mem_t **mem_out) 515 { 516 NvU64 gpu_va; 517 NV_STATUS status = NV_OK; 518 uvm_mem_t *mem = NULL; 519 520 TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &mem)); 521 522 TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(mem, gpu), error); 523 524 gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu); 525 526 // This semaphore resides in the uvm_mem region, i.e., it has the GPU VA 527 // MSbit set. The intent is to validate semaphore operations when the 528 // semaphore's VA is in the high-end of the GPU effective virtual address 529 // space spectrum, i.e., its VA upper-bit is set. 530 TEST_CHECK_GOTO(gpu_va & (1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1)), error); 531 532 *mem_out = mem; 533 534 return NV_OK; 535 536 error: 537 uvm_mem_free(mem); 538 return status; 539 } 540 541 // test_semaphore_reduction_inc is similar in concept to test_membar(). It uses 542 // uvm_mem (instead of uvm_rm_mem) as the semaphore, i.e., it assumes that the 543 // CE HAL has been validated, since uvm_mem needs the CE memset/memcopy to be 544 // operational as a pre-requisite for GPU PTE writes. The purpose of 545 // test_semaphore_reduction_inc is to validate the reduction inc operation on 546 // semaphores with their VA's upper-bit set. 547 static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu) 548 { 549 NV_STATUS status; 550 uvm_push_t push; 551 uvm_mem_t *mem; 552 NvU64 gpu_va; 553 NvU32 i; 554 NvU32 *host_ptr = NULL; 555 NvU32 value; 556 557 // Semaphore reduction needs 1 word (4 bytes). 558 const size_t size = sizeof(NvU32); 559 560 // TODO: Bug 3839176: the test is waived on Confidential Computing because 561 // it assumes that GPU can access system memory without using encryption. 562 if (uvm_conf_computing_mode_enabled(gpu)) 563 return NV_OK; 564 565 status = test_semaphore_alloc_sem(gpu, size, &mem); 566 TEST_CHECK_RET(status == NV_OK); 567 568 // Initialize the counter of reductions. 569 host_ptr = uvm_mem_get_cpu_addr_kernel(mem); 570 TEST_CHECK_GOTO(host_ptr != NULL, done); 571 *host_ptr = 0; 572 573 gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu); 574 575 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_reduction_inc test"); 576 TEST_CHECK_GOTO(status == NV_OK, done); 577 578 for (i = 0; i < REDUCTIONS; i++) { 579 uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 580 gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1); 581 } 582 583 status = uvm_push_end_and_wait(&push); 584 TEST_CHECK_GOTO(status == NV_OK, done); 585 586 value = *host_ptr; 587 if (value != REDUCTIONS) { 588 UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu)); 589 status = NV_ERR_INVALID_STATE; 590 goto done; 591 } 592 593 done: 594 uvm_mem_free(mem); 595 596 return status; 597 } 598 599 static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu) 600 { 601 NV_STATUS status; 602 uvm_push_t push; 603 uvm_mem_t *mem; 604 NvU64 gpu_va; 605 NvU32 value; 606 NvU32 *host_ptr = NULL; 607 NvU32 payload = 0xA5A55A5A; 608 609 // Semaphore release needs 1 word (4 bytes). 610 const size_t size = sizeof(NvU32); 611 612 // TODO: Bug 3839176: the test is waived on Confidential Computing because 613 // it assumes that GPU can access system memory without using encryption. 614 if (uvm_conf_computing_mode_enabled(gpu)) 615 return NV_OK; 616 617 status = test_semaphore_alloc_sem(gpu, size, &mem); 618 TEST_CHECK_RET(status == NV_OK); 619 620 // Initialize the payload. 621 host_ptr = uvm_mem_get_cpu_addr_kernel(mem); 622 TEST_CHECK_GOTO(host_ptr != NULL, done); 623 *host_ptr = 0; 624 625 gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu); 626 627 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_release test"); 628 TEST_CHECK_GOTO(status == NV_OK, done); 629 630 gpu->parent->ce_hal->semaphore_release(&push, gpu_va, payload); 631 632 status = uvm_push_end_and_wait(&push); 633 TEST_CHECK_GOTO(status == NV_OK, done); 634 635 value = *host_ptr; 636 if (value != payload) { 637 UVM_TEST_PRINT("Semaphore payload = %u instead of %u, GPU %s\n", value, payload, uvm_gpu_name(gpu)); 638 status = NV_ERR_INVALID_STATE; 639 goto done; 640 } 641 642 done: 643 uvm_mem_free(mem); 644 645 return status; 646 } 647 648 static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu) 649 { 650 NV_STATUS status; 651 uvm_push_t push; 652 uvm_mem_t *mem; 653 NvU64 gpu_va; 654 NvU32 i; 655 NvU64 *timestamp; 656 NvU64 last_timestamp = 0; 657 658 // 2 iterations: 659 // 1: compare retrieved timestamp with 0; 660 // 2: compare retrieved timestamp with previous timestamp (obtained in 1). 661 const NvU32 iterations = 2; 662 663 // The semaphore is 4 words long (16 bytes). 664 const size_t size = 16; 665 666 // TODO: Bug 3839176: the test is waived on Confidential Computing because 667 // it assumes that GPU can access system memory without using encryption. 668 if (uvm_conf_computing_mode_enabled(gpu)) 669 return NV_OK; 670 671 status = test_semaphore_alloc_sem(gpu, size, &mem); 672 TEST_CHECK_RET(status == NV_OK); 673 674 timestamp = uvm_mem_get_cpu_addr_kernel(mem); 675 TEST_CHECK_GOTO(timestamp != NULL, done); 676 memset(timestamp, 0, size); 677 678 // Shift the timestamp pointer to where the semaphore timestamp info is. 679 timestamp += 1; 680 681 gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu); 682 683 for (i = 0; i < iterations; i++) { 684 status = uvm_push_begin(gpu->channel_manager, 685 UVM_CHANNEL_TYPE_GPU_INTERNAL, 686 &push, 687 "semaphore_timestamp test, iter: %u", 688 i); 689 TEST_CHECK_GOTO(status == NV_OK, done); 690 691 gpu->parent->ce_hal->semaphore_timestamp(&push, gpu_va); 692 693 status = uvm_push_end_and_wait(&push); 694 TEST_CHECK_GOTO(status == NV_OK, done); 695 696 TEST_CHECK_GOTO(*timestamp != 0, done); 697 TEST_CHECK_GOTO(*timestamp >= last_timestamp, done); 698 last_timestamp = *timestamp; 699 } 700 701 done: 702 uvm_mem_free(mem); 703 704 return status; 705 } 706 707 static bool mem_match(uvm_mem_t *mem1, uvm_mem_t *mem2, size_t size) 708 { 709 void *mem1_addr; 710 void *mem2_addr; 711 712 UVM_ASSERT(uvm_mem_is_sysmem(mem1)); 713 UVM_ASSERT(uvm_mem_is_sysmem(mem2)); 714 UVM_ASSERT(mem1->size >= size); 715 UVM_ASSERT(mem2->size >= size); 716 717 mem1_addr = uvm_mem_get_cpu_addr_kernel(mem1); 718 mem2_addr = uvm_mem_get_cpu_addr_kernel(mem2); 719 720 return !memcmp(mem1_addr, mem2_addr, size); 721 } 722 723 static NV_STATUS zero_vidmem(uvm_mem_t *mem) 724 { 725 uvm_push_t push; 726 uvm_gpu_address_t gpu_address; 727 uvm_gpu_t *gpu = mem->backing_gpu; 728 729 UVM_ASSERT(uvm_mem_is_vidmem(mem)); 730 731 TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem")); 732 733 gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu); 734 gpu->parent->ce_hal->memset_1(&push, gpu_address, 0, mem->size); 735 736 TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push)); 737 738 return NV_OK; 739 } 740 741 static void write_range_cpu(uvm_mem_t *mem, NvU64 base_val) 742 { 743 NvU64 *mem_cpu_va; 744 unsigned i; 745 746 UVM_ASSERT(uvm_mem_is_sysmem(mem)); 747 UVM_ASSERT(IS_ALIGNED(mem->size, sizeof(*mem_cpu_va))); 748 749 mem_cpu_va = (NvU64 *) uvm_mem_get_cpu_addr_kernel(mem); 750 751 for (i = 0; i < (mem->size / sizeof(*mem_cpu_va)); i++) 752 mem_cpu_va[i] = base_val++; 753 } 754 755 static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size) 756 { 757 NV_STATUS status; 758 759 UVM_ASSERT(mem); 760 761 *mem = NULL; 762 763 TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem)); 764 TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err); 765 TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err); 766 767 return NV_OK; 768 769 err: 770 uvm_mem_free(*mem); 771 return status; 772 } 773 774 static NV_STATUS alloc_sysmem_unprotected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size) 775 { 776 NV_STATUS status; 777 778 UVM_ASSERT(mem); 779 780 *mem = NULL; 781 782 TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem)); 783 TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err); 784 TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err); 785 786 memset(uvm_mem_get_cpu_addr_kernel(*mem), 0, (*mem)->size); 787 788 return NV_OK; 789 790 err: 791 uvm_mem_free(*mem); 792 return status; 793 } 794 795 static void cpu_encrypt(uvm_channel_t *channel, 796 uvm_mem_t *dst_mem, 797 uvm_mem_t *src_mem, 798 uvm_mem_t *auth_tag_mem, 799 size_t size, 800 NvU32 copy_size) 801 { 802 size_t offset = 0; 803 char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem); 804 char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem); 805 char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem); 806 807 while (offset < size) { 808 uvm_conf_computing_cpu_encrypt(channel, dst_cipher, src_plain, NULL, copy_size, auth_tag_buffer); 809 810 offset += copy_size; 811 dst_cipher += copy_size; 812 src_plain += copy_size; 813 auth_tag_buffer += UVM_CONF_COMPUTING_AUTH_TAG_SIZE; 814 } 815 } 816 817 static void cpu_acquire_encryption_ivs(uvm_channel_t *channel, 818 size_t size, 819 NvU32 copy_size, 820 UvmCslIv *ivs) 821 { 822 size_t offset = 0; 823 int i = 0; 824 825 for (; offset < size; offset += copy_size) 826 uvm_conf_computing_acquire_encryption_iv(channel, &ivs[i++]); 827 } 828 829 static void cpu_encrypt_rev(uvm_channel_t *channel, 830 uvm_mem_t *dst_mem, 831 uvm_mem_t *src_mem, 832 uvm_mem_t *auth_tag_mem, 833 size_t size, 834 NvU32 copy_size, 835 UvmCslIv *encrypt_iv) 836 { 837 char *src_plain = (char *) uvm_mem_get_cpu_addr_kernel(src_mem); 838 char *dst_cipher = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem); 839 char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem); 840 int i; 841 842 // CPU encrypt order is the opposite of the GPU decrypt order 843 for (i = (size / copy_size) - 1; i >= 0; i--) { 844 uvm_conf_computing_cpu_encrypt(channel, 845 dst_cipher + i * copy_size, 846 src_plain + i * copy_size, 847 encrypt_iv + i, 848 copy_size, 849 auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE); 850 } 851 } 852 853 static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel, 854 uvm_mem_t *dst_mem, 855 uvm_mem_t *src_mem, 856 const UvmCslIv *decrypt_iv, 857 uvm_mem_t *auth_tag_mem, 858 size_t size, 859 NvU32 copy_size) 860 { 861 size_t i; 862 char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem); 863 char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem); 864 char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem); 865 866 for (i = 0; i < size / copy_size; i++) { 867 TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel, 868 dst_plain + i * copy_size, 869 src_cipher + i * copy_size, 870 decrypt_iv + i, 871 copy_size, 872 auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE)); 873 } 874 875 return NV_OK; 876 } 877 static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel, 878 uvm_mem_t *dst_mem, 879 uvm_mem_t *src_mem, 880 const UvmCslIv *decrypt_iv, 881 uvm_mem_t *auth_tag_mem, 882 size_t size, 883 NvU32 copy_size) 884 { 885 int i; 886 char *dst_plain = (char *) uvm_mem_get_cpu_addr_kernel(dst_mem); 887 char *src_cipher = (char *) uvm_mem_get_cpu_addr_kernel(src_mem); 888 char *auth_tag_buffer = (char *) uvm_mem_get_cpu_addr_kernel(auth_tag_mem); 889 890 UVM_ASSERT((size / copy_size) <= INT_MAX); 891 892 // CPU decrypt order is the opposite of the GPU decrypt order 893 for (i = (size / copy_size) - 1; i >= 0; i--) { 894 TEST_NV_CHECK_RET(uvm_conf_computing_cpu_decrypt(channel, 895 dst_plain + i * copy_size, 896 src_cipher + i * copy_size, 897 decrypt_iv + i, 898 copy_size, 899 auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE)); 900 } 901 902 return NV_OK; 903 } 904 905 // GPU address to use as source or destination in CE decrypt/encrypt operations. 906 // If the uvm_mem backing storage is contiguous in the [offset, offset + size) 907 // interval, the physical address gets priority over the virtual counterpart. 908 static uvm_gpu_address_t gpu_address(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU32 size) 909 { 910 uvm_gpu_address_t gpu_virtual_address; 911 912 if (uvm_mem_is_physically_contiguous(mem, offset, size)) 913 return uvm_mem_gpu_address_physical(mem, gpu, offset, size); 914 915 gpu_virtual_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu); 916 gpu_virtual_address.address += offset; 917 918 return gpu_virtual_address; 919 } 920 921 // Automatically get the correct address for the authentication tag. The 922 // addressing mode of the tag should match that of the reference address 923 // (destination pointer for GPU encrypt, source pointer for GPU encrypt) 924 static uvm_gpu_address_t auth_tag_gpu_address(uvm_mem_t *auth_tag_mem, 925 uvm_gpu_t *gpu, 926 size_t offset, 927 uvm_gpu_address_t reference) 928 { 929 uvm_gpu_address_t auth_tag_gpu_address; 930 931 if (!reference.is_virtual) 932 return uvm_mem_gpu_address_physical(auth_tag_mem, gpu, offset, UVM_CONF_COMPUTING_AUTH_TAG_SIZE); 933 934 auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(auth_tag_mem, gpu); 935 auth_tag_gpu_address.address += offset; 936 937 return auth_tag_gpu_address; 938 } 939 940 // Note: no membar is issued in any of the GPU transfers (encryptions) 941 static void gpu_encrypt(uvm_push_t *push, 942 uvm_mem_t *dst_mem, 943 uvm_mem_t *src_mem, 944 uvm_mem_t *auth_tag_mem, 945 UvmCslIv *decrypt_iv, 946 size_t size, 947 NvU32 copy_size) 948 { 949 size_t i; 950 size_t num_iterations = size / copy_size; 951 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 952 953 for (i = 0; i < num_iterations; i++) { 954 uvm_gpu_address_t dst_cipher = gpu_address(dst_mem, gpu, i * copy_size, copy_size); 955 uvm_gpu_address_t src_plain = gpu_address(src_mem, gpu, i * copy_size, copy_size); 956 uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem, 957 gpu, 958 i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE, 959 dst_cipher); 960 961 uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv); 962 963 if (i > 0) 964 uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 965 966 uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 967 968 gpu->parent->ce_hal->encrypt(push, dst_cipher, src_plain, copy_size, auth_tag); 969 decrypt_iv++; 970 } 971 } 972 973 // Note: no membar is issued in any of the GPU transfers (decryptions) 974 static void gpu_decrypt(uvm_push_t *push, 975 uvm_mem_t *dst_mem, 976 uvm_mem_t *src_mem, 977 uvm_mem_t *auth_tag_mem, 978 size_t size, 979 NvU32 copy_size) 980 { 981 size_t i; 982 size_t num_iterations = size / copy_size; 983 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 984 985 for (i = 0; i < num_iterations; i++) { 986 uvm_gpu_address_t dst_plain = gpu_address(dst_mem, gpu, i * copy_size, copy_size); 987 uvm_gpu_address_t src_cipher = gpu_address(src_mem, gpu, i * copy_size, copy_size); 988 uvm_gpu_address_t auth_tag = auth_tag_gpu_address(auth_tag_mem, 989 gpu, 990 i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE, 991 src_cipher); 992 993 if (i > 0) 994 uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED); 995 996 uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); 997 998 gpu->parent->ce_hal->decrypt(push, dst_plain, src_cipher, copy_size, auth_tag); 999 } 1000 } 1001 1002 static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, 1003 uvm_channel_type_t decrypt_channel_type, 1004 uvm_channel_type_t encrypt_channel_type, 1005 size_t size, 1006 NvU32 copy_size, 1007 bool decrypt_in_order, 1008 bool encrypt_in_order) 1009 { 1010 uvm_push_t push; 1011 NvU64 init_value; 1012 NV_STATUS status = NV_OK; 1013 uvm_mem_t *src_plain = NULL; 1014 uvm_mem_t *src_cipher = NULL; 1015 uvm_mem_t *dst_cipher = NULL; 1016 uvm_mem_t *dst_plain_gpu = NULL; 1017 uvm_mem_t *dst_plain = NULL; 1018 uvm_mem_t *auth_tag_mem = NULL; 1019 size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE; 1020 UvmCslIv *decrypt_iv = NULL; 1021 UvmCslIv *encrypt_iv = NULL; 1022 uvm_tracker_t tracker; 1023 size_t src_plain_size; 1024 1025 TEST_CHECK_RET(copy_size <= size); 1026 TEST_CHECK_RET(IS_ALIGNED(size, copy_size)); 1027 1028 uvm_tracker_init(&tracker); 1029 1030 decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv)); 1031 if (!decrypt_iv) { 1032 status = NV_ERR_NO_MEMORY; 1033 goto out; 1034 } 1035 1036 encrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv)); 1037 if (!encrypt_iv) { 1038 status = NV_ERR_NO_MEMORY; 1039 goto out; 1040 } 1041 1042 TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_cipher, size), out); 1043 TEST_NV_CHECK_GOTO(alloc_vidmem_protected(gpu, &dst_plain_gpu, size), out); 1044 TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_cipher, size), out); 1045 TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &dst_plain, size), out); 1046 TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &auth_tag_mem, auth_tag_buffer_size), out); 1047 1048 // The plaintext CPU buffer size should fit the initialization value 1049 src_plain_size = UVM_ALIGN_UP(size, sizeof(init_value)); 1050 TEST_NV_CHECK_GOTO(alloc_sysmem_unprotected(gpu, &src_plain, src_plain_size), out); 1051 1052 // Initialize the plaintext CPU buffer using a value that uniquely 1053 // identifies the given inputs 1054 TEST_CHECK_GOTO((((NvU64) size) < (1ULL << 63)), out); 1055 init_value = ((NvU64) decrypt_in_order << 63) | ((NvU64) size) | ((NvU64) copy_size); 1056 write_range_cpu(src_plain, init_value); 1057 1058 TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, 1059 decrypt_channel_type, 1060 &push, 1061 "CPU > GPU decrypt"), 1062 out); 1063 1064 // CPU (decrypted) > CPU (encrypted), using CPU, if in-order 1065 // acquire IVs if not in-order 1066 if (encrypt_in_order) 1067 cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size); 1068 else 1069 cpu_acquire_encryption_ivs(push.channel, size, copy_size, encrypt_iv); 1070 1071 // CPU (encrypted) > GPU (decrypted), using GPU 1072 gpu_decrypt(&push, dst_plain_gpu, src_cipher, auth_tag_mem, size, copy_size); 1073 1074 // Use acquired IVs to encrypt in reverse order 1075 if (!encrypt_in_order) 1076 cpu_encrypt_rev(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size, encrypt_iv); 1077 1078 uvm_push_end(&push); 1079 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out); 1080 1081 // GPU (decrypted) > CPU (encrypted), using GPU 1082 TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager, 1083 encrypt_channel_type, 1084 &tracker, 1085 &push, 1086 "GPU > CPU encrypt"), 1087 out); 1088 1089 gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size); 1090 1091 TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out); 1092 1093 TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out); 1094 1095 TEST_CHECK_GOTO(!mem_match(dst_cipher, src_plain, size), out); 1096 1097 // CPU (encrypted) > CPU (decrypted), using CPU 1098 if (decrypt_in_order) { 1099 TEST_NV_CHECK_GOTO(cpu_decrypt_in_order(push.channel, 1100 dst_plain, 1101 dst_cipher, 1102 decrypt_iv, 1103 auth_tag_mem, 1104 size, 1105 copy_size), 1106 out); 1107 } 1108 else { 1109 TEST_NV_CHECK_GOTO(cpu_decrypt_out_of_order(push.channel, 1110 dst_plain, 1111 dst_cipher, 1112 decrypt_iv, 1113 auth_tag_mem, 1114 size, 1115 copy_size), 1116 out); 1117 } 1118 1119 TEST_CHECK_GOTO(mem_match(src_plain, dst_plain, size), out); 1120 1121 out: 1122 uvm_mem_free(auth_tag_mem); 1123 uvm_mem_free(dst_plain); 1124 uvm_mem_free(dst_plain_gpu); 1125 uvm_mem_free(dst_cipher); 1126 uvm_mem_free(src_cipher); 1127 uvm_mem_free(src_plain); 1128 uvm_tracker_deinit(&tracker); 1129 uvm_kvfree(decrypt_iv); 1130 uvm_kvfree(encrypt_iv); 1131 1132 return status; 1133 } 1134 1135 static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu, 1136 uvm_channel_type_t decrypt_channel_type, 1137 uvm_channel_type_t encrypt_channel_type) 1138 { 1139 bool cpu_decrypt_in_order = true; 1140 bool cpu_encrypt_in_order = true; 1141 size_t size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_4K * 2, UVM_PAGE_SIZE_2M}; 1142 size_t copy_size[] = {UVM_PAGE_SIZE_4K, UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_2M}; 1143 unsigned i; 1144 1145 struct { 1146 bool encrypt_in_order; 1147 bool decrypt_in_order; 1148 } orders[] = {{true, true}, {true, false}, {false, true}, {false, false}}; 1149 1150 struct { 1151 size_t size; 1152 NvU32 copy_size; 1153 } small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}}; 1154 1155 // Only Confidential Computing uses CE encryption/decryption 1156 if (!uvm_conf_computing_mode_enabled(gpu)) 1157 return NV_OK; 1158 1159 // Use a size, and copy size, that are not a multiple of common page sizes. 1160 for (i = 0; i < ARRAY_SIZE(small_sizes); ++i) { 1161 // Skip tests that need large pushbuffer on WLC. Secure work launch 1162 // needs to do at least one decrypt operation so tests that only need 1163 // one operation work ok. Tests using more operations might overflow 1164 // UVM_MAX_WLC_PUSH_SIZE. 1165 if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (small_sizes[i].size / small_sizes[i].copy_size > 1)) 1166 continue; 1167 1168 TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu, 1169 decrypt_channel_type, 1170 encrypt_channel_type, 1171 small_sizes[i].size, 1172 small_sizes[i].copy_size, 1173 cpu_decrypt_in_order, 1174 cpu_encrypt_in_order)); 1175 } 1176 1177 // Use sizes, and copy sizes, that are a multiple of common page sizes. 1178 // This is the most typical usage of encrypt/decrypt in the UVM driver. 1179 for (i = 0; i < ARRAY_SIZE(orders); ++i) { 1180 unsigned j; 1181 1182 cpu_encrypt_in_order = orders[i].encrypt_in_order; 1183 cpu_decrypt_in_order = orders[i].decrypt_in_order; 1184 1185 for (j = 0; j < ARRAY_SIZE(size); ++j) { 1186 unsigned k; 1187 1188 for (k = 0; k < ARRAY_SIZE(copy_size); ++k) { 1189 if (copy_size[k] > size[j]) 1190 continue; 1191 1192 // Skip tests that need large pushbuffer on WLC. Secure work 1193 // launch needs to do at least one decrypt operation so tests 1194 // that only need one operation work ok. Tests using more 1195 // operations might overflow UVM_MAX_WLC_PUSH_SIZE. 1196 if (encrypt_channel_type == UVM_CHANNEL_TYPE_WLC && (size[j] / copy_size[k] > 1)) 1197 continue; 1198 1199 // There is no difference between in-order and out-of-order 1200 // decryption when encrypting once. 1201 if ((copy_size[k] == size[j]) && !cpu_decrypt_in_order) 1202 continue; 1203 1204 TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu, 1205 decrypt_channel_type, 1206 encrypt_channel_type, 1207 size[j], 1208 copy_size[k], 1209 cpu_decrypt_in_order, 1210 cpu_encrypt_in_order)); 1211 } 1212 } 1213 } 1214 1215 return NV_OK; 1216 } 1217 1218 static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest) 1219 { 1220 uvm_gpu_t *gpu; 1221 1222 for_each_va_space_gpu(gpu, va_space) { 1223 TEST_NV_CHECK_RET(test_non_pipelined(gpu)); 1224 TEST_NV_CHECK_RET(test_membar(gpu)); 1225 TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu)); 1226 TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu)); 1227 TEST_NV_CHECK_RET(test_semaphore_release(gpu)); 1228 1229 if (!skipTimestampTest) 1230 TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu)); 1231 1232 TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_CPU_TO_GPU, UVM_CHANNEL_TYPE_GPU_TO_CPU)); 1233 TEST_NV_CHECK_RET(test_encryption_decryption(gpu, UVM_CHANNEL_TYPE_WLC, UVM_CHANNEL_TYPE_WLC)); 1234 } 1235 1236 return NV_OK; 1237 } 1238 1239 NV_STATUS uvm_test_ce_sanity(UVM_TEST_CE_SANITY_PARAMS *params, struct file *filp) 1240 { 1241 NV_STATUS status; 1242 uvm_va_space_t *va_space = uvm_va_space_get(filp); 1243 1244 uvm_va_space_down_read_rm(va_space); 1245 1246 status = test_ce(va_space, params->skipTimestampTest); 1247 if (status != NV_OK) 1248 goto done; 1249 1250 done: 1251 uvm_va_space_up_read_rm(va_space); 1252 1253 return status; 1254 } 1255