1 /******************************************************************************* 2 Copyright (c) 2015-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include <asm/atomic.h> 25 26 #include "uvm_global.h" 27 #include "uvm_channel.h" 28 #include "uvm_hal.h" 29 #include "uvm_mem.h" 30 #include "uvm_push.h" 31 #include "uvm_test.h" 32 #include "uvm_test_rng.h" 33 #include "uvm_thread_context.h" 34 #include "uvm_va_space.h" 35 #include "uvm_tracker.h" 36 #include "uvm_gpu_semaphore.h" 37 #include "uvm_kvmalloc.h" 38 39 #define TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES 2 40 41 static NvU32 get_push_end_size(uvm_channel_t *channel) 42 { 43 if (uvm_channel_is_ce(channel)) 44 return UVM_PUSH_CE_END_SIZE; 45 46 return 0; 47 } 48 49 static NV_STATUS test_push_end_size(uvm_va_space_t *va_space) 50 { 51 uvm_gpu_t *gpu; 52 53 for_each_va_space_gpu(gpu, va_space) { 54 uvm_channel_type_t type; 55 56 for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; ++type) { 57 uvm_push_t push; 58 NvU32 push_size_before; 59 NvU32 push_end_size_observed, push_end_size_expected; 60 61 TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, 62 type, 63 &push, 64 "type %s\n", 65 uvm_channel_type_to_string(type))); 66 67 push_size_before = uvm_push_get_size(&push); 68 uvm_push_end(&push); 69 70 push_end_size_expected = get_push_end_size(push.channel); 71 push_end_size_observed = uvm_push_get_size(&push) - push_size_before; 72 73 if (push_end_size_observed != push_end_size_expected) { 74 UVM_TEST_PRINT("push_end_size incorrect, %u instead of %u on channel type %s for GPU %s\n", 75 push_end_size_observed, 76 push_end_size_expected, 77 uvm_channel_type_to_string(type), 78 uvm_gpu_name(gpu)); 79 80 // The size mismatch error gets precedence over a wait error 81 (void) uvm_push_wait(&push); 82 83 return NV_ERR_INVALID_STATE; 84 } 85 86 TEST_NV_CHECK_RET(uvm_push_wait(&push)); 87 } 88 } 89 90 return NV_OK; 91 } 92 93 typedef enum { 94 TEST_INLINE_ADD, 95 TEST_INLINE_GET, 96 TEST_INLINE_SINGLE_BUFFER, 97 TEST_INLINE_MAX, 98 } test_inline_type_t; 99 100 static NV_STATUS test_push_inline_data_gpu(uvm_gpu_t *gpu) 101 { 102 static const size_t test_sizes[] = { 1, 2, 3, 4, 8, 31, 32, 1023, 1024, 1025, UVM_PUSH_INLINE_DATA_MAX_SIZE }; 103 NV_STATUS status; 104 int i, j; 105 int test_inline_type; 106 uvm_push_t push; 107 uvm_mem_t *mem = NULL; 108 char *verif; 109 110 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_PUSH_INLINE_DATA_MAX_SIZE, current->mm, &mem); 111 TEST_CHECK_GOTO(status == NV_OK, done); 112 113 status = uvm_mem_map_gpu_kernel(mem, gpu); 114 TEST_CHECK_GOTO(status == NV_OK, done); 115 116 verif = (char *)uvm_mem_get_cpu_addr_kernel(mem); 117 118 for (test_inline_type = 0; test_inline_type < TEST_INLINE_MAX; ++test_inline_type) { 119 for (i = 0; i < ARRAY_SIZE(test_sizes); ++i) { 120 size_t test_size = test_sizes[i]; 121 uvm_push_inline_data_t data; 122 size_t inline_data_size = 0; 123 uvm_gpu_address_t data_gpu_address; 124 char *inline_buf; 125 126 status = uvm_push_begin(gpu->channel_manager, 127 UVM_CHANNEL_TYPE_GPU_INTERNAL, 128 &push, 129 "Inline data size %zu", 130 test_size); 131 TEST_CHECK_GOTO(status == NV_OK, done); 132 133 // Do a noop first to test inline data starting at different offsets 134 gpu->parent->host_hal->noop(&push, roundup(min(test_size, (size_t)4096), UVM_METHOD_SIZE)); 135 136 switch (test_inline_type) { 137 case TEST_INLINE_ADD: 138 uvm_push_inline_data_begin(&push, &data); 139 for (j = 0; j < test_size; ++j) { 140 char value = 1 + i + j; 141 uvm_push_inline_data_add(&data, &value, 1); 142 } 143 inline_data_size = uvm_push_inline_data_size(&data); 144 data_gpu_address = uvm_push_inline_data_end(&data); 145 break; 146 case TEST_INLINE_GET: 147 uvm_push_inline_data_begin(&push, &data); 148 inline_buf = (char*)uvm_push_inline_data_get(&data, test_size); 149 inline_data_size = uvm_push_inline_data_size(&data); 150 data_gpu_address = uvm_push_inline_data_end(&data); 151 for (j = 0; j < test_size; ++j) 152 inline_buf[j] = 1 + i + j; 153 break; 154 case TEST_INLINE_SINGLE_BUFFER: 155 inline_buf = (char*)uvm_push_get_single_inline_buffer(&push, test_size, &data_gpu_address); 156 inline_data_size = test_size; 157 for (j = 0; j < test_size; ++j) 158 inline_buf[j] = 1 + i + j; 159 break; 160 } 161 162 163 gpu->parent->ce_hal->memcopy(&push, 164 uvm_mem_gpu_address_virtual_kernel(mem, gpu), 165 data_gpu_address, 166 test_size); 167 status = uvm_push_end_and_wait(&push); 168 TEST_CHECK_GOTO(status == NV_OK, done); 169 170 TEST_CHECK_GOTO(inline_data_size == test_size, done); 171 172 for (j = 0; j < test_size; ++j) { 173 char expected = 1 + i + j; 174 if (verif[j] != expected) { 175 UVM_TEST_PRINT("size %zu verif[%d] = %d instead of %d\n", test_size, j, verif[j], expected); 176 status = NV_ERR_INVALID_STATE; 177 goto done; 178 } 179 } 180 } 181 } 182 done: 183 uvm_mem_free(mem); 184 185 return status; 186 } 187 188 static NV_STATUS test_push_inline_data(uvm_va_space_t *va_space) 189 { 190 uvm_gpu_t *gpu; 191 192 for_each_va_space_gpu(gpu, va_space) { 193 TEST_CHECK_RET(test_push_inline_data_gpu(gpu) == NV_OK); 194 } 195 196 return NV_OK; 197 } 198 199 // Test that begins UVM_PUSH_MAX_CONCURRENT_PUSHES number of pushes before 200 // ending any of them on each GPU. 201 // Notably starting more than a single push is not safe to do outside of a test 202 // as if multiple threads tried doing so, it could easily deadlock. 203 static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space) 204 { 205 NV_STATUS status = NV_OK; 206 uvm_gpu_t *gpu; 207 NvU32 i; 208 uvm_push_t *pushes; 209 uvm_tracker_t tracker = UVM_TRACKER_INIT(); 210 uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL; 211 212 // As noted above, this test does unsafe things that would be detected by 213 // lock tracking, opt-out. 214 uvm_thread_context_lock_disable_tracking(); 215 216 pushes = uvm_kvmalloc_zero(sizeof(*pushes) * UVM_PUSH_MAX_CONCURRENT_PUSHES); 217 if (pushes == NULL) { 218 status = NV_ERR_NO_MEMORY; 219 goto done; 220 } 221 222 for_each_va_space_gpu(gpu, va_space) { 223 224 for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) { 225 uvm_push_t *push = &pushes[i]; 226 status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i); 227 TEST_CHECK_GOTO(status == NV_OK, done); 228 } 229 for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) { 230 uvm_push_t *push = &pushes[i]; 231 uvm_push_end(push); 232 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, push), done); 233 } 234 TEST_CHECK_GOTO(tracker.size != 0, done); 235 236 status = uvm_tracker_wait(&tracker); 237 TEST_CHECK_GOTO(status == NV_OK, done); 238 } 239 240 done: 241 uvm_thread_context_lock_enable_tracking(); 242 243 uvm_tracker_deinit(&tracker); 244 245 uvm_kvfree(pushes); 246 247 return status; 248 } 249 250 static void add_to_counter(void* ptr, int value) 251 { 252 atomic_t *atomic = (atomic_t*) ptr; 253 atomic_add(value, atomic); 254 } 255 256 static void add_one_to_counter(void* ptr) 257 { 258 add_to_counter(ptr, 1); 259 } 260 261 static void add_two_to_counter(void* ptr) 262 { 263 add_to_counter(ptr, 2); 264 } 265 266 static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu) 267 { 268 NV_STATUS status; 269 uvm_channel_t *channel; 270 uvm_push_t push; 271 NvU32 i; 272 NvU32 *host_va; 273 NvU64 gpu_va; 274 NvU32 observed, expected; 275 unsigned int num_non_paused_pushes; 276 uvm_push_t pushes_not_ended[TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES]; 277 const NvLength size = sizeof(NvU32) * (1 + TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES); 278 uvm_rm_mem_t *mem = NULL; 279 atomic_t on_complete_counter = ATOMIC_INIT(0); 280 281 // This test issues virtual memcopies/memsets, which in SR-IOV heavy cannot 282 // be pushed to a proxy channel. Pushing to a UVM internal CE channel works 283 // in all scenarios. 284 channel = uvm_channel_any_of_type(gpu->channel_manager, UVM_CHANNEL_POOL_TYPE_CE); 285 TEST_CHECK_RET(channel != NULL); 286 287 if (channel->num_gpfifo_entries <= TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES) { 288 UVM_TEST_PRINT("Insufficient number of gpfifo entries per channel to run this test. Expected at least %u " 289 "entries, but found %u\n", 290 TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES + 1, 291 channel->num_gpfifo_entries); 292 return NV_ERR_INVALID_STATE; 293 } 294 num_non_paused_pushes = channel->num_gpfifo_entries; 295 296 // The UVM driver only allows push interleaving across separate threads, but 297 // it is hard to consistenly replicate the interleaving. Instead, we 298 // temporarily disable lock tracking, so we can interleave pushes from a 299 // single thread. 300 uvm_thread_context_lock_disable_tracking(); 301 302 status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem); 303 TEST_CHECK_GOTO(status == NV_OK, done); 304 host_va = (NvU32*)uvm_rm_mem_get_cpu_va(mem); 305 gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel)); 306 memset(host_va, 0, size); 307 308 // Begin a few pushes on the channel, but do not end them yet. 309 // Each pushed method sets a magic number on an independent memory location. 310 for (i = 0; i < TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES; ++i) { 311 uvm_push_info_t *push_info; 312 313 status = uvm_push_begin_on_channel(channel, pushes_not_ended + i, "Set to 0x%x", 0xDEADBEEF + i); 314 TEST_CHECK_GOTO(status == NV_OK, done); 315 gpu->parent->ce_hal->memset_v_4(pushes_not_ended + i, 316 gpu_va + sizeof(NvU32) * (i + 1), 317 0xDEADBEEF + i, 318 sizeof(NvU32)); 319 320 push_info = uvm_push_info_from_push(pushes_not_ended + i); 321 push_info->on_complete = add_two_to_counter; 322 push_info->on_complete_data = &on_complete_counter; 323 } 324 325 // Push N (N = #channel entries) value increments to the same channel. 326 for (i = 0; i < num_non_paused_pushes; ++i) { 327 uvm_push_info_t *push_info; 328 329 status = uvm_push_begin_on_channel(channel, &push, "inc to %u", i + 1); 330 TEST_CHECK_GOTO(status == NV_OK, done); 331 gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, num_non_paused_pushes); 332 333 push_info = uvm_push_info_from_push(&push); 334 push_info->on_complete = add_one_to_counter; 335 push_info->on_complete_data = &on_complete_counter; 336 337 uvm_push_end(&push); 338 } 339 340 // End the pending pushes 341 for (i = 0; i < TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES; ++i) 342 uvm_push_end(pushes_not_ended + i); 343 344 // When the channel manager becomes idle, the GPU methods have been 345 // completed, and the CPU completion callbacks associated with the push 346 // have been invoked. 347 status = uvm_channel_manager_wait(channel->pool->manager); 348 TEST_CHECK_GOTO(status == NV_OK, done); 349 350 observed = host_va[0]; 351 expected = num_non_paused_pushes; 352 if (observed != expected) { 353 UVM_TEST_PRINT("Observed counter %u but expected %u\n", observed, expected); 354 status = NV_ERR_INVALID_STATE; 355 goto done; 356 } 357 358 for (i = 0; i < TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES; ++i) { 359 observed = host_va[i + 1]; 360 expected = 0xDEADBEEF + i; 361 if (observed != expected) { 362 UVM_TEST_PRINT("Observed magic number 0x%x but expected 0x%x\n", observed, expected); 363 status = NV_ERR_INVALID_STATE; 364 goto done; 365 } 366 } 367 368 observed = atomic_read(&on_complete_counter); 369 expected = TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES * 2 + num_non_paused_pushes; 370 if (observed != expected) { 371 UVM_TEST_PRINT("Wrong value of counter incremented by push info callback. Observed %u but expected %u\n", 372 observed, 373 expected); 374 status = NV_ERR_INVALID_STATE; 375 goto done; 376 } 377 378 done: 379 uvm_rm_mem_free(mem); 380 uvm_thread_context_lock_enable_tracking(); 381 382 return status; 383 } 384 385 // Using a single thread, interleave pushes and check that the result is 386 // consistent with a non-interleaved sequence. 387 // 1) Begin a few pushes in channel X but do not end them. Each pushed (GPU) 388 // method sets a individual value in an independent system memory location. 389 // Each push is associated with a push info (CPU) callback that atomically 390 // adds 2 to a memory location M 391 // 2) Begin and end many pushes in the same channel X such that all the gpfifo 392 // entries are filled. All the pushed methods do the same thing: atomically 393 // increment a given system memory location. 394 // Each push is associated with a push info callback that atomically 395 // increments the memory location M 396 // 3) End the pending pushes 397 // 398 // The final state should be the same as in the non-interleaved sequence 399 // (1)-(3)-(2) 400 // 401 // Starting more than a single push is not safe to do outside of a test as if 402 // multiple threads tried doing so, it could easily deadlock. 403 static NV_STATUS test_push_interleaving(uvm_va_space_t *va_space) 404 { 405 NV_STATUS status; 406 uvm_gpu_t *gpu; 407 408 BUILD_BUG_ON(TEST_PUSH_INTERLEAVING_NUM_PAUSED_PUSHES >= UVM_PUSH_MAX_CONCURRENT_PUSHES); 409 410 for_each_va_space_gpu(gpu, va_space) { 411 status = test_push_interleaving_on_gpu(gpu); 412 if (status != NV_OK) 413 return status; 414 } 415 416 return NV_OK; 417 } 418 419 // Push exactly UVM_MAX_PUSH_SIZE methods while acquiring a semaphore 420 // This is very tightly coupled with the pushbuffer implementation and method 421 // sizes, which is not ideal, but allows to test corner cases in the pushbuffer 422 // management code. 423 static NV_STATUS test_push_exactly_max_push(uvm_gpu_t *gpu, 424 uvm_push_t *push, 425 uvm_channel_type_t channel_type, 426 uvm_gpu_semaphore_t *sema_to_acquire, 427 NvU32 value) 428 { 429 NV_STATUS status; 430 NvU64 semaphore_gpu_va; 431 NvU32 push_end_size; 432 433 status = uvm_push_begin(gpu->channel_manager, channel_type, push, "Test push"); 434 if (status != NV_OK) 435 return status; 436 437 TEST_CHECK_RET(uvm_push_has_space(push, UVM_MAX_PUSH_SIZE)); 438 TEST_CHECK_RET(!uvm_push_has_space(push, UVM_MAX_PUSH_SIZE + 1)); 439 440 semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(sema_to_acquire, gpu, uvm_channel_is_proxy(push->channel)); 441 gpu->parent->host_hal->semaphore_acquire(push, semaphore_gpu_va, value); 442 443 // Push a noop leaving just push_end_size in the pushbuffer. 444 push_end_size = get_push_end_size(push->channel); 445 gpu->parent->host_hal->noop(push, UVM_MAX_PUSH_SIZE - uvm_push_get_size(push) - push_end_size); 446 447 TEST_CHECK_RET(uvm_push_has_space(push, push_end_size)); 448 TEST_CHECK_RET(!uvm_push_has_space(push, push_end_size + 1)); 449 uvm_push_end(push); 450 451 UVM_ASSERT_MSG(uvm_push_get_size(push) == UVM_MAX_PUSH_SIZE, "push_size %u\n", uvm_push_get_size(push)); 452 453 return NV_OK; 454 } 455 456 static NvU32 test_count_idle_chunks(uvm_pushbuffer_t *pushbuffer) 457 { 458 NvU32 i; 459 NvU32 count = 0; 460 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) 461 count += test_bit(i, pushbuffer->idle_chunks) ? 1 : 0; 462 return count; 463 } 464 465 static NvU32 test_count_available_chunks(uvm_pushbuffer_t *pushbuffer) 466 { 467 NvU32 i; 468 NvU32 count = 0; 469 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) 470 count += test_bit(i, pushbuffer->available_chunks) ? 1 : 0; 471 return count; 472 } 473 474 // Reuse the whole pushbuffer 4 times, one UVM_MAX_PUSH_SIZE at a time 475 #define EXTRA_MAX_PUSHES_WHILE_FULL (4 * UVM_PUSHBUFFER_SIZE / UVM_MAX_PUSH_SIZE) 476 477 // Test doing pushes of exactly UVM_MAX_PUSH_SIZE size and only allowing them to 478 // complete one by one. 479 static NV_STATUS test_max_pushes_on_gpu_and_channel_type(uvm_gpu_t *gpu, uvm_channel_type_t channel_type) 480 { 481 NV_STATUS status; 482 483 uvm_tracker_t tracker; 484 uvm_gpu_semaphore_t sema; 485 NvU32 total_push_size = 0; 486 NvU32 push_count = 0; 487 NvU32 i; 488 489 uvm_tracker_init(&tracker); 490 491 status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &sema); 492 TEST_CHECK_GOTO(status == NV_OK, done); 493 494 uvm_gpu_semaphore_set_payload(&sema, 0); 495 496 // Need to wait for all channels to completely idle so that the pushbuffer 497 // is in completely idle state when we begin. 498 status = uvm_channel_manager_wait(gpu->channel_manager); 499 TEST_CHECK_GOTO(status == NV_OK, done); 500 501 while (uvm_pushbuffer_has_space(gpu->channel_manager->pushbuffer)) { 502 uvm_push_t push; 503 504 ++push_count; 505 506 status = test_push_exactly_max_push(gpu, &push, channel_type, &sema, push_count); 507 TEST_CHECK_GOTO(status == NV_OK, done); 508 509 total_push_size += uvm_push_get_size(&push); 510 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done); 511 } 512 513 if (total_push_size != UVM_PUSHBUFFER_SIZE) { 514 UVM_TEST_PRINT("Unexpected space in the pushbuffer, total push %u\n", total_push_size); 515 uvm_pushbuffer_print(gpu->channel_manager->pushbuffer); 516 status = NV_ERR_INVALID_STATE; 517 goto done; 518 } 519 520 TEST_CHECK_GOTO(test_count_available_chunks(gpu->channel_manager->pushbuffer) == 0, done); 521 TEST_CHECK_GOTO(test_count_idle_chunks(gpu->channel_manager->pushbuffer) == 0, done); 522 523 for (i = 0; i < EXTRA_MAX_PUSHES_WHILE_FULL; ++i) { 524 uvm_push_t push; 525 526 // There should be no space for another push until the sema is 527 // incremented. Incrementing the same allows a single push to complete 528 // freeing exactly UVM_MAX_PUSH_SIZE space. 529 if (uvm_pushbuffer_has_space(gpu->channel_manager->pushbuffer)) { 530 UVM_TEST_PRINT("Unexpected space in the pushbuffer for iter %d\n", i); 531 uvm_pushbuffer_print(gpu->channel_manager->pushbuffer); 532 status = NV_ERR_INVALID_STATE; 533 goto done; 534 } 535 536 uvm_gpu_semaphore_set_payload(&sema, i + 1); 537 538 ++push_count; 539 540 // Take UVM_MAX_PUSH_SIZE space. This should leave no space left again. 541 status = test_push_exactly_max_push(gpu, &push, channel_type, &sema, push_count); 542 TEST_CHECK_GOTO(status == NV_OK, done); 543 544 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done); 545 } 546 547 done: 548 uvm_gpu_semaphore_set_payload(&sema, push_count); 549 uvm_tracker_wait_deinit(&tracker); 550 551 uvm_gpu_semaphore_free(&sema); 552 553 return status; 554 } 555 556 static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu) 557 { 558 559 TEST_NV_CHECK_RET(test_max_pushes_on_gpu_and_channel_type(gpu, UVM_CHANNEL_TYPE_GPU_INTERNAL)); 560 561 return NV_OK; 562 } 563 564 // Test doing UVM_PUSHBUFFER_CHUNKS independent pushes expecting each one to use 565 // a different chunk in the pushbuffer. 566 static NV_STATUS test_idle_chunks_on_gpu(uvm_gpu_t *gpu) 567 { 568 NV_STATUS status; 569 570 uvm_gpu_semaphore_t sema; 571 uvm_tracker_t tracker = UVM_TRACKER_INIT(); 572 NvU32 i; 573 574 uvm_tracker_init(&tracker); 575 576 status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &sema); 577 TEST_CHECK_GOTO(status == NV_OK, done); 578 579 uvm_gpu_semaphore_set_payload(&sema, 0); 580 581 // Need to wait for all channels to completely idle so that the pushbuffer 582 // is in completely idle state when we begin. 583 status = uvm_channel_manager_wait(gpu->channel_manager); 584 TEST_CHECK_GOTO(status == NV_OK, done); 585 586 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) { 587 NvU64 semaphore_gpu_va; 588 uvm_push_t push; 589 590 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Push using chunk %u", i); 591 TEST_CHECK_GOTO(status == NV_OK, done); 592 593 semaphore_gpu_va = uvm_gpu_semaphore_get_gpu_va(&sema, gpu, uvm_channel_is_proxy(push.channel)); 594 gpu->parent->host_hal->semaphore_acquire(&push, semaphore_gpu_va, i + 1); 595 uvm_push_end(&push); 596 597 TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done); 598 599 if (test_count_idle_chunks(gpu->channel_manager->pushbuffer) != UVM_PUSHBUFFER_CHUNKS - i - 1) { 600 UVM_TEST_PRINT("Unexpected count of idle chunks in the pushbuffer %u instead of %u\n", 601 test_count_idle_chunks(gpu->channel_manager->pushbuffer), UVM_PUSHBUFFER_CHUNKS - i - 1); 602 uvm_pushbuffer_print(gpu->channel_manager->pushbuffer); 603 status = NV_ERR_INVALID_STATE; 604 goto done; 605 } 606 } 607 uvm_gpu_semaphore_set_payload(&sema, UVM_PUSHBUFFER_CHUNKS + 1); 608 609 status = uvm_channel_manager_wait(gpu->channel_manager); 610 TEST_CHECK_GOTO(status == NV_OK, done); 611 612 if (test_count_idle_chunks(gpu->channel_manager->pushbuffer) != UVM_PUSHBUFFER_CHUNKS) { 613 UVM_TEST_PRINT("Unexpected count of idle chunks in the pushbuffer %u\n", 614 test_count_idle_chunks(gpu->channel_manager->pushbuffer)); 615 uvm_pushbuffer_print(gpu->channel_manager->pushbuffer); 616 status = NV_ERR_INVALID_STATE; 617 goto done; 618 } 619 620 done: 621 uvm_gpu_semaphore_set_payload(&sema, UVM_PUSHBUFFER_CHUNKS + 1); 622 uvm_tracker_wait(&tracker); 623 624 uvm_gpu_semaphore_free(&sema); 625 uvm_tracker_deinit(&tracker); 626 627 return status; 628 } 629 630 static NV_STATUS test_pushbuffer(uvm_va_space_t *va_space) 631 { 632 uvm_gpu_t *gpu; 633 634 for_each_va_space_gpu(gpu, va_space) { 635 TEST_NV_CHECK_RET(test_max_pushes_on_gpu(gpu)); 636 TEST_NV_CHECK_RET(test_idle_chunks_on_gpu(gpu)); 637 } 638 return NV_OK; 639 } 640 641 typedef struct 642 { 643 NvU64 *timestmap_in_pushbuffer; 644 NvU64 timestamp; 645 } timestamp_test_t; 646 647 static void timestamp_on_complete(void *void_data) 648 { 649 timestamp_test_t *data = (timestamp_test_t *)void_data; 650 651 if (uvm_global_get_status() != NV_OK) { 652 // Do nothing if a global error has been set as the callback might be 653 // called from teardown where the reference to test data is no longer 654 // valid. 655 return; 656 } 657 658 data->timestamp = *data->timestmap_in_pushbuffer; 659 } 660 661 static NV_STATUS test_timestamp_on_gpu(uvm_gpu_t *gpu) 662 { 663 NV_STATUS status; 664 uvm_push_t push; 665 timestamp_test_t test_data = {0}; 666 NvU32 i; 667 NvU64 last_stamp = 0; 668 669 for (i = 0; i < 10; ++i) { 670 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Releasing a timestamp"); 671 if (status != NV_OK) 672 return status; 673 674 test_data.timestmap_in_pushbuffer = uvm_push_timestamp(&push); 675 uvm_push_info_from_push(&push)->on_complete = timestamp_on_complete; 676 uvm_push_info_from_push(&push)->on_complete_data = &test_data; 677 uvm_push_end(&push); 678 679 // Synchronize the channel manager to make sure the on_complete 680 // callbacks have a chance to run. 681 status = uvm_channel_manager_wait(gpu->channel_manager); 682 TEST_CHECK_RET(status == NV_OK); 683 684 TEST_CHECK_RET(test_data.timestamp != 0); 685 TEST_CHECK_RET(test_data.timestamp > last_stamp); 686 last_stamp = test_data.timestamp; 687 } 688 689 return NV_OK; 690 } 691 692 static NV_STATUS test_timestamp(uvm_va_space_t *va_space) 693 { 694 uvm_gpu_t *gpu; 695 696 for_each_va_space_gpu(gpu, va_space) 697 TEST_CHECK_RET(test_timestamp_on_gpu(gpu) == NV_OK); 698 699 return NV_OK; 700 } 701 702 static NV_STATUS sync_memcopy(uvm_channel_type_t type, uvm_mem_t *dst, uvm_mem_t *src) 703 { 704 uvm_push_t push; 705 uvm_gpu_address_t dst_va; 706 uvm_gpu_address_t src_va; 707 uvm_gpu_t *gpu; 708 NV_STATUS status; 709 710 UVM_ASSERT(uvm_mem_is_vidmem(src) || uvm_mem_is_vidmem(dst)); 711 712 if (type == UVM_CHANNEL_TYPE_CPU_TO_GPU || type == UVM_CHANNEL_TYPE_GPU_TO_CPU) { 713 gpu = (type == UVM_CHANNEL_TYPE_CPU_TO_GPU) ? dst->backing_gpu : src->backing_gpu; 714 status = uvm_push_begin(gpu->channel_manager, type, &push, uvm_channel_type_to_string(type)); 715 if (status != NV_OK) 716 return status; 717 718 dst_va = uvm_mem_gpu_address_virtual_kernel(dst, gpu); 719 src_va = uvm_mem_gpu_address_virtual_kernel(src, gpu); 720 gpu->parent->ce_hal->memcopy(&push, dst_va, src_va, src->size); 721 } 722 else { 723 unsigned i; 724 const NvU32 chunk_size = src->chunk_size; 725 726 UVM_ASSERT((src->size % chunk_size) == 0); 727 728 gpu = src->backing_gpu; 729 status = uvm_push_begin_gpu_to_gpu(gpu->channel_manager, 730 dst->backing_gpu, 731 &push, 732 uvm_channel_type_to_string(type)); 733 734 for (i = 0; i < src->size / chunk_size; i++) { 735 dst_va = uvm_mem_gpu_address_copy(dst, gpu, i * chunk_size, chunk_size); 736 src_va = uvm_mem_gpu_address_copy(src, gpu, i * chunk_size, chunk_size); 737 gpu->parent->ce_hal->memcopy(&push, dst_va, src_va, chunk_size); 738 } 739 } 740 741 return uvm_push_end_and_wait(&push); 742 } 743 744 static bool can_do_peer_copies(uvm_va_space_t *va_space, uvm_gpu_t *gpu_a, uvm_gpu_t *gpu_b) 745 { 746 if (gpu_a == gpu_b || !uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(gpu_a->id)], gpu_b->id)) 747 return false; 748 749 UVM_ASSERT(uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(gpu_b->id)], gpu_a->id)); 750 751 // TODO: Bug 2028875. Indirect peers are not supported for now. 752 if (uvm_gpus_are_indirect_peers(gpu_a, gpu_b)) 753 return false; 754 755 return true; 756 } 757 758 // Test the GPU to GPU push interface by transferring data between each 759 // permutation of GPU peers. 760 static NV_STATUS test_push_gpu_to_gpu(uvm_va_space_t *va_space) 761 { 762 NvU32 i; 763 NV_STATUS status; 764 uvm_gpu_t *gpu, *gpu_a, *gpu_b; 765 uvm_mem_t *mem[UVM_ID_MAX_PROCESSORS] = {NULL}; 766 NvU32 *host_ptr; 767 const size_t size = 1024 * 1024; 768 bool waive = true; 769 770 for_each_va_space_gpu(gpu_a, va_space) { 771 772 for_each_va_space_gpu(gpu_b, va_space) { 773 if (can_do_peer_copies(va_space, gpu_a, gpu_b)) { 774 waive = false; 775 break; 776 } 777 } 778 } 779 780 if (waive) 781 return NV_OK; 782 783 // Alloc and initialize host buffer 784 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &mem[UVM_ID_CPU_VALUE]); 785 TEST_CHECK_GOTO(status == NV_OK, done); 786 787 host_ptr = (NvU32 *)uvm_mem_get_cpu_addr_kernel(mem[UVM_ID_CPU_VALUE]); 788 789 for (i = 0; i < size / sizeof(NvU32); ++i) 790 host_ptr[i] = i + 1; 791 792 // Allocate vidmem on each GPU, and map the host buffer 793 for_each_va_space_gpu(gpu, va_space) { 794 status = uvm_mem_alloc_vidmem(size, gpu, &mem[uvm_id_value(gpu->id)]); 795 TEST_CHECK_GOTO(status == NV_OK, done); 796 797 status = uvm_mem_map_gpu_kernel(mem[uvm_id_value(gpu->id)], gpu); 798 TEST_CHECK_GOTO(status == NV_OK, done); 799 800 status = uvm_mem_map_gpu_kernel(mem[UVM_ID_CPU_VALUE], gpu); 801 TEST_CHECK_GOTO(status == NV_OK, done); 802 } 803 804 // Copy buffer between each pair of GPU peers, in both directions 805 for_each_va_space_gpu(gpu_a, va_space) { 806 for_each_va_space_gpu(gpu_b, va_space) { 807 if (!can_do_peer_copies(va_space, gpu_a, gpu_b)) 808 continue; 809 810 // Copy from CPU to the first GPU, and then zero out the host copy 811 status = sync_memcopy(UVM_CHANNEL_TYPE_CPU_TO_GPU, 812 mem[uvm_id_value(gpu_a->id)], 813 mem[UVM_ID_CPU_VALUE]); 814 TEST_CHECK_GOTO(status == NV_OK, done); 815 816 memset(host_ptr, 0, size / sizeof(NvU32)); 817 818 // Copy from the first GPU to the second GPU 819 status = sync_memcopy(UVM_CHANNEL_TYPE_GPU_TO_GPU, 820 mem[uvm_id_value(gpu_b->id)], 821 mem[uvm_id_value(gpu_a->id)]); 822 TEST_CHECK_GOTO(status == NV_OK, done); 823 824 // Copy from the second GPU back to the host, and check result 825 status = sync_memcopy(UVM_CHANNEL_TYPE_GPU_TO_CPU, 826 mem[UVM_ID_CPU_VALUE], 827 mem[uvm_id_value(gpu_b->id)]); 828 TEST_CHECK_GOTO(status == NV_OK, done); 829 830 for (i = 0; i < size / sizeof(NvU32); ++i) { 831 if (host_ptr[i] != i + 1) { 832 UVM_TEST_PRINT("host_ptr[%u] = %u instead of %u when copying between %s and %s\n", 833 i, 834 host_ptr[i], 835 i + 1, 836 uvm_gpu_name(gpu_a), 837 uvm_gpu_name(gpu_b)); 838 status = NV_ERR_INVALID_STATE; 839 TEST_CHECK_GOTO(status == NV_OK, done); 840 } 841 } 842 } 843 } 844 845 done: 846 for_each_va_space_gpu(gpu, va_space) 847 uvm_mem_free(mem[uvm_id_value(gpu->id)]); 848 849 uvm_mem_free(mem[UVM_ID_CPU_VALUE]); 850 851 return status; 852 } 853 854 NV_STATUS uvm_test_push_sanity(UVM_TEST_PUSH_SANITY_PARAMS *params, struct file *filp) 855 { 856 NV_STATUS status; 857 uvm_va_space_t *va_space = uvm_va_space_get(filp); 858 859 // Take the global lock as some of the tests rely on being the 860 // only thread doing pushes and could deadlock otherwise. 861 uvm_mutex_lock(&g_uvm_global.global_lock); 862 uvm_va_space_down_read_rm(va_space); 863 864 status = test_push_end_size(va_space); 865 if (status != NV_OK) 866 goto done; 867 868 status = test_push_inline_data(va_space); 869 if (status != NV_OK) 870 goto done; 871 872 status = test_concurrent_pushes(va_space); 873 if (status != NV_OK) 874 goto done; 875 876 status = test_push_interleaving(va_space); 877 if (status != NV_OK) 878 goto done; 879 880 status = test_push_gpu_to_gpu(va_space); 881 if (status != NV_OK) 882 goto done; 883 884 status = test_pushbuffer(va_space); 885 if (status != NV_OK) 886 goto done; 887 888 if (!params->skipTimestampTest) { 889 status = test_timestamp(va_space); 890 if (status != NV_OK) 891 goto done; 892 } 893 894 done: 895 uvm_va_space_up_read_rm(va_space); 896 uvm_mutex_unlock(&g_uvm_global.global_lock); 897 898 return status; 899 } 900