1 /******************************************************************************* 2 Copyright (c) 2015-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_gpu_semaphore.h" 25 #include "uvm_lock.h" 26 #include "uvm_global.h" 27 #include "uvm_kvmalloc.h" 28 #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP 29 #include "uvm_conf_computing.h" 30 31 #define UVM_SEMAPHORE_SIZE 4 32 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE 33 #define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE) 34 35 // The top nibble of the canary base is intentionally 0. The rest of the value 36 // is arbitrary. See the comments below on make_canary. 37 #define UVM_SEMAPHORE_CANARY_BASE 0x0badc0de 38 #define UVM_SEMAPHORE_CANARY_MASK 0xf0000000 39 40 struct uvm_gpu_semaphore_pool_struct 41 { 42 // The GPU owning the pool 43 uvm_gpu_t *gpu; 44 45 // List of all the semaphore pages belonging to the pool 46 struct list_head pages; 47 48 // Pages aperture. 49 uvm_aperture_t aperture; 50 51 // Count of free semaphores among all the pages 52 NvU32 free_semaphores_count; 53 54 // Lock protecting the state of the pool 55 uvm_mutex_t mutex; 56 }; 57 58 struct uvm_gpu_semaphore_pool_page_struct 59 { 60 // Allocation backing the page 61 uvm_rm_mem_t *memory; 62 63 // Pool the page is part of 64 uvm_gpu_semaphore_pool_t *pool; 65 66 // Node in the list of all pages in a semaphore pool 67 struct list_head all_pages_node; 68 69 // Mask indicating free semaphore indices within the page 70 DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE); 71 }; 72 73 static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool) 74 { 75 return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID); 76 } 77 78 static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore) 79 { 80 return gpu_semaphore_pool_is_secure(semaphore->page->pool); 81 } 82 83 static NvU32 get_index(uvm_gpu_semaphore_t *semaphore) 84 { 85 NvU32 offset; 86 NvU32 index; 87 88 if (gpu_semaphore_is_secure(semaphore)) 89 return semaphore->conf_computing.index; 90 91 UVM_ASSERT(semaphore->payload != NULL); 92 UVM_ASSERT(semaphore->page != NULL); 93 94 offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory); 95 UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0); 96 97 index = offset / UVM_SEMAPHORE_SIZE; 98 UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE); 99 100 return index; 101 } 102 103 // Use canary values on debug builds to catch semaphore use-after-free. We can 104 // catch release-after-free by simply setting the payload to a known value at 105 // free then checking it on alloc or pool free, but catching acquire-after-free 106 // is a little trickier. 107 // 108 // In order to make still-pending GEQ acquires stall indefinitely we need to 109 // reduce the current payload as much as we can, subject to two restrictions: 110 // 111 // 1) The pending acquires could be comparing against values much less than and 112 // much greater than the current payload, so we have to set the payload to a 113 // value reasonably less than the acquires which we might expect to be 114 // pending. 115 // 116 // 2) Going over halfway past a pending acquire on the 32-bit number wheel will 117 // cause Host to wrap and think the acquire succeeded. So we shouldn't reduce 118 // by more than 2^31. 119 // 120 // To handle these restrictions we'll deal with quadrants of 2^32, under the 121 // assumption that it's unlikely for a payload to outpace a pending acquire by 122 // more than 2^30. 123 // 124 // We also need for the base value to have some 0s in the upper significant 125 // bits, otherwise those bits might carry us past the quadrant boundary when we 126 // OR them in. 127 static NvU32 make_canary(NvU32 payload) 128 { 129 NvU32 prev_quadrant = payload - (1 << 30); 130 return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE; 131 } 132 133 static bool is_canary(NvU32 val) 134 { 135 return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE; 136 } 137 138 static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool) 139 { 140 // A pool allocated in the CPR of vidmem cannot be read/written from the 141 // CPU. 142 return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG(); 143 return UVM_IS_DEBUG(); 144 } 145 146 // Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore 147 // pool? 148 static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem) 149 { 150 return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va); 151 } 152 153 // Secure semaphore pools are allocated in the CPR of vidmem and only mapped to 154 // the owning GPU as no other processor have access to it. 155 static NV_STATUS pool_alloc_secure_page(uvm_gpu_semaphore_pool_t *pool, 156 uvm_gpu_semaphore_pool_page_t *pool_page, 157 uvm_rm_mem_type_t memory_type) 158 { 159 NV_STATUS status; 160 161 UVM_ASSERT(gpu_semaphore_pool_is_secure(pool)); 162 status = uvm_rm_mem_alloc(pool->gpu, 163 memory_type, 164 UVM_SEMAPHORE_PAGE_SIZE, 165 UVM_CONF_COMPUTING_BUF_ALIGNMENT, 166 &pool_page->memory); 167 168 if (status != NV_OK) 169 return status; 170 171 return NV_OK; 172 } 173 174 static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool) 175 { 176 NV_STATUS status; 177 uvm_gpu_semaphore_pool_page_t *pool_page; 178 NvU32 *payloads; 179 size_t i; 180 uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU; 181 182 uvm_assert_mutex_locked(&pool->mutex); 183 184 pool_page = uvm_kvmalloc_zero(sizeof(*pool_page)); 185 186 if (!pool_page) 187 return NV_ERR_NO_MEMORY; 188 189 pool_page->pool = pool; 190 191 // Whenever the Confidential Computing feature is enabled, engines can 192 // access semaphores only in the CPR of vidmem. Mapping to other GPUs is 193 // also disabled. 194 if (gpu_semaphore_pool_is_secure(pool)) { 195 status = pool_alloc_secure_page(pool, pool_page, memory_type); 196 197 if (status != NV_OK) 198 goto error; 199 } 200 else { 201 status = uvm_rm_mem_alloc_and_map_all(pool->gpu, 202 memory_type, 203 UVM_SEMAPHORE_PAGE_SIZE, 204 0, 205 &pool_page->memory); 206 if (status != NV_OK) 207 goto error; 208 } 209 210 // Verify the GPU can access the semaphore pool. 211 UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory)); 212 213 // All semaphores are initially free 214 bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE); 215 216 list_add(&pool_page->all_pages_node, &pool->pages); 217 pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE; 218 219 if (semaphore_uses_canary(pool)) { 220 payloads = uvm_rm_mem_get_cpu_va(pool_page->memory); 221 for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++) 222 payloads[i] = make_canary(0); 223 } 224 225 return NV_OK; 226 227 error: 228 uvm_kvfree(pool_page); 229 return status; 230 } 231 232 static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page) 233 { 234 uvm_gpu_semaphore_pool_t *pool; 235 236 UVM_ASSERT(page); 237 pool = page->pool; 238 239 uvm_assert_mutex_locked(&pool->mutex); 240 241 // Assert that no semaphores are still allocated 242 UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE)); 243 UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE, 244 "count: %u\n", 245 pool->free_semaphores_count); 246 247 if (semaphore_uses_canary(pool)) { 248 size_t i; 249 NvU32 *payloads = uvm_rm_mem_get_cpu_va(page->memory); 250 for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++) 251 UVM_ASSERT(is_canary(payloads[i])); 252 } 253 254 pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE; 255 list_del(&page->all_pages_node); 256 uvm_rm_mem_free(page->memory); 257 uvm_kvfree(page); 258 } 259 260 NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore) 261 { 262 NV_STATUS status = NV_OK; 263 uvm_gpu_semaphore_pool_page_t *page; 264 265 memset(semaphore, 0, sizeof(*semaphore)); 266 267 uvm_mutex_lock(&pool->mutex); 268 269 if (pool->free_semaphores_count == 0) 270 status = pool_alloc_page(pool); 271 272 if (status != NV_OK) 273 goto done; 274 275 list_for_each_entry(page, &pool->pages, all_pages_node) { 276 NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE); 277 if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE) 278 continue; 279 280 if (gpu_semaphore_pool_is_secure(pool)) { 281 semaphore->conf_computing.index = semaphore_index; 282 } 283 else { 284 semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + 285 semaphore_index * UVM_SEMAPHORE_SIZE); 286 } 287 288 semaphore->page = page; 289 290 if (semaphore_uses_canary(pool)) 291 UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore))); 292 293 uvm_gpu_semaphore_set_payload(semaphore, 0); 294 295 __clear_bit(semaphore_index, page->free_semaphores); 296 --pool->free_semaphores_count; 297 298 goto done; 299 } 300 301 UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n"); 302 status = NV_ERR_GENERIC; 303 304 done: 305 uvm_mutex_unlock(&pool->mutex); 306 307 return status; 308 } 309 310 void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore) 311 { 312 uvm_gpu_semaphore_pool_page_t *page; 313 uvm_gpu_semaphore_pool_t *pool; 314 NvU32 index; 315 316 UVM_ASSERT(semaphore); 317 318 // uvm_gpu_semaphore_t is to be embedded in other structures so it should always 319 // be accessible, but it may not be initialized in error cases. Early out if 320 // page is NULL indicating the semaphore hasn't been allocated successfully. 321 page = semaphore->page; 322 if (page == NULL) 323 return; 324 325 pool = page->pool; 326 index = get_index(semaphore); 327 328 // Write a known value lower than the current payload in an attempt to catch 329 // release-after-free and acquire-after-free. 330 if (semaphore_uses_canary(pool)) 331 uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore))); 332 333 uvm_mutex_lock(&pool->mutex); 334 335 semaphore->page = NULL; 336 semaphore->payload = NULL; 337 338 ++pool->free_semaphores_count; 339 __set_bit(index, page->free_semaphores); 340 341 uvm_mutex_unlock(&pool->mutex); 342 } 343 344 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out) 345 { 346 uvm_gpu_semaphore_pool_t *pool; 347 pool = uvm_kvmalloc_zero(sizeof(*pool)); 348 349 if (!pool) 350 return NV_ERR_NO_MEMORY; 351 352 uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL); 353 354 INIT_LIST_HEAD(&pool->pages); 355 356 pool->free_semaphores_count = 0; 357 pool->gpu = gpu; 358 pool->aperture = UVM_APERTURE_SYS; 359 360 *pool_out = pool; 361 362 return NV_OK; 363 } 364 365 NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out) 366 { 367 NV_STATUS status; 368 369 UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu)); 370 371 status = uvm_gpu_semaphore_pool_create(gpu, pool_out); 372 if (status == NV_OK) 373 (*pool_out)->aperture = UVM_APERTURE_VID; 374 375 return status; 376 } 377 378 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool) 379 { 380 uvm_gpu_semaphore_pool_page_t *page; 381 uvm_gpu_semaphore_pool_page_t *next_page; 382 383 if (!pool) 384 return; 385 386 // No other thread should be touching the pool once it's being destroyed 387 uvm_assert_mutex_unlocked(&pool->mutex); 388 389 // Keep pool_free_page happy 390 uvm_mutex_lock(&pool->mutex); 391 392 list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node) 393 pool_free_page(page); 394 395 UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count); 396 UVM_ASSERT(list_empty(&pool->pages)); 397 398 uvm_mutex_unlock(&pool->mutex); 399 400 uvm_kvfree(pool); 401 } 402 403 NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu) 404 { 405 NV_STATUS status = NV_OK; 406 uvm_gpu_semaphore_pool_page_t *page; 407 408 UVM_ASSERT(pool); 409 UVM_ASSERT(gpu); 410 411 uvm_mutex_lock(&pool->mutex); 412 413 list_for_each_entry(page, &pool->pages, all_pages_node) { 414 status = uvm_rm_mem_map_gpu(page->memory, gpu, 0); 415 if (status != NV_OK) 416 goto done; 417 } 418 419 done: 420 uvm_mutex_unlock(&pool->mutex); 421 422 return status; 423 } 424 425 void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu) 426 { 427 uvm_gpu_semaphore_pool_page_t *page; 428 429 UVM_ASSERT(pool); 430 UVM_ASSERT(gpu); 431 432 uvm_mutex_lock(&pool->mutex); 433 434 list_for_each_entry(page, &pool->pages, all_pages_node) 435 uvm_rm_mem_unmap_gpu(page->memory, gpu); 436 437 uvm_mutex_unlock(&pool->mutex); 438 } 439 440 NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu) 441 { 442 return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false); 443 } 444 445 NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu) 446 { 447 return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true); 448 } 449 450 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space) 451 { 452 NvU32 index = get_index(semaphore); 453 NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space).address; 454 455 return base_va + UVM_SEMAPHORE_SIZE * index; 456 } 457 458 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore) 459 { 460 if (gpu_semaphore_is_secure(semaphore)) 461 return UVM_GPU_READ_ONCE(semaphore->conf_computing.cached_payload); 462 463 return UVM_GPU_READ_ONCE(*semaphore->payload); 464 } 465 466 void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload) 467 { 468 // Provide a guarantee that all memory accesses prior to setting the payload 469 // won't be moved past it. 470 // Use a big hammer mb() as set_payload() is not used in any performance path 471 // today. 472 // This could likely be optimized to be either an smp_store_release() or use 473 // an smp_mb__before_atomic() barrier. The former is a recent addition to 474 // kernel though, and it's not clear whether combining the latter with a 475 // regular 32bit store is well defined in all cases. Both also seem to risk 476 // being optimized out on non-SMP configs (we need them for interacting with 477 // the GPU correctly even on non-SMP). 478 mb(); 479 480 if (gpu_semaphore_is_secure(semaphore)) 481 UVM_GPU_WRITE_ONCE(semaphore->conf_computing.cached_payload, payload); 482 else 483 UVM_GPU_WRITE_ONCE(*semaphore->payload, payload); 484 } 485 486 // This function is intended to catch channels which have been left dangling in 487 // trackers after their owning GPUs have been destroyed. 488 static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem) 489 { 490 uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu; 491 uvm_gpu_t *table_gpu; 492 493 UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic); 494 495 // It's ok for the GPU to not be in the global table, since add_gpu operates 496 // on trackers before adding the GPU to the table, and remove_gpu operates 497 // on trackers after removing the GPU. We rely on the magic value to catch 498 // those cases. 499 // 500 // But if a pointer is in the table it must match. 501 table_gpu = uvm_gpu_get(gpu->global_id); 502 if (table_gpu) 503 UVM_ASSERT(table_gpu == gpu); 504 505 // Return a boolean so this function can be used in assertions for 506 // conditional compilation 507 return true; 508 } 509 510 bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore) 511 { 512 uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu; 513 514 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore)); 515 if (uvm_conf_computing_mode_enabled(gpu)) 516 return true; 517 518 return false; 519 } 520 521 522 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem) 523 { 524 NV_STATUS status; 525 uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF; 526 527 memset(tracking_sem, 0, sizeof(*tracking_sem)); 528 529 status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore); 530 if (status != NV_OK) 531 return status; 532 533 UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0); 534 535 if (uvm_conf_computing_mode_enabled(pool->gpu)) 536 order = UVM_LOCK_ORDER_SECURE_SEMAPHORE; 537 538 if (tracking_semaphore_uses_mutex(tracking_sem)) 539 uvm_mutex_init(&tracking_sem->m_lock, order); 540 else 541 uvm_spin_lock_init(&tracking_sem->s_lock, order); 542 543 atomic64_set(&tracking_sem->completed_value, 0); 544 tracking_sem->queued_value = 0; 545 546 return NV_OK; 547 } 548 549 void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem) 550 { 551 uvm_gpu_semaphore_free(&tracking_sem->semaphore); 552 } 553 554 static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier) 555 { 556 // No new value, or the GPU is currently writing the new encrypted material 557 // and no change in value would still result in corrupted data. 558 return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2); 559 } 560 561 static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore) 562 { 563 UvmCslIv local_iv; 564 NvU32 local_payload; 565 NvU32 new_sem_value; 566 NvU32 gpu_notifier; 567 NvU32 last_observed_notifier; 568 NvU32 new_gpu_notifier = 0; 569 NvU32 iv_index = 0; 570 571 // A channel can have multiple entries pending and the tracking semaphore 572 // update of each entry can race with this function. Since the semaphore 573 // needs to be updated to release a used entry, we never need more 574 // than 'num_gpfifo_entries' re-tries. 575 unsigned tries_left = channel->num_gpfifo_entries; 576 NV_STATUS status = NV_OK; 577 NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE]; 578 UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs; 579 void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag); 580 NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier); 581 NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload); 582 583 UVM_ASSERT(uvm_channel_is_secure_ce(channel)); 584 585 last_observed_notifier = semaphore->conf_computing.last_observed_notifier; 586 gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr); 587 UVM_ASSERT(last_observed_notifier <= gpu_notifier); 588 589 if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier)) 590 return; 591 592 do { 593 gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr); 594 595 // Odd notifier value means there's an update in progress. 596 if (gpu_notifier % 2) 597 continue; 598 599 // Make sure no memory accesses happen before we read the notifier 600 smp_mb__after_atomic(); 601 602 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries; 603 memcpy(local_auth_tag, auth_tag_cpu_addr, sizeof(local_auth_tag)); 604 local_payload = UVM_READ_ONCE(*payload_cpu_addr); 605 memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv)); 606 607 // Make sure the second read of notifier happens after 608 // all memory accesses. 609 smp_mb__before_atomic(); 610 new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr); 611 tries_left--; 612 } while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2))); 613 614 if (!tries_left) { 615 status = NV_ERR_INVALID_STATE; 616 goto error; 617 } 618 619 if (gpu_notifier == new_gpu_notifier) { 620 status = uvm_conf_computing_cpu_decrypt(channel, 621 &new_sem_value, 622 &local_payload, 623 &local_iv, 624 sizeof(new_sem_value), 625 &local_auth_tag); 626 627 if (status != NV_OK) 628 goto error; 629 630 uvm_gpu_semaphore_set_payload(semaphore, new_sem_value); 631 UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier); 632 } 633 634 return; 635 636 error: 637 // Decryption failure is a fatal error as well as running out of try left. 638 // Upon testing, all decryption happened within one try, anything that 639 // would require ten retry would be considered active tampering with the 640 // data structures. 641 uvm_global_set_fatal_error(status); 642 } 643 644 static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore) 645 { 646 NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value); 647 // The semaphore value is the bottom 32 bits of completed_value 648 NvU32 old_sem_value = (NvU32)old_value; 649 NvU32 new_sem_value; 650 NvU64 new_value; 651 652 if (tracking_semaphore_uses_mutex(tracking_semaphore)) 653 uvm_assert_mutex_locked(&tracking_semaphore->m_lock); 654 else 655 uvm_assert_spinlock_locked(&tracking_semaphore->s_lock); 656 657 if (tracking_semaphore->semaphore.conf_computing.encrypted_payload) { 658 // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore 659 // mechanism to all semaphore 660 uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem); 661 uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore); 662 } 663 664 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore); 665 666 // The following logic to update the completed value is very subtle, it 667 // helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt 668 // before going through this code. 669 670 if (old_sem_value == new_sem_value) { 671 // No progress since the last update. 672 // No additional memory barrier required in this case as completed_value 673 // is always updated under the lock that this thread just acquired. 674 // That guarantees full ordering with all the accesses the thread that 675 // updated completed_value did under the lock including the GPU 676 // semaphore read. 677 return old_value; 678 } 679 680 // Replace the bottom 32-bits with the new semaphore value 681 new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value; 682 683 // If we've wrapped around, add 2^32 to the value 684 // Notably the user of the GPU tracking semaphore needs to guarantee that 685 // the value is updated often enough to notice the wrap around each time it 686 // happens. In case of a channel tracking semaphore that's released for each 687 // push, it's easily guaranteed because of the small number of GPFIFO 688 // entries available per channel (there could be at most as many pending 689 // pushes as GPFIFO entries). 690 if (unlikely(new_sem_value < old_sem_value)) 691 new_value += 1ULL << 32; 692 693 // Check for unexpected large jumps of the semaphore value 694 UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP, 695 "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n", 696 tracking_semaphore->semaphore.page->pool->gpu->parent->name, 697 (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload, 698 old_value, new_value); 699 700 // Use an atomic write even though the lock is held so that the value can 701 // be (carefully) read atomically outside of the lock. 702 // 703 // atomic64_set() on its own doesn't imply any memory barriers and we need 704 // prior memory accesses (in particular the read of the GPU semaphore 705 // payload) by this thread to be visible to other threads that see the newly 706 // set completed_value. smp_mb__before_atomic() provides that ordering. 707 // 708 // Also see the comment and matching smp_mb__after_atomic() barrier in 709 // uvm_gpu_tracking_semaphore_is_value_completed(). 710 // 711 // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire() 712 // have been added that are exactly what we need and could be slightly 713 // faster on arm and powerpc than the implementation below. But at least in 714 // 4.3 the implementation looks broken for arm32 (it maps directly to 715 // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit 716 // architectures) so instead of dealing with that just use a slightly bigger 717 // hammer. 718 smp_mb__before_atomic(); 719 atomic64_set(&tracking_semaphore->completed_value, new_value); 720 721 // For this thread, we don't want any later accesses to be ordered above the 722 // GPU semaphore read. This could be accomplished by using a 723 // smp_load_acquire() for reading it, but given that it's also a pretty 724 // recent addition to the kernel, just leverage smp_mb__after_atomic() that 725 // guarantees that no accesses will be ordered above the atomic (and hence 726 // the GPU semaphore read). 727 // 728 // Notably the soon following unlock is a release barrier that allows later 729 // memory accesses to be reordered above it and hence doesn't provide the 730 // necessary ordering with the GPU semaphore read. 731 // 732 // Also notably this would still need to be handled if we ever switch to 733 // atomic64_set_release() and atomic64_read_acquire() for accessing 734 // completed_value. 735 smp_mb__after_atomic(); 736 737 return new_value; 738 } 739 740 NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore) 741 { 742 NvU64 completed; 743 744 // Check that the GPU which owns the semaphore is still present 745 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore)); 746 747 if (tracking_semaphore_uses_mutex(tracking_semaphore)) 748 uvm_mutex_lock(&tracking_semaphore->m_lock); 749 else 750 uvm_spin_lock(&tracking_semaphore->s_lock); 751 752 completed = update_completed_value_locked(tracking_semaphore); 753 754 if (tracking_semaphore_uses_mutex(tracking_semaphore)) 755 uvm_mutex_unlock(&tracking_semaphore->m_lock); 756 else 757 uvm_spin_unlock(&tracking_semaphore->s_lock); 758 759 return completed; 760 } 761 762 bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value) 763 { 764 NvU64 completed = atomic64_read(&tracking_sem->completed_value); 765 766 // Check that the GPU which owns the semaphore is still present 767 UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem)); 768 769 if (completed >= value) { 770 // atomic64_read() doesn't imply any memory barriers and we need all 771 // subsequent memory accesses in this thread to be ordered after the 772 // atomic read of the completed value above as that will also order them 773 // with any accesses (in particular the GPU semaphore read) performed by 774 // the other thread prior to it setting the completed_value we read. 775 // smp_mb__after_atomic() provides that ordering. 776 // 777 // Also see the comment in update_completed_value_locked(). 778 smp_mb__after_atomic(); 779 780 return true; 781 } 782 783 return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value; 784 } 785