1 /******************************************************************************* 2 Copyright (c) 2015-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_api.h" 25 #include "uvm_pushbuffer.h" 26 #include "uvm_channel.h" 27 #include "uvm_global.h" 28 #include "uvm_lock.h" 29 #include "uvm_procfs.h" 30 #include "uvm_push.h" 31 #include "uvm_kvmalloc.h" 32 #include "uvm_gpu.h" 33 #include "uvm_common.h" 34 #include "uvm_linux.h" 35 #include "uvm_conf_computing.h" 36 37 // Print pushbuffer state into a seq_file if provided or with UVM_DBG_PRINT() if not. 38 static void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s); 39 40 static int nv_procfs_read_pushbuffer_info(struct seq_file *s, void *v) 41 { 42 uvm_pushbuffer_t *pushbuffer = (uvm_pushbuffer_t *)s->private; 43 44 if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) 45 return -EAGAIN; 46 47 uvm_pushbuffer_print_common(pushbuffer, s); 48 49 uvm_up_read(&g_uvm_global.pm.lock); 50 51 return 0; 52 } 53 54 static int nv_procfs_read_pushbuffer_info_entry(struct seq_file *s, void *v) 55 { 56 UVM_ENTRY_RET(nv_procfs_read_pushbuffer_info(s, v)); 57 } 58 59 UVM_DEFINE_SINGLE_PROCFS_FILE(pushbuffer_info_entry); 60 61 static NV_STATUS create_procfs(uvm_pushbuffer_t *pushbuffer) 62 { 63 uvm_gpu_t *gpu = pushbuffer->channel_manager->gpu; 64 65 // The pushbuffer info file is for debug only 66 if (!uvm_procfs_is_debug_enabled()) 67 return NV_OK; 68 69 pushbuffer->procfs.info_file = NV_CREATE_PROC_FILE("pushbuffer", 70 gpu->procfs.dir, 71 pushbuffer_info_entry, 72 pushbuffer); 73 if (pushbuffer->procfs.info_file == NULL) 74 return NV_ERR_OPERATING_SYSTEM; 75 76 return NV_OK; 77 } 78 79 NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_pushbuffer_t **pushbuffer_out) 80 { 81 NV_STATUS status; 82 int i; 83 uvm_gpu_t *gpu = channel_manager->gpu; 84 NvU64 pushbuffer_alignment; 85 86 uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer)); 87 if (pushbuffer == NULL) 88 return NV_ERR_NO_MEMORY; 89 90 pushbuffer->channel_manager = channel_manager; 91 92 uvm_spin_lock_init(&pushbuffer->lock, UVM_LOCK_ORDER_LEAF); 93 94 // Currently the pushbuffer supports UVM_PUSHBUFFER_CHUNKS of concurrent 95 // pushes. 96 uvm_sema_init(&pushbuffer->concurrent_pushes_sema, UVM_PUSHBUFFER_CHUNKS, UVM_LOCK_ORDER_PUSH); 97 98 UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS || 99 channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID); 100 101 // The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size 102 // (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that 103 // the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set 104 // the Esched/PBDMA segment base for all channels during their 105 // initialization and it is immutable for the entire channels' lifetime. 106 BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE); 107 BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40)); 108 109 if (gpu->uvm_test_force_upper_pushbuffer_segment) 110 pushbuffer_alignment = (1ull << 40); 111 else 112 pushbuffer_alignment = UVM_PUSHBUFFER_SIZE; 113 114 status = uvm_rm_mem_alloc_and_map_cpu(gpu, 115 (channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ? 116 UVM_RM_MEM_TYPE_SYS: 117 UVM_RM_MEM_TYPE_GPU, 118 UVM_PUSHBUFFER_SIZE, 119 pushbuffer_alignment, 120 &pushbuffer->memory); 121 if (status != NV_OK) 122 goto error; 123 124 if (uvm_conf_computing_mode_enabled(gpu)) { 125 UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS); 126 127 // Move the above allocation to unprotected_sysmem 128 pushbuffer->memory_unprotected_sysmem = pushbuffer->memory; 129 pushbuffer->memory = NULL; 130 131 // Make sure the base can be least 4KB aligned. Pushes can include inline buffers 132 // with specific alignment requirement. Different base between backing memory 133 // locations would change that. 134 pushbuffer->memory_protected_sysmem = uvm_kvmalloc_zero(UVM_PUSHBUFFER_SIZE + UVM_PAGE_SIZE_4K); 135 if (!pushbuffer->memory_protected_sysmem) { 136 status = NV_ERR_NO_MEMORY; 137 goto error; 138 } 139 140 141 status = uvm_rm_mem_alloc(gpu, 142 UVM_RM_MEM_TYPE_GPU, 143 UVM_PUSHBUFFER_SIZE, 144 pushbuffer_alignment, 145 &pushbuffer->memory); 146 if (status != NV_OK) 147 goto error; 148 149 status = uvm_rm_mem_map_gpu(pushbuffer->memory_unprotected_sysmem, gpu, pushbuffer_alignment); 150 if (status != NV_OK) 151 goto error; 152 } 153 154 // Verify the GPU can access the pushbuffer. 155 UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va); 156 157 bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS); 158 bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS); 159 160 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) 161 INIT_LIST_HEAD(&pushbuffer->chunks[i].pending_gpfifos); 162 163 status = create_procfs(pushbuffer); 164 if (status != NV_OK) 165 goto error; 166 167 *pushbuffer_out = pushbuffer; 168 169 return status; 170 171 error: 172 uvm_pushbuffer_destroy(pushbuffer); 173 return status; 174 } 175 176 static uvm_pushbuffer_chunk_t *get_chunk_in_mask(uvm_pushbuffer_t *pushbuffer, unsigned long *mask) 177 { 178 NvU32 index = find_first_bit(mask, UVM_PUSHBUFFER_CHUNKS); 179 180 uvm_assert_spinlock_locked(&pushbuffer->lock); 181 182 if (index == UVM_PUSHBUFFER_CHUNKS) 183 return NULL; 184 185 return &pushbuffer->chunks[index]; 186 } 187 188 static uvm_pushbuffer_chunk_t *get_available_chunk(uvm_pushbuffer_t *pushbuffer) 189 { 190 return get_chunk_in_mask(pushbuffer, pushbuffer->available_chunks); 191 } 192 193 static uvm_pushbuffer_chunk_t *get_idle_chunk(uvm_pushbuffer_t *pushbuffer) 194 { 195 return get_chunk_in_mask(pushbuffer, pushbuffer->idle_chunks); 196 } 197 198 static NvU32 chunk_get_index(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 199 { 200 NvU32 index = chunk - pushbuffer->chunks; 201 UVM_ASSERT(index < UVM_PUSHBUFFER_CHUNKS); 202 return index; 203 } 204 205 static NvU32 chunk_get_offset(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 206 { 207 return chunk_get_index(pushbuffer, chunk) * UVM_PUSHBUFFER_CHUNK_SIZE; 208 } 209 210 static void set_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask) 211 { 212 NvU32 index = chunk_get_index(pushbuffer, chunk); 213 214 uvm_assert_spinlock_locked(&pushbuffer->lock); 215 216 __set_bit(index, mask); 217 } 218 219 static void clear_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask) 220 { 221 NvU32 index = chunk_get_index(pushbuffer, chunk); 222 223 uvm_assert_spinlock_locked(&pushbuffer->lock); 224 225 __clear_bit(index, mask); 226 } 227 228 static uvm_pushbuffer_chunk_t *pick_chunk(uvm_pushbuffer_t *pushbuffer) 229 { 230 uvm_pushbuffer_chunk_t *chunk = get_idle_chunk(pushbuffer); 231 232 uvm_assert_spinlock_locked(&pushbuffer->lock); 233 234 if (chunk == NULL) 235 chunk = get_available_chunk(pushbuffer); 236 237 return chunk; 238 } 239 240 static bool try_claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out) 241 { 242 uvm_pushbuffer_chunk_t *chunk; 243 244 uvm_spin_lock(&pushbuffer->lock); 245 246 chunk = pick_chunk(pushbuffer); 247 if (!chunk) 248 goto done; 249 250 chunk->current_push = push; 251 clear_chunk(pushbuffer, chunk, pushbuffer->idle_chunks); 252 clear_chunk(pushbuffer, chunk, pushbuffer->available_chunks); 253 254 done: 255 uvm_spin_unlock(&pushbuffer->lock); 256 *chunk_out = chunk; 257 258 return chunk != NULL; 259 } 260 261 static char *get_base_cpu_va(uvm_pushbuffer_t *pushbuffer) 262 { 263 // Confidential Computing pushes are assembled in protected sysmem 264 // and safely (through encrypt/decrypt) moved to protected vidmem. 265 // Or signed and moved to unprotected sysmem. 266 if (uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu)) { 267 // Align protected sysmem base to 4kB. This should be enough to give 268 // the same alignment behaviour for inline buffers as the other two 269 // backing memory locations. 270 return (char*)(UVM_ALIGN_UP((uintptr_t)pushbuffer->memory_protected_sysmem, UVM_PAGE_SIZE_4K)); 271 } 272 273 return (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory); 274 } 275 276 static NvU32 *chunk_get_next_push_start_addr(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 277 { 278 char *push_start = get_base_cpu_va(pushbuffer); 279 push_start += chunk_get_offset(pushbuffer, chunk); 280 push_start += chunk->next_push_start; 281 282 UVM_ASSERT(((NvU64)push_start) % sizeof(NvU32) == 0); 283 284 return (NvU32*)push_start; 285 } 286 287 static NV_STATUS claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out) 288 { 289 NV_STATUS status = NV_OK; 290 uvm_channel_manager_t *channel_manager = pushbuffer->channel_manager; 291 uvm_spin_loop_t spin; 292 293 if (try_claim_chunk(pushbuffer, push, chunk_out)) 294 return NV_OK; 295 296 uvm_channel_manager_update_progress(channel_manager); 297 298 uvm_spin_loop_init(&spin); 299 while (!try_claim_chunk(pushbuffer, push, chunk_out) && status == NV_OK) { 300 UVM_SPIN_LOOP(&spin); 301 status = uvm_channel_manager_check_errors(channel_manager); 302 uvm_channel_manager_update_progress(channel_manager); 303 } 304 305 return status; 306 } 307 308 NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push) 309 { 310 uvm_pushbuffer_chunk_t *chunk; 311 NV_STATUS status; 312 313 UVM_ASSERT(pushbuffer); 314 UVM_ASSERT(push); 315 UVM_ASSERT(push->channel); 316 317 if (uvm_channel_is_wlc(push->channel)) { 318 // WLC pushes use static PB and don't count against max concurrent 319 // pushes. 320 push->begin = (void*)UVM_ALIGN_UP((uintptr_t)push->channel->conf_computing.static_pb_protected_sysmem, 321 UVM_PAGE_SIZE_4K); 322 push->next = push->begin; 323 return NV_OK; 324 } 325 326 // Note that this semaphore is uvm_up()ed in end_push(). 327 uvm_down(&pushbuffer->concurrent_pushes_sema); 328 329 status = claim_chunk(pushbuffer, push, &chunk); 330 if (status != NV_OK) { 331 uvm_up(&pushbuffer->concurrent_pushes_sema); 332 return status; 333 } 334 335 UVM_ASSERT(chunk); 336 337 push->begin = chunk_get_next_push_start_addr(pushbuffer, chunk); 338 push->next = push->begin; 339 340 return NV_OK; 341 } 342 343 static uvm_gpfifo_entry_t *chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t *chunk) 344 { 345 return list_first_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node); 346 } 347 348 static uvm_gpfifo_entry_t *chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t *chunk) 349 { 350 return list_last_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node); 351 } 352 353 // Get the cpu put within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE]) 354 static NvU32 chunk_get_cpu_put(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 355 { 356 uvm_gpfifo_entry_t *gpfifo = chunk_get_last_gpfifo(chunk); 357 358 uvm_assert_spinlock_locked(&pushbuffer->lock); 359 360 if (gpfifo != NULL) 361 return gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - chunk_get_offset(pushbuffer, chunk); 362 else 363 return 0; 364 } 365 366 // Get the gpu get within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE)) 367 static NvU32 chunk_get_gpu_get(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 368 { 369 uvm_gpfifo_entry_t *gpfifo = chunk_get_first_gpfifo(chunk); 370 371 uvm_assert_spinlock_locked(&pushbuffer->lock); 372 373 if (gpfifo != NULL) 374 return gpfifo->pushbuffer_offset - chunk_get_offset(pushbuffer, chunk); 375 else 376 return 0; 377 } 378 379 static void update_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk) 380 { 381 NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk); 382 NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk); 383 384 uvm_assert_spinlock_locked(&pushbuffer->lock); 385 386 if (gpu_get == cpu_put) { 387 // cpu_put can be equal to gpu_get both when the chunk is full and empty. We 388 // can tell apart the cases by checking whether the pending GPFIFOs list is 389 // empty. 390 if (!list_empty(&chunk->pending_gpfifos)) 391 return; 392 393 // Chunk completely idle 394 set_chunk(pushbuffer, chunk, pushbuffer->idle_chunks); 395 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks); 396 UVM_ASSERT_MSG(cpu_put == 0, "cpu put %u\n", cpu_put); 397 398 // For a completely idle chunk, always start at the very beginning. This 399 // helps avoid the waste that can happen at the very end of the chunk 400 // described at the top of uvm_pushbuffer.h. 401 chunk->next_push_start = 0; 402 } 403 else if (gpu_get > cpu_put) { 404 if (gpu_get - cpu_put >= UVM_MAX_PUSH_SIZE) { 405 // Enough space between put and get 406 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks); 407 chunk->next_push_start = cpu_put; 408 } 409 } 410 else if (UVM_PUSHBUFFER_CHUNK_SIZE >= cpu_put + UVM_MAX_PUSH_SIZE) { 411 UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put); 412 413 // Enough space at the end 414 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks); 415 chunk->next_push_start = cpu_put; 416 } 417 else if (gpu_get >= UVM_MAX_PUSH_SIZE) { 418 UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put); 419 420 // Enough space at the beginning 421 set_chunk(pushbuffer, chunk, pushbuffer->available_chunks); 422 chunk->next_push_start = 0; 423 } 424 } 425 426 void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer) 427 { 428 if (pushbuffer == NULL) 429 return; 430 431 proc_remove(pushbuffer->procfs.info_file); 432 433 uvm_rm_mem_free(pushbuffer->memory_unprotected_sysmem); 434 uvm_kvfree(pushbuffer->memory_protected_sysmem); 435 uvm_rm_mem_free(pushbuffer->memory); 436 uvm_kvfree(pushbuffer); 437 } 438 439 static uvm_pushbuffer_chunk_t *offset_to_chunk(uvm_pushbuffer_t *pushbuffer, NvU32 offset) 440 { 441 UVM_ASSERT(offset < UVM_PUSHBUFFER_SIZE); 442 return &pushbuffer->chunks[offset / UVM_PUSHBUFFER_CHUNK_SIZE]; 443 } 444 445 static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo) 446 { 447 uvm_pushbuffer_chunk_t *chunk = offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset); 448 UVM_ASSERT(offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - 1) == chunk); 449 return chunk; 450 } 451 452 static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo) 453 { 454 NV_STATUS status; 455 NvU32 auth_tag_offset; 456 void *auth_tag_cpu_va; 457 void *push_protected_cpu_va; 458 void *push_unprotected_cpu_va; 459 NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset; 460 NvU32 push_info_index = gpfifo->push_info - channel->push_infos; 461 uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel); 462 uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index; 463 464 if (channel->conf_computing.push_crypto_bundles == NULL) 465 return; 466 467 // When the crypto bundle is used, the push size cannot be zero 468 if (crypto_bundle->push_size == 0) 469 return; 470 471 UVM_ASSERT(!uvm_channel_is_wlc(channel)); 472 UVM_ASSERT(!uvm_channel_is_lcic(channel)); 473 474 push_protected_cpu_va = (char *)get_base_cpu_va(pushbuffer) + pushbuffer_offset; 475 push_unprotected_cpu_va = (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem) + pushbuffer_offset; 476 auth_tag_offset = push_info_index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE; 477 auth_tag_cpu_va = (char *)uvm_rm_mem_get_cpu_va(channel->conf_computing.push_crypto_bundle_auth_tags) + 478 auth_tag_offset; 479 480 status = uvm_conf_computing_cpu_decrypt(channel, 481 push_protected_cpu_va, 482 push_unprotected_cpu_va, 483 &crypto_bundle->iv, 484 crypto_bundle->push_size, 485 auth_tag_cpu_va); 486 487 // A decryption failure here is not fatal because it does not 488 // prevent UVM from running fine in the future and cannot be used 489 // maliciously to leak information or otherwise derail UVM from its 490 // regular duties. 491 UVM_ASSERT_MSG_RELEASE(status == NV_OK, "Pushbuffer decryption failure: %s\n", nvstatusToString(status)); 492 493 // Avoid reusing the bundle across multiple pushes 494 crypto_bundle->push_size = 0; 495 } 496 497 void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo) 498 { 499 uvm_pushbuffer_chunk_t *chunk; 500 bool need_to_update_chunk = false; 501 uvm_push_info_t *push_info = gpfifo->push_info; 502 uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel); 503 504 UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL); 505 506 chunk = gpfifo_to_chunk(pushbuffer, gpfifo); 507 508 if (push_info->on_complete != NULL) { 509 decrypt_push(channel, gpfifo); 510 push_info->on_complete(push_info->on_complete_data); 511 push_info->on_complete = NULL; 512 push_info->on_complete_data = NULL; 513 } 514 515 uvm_spin_lock(&pushbuffer->lock); 516 517 if (gpfifo == chunk_get_first_gpfifo(chunk)) 518 need_to_update_chunk = true; 519 else if (gpfifo == chunk_get_last_gpfifo(chunk)) 520 need_to_update_chunk = true; 521 522 list_del(&gpfifo->pending_list_node); 523 524 // If current_push is not NULL, updating the chunk is delayed till 525 // uvm_pushbuffer_end_push() is called for that push. 526 if (need_to_update_chunk && chunk->current_push == NULL) 527 update_chunk(pushbuffer, chunk); 528 529 uvm_spin_unlock(&pushbuffer->lock); 530 } 531 532 NvU32 uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push) 533 { 534 NvU32 offset; 535 536 if (uvm_channel_is_wlc(push->channel)) { 537 // WLC channels use private static PB and their gpfifo entries are not 538 // added to any chunk's list. This only needs to return legal offset. 539 // Completion cleanup will not find WLC gpfifo entries as either first 540 // or last entry of any chunk. 541 return 0; 542 } 543 544 offset = (char*)push->begin - get_base_cpu_va(pushbuffer); 545 546 UVM_ASSERT(((NvU64)offset) % sizeof(NvU32) == 0); 547 548 return offset; 549 } 550 551 NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push) 552 { 553 NvU64 pushbuffer_base; 554 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 555 bool is_proxy_channel = uvm_channel_is_proxy(push->channel); 556 557 pushbuffer_base = uvm_rm_mem_get_gpu_va(pushbuffer->memory, gpu, is_proxy_channel).address; 558 559 if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) { 560 // We need to use the same static locations for PB as the fixed 561 // schedule because that's what the channels are initialized to use. 562 return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu); 563 } 564 else if (uvm_channel_is_sec2(push->channel)) { 565 // SEC2 PBs are in unprotected sysmem 566 pushbuffer_base = uvm_pushbuffer_get_sec2_gpu_va_base(pushbuffer); 567 } 568 569 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push); 570 } 571 572 void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push) 573 { 574 char *pushbuffer_base; 575 576 if (uvm_channel_is_wlc(push->channel)) { 577 // Reuse existing WLC static pb for initialization 578 UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager)); 579 return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu; 580 } 581 582 pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem); 583 584 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push); 585 } 586 587 NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push) 588 { 589 NvU64 pushbuffer_base; 590 591 if (uvm_channel_is_wlc(push->channel)) { 592 // Reuse existing WLC static pb for initialization 593 UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager)); 594 return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem, 595 uvm_push_get_gpu(push)); 596 } 597 598 pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push)); 599 600 return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push); 601 } 602 603 void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_gpfifo_entry_t *gpfifo) 604 { 605 uvm_pushbuffer_chunk_t *chunk; 606 607 if (uvm_channel_is_wlc(push->channel)) { 608 // WLC channels use static pushbuffer and don't count towards max 609 // concurrent pushes. Initializing the list as head makes sure the 610 // deletion in "uvm_pushbuffer_mark_completed" doesn't crash. 611 INIT_LIST_HEAD(&gpfifo->pending_list_node); 612 return; 613 } 614 615 chunk = gpfifo_to_chunk(pushbuffer, gpfifo); 616 617 uvm_channel_pool_assert_locked(push->channel->pool); 618 619 uvm_spin_lock(&pushbuffer->lock); 620 621 list_add_tail(&gpfifo->pending_list_node, &chunk->pending_gpfifos); 622 623 update_chunk(pushbuffer, chunk); 624 625 UVM_ASSERT(chunk->current_push == push); 626 chunk->current_push = NULL; 627 628 uvm_spin_unlock(&pushbuffer->lock); 629 630 // uvm_pushbuffer_end_push() needs to be called with the channel lock held 631 // while the concurrent pushes sema has a higher lock order. To keep the 632 // code structure simple, just up out of order here. 633 uvm_up_out_of_order(&pushbuffer->concurrent_pushes_sema); 634 } 635 636 bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer) 637 { 638 bool has_space; 639 640 uvm_spin_lock(&pushbuffer->lock); 641 642 has_space = pick_chunk(pushbuffer) != NULL; 643 644 uvm_spin_unlock(&pushbuffer->lock); 645 646 return has_space; 647 } 648 649 void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s) 650 { 651 NvU32 i; 652 653 UVM_SEQ_OR_DBG_PRINT(s, "Pushbuffer for GPU %s\n", uvm_gpu_name(pushbuffer->channel_manager->gpu)); 654 UVM_SEQ_OR_DBG_PRINT(s, " has space: %d\n", uvm_pushbuffer_has_space(pushbuffer)); 655 656 uvm_spin_lock(&pushbuffer->lock); 657 658 for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) { 659 uvm_pushbuffer_chunk_t *chunk = &pushbuffer->chunks[i]; 660 NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk); 661 NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk); 662 UVM_SEQ_OR_DBG_PRINT(s, " chunk %u put %u get %u next %u available %d idle %d\n", 663 i, 664 cpu_put, gpu_get, chunk->next_push_start, 665 test_bit(i, pushbuffer->available_chunks) ? 1 : 0, 666 test_bit(i, pushbuffer->idle_chunks) ? 1 : 0); 667 668 } 669 670 uvm_spin_unlock(&pushbuffer->lock); 671 } 672 673 void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer) 674 { 675 return uvm_pushbuffer_print_common(pushbuffer, NULL); 676 } 677 678 NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer) 679 { 680 return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu); 681 } 682 683 NvU64 uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t *pushbuffer) 684 { 685 UVM_ASSERT(uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu)); 686 687 return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, pushbuffer->channel_manager->gpu); 688 } 689