1 /******************************************************************************* 2 Copyright (c) 2016-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 #include "uvm_common.h" 24 #include "uvm_ioctl.h" 25 #include "uvm_global.h" 26 #include "uvm_gpu.h" 27 #include "uvm_hal.h" 28 #include "uvm_tools.h" 29 #include "uvm_va_space.h" 30 #include "uvm_api.h" 31 #include "uvm_hal_types.h" 32 #include "uvm_va_block.h" 33 #include "uvm_va_range.h" 34 #include "uvm_push.h" 35 #include "uvm_forward_decl.h" 36 #include "uvm_range_group.h" 37 #include "uvm_mem.h" 38 #include "nv_speculation_barrier.h" 39 40 // We limit the number of times a page can be retained by the kernel 41 // to prevent the user from maliciously passing UVM tools the same page 42 // over and over again in an attempt to overflow the refcount. 43 #define MAX_PAGE_COUNT (1 << 20) 44 45 typedef struct 46 { 47 NvU32 get_ahead; 48 NvU32 get_behind; 49 NvU32 put_ahead; 50 NvU32 put_behind; 51 } uvm_tools_queue_snapshot_t; 52 53 typedef struct 54 { 55 uvm_spinlock_t lock; 56 NvU64 subscribed_queues; 57 struct list_head queue_nodes[UvmEventNumTypesAll]; 58 59 struct page **queue_buffer_pages; 60 UvmEventEntry *queue; 61 NvU32 queue_buffer_count; 62 NvU32 notification_threshold; 63 64 struct page **control_buffer_pages; 65 UvmToolsEventControlData *control; 66 67 wait_queue_head_t wait_queue; 68 bool is_wakeup_get_valid; 69 NvU32 wakeup_get; 70 } uvm_tools_queue_t; 71 72 typedef struct 73 { 74 struct list_head counter_nodes[UVM_TOTAL_COUNTERS]; 75 NvU64 subscribed_counters; 76 77 struct page **counter_buffer_pages; 78 NvU64 *counters; 79 80 bool all_processors; 81 NvProcessorUuid processor; 82 } uvm_tools_counter_t; 83 84 // private_data for /dev/nvidia-uvm-tools 85 typedef struct 86 { 87 bool is_queue; 88 struct file *uvm_file; 89 union 90 { 91 uvm_tools_queue_t queue; 92 uvm_tools_counter_t counter; 93 }; 94 } uvm_tools_event_tracker_t; 95 96 // Delayed events 97 // 98 // Events that require gpu timestamps for asynchronous operations use a delayed 99 // notification mechanism. Each event type registers a callback that is invoked 100 // from the update_progress channel routines. The callback then enqueues a 101 // work item that takes care of notifying the events. This module keeps a 102 // global list of channels with pending events. Other modules or user apps (via 103 // ioctl) may call uvm_tools_flush_events to update the progress of the channels 104 // in the list, as needed. 105 // 106 // User apps will need to flush events before removing gpus to avoid getting 107 // events with gpus ids that have been removed. 108 109 // This object describes the pending migrations operations within a VA block 110 typedef struct 111 { 112 nv_kthread_q_item_t queue_item; 113 uvm_processor_id_t dst; 114 uvm_processor_id_t src; 115 uvm_va_space_t *va_space; 116 117 uvm_channel_t *channel; 118 struct list_head events; 119 NvU64 start_timestamp_cpu; 120 NvU64 end_timestamp_cpu; 121 NvU64 *start_timestamp_gpu_addr; 122 NvU64 start_timestamp_gpu; 123 NvU64 range_group_id; 124 } block_migration_data_t; 125 126 // This object represents a specific pending migration within a VA block 127 typedef struct 128 { 129 struct list_head events_node; 130 NvU64 bytes; 131 NvU64 address; 132 NvU64 *end_timestamp_gpu_addr; 133 NvU64 end_timestamp_gpu; 134 UvmEventMigrationCause cause; 135 } migration_data_t; 136 137 // This object represents a pending gpu faut replay operation 138 typedef struct 139 { 140 nv_kthread_q_item_t queue_item; 141 uvm_channel_t *channel; 142 uvm_gpu_id_t gpu_id; 143 NvU32 batch_id; 144 uvm_fault_client_type_t client_type; 145 NvU64 timestamp; 146 NvU64 timestamp_gpu; 147 NvU64 *timestamp_gpu_addr; 148 } replay_data_t; 149 150 // This object describes the pending map remote operations within a VA block 151 typedef struct 152 { 153 nv_kthread_q_item_t queue_item; 154 uvm_processor_id_t src; 155 uvm_processor_id_t dst; 156 UvmEventMapRemoteCause cause; 157 NvU64 timestamp; 158 uvm_va_space_t *va_space; 159 160 uvm_channel_t *channel; 161 struct list_head events; 162 } block_map_remote_data_t; 163 164 // This object represents a pending map remote operation 165 typedef struct 166 { 167 struct list_head events_node; 168 169 NvU64 address; 170 NvU64 size; 171 NvU64 timestamp_gpu; 172 NvU64 *timestamp_gpu_addr; 173 } map_remote_data_t; 174 175 176 static struct cdev g_uvm_tools_cdev; 177 static LIST_HEAD(g_tools_va_space_list); 178 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll]; 179 static uvm_rw_semaphore_t g_tools_va_space_list_lock; 180 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL; 181 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL; 182 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL; 183 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL; 184 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL; 185 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL; 186 static uvm_spinlock_t g_tools_channel_list_lock; 187 static LIST_HEAD(g_tools_channel_list); 188 static nv_kthread_q_t g_tools_queue; 189 190 static NV_STATUS tools_update_status(uvm_va_space_t *va_space); 191 192 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp) 193 { 194 return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data); 195 } 196 197 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker) 198 { 199 return event_tracker != NULL && event_tracker->is_queue; 200 } 201 202 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker) 203 { 204 return event_tracker != NULL && !event_tracker->is_queue; 205 } 206 207 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker) 208 { 209 uvm_va_space_t *va_space; 210 UVM_ASSERT(event_tracker->uvm_file); 211 va_space = uvm_va_space_get(event_tracker->uvm_file); 212 return va_space; 213 } 214 215 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count) 216 { 217 NvU64 i; 218 219 for (i = 0; i < page_count; i++) { 220 set_page_dirty(pages[i]); 221 NV_UNPIN_USER_PAGE(pages[i]); 222 } 223 } 224 225 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size) 226 { 227 size = DIV_ROUND_UP(size, PAGE_SIZE); 228 vunmap((NvU8 *)addr); 229 uvm_put_user_pages_dirty(pages, size); 230 uvm_kvfree(pages); 231 } 232 233 // This must be called with the mmap_lock held in read mode or better. 234 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size) 235 { 236 struct vm_area_struct *vma; 237 NvU64 addr = start_va; 238 NvU64 region_end = start_va + size; 239 240 do { 241 vma = find_vma(mm, addr); 242 if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file)) 243 return NV_ERR_INVALID_ARGUMENT; 244 245 addr = vma->vm_end; 246 } while (addr < region_end); 247 248 return NV_OK; 249 } 250 251 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel. 252 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory. 253 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages) 254 { 255 NV_STATUS status = NV_OK; 256 long ret = 0; 257 long num_pages; 258 long i; 259 260 *addr = NULL; 261 *pages = NULL; 262 num_pages = DIV_ROUND_UP(size, PAGE_SIZE); 263 264 if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) { 265 status = NV_ERR_INVALID_ADDRESS; 266 goto fail; 267 } 268 269 *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages); 270 if (*pages == NULL) { 271 status = NV_ERR_NO_MEMORY; 272 goto fail; 273 } 274 275 // Although uvm_down_read_mmap_lock() is preferable due to its participation 276 // in the UVM lock dependency tracker, it cannot be used here. That's 277 // because pin_user_pages() may fault in HMM pages which are GPU-resident. 278 // When that happens, the UVM page fault handler would record another 279 // mmap_read_lock() on the same thread as this one, leading to a false 280 // positive lock dependency report. 281 // 282 // Therefore, use the lower level nv_mmap_read_lock() here. 283 nv_mmap_read_lock(current->mm); 284 status = check_vmas(current->mm, user_va, size); 285 if (status != NV_OK) { 286 nv_mmap_read_unlock(current->mm); 287 goto fail; 288 } 289 ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages); 290 nv_mmap_read_unlock(current->mm); 291 292 if (ret != num_pages) { 293 status = NV_ERR_INVALID_ARGUMENT; 294 goto fail; 295 } 296 297 for (i = 0; i < num_pages; i++) { 298 if (page_count((*pages)[i]) > MAX_PAGE_COUNT) { 299 status = NV_ERR_INVALID_ARGUMENT; 300 goto fail; 301 } 302 } 303 304 *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL); 305 if (*addr == NULL) 306 goto fail; 307 308 return NV_OK; 309 310 fail: 311 if (*pages == NULL) 312 return status; 313 314 if (ret > 0) 315 uvm_put_user_pages_dirty(*pages, ret); 316 else if (ret < 0) 317 status = errno_to_nv_status(ret); 318 319 uvm_kvfree(*pages); 320 *pages = NULL; 321 return status; 322 } 323 324 static void insert_event_tracker(uvm_va_space_t *va_space, 325 struct list_head *node, 326 NvU32 list_count, 327 NvU64 list_mask, 328 NvU64 *subscribed_mask, 329 struct list_head *lists, 330 NvU64 *inserted_lists) 331 { 332 NvU32 i; 333 NvU64 insertable_lists = list_mask & ~*subscribed_mask; 334 335 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 336 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 337 338 for (i = 0; i < list_count; i++) { 339 if (insertable_lists & (1ULL << i)) { 340 ++g_tools_enabled_event_count[i]; 341 list_add(node + i, lists + i); 342 } 343 } 344 345 *subscribed_mask |= list_mask; 346 *inserted_lists = insertable_lists; 347 } 348 349 static void remove_event_tracker(uvm_va_space_t *va_space, 350 struct list_head *node, 351 NvU32 list_count, 352 NvU64 list_mask, 353 NvU64 *subscribed_mask) 354 { 355 NvU32 i; 356 NvU64 removable_lists = list_mask & *subscribed_mask; 357 358 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 359 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 360 361 for (i = 0; i < list_count; i++) { 362 if (removable_lists & (1ULL << i)) { 363 UVM_ASSERT(g_tools_enabled_event_count[i] > 0); 364 --g_tools_enabled_event_count[i]; 365 list_del(node + i); 366 } 367 } 368 369 *subscribed_mask &= ~list_mask; 370 } 371 372 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn) 373 { 374 NvU32 queue_mask = queue->queue_buffer_count - 1; 375 376 uvm_assert_spinlock_locked(&queue->lock); 377 return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold; 378 } 379 380 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker) 381 { 382 if (event_tracker->uvm_file != NULL) { 383 NV_STATUS status; 384 uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker); 385 386 uvm_down_write(&g_tools_va_space_list_lock); 387 uvm_down_write(&va_space->perf_events.lock); 388 uvm_down_write(&va_space->tools.lock); 389 390 if (event_tracker->is_queue) { 391 uvm_tools_queue_t *queue = &event_tracker->queue; 392 393 remove_event_tracker(va_space, 394 queue->queue_nodes, 395 UvmEventNumTypesAll, 396 queue->subscribed_queues, 397 &queue->subscribed_queues); 398 399 if (queue->queue != NULL) { 400 unmap_user_pages(queue->queue_buffer_pages, 401 queue->queue, 402 queue->queue_buffer_count * sizeof(UvmEventEntry)); 403 } 404 405 if (queue->control != NULL) { 406 unmap_user_pages(queue->control_buffer_pages, 407 queue->control, 408 sizeof(UvmToolsEventControlData)); 409 } 410 } 411 else { 412 uvm_tools_counter_t *counters = &event_tracker->counter; 413 414 remove_event_tracker(va_space, 415 counters->counter_nodes, 416 UVM_TOTAL_COUNTERS, 417 counters->subscribed_counters, 418 &counters->subscribed_counters); 419 420 if (counters->counters != NULL) { 421 unmap_user_pages(counters->counter_buffer_pages, 422 counters->counters, 423 UVM_TOTAL_COUNTERS * sizeof(NvU64)); 424 } 425 } 426 427 // de-registration should not fail 428 status = tools_update_status(va_space); 429 UVM_ASSERT(status == NV_OK); 430 431 uvm_up_write(&va_space->tools.lock); 432 uvm_up_write(&va_space->perf_events.lock); 433 uvm_up_write(&g_tools_va_space_list_lock); 434 435 fput(event_tracker->uvm_file); 436 } 437 kmem_cache_free(g_tools_event_tracker_cache, event_tracker); 438 } 439 440 static void enqueue_event(const UvmEventEntry *entry, uvm_tools_queue_t *queue) 441 { 442 UvmToolsEventControlData *ctrl = queue->control; 443 uvm_tools_queue_snapshot_t sn; 444 NvU32 queue_size = queue->queue_buffer_count; 445 NvU32 queue_mask = queue_size - 1; 446 447 // Prevent processor speculation prior to accessing user-mapped memory to 448 // avoid leaking information from side-channel attacks. There are many 449 // possible paths leading to this point and it would be difficult and error- 450 // prone to audit all of them to determine whether user mode could guide 451 // this access to kernel memory under speculative execution, so to be on the 452 // safe side we'll just always block speculation. 453 nv_speculation_barrier(); 454 455 uvm_spin_lock(&queue->lock); 456 457 // ctrl is mapped into user space with read and write permissions, 458 // so its values cannot be trusted. 459 sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask; 460 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask; 461 sn.put_ahead = (sn.put_behind + 1) & queue_mask; 462 463 // one free element means that the queue is full 464 if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) { 465 atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType); 466 goto unlock; 467 } 468 469 memcpy(queue->queue + sn.put_behind, entry, sizeof(*entry)); 470 471 sn.put_behind = sn.put_ahead; 472 // put_ahead and put_behind will always be the same outside of queue->lock 473 // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach 474 atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind); 475 atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind); 476 477 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 478 // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead 479 if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) { 480 queue->is_wakeup_get_valid = true; 481 queue->wakeup_get = sn.get_ahead; 482 wake_up_all(&queue->wait_queue); 483 } 484 485 unlock: 486 uvm_spin_unlock(&queue->lock); 487 } 488 489 static void uvm_tools_record_event(uvm_va_space_t *va_space, const UvmEventEntry *entry) 490 { 491 NvU8 eventType = entry->eventData.eventType; 492 uvm_tools_queue_t *queue; 493 494 UVM_ASSERT(eventType < UvmEventNumTypesAll); 495 496 uvm_assert_rwsem_locked(&va_space->tools.lock); 497 498 list_for_each_entry(queue, va_space->tools.queues + eventType, queue_nodes[eventType]) 499 enqueue_event(entry, queue); 500 } 501 502 static void uvm_tools_broadcast_event(const UvmEventEntry *entry) 503 { 504 uvm_va_space_t *va_space; 505 506 uvm_down_read(&g_tools_va_space_list_lock); 507 list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) { 508 uvm_down_read(&va_space->tools.lock); 509 uvm_tools_record_event(va_space, entry); 510 uvm_up_read(&va_space->tools.lock); 511 } 512 uvm_up_read(&g_tools_va_space_list_lock); 513 } 514 515 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor) 516 { 517 // For compatibility with older counters, CPU faults for memory with a preferred location are reported 518 // for their preferred location as well as for the CPU device itself. 519 // This check prevents double counting in the aggregate count. 520 if (counter == UvmCounterNameCpuPageFaultCount) 521 return uvm_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT); 522 return true; 523 } 524 525 static void uvm_tools_inc_counter(uvm_va_space_t *va_space, 526 UvmCounterName counter, 527 NvU64 amount, 528 const NvProcessorUuid *processor) 529 { 530 UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS); 531 uvm_assert_rwsem_locked(&va_space->tools.lock); 532 533 if (amount > 0) { 534 uvm_tools_counter_t *counters; 535 536 // Prevent processor speculation prior to accessing user-mapped memory 537 // to avoid leaking information from side-channel attacks. There are 538 // many possible paths leading to this point and it would be difficult 539 // and error-prone to audit all of them to determine whether user mode 540 // could guide this access to kernel memory under speculative execution, 541 // so to be on the safe side we'll just always block speculation. 542 nv_speculation_barrier(); 543 544 list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) { 545 if ((counters->all_processors && counter_matches_processor(counter, processor)) || 546 uvm_uuid_eq(&counters->processor, processor)) { 547 atomic64_add(amount, (atomic64_t *)(counters->counters + counter)); 548 } 549 } 550 } 551 } 552 553 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter) 554 { 555 uvm_assert_rwsem_locked(&va_space->tools.lock); 556 557 UVM_ASSERT(counter < UVM_TOTAL_COUNTERS); 558 return !list_empty(va_space->tools.counters + counter); 559 } 560 561 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event) 562 { 563 uvm_assert_rwsem_locked(&va_space->tools.lock); 564 565 UVM_ASSERT(event < UvmEventNumTypesAll); 566 return !list_empty(va_space->tools.queues + event); 567 } 568 569 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event) 570 { 571 bool ret = false; 572 573 uvm_down_read(&g_tools_va_space_list_lock); 574 ret = g_tools_enabled_event_count[event] != 0; 575 uvm_up_read(&g_tools_va_space_list_lock); 576 577 return ret; 578 } 579 580 static bool tools_are_enabled(uvm_va_space_t *va_space) 581 { 582 NvU32 i; 583 584 uvm_assert_rwsem_locked(&va_space->tools.lock); 585 586 for (i = 0; i < UVM_TOTAL_COUNTERS; i++) { 587 if (tools_is_counter_enabled(va_space, i)) 588 return true; 589 } 590 for (i = 0; i < UvmEventNumTypesAll; i++) { 591 if (tools_is_event_enabled(va_space, i)) 592 return true; 593 } 594 return false; 595 } 596 597 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space) 598 { 599 return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) || 600 tools_is_event_enabled(va_space, UvmEventTypeGpuFault) || 601 tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) || 602 tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount); 603 } 604 605 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space) 606 { 607 return tools_is_event_enabled(va_space, UvmEventTypeMigration) || 608 tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) || 609 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) || 610 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD); 611 } 612 613 static int uvm_tools_open(struct inode *inode, struct file *filp) 614 { 615 filp->private_data = NULL; 616 return -nv_status_to_errno(uvm_global_get_status()); 617 } 618 619 static int uvm_tools_open_entry(struct inode *inode, struct file *filp) 620 { 621 UVM_ENTRY_RET(uvm_tools_open(inode, filp)); 622 } 623 624 static int uvm_tools_release(struct inode *inode, struct file *filp) 625 { 626 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 627 if (event_tracker != NULL) { 628 destroy_event_tracker(event_tracker); 629 filp->private_data = NULL; 630 } 631 return -nv_status_to_errno(uvm_global_get_status()); 632 } 633 634 static int uvm_tools_release_entry(struct inode *inode, struct file *filp) 635 { 636 UVM_ENTRY_RET(uvm_tools_release(inode, filp)); 637 } 638 639 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 640 { 641 switch (cmd) { 642 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER, uvm_api_tools_init_event_tracker); 643 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold); 644 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS, uvm_api_tools_event_queue_enable_events); 645 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events); 646 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS, uvm_api_tools_enable_counters); 647 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS, uvm_api_tools_disable_counters); 648 } 649 650 uvm_thread_assert_all_unlocked(); 651 652 return -EINVAL; 653 } 654 655 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg) 656 { 657 UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg)); 658 } 659 660 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait) 661 { 662 int flags = 0; 663 uvm_tools_queue_snapshot_t sn; 664 uvm_tools_event_tracker_t *event_tracker; 665 UvmToolsEventControlData *ctrl; 666 667 if (uvm_global_get_status() != NV_OK) 668 return POLLERR; 669 670 event_tracker = tools_event_tracker(filp); 671 if (!tracker_is_queue(event_tracker)) 672 return POLLERR; 673 674 uvm_spin_lock(&event_tracker->queue.lock); 675 676 event_tracker->queue.is_wakeup_get_valid = false; 677 ctrl = event_tracker->queue.control; 678 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 679 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind); 680 681 if (queue_needs_wakeup(&event_tracker->queue, &sn)) 682 flags = POLLIN | POLLRDNORM; 683 684 uvm_spin_unlock(&event_tracker->queue.lock); 685 686 poll_wait(filp, &event_tracker->queue.wait_queue, wait); 687 return flags; 688 } 689 690 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait) 691 { 692 UVM_ENTRY_RET(uvm_tools_poll(filp, wait)); 693 } 694 695 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = { 696 [UVM_FAULT_TYPE_INVALID_PDE] = UvmFaultTypeInvalidPde, 697 [UVM_FAULT_TYPE_INVALID_PTE] = UvmFaultTypeInvalidPte, 698 [UVM_FAULT_TYPE_ATOMIC] = UvmFaultTypeAtomic, 699 [UVM_FAULT_TYPE_WRITE] = UvmFaultTypeWrite, 700 [UVM_FAULT_TYPE_PDE_SIZE] = UvmFaultTypeInvalidPdeSize, 701 [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION] = UvmFaultTypeLimitViolation, 702 [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK] = UvmFaultTypeUnboundInstBlock, 703 [UVM_FAULT_TYPE_PRIV_VIOLATION] = UvmFaultTypePrivViolation, 704 [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation, 705 [UVM_FAULT_TYPE_WORK_CREATION] = UvmFaultTypeWorkCreation, 706 [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture, 707 [UVM_FAULT_TYPE_COMPRESSION_FAILURE] = UvmFaultTypeCompressionFailure, 708 [UVM_FAULT_TYPE_UNSUPPORTED_KIND] = UvmFaultTypeUnsupportedKind, 709 [UVM_FAULT_TYPE_REGION_VIOLATION] = UvmFaultTypeRegionViolation, 710 [UVM_FAULT_TYPE_POISONED] = UvmFaultTypePoison, 711 }; 712 713 // TODO: add new value for weak atomics in tools 714 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = { 715 [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic, 716 [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK] = UvmEventMemoryAccessTypeAtomic, 717 [UVM_FAULT_ACCESS_TYPE_WRITE] = UvmEventMemoryAccessTypeWrite, 718 [UVM_FAULT_ACCESS_TYPE_READ] = UvmEventMemoryAccessTypeRead, 719 [UVM_FAULT_ACCESS_TYPE_PREFETCH] = UvmEventMemoryAccessTypePrefetch 720 }; 721 722 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = { 723 [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0, 724 [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1, 725 [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2, 726 [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3, 727 [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4, 728 [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5, 729 [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6, 730 [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7, 731 [UVM_APERTURE_SYS] = UvmEventApertureSys, 732 [UVM_APERTURE_VID] = UvmEventApertureVid, 733 }; 734 735 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = { 736 [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc, 737 [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub, 738 }; 739 740 static void record_gpu_fault_instance(uvm_gpu_t *gpu, 741 uvm_va_space_t *va_space, 742 const uvm_fault_buffer_entry_t *fault_entry, 743 NvU64 batch_id, 744 NvU64 timestamp) 745 { 746 UvmEventEntry entry; 747 UvmEventGpuFaultInfo *info = &entry.eventData.gpuFault; 748 memset(&entry, 0, sizeof(entry)); 749 750 info->eventType = UvmEventTypeGpuFault; 751 info->gpuIndex = uvm_parent_id_value_from_processor_id(gpu->id); 752 info->faultType = g_hal_to_tools_fault_type_table[fault_entry->fault_type]; 753 info->accessType = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type]; 754 info->clientType = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type]; 755 if (fault_entry->is_replayable) 756 info->gpcId = fault_entry->fault_source.gpc_id; 757 else 758 info->channelId = fault_entry->fault_source.channel_id; 759 info->clientId = fault_entry->fault_source.client_id; 760 info->address = fault_entry->fault_address; 761 info->timeStamp = timestamp; 762 info->timeStampGpu = fault_entry->timestamp; 763 info->batchId = batch_id; 764 765 uvm_tools_record_event(va_space, &entry); 766 } 767 768 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data) 769 { 770 uvm_va_space_t *va_space = event_data->fault.space; 771 772 UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT); 773 UVM_ASSERT(event_data->fault.space); 774 775 uvm_assert_rwsem_locked(&va_space->lock); 776 uvm_assert_rwsem_locked(&va_space->perf_events.lock); 777 UVM_ASSERT(va_space->tools.enabled); 778 779 uvm_down_read(&va_space->tools.lock); 780 UVM_ASSERT(tools_is_fault_callback_needed(va_space)); 781 782 if (UVM_ID_IS_CPU(event_data->fault.proc_id)) { 783 if (tools_is_event_enabled(va_space, UvmEventTypeCpuFault)) { 784 UvmEventEntry entry; 785 UvmEventCpuFaultInfo *info = &entry.eventData.cpuFault; 786 memset(&entry, 0, sizeof(entry)); 787 788 info->eventType = UvmEventTypeCpuFault; 789 if (event_data->fault.cpu.is_write) 790 info->accessType = UvmEventMemoryAccessTypeWrite; 791 else 792 info->accessType = UvmEventMemoryAccessTypeRead; 793 794 info->address = event_data->fault.cpu.fault_va; 795 info->timeStamp = NV_GETTIME(); 796 // assume that current owns va_space 797 info->pid = uvm_get_stale_process_id(); 798 info->threadId = uvm_get_stale_thread_id(); 799 info->pc = event_data->fault.cpu.pc; 800 801 uvm_tools_record_event(va_space, &entry); 802 } 803 if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) { 804 uvm_processor_id_t preferred_location; 805 806 // The UVM Lite tools interface did not represent the CPU as a UVM 807 // device. It reported CPU faults against the corresponding 808 // allocation's 'home location'. Though this driver's tools 809 // interface does include a CPU device, for compatibility, the 810 // driver still reports faults against a buffer's preferred 811 // location, in addition to the CPU. 812 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT); 813 814 preferred_location = event_data->fault.preferred_location; 815 if (UVM_ID_IS_GPU(preferred_location)) { 816 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location); 817 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, uvm_gpu_uuid(gpu)); 818 } 819 } 820 } 821 else { 822 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id); 823 UVM_ASSERT(gpu); 824 825 if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) { 826 NvU64 timestamp = NV_GETTIME(); 827 uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry; 828 uvm_fault_buffer_entry_t *fault_instance; 829 830 record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp); 831 832 list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list) 833 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp); 834 } 835 836 if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount)) 837 uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, uvm_gpu_uuid(gpu)); 838 } 839 uvm_up_read(&va_space->tools.lock); 840 } 841 842 static void add_pending_event_for_channel(uvm_channel_t *channel) 843 { 844 uvm_assert_spinlock_locked(&g_tools_channel_list_lock); 845 846 if (channel->tools.pending_event_count++ == 0) 847 list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list); 848 } 849 850 static void remove_pending_event_for_channel(uvm_channel_t *channel) 851 { 852 uvm_assert_spinlock_locked(&g_tools_channel_list_lock); 853 UVM_ASSERT(channel->tools.pending_event_count > 0); 854 if (--channel->tools.pending_event_count == 0) 855 list_del_init(&channel->tools.channel_list_node); 856 } 857 858 859 static void record_migration_events(void *args) 860 { 861 block_migration_data_t *block_mig = (block_migration_data_t *)args; 862 migration_data_t *mig; 863 migration_data_t *next; 864 UvmEventEntry entry; 865 UvmEventMigrationInfo *info = &entry.eventData.migration; 866 uvm_va_space_t *va_space = block_mig->va_space; 867 868 NvU64 gpu_timestamp = block_mig->start_timestamp_gpu; 869 870 // Initialize fields that are constant throughout the whole block 871 memset(&entry, 0, sizeof(entry)); 872 info->eventType = UvmEventTypeMigration; 873 info->srcIndex = uvm_parent_id_value_from_processor_id(block_mig->src); 874 info->dstIndex = uvm_parent_id_value_from_processor_id(block_mig->dst); 875 info->beginTimeStamp = block_mig->start_timestamp_cpu; 876 info->endTimeStamp = block_mig->end_timestamp_cpu; 877 info->rangeGroupId = block_mig->range_group_id; 878 879 uvm_down_read(&va_space->tools.lock); 880 list_for_each_entry_safe(mig, next, &block_mig->events, events_node) { 881 UVM_ASSERT(mig->bytes > 0); 882 list_del(&mig->events_node); 883 884 info->address = mig->address; 885 info->migratedBytes = mig->bytes; 886 info->beginTimeStampGpu = gpu_timestamp; 887 info->endTimeStampGpu = mig->end_timestamp_gpu; 888 info->migrationCause = mig->cause; 889 gpu_timestamp = mig->end_timestamp_gpu; 890 kmem_cache_free(g_tools_migration_data_cache, mig); 891 892 uvm_tools_record_event(va_space, &entry); 893 } 894 uvm_up_read(&va_space->tools.lock); 895 896 UVM_ASSERT(list_empty(&block_mig->events)); 897 kmem_cache_free(g_tools_block_migration_data_cache, block_mig); 898 } 899 900 static void record_migration_events_entry(void *args) 901 { 902 UVM_ENTRY_VOID(record_migration_events(args)); 903 } 904 905 static void on_block_migration_complete(void *ptr) 906 { 907 migration_data_t *mig; 908 block_migration_data_t *block_mig = (block_migration_data_t *)ptr; 909 910 block_mig->end_timestamp_cpu = NV_GETTIME(); 911 block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr; 912 list_for_each_entry(mig, &block_mig->events, events_node) 913 mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr; 914 915 nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig); 916 917 // The UVM driver may notice that work in a channel is complete in a variety of situations 918 // and the va_space lock is not always held in all of them, nor can it always be taken safely on them. 919 // Dispatching events requires the va_space lock to be held in at least read mode, so 920 // this callback simply enqueues the dispatching onto a queue, where the 921 // va_space lock is always safe to acquire. 922 uvm_spin_lock(&g_tools_channel_list_lock); 923 remove_pending_event_for_channel(block_mig->channel); 924 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item); 925 uvm_spin_unlock(&g_tools_channel_list_lock); 926 } 927 928 static void record_replay_event_helper(uvm_gpu_id_t gpu_id, 929 NvU32 batch_id, 930 uvm_fault_client_type_t client_type, 931 NvU64 timestamp, 932 NvU64 timestamp_gpu) 933 { 934 UvmEventEntry entry; 935 936 memset(&entry, 0, sizeof(entry)); 937 entry.eventData.gpuFaultReplay.eventType = UvmEventTypeGpuFaultReplay; 938 entry.eventData.gpuFaultReplay.gpuIndex = uvm_parent_id_value_from_processor_id(gpu_id); 939 entry.eventData.gpuFaultReplay.batchId = batch_id; 940 entry.eventData.gpuFaultReplay.clientType = g_hal_to_tools_fault_client_type_table[client_type]; 941 entry.eventData.gpuFaultReplay.timeStamp = timestamp; 942 entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu; 943 944 uvm_tools_broadcast_event(&entry); 945 } 946 947 static void record_replay_events(void *args) 948 { 949 replay_data_t *replay = (replay_data_t *)args; 950 951 record_replay_event_helper(replay->gpu_id, 952 replay->batch_id, 953 replay->client_type, 954 replay->timestamp, 955 replay->timestamp_gpu); 956 957 kmem_cache_free(g_tools_replay_data_cache, replay); 958 } 959 960 static void record_replay_events_entry(void *args) 961 { 962 UVM_ENTRY_VOID(record_replay_events(args)); 963 } 964 965 static void on_replay_complete(void *ptr) 966 { 967 replay_data_t *replay = (replay_data_t *)ptr; 968 replay->timestamp_gpu = *replay->timestamp_gpu_addr; 969 970 nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr); 971 972 uvm_spin_lock(&g_tools_channel_list_lock); 973 remove_pending_event_for_channel(replay->channel); 974 nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item); 975 uvm_spin_unlock(&g_tools_channel_list_lock); 976 977 } 978 979 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = { 980 [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence, 981 [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence, 982 [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER] = UvmEventMigrationCauseAccessCounters, 983 [UVM_MAKE_RESIDENT_CAUSE_PREFETCH] = UvmEventMigrationCausePrefetch, 984 [UVM_MAKE_RESIDENT_CAUSE_EVICTION] = UvmEventMigrationCauseEviction, 985 [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS] = UvmEventMigrationCauseInvalid, 986 [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE] = UvmEventMigrationCauseUser, 987 [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP] = UvmEventMigrationCauseCoherence, 988 [UVM_MAKE_RESIDENT_CAUSE_API_HINT] = UvmEventMigrationCauseUser, 989 }; 990 991 // For non-CPU-to-CPU migrations (or CPU-to-CPU copies using CEs), this event is 992 // notified asynchronously when all the migrations pushed to the same uvm_push_t 993 // object in a call to block_copy_resident_pages_between have finished. 994 // For CPU-to-CPU copies using memcpy, this event is notified when all of the 995 // page copies does by block_copy_resident_pages have finished. 996 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data) 997 { 998 uvm_va_block_t *va_block = event_data->migration.block; 999 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1000 1001 UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION); 1002 1003 uvm_assert_mutex_locked(&va_block->lock); 1004 uvm_assert_rwsem_locked(&va_space->perf_events.lock); 1005 UVM_ASSERT(va_space->tools.enabled); 1006 1007 uvm_down_read(&va_space->tools.lock); 1008 UVM_ASSERT(tools_is_migration_callback_needed(va_space)); 1009 1010 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) { 1011 if (!UVM_ID_IS_CPU(event_data->migration.src) || !UVM_ID_IS_CPU(event_data->migration.dst)) { 1012 migration_data_t *mig; 1013 uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push); 1014 block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data; 1015 1016 if (push_info->on_complete != NULL) { 1017 mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS); 1018 if (mig == NULL) 1019 goto done_unlock; 1020 1021 mig->address = event_data->migration.address; 1022 mig->bytes = event_data->migration.bytes; 1023 mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push); 1024 mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause]; 1025 1026 list_add_tail(&mig->events_node, &block_mig->events); 1027 } 1028 } 1029 else { 1030 UvmEventEntry entry; 1031 UvmEventMigrationInfo *info = &entry.eventData.migration; 1032 uvm_va_space_t *va_space = uvm_va_block_get_va_space(event_data->migration.block); 1033 1034 // CPU-to-CPU migration events can be added directly to the queue. 1035 memset(&entry, 0, sizeof(entry)); 1036 info->eventType = UvmEventTypeMigration; 1037 info->srcIndex = uvm_parent_id_value_from_processor_id(event_data->migration.src); 1038 info->dstIndex = uvm_parent_id_value_from_processor_id(event_data->migration.dst); 1039 // TODO: Bug 4232310: Add src and dst NUMA node IDS to event data. 1040 //info->srcNid = event_data->migration.src_nid; 1041 //info->dstNid = event_data->migration.dst_nid; 1042 info->address = event_data->migration.address; 1043 info->migratedBytes = event_data->migration.bytes; 1044 info->beginTimeStamp = event_data->migration.cpu_start_timestamp; 1045 info->endTimeStamp = NV_GETTIME(); 1046 info->migrationCause = event_data->migration.cause; 1047 info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE; 1048 1049 // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held. 1050 if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) { 1051 uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address); 1052 if (range != NULL) 1053 info->rangeGroupId = range->range_group->id; 1054 } 1055 1056 uvm_tools_record_event(va_space, &entry); 1057 } 1058 } 1059 1060 // We don't want to increment neither UvmCounterNameBytesXferDtH nor 1061 // UvmCounterNameBytesXferHtD in a CPU-to-CPU migration. 1062 if (UVM_ID_IS_CPU(event_data->migration.src) && UVM_ID_IS_CPU(event_data->migration.dst)) 1063 goto done_unlock; 1064 1065 // Increment counters 1066 if (UVM_ID_IS_CPU(event_data->migration.src) && 1067 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) { 1068 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst); 1069 uvm_tools_inc_counter(va_space, 1070 UvmCounterNameBytesXferHtD, 1071 event_data->migration.bytes, 1072 uvm_gpu_uuid(gpu)); 1073 } 1074 if (UVM_ID_IS_CPU(event_data->migration.dst) && 1075 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) { 1076 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src); 1077 uvm_tools_inc_counter(va_space, 1078 UvmCounterNameBytesXferDtH, 1079 event_data->migration.bytes, 1080 uvm_gpu_uuid(gpu)); 1081 } 1082 1083 done_unlock: 1084 uvm_up_read(&va_space->tools.lock); 1085 } 1086 1087 // This event is notified asynchronously when it is marked as completed in the 1088 // pushbuffer the replay method belongs to. 1089 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, 1090 uvm_push_t *push, 1091 NvU32 batch_id, 1092 uvm_fault_client_type_t client_type) 1093 { 1094 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1095 replay_data_t *replay; 1096 1097 // Perform delayed notification only if some VA space has signed up for 1098 // UvmEventTypeGpuFaultReplay 1099 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay)) 1100 return; 1101 1102 replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS); 1103 if (replay == NULL) 1104 return; 1105 1106 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL); 1107 1108 replay->timestamp_gpu_addr = uvm_push_timestamp(push); 1109 replay->gpu_id = gpu->id; 1110 replay->batch_id = batch_id; 1111 replay->client_type = client_type; 1112 replay->timestamp = NV_GETTIME(); 1113 replay->channel = push->channel; 1114 1115 push_info->on_complete_data = replay; 1116 push_info->on_complete = on_replay_complete; 1117 1118 uvm_spin_lock(&g_tools_channel_list_lock); 1119 add_pending_event_for_channel(replay->channel); 1120 uvm_spin_unlock(&g_tools_channel_list_lock); 1121 } 1122 1123 1124 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type) 1125 { 1126 UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method); 1127 1128 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay)) 1129 return; 1130 1131 record_replay_event_helper(gpu->id, batch_id, client_type, NV_GETTIME(), gpu->parent->host_hal->get_time(gpu)); 1132 } 1133 1134 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, 1135 const uvm_access_counter_buffer_entry_t *buffer_entry, 1136 bool on_managed_phys) 1137 { 1138 UvmEventEntry entry; 1139 UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter; 1140 1141 // Perform delayed notification only if some VA space has signed up for 1142 // UvmEventTypeAccessCounter 1143 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeTestAccessCounter)) 1144 return; 1145 1146 if (!buffer_entry->address.is_virtual) 1147 UVM_ASSERT(UVM_ID_IS_VALID(buffer_entry->physical_info.resident_id)); 1148 1149 memset(&entry, 0, sizeof(entry)); 1150 1151 info->eventType = UvmEventTypeTestAccessCounter; 1152 info->srcIndex = uvm_parent_id_value_from_processor_id(gpu->id); 1153 info->address = buffer_entry->address.address; 1154 info->isVirtual = buffer_entry->address.is_virtual? 1: 0; 1155 1156 if (buffer_entry->address.is_virtual) { 1157 info->instancePtr = buffer_entry->virtual_info.instance_ptr.address; 1158 info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture]; 1159 info->veId = buffer_entry->virtual_info.ve_id; 1160 } 1161 else { 1162 info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture]; 1163 info->physOnManaged = on_managed_phys? 1 : 0; 1164 } 1165 1166 info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0; 1167 info->value = buffer_entry->counter_value; 1168 info->subGranularity = buffer_entry->sub_granularity; 1169 info->bank = buffer_entry->bank; 1170 info->tag = buffer_entry->tag; 1171 1172 uvm_tools_broadcast_event(&entry); 1173 } 1174 1175 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space) 1176 { 1177 UvmEventEntry entry; 1178 1179 if (!va_space->tools.enabled) 1180 return; 1181 1182 entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate; 1183 uvm_down_read(&va_space->tools.lock); 1184 uvm_tools_record_event(va_space, &entry); 1185 uvm_up_read(&va_space->tools.lock); 1186 } 1187 1188 // This function is used as a begin marker to group all migrations within a VA 1189 // block that are performed in the same call to 1190 // block_copy_resident_pages_between. All of these are pushed to the same 1191 // uvm_push_t object, and will be notified in burst when the last one finishes. 1192 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block, 1193 uvm_push_t *push, 1194 uvm_processor_id_t dst_id, 1195 uvm_processor_id_t src_id, 1196 NvU64 start, 1197 uvm_make_resident_cause_t cause) 1198 { 1199 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1200 uvm_range_group_range_t *range; 1201 1202 // Calls from tools read/write functions to make_resident must not trigger 1203 // any migration 1204 UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS); 1205 1206 // During evictions the va_space lock is not held. 1207 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) 1208 uvm_assert_rwsem_locked(&va_space->lock); 1209 1210 if (!va_space->tools.enabled) 1211 return; 1212 1213 uvm_down_read(&va_space->tools.lock); 1214 1215 // Perform delayed notification only if the VA space has signed up for 1216 // UvmEventTypeMigration 1217 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) { 1218 block_migration_data_t *block_mig; 1219 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1220 1221 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL); 1222 1223 block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS); 1224 if (block_mig == NULL) 1225 goto done_unlock; 1226 1227 block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push); 1228 block_mig->channel = push->channel; 1229 block_mig->start_timestamp_cpu = NV_GETTIME(); 1230 block_mig->dst = dst_id; 1231 block_mig->src = src_id; 1232 block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE; 1233 1234 // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held. 1235 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) { 1236 range = uvm_range_group_range_find(va_space, start); 1237 if (range != NULL) 1238 block_mig->range_group_id = range->range_group->id; 1239 } 1240 block_mig->va_space = va_space; 1241 1242 INIT_LIST_HEAD(&block_mig->events); 1243 push_info->on_complete_data = block_mig; 1244 push_info->on_complete = on_block_migration_complete; 1245 1246 uvm_spin_lock(&g_tools_channel_list_lock); 1247 add_pending_event_for_channel(block_mig->channel); 1248 uvm_spin_unlock(&g_tools_channel_list_lock); 1249 } 1250 1251 done_unlock: 1252 uvm_up_read(&va_space->tools.lock); 1253 } 1254 1255 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block, 1256 uvm_processor_id_t dst, 1257 uvm_va_block_region_t region, 1258 const uvm_page_mask_t *page_mask) 1259 { 1260 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1261 1262 if (!va_space->tools.enabled) 1263 return; 1264 1265 uvm_down_read(&va_space->tools.lock); 1266 if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate)) { 1267 // Read-duplication events 1268 UvmEventEntry entry; 1269 UvmEventReadDuplicateInfo *info_read_duplicate = &entry.eventData.readDuplicate; 1270 uvm_page_index_t page_index; 1271 memset(&entry, 0, sizeof(entry)); 1272 1273 info_read_duplicate->eventType = UvmEventTypeReadDuplicate; 1274 info_read_duplicate->size = PAGE_SIZE; 1275 info_read_duplicate->timeStamp = NV_GETTIME(); 1276 1277 for_each_va_block_page_in_region_mask(page_index, page_mask, region) { 1278 uvm_processor_id_t id; 1279 uvm_processor_mask_t resident_processors; 1280 1281 info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index); 1282 info_read_duplicate->processors = 0; 1283 1284 uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors); 1285 for_each_id_in_mask(id, &resident_processors) 1286 info_read_duplicate->processors |= (1 << uvm_parent_id_value_from_processor_id(id)); 1287 1288 uvm_tools_record_event(va_space, &entry); 1289 } 1290 } 1291 uvm_up_read(&va_space->tools.lock); 1292 } 1293 1294 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block, 1295 uvm_processor_id_t dst, 1296 uvm_va_block_region_t region, 1297 const uvm_page_mask_t *page_mask) 1298 { 1299 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1300 1301 if (!va_space->tools.enabled) 1302 return; 1303 1304 uvm_down_read(&va_space->tools.lock); 1305 if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicateInvalidate)) { 1306 UvmEventEntry entry; 1307 uvm_page_index_t page_index; 1308 UvmEventReadDuplicateInvalidateInfo *info = &entry.eventData.readDuplicateInvalidate; 1309 memset(&entry, 0, sizeof(entry)); 1310 1311 info->eventType = UvmEventTypeReadDuplicateInvalidate; 1312 info->residentIndex = uvm_parent_id_value_from_processor_id(dst); 1313 info->size = PAGE_SIZE; 1314 info->timeStamp = NV_GETTIME(); 1315 1316 for_each_va_block_page_in_region_mask(page_index, page_mask, region) { 1317 UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index)); 1318 1319 info->address = uvm_va_block_cpu_page_address(va_block, page_index); 1320 uvm_tools_record_event(va_space, &entry); 1321 } 1322 } 1323 uvm_up_read(&va_space->tools.lock); 1324 } 1325 1326 static void tools_schedule_completed_events(void) 1327 { 1328 uvm_channel_t *channel; 1329 uvm_channel_t *next_channel; 1330 NvU64 channel_count = 0; 1331 NvU64 i; 1332 1333 uvm_spin_lock(&g_tools_channel_list_lock); 1334 1335 // retain every channel list entry currently in the list and keep track of their count. 1336 list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) { 1337 ++channel->tools.pending_event_count; 1338 ++channel_count; 1339 } 1340 uvm_spin_unlock(&g_tools_channel_list_lock); 1341 1342 if (channel_count == 0) 1343 return; 1344 1345 // new entries always appear at the end, and all the entries seen in the first loop have been retained 1346 // so it is safe to go through them 1347 channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node); 1348 for (i = 0; i < channel_count; i++) { 1349 uvm_channel_update_progress_all(channel); 1350 channel = list_next_entry(channel, tools.channel_list_node); 1351 } 1352 1353 // now release all the entries we retained in the beginning 1354 i = 0; 1355 uvm_spin_lock(&g_tools_channel_list_lock); 1356 list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) { 1357 if (i++ == channel_count) 1358 break; 1359 1360 remove_pending_event_for_channel(channel); 1361 } 1362 uvm_spin_unlock(&g_tools_channel_list_lock); 1363 } 1364 1365 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space, 1366 NvU64 address, 1367 bool is_write, 1368 UvmEventFatalReason reason) 1369 { 1370 uvm_assert_rwsem_locked(&va_space->lock); 1371 1372 if (!va_space->tools.enabled) 1373 return; 1374 1375 uvm_down_read(&va_space->tools.lock); 1376 if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) { 1377 UvmEventEntry entry; 1378 UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault; 1379 memset(&entry, 0, sizeof(entry)); 1380 1381 info->eventType = UvmEventTypeFatalFault; 1382 info->processorIndex = UVM_ID_CPU_VALUE; 1383 info->timeStamp = NV_GETTIME(); 1384 info->address = address; 1385 info->accessType = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead; 1386 // info->faultType is not valid for cpu faults 1387 info->reason = reason; 1388 1389 uvm_tools_record_event(va_space, &entry); 1390 } 1391 uvm_up_read(&va_space->tools.lock); 1392 } 1393 1394 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id, 1395 uvm_va_space_t *va_space, 1396 const uvm_fault_buffer_entry_t *buffer_entry, 1397 UvmEventFatalReason reason) 1398 { 1399 uvm_assert_rwsem_locked(&va_space->lock); 1400 1401 if (!va_space->tools.enabled) 1402 return; 1403 1404 uvm_down_read(&va_space->tools.lock); 1405 if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) { 1406 UvmEventEntry entry; 1407 UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault; 1408 memset(&entry, 0, sizeof(entry)); 1409 1410 info->eventType = UvmEventTypeFatalFault; 1411 info->processorIndex = uvm_parent_id_value_from_processor_id(gpu_id); 1412 info->timeStamp = NV_GETTIME(); 1413 info->address = buffer_entry->fault_address; 1414 info->accessType = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type]; 1415 info->faultType = g_hal_to_tools_fault_type_table[buffer_entry->fault_type]; 1416 info->reason = reason; 1417 1418 uvm_tools_record_event(va_space, &entry); 1419 } 1420 uvm_up_read(&va_space->tools.lock); 1421 } 1422 1423 void uvm_tools_record_thrashing(uvm_va_space_t *va_space, 1424 NvU64 address, 1425 size_t region_size, 1426 const uvm_processor_mask_t *processors) 1427 { 1428 UVM_ASSERT(address); 1429 UVM_ASSERT(PAGE_ALIGNED(address)); 1430 UVM_ASSERT(region_size > 0); 1431 1432 uvm_assert_rwsem_locked(&va_space->lock); 1433 1434 if (!va_space->tools.enabled) 1435 return; 1436 1437 uvm_down_read(&va_space->tools.lock); 1438 if (tools_is_event_enabled(va_space, UvmEventTypeThrashingDetected)) { 1439 uvm_processor_id_t id; 1440 UvmEventEntry entry; 1441 UvmEventThrashingDetectedInfo *info = &entry.eventData.thrashing; 1442 memset(&entry, 0, sizeof(entry)); 1443 1444 info->eventType = UvmEventTypeThrashingDetected; 1445 info->address = address; 1446 info->size = region_size; 1447 info->timeStamp = NV_GETTIME(); 1448 1449 for_each_id_in_mask(id, processors) 1450 __set_bit(uvm_parent_id_value_from_processor_id(id), 1451 (unsigned long *)&info->processors); 1452 1453 uvm_tools_record_event(va_space, &entry); 1454 } 1455 uvm_up_read(&va_space->tools.lock); 1456 } 1457 1458 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor) 1459 { 1460 UVM_ASSERT(address); 1461 UVM_ASSERT(PAGE_ALIGNED(address)); 1462 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1463 1464 uvm_assert_rwsem_locked(&va_space->lock); 1465 1466 if (!va_space->tools.enabled) 1467 return; 1468 1469 uvm_down_read(&va_space->tools.lock); 1470 if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingStart)) { 1471 UvmEventEntry entry; 1472 UvmEventThrottlingStartInfo *info = &entry.eventData.throttlingStart; 1473 memset(&entry, 0, sizeof(entry)); 1474 1475 info->eventType = UvmEventTypeThrottlingStart; 1476 info->processorIndex = uvm_parent_id_value_from_processor_id(processor); 1477 info->address = address; 1478 info->timeStamp = NV_GETTIME(); 1479 1480 uvm_tools_record_event(va_space, &entry); 1481 } 1482 uvm_up_read(&va_space->tools.lock); 1483 } 1484 1485 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor) 1486 { 1487 UVM_ASSERT(address); 1488 UVM_ASSERT(PAGE_ALIGNED(address)); 1489 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1490 1491 uvm_assert_rwsem_locked(&va_space->lock); 1492 1493 if (!va_space->tools.enabled) 1494 return; 1495 1496 uvm_down_read(&va_space->tools.lock); 1497 if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingEnd)) { 1498 UvmEventEntry entry; 1499 UvmEventThrottlingEndInfo *info = &entry.eventData.throttlingEnd; 1500 memset(&entry, 0, sizeof(entry)); 1501 1502 info->eventType = UvmEventTypeThrottlingEnd; 1503 info->processorIndex = uvm_parent_id_value_from_processor_id(processor); 1504 info->address = address; 1505 info->timeStamp = NV_GETTIME(); 1506 1507 uvm_tools_record_event(va_space, &entry); 1508 } 1509 uvm_up_read(&va_space->tools.lock); 1510 } 1511 1512 static void record_map_remote_events(void *args) 1513 { 1514 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args; 1515 map_remote_data_t *map_remote, *next; 1516 UvmEventEntry entry; 1517 uvm_va_space_t *va_space = block_map_remote->va_space; 1518 1519 memset(&entry, 0, sizeof(entry)); 1520 1521 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote; 1522 entry.eventData.mapRemote.srcIndex = uvm_parent_id_value_from_processor_id(block_map_remote->src); 1523 entry.eventData.mapRemote.dstIndex = uvm_parent_id_value_from_processor_id(block_map_remote->dst); 1524 entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause; 1525 entry.eventData.mapRemote.timeStamp = block_map_remote->timestamp; 1526 1527 uvm_down_read(&va_space->tools.lock); 1528 list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) { 1529 list_del(&map_remote->events_node); 1530 1531 entry.eventData.mapRemote.address = map_remote->address; 1532 entry.eventData.mapRemote.size = map_remote->size; 1533 entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu; 1534 kmem_cache_free(g_tools_map_remote_data_cache, map_remote); 1535 1536 uvm_tools_record_event(va_space, &entry); 1537 } 1538 uvm_up_read(&va_space->tools.lock); 1539 1540 UVM_ASSERT(list_empty(&block_map_remote->events)); 1541 kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote); 1542 } 1543 1544 static void record_map_remote_events_entry(void *args) 1545 { 1546 UVM_ENTRY_VOID(record_map_remote_events(args)); 1547 } 1548 1549 static void on_map_remote_complete(void *ptr) 1550 { 1551 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr; 1552 map_remote_data_t *map_remote; 1553 1554 // Only GPU mappings use the deferred mechanism 1555 UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src)); 1556 list_for_each_entry(map_remote, &block_map_remote->events, events_node) 1557 map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr; 1558 1559 nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr); 1560 1561 uvm_spin_lock(&g_tools_channel_list_lock); 1562 remove_pending_event_for_channel(block_map_remote->channel); 1563 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item); 1564 uvm_spin_unlock(&g_tools_channel_list_lock); 1565 } 1566 1567 void uvm_tools_record_map_remote(uvm_va_block_t *va_block, 1568 uvm_push_t *push, 1569 uvm_processor_id_t processor, 1570 uvm_processor_id_t residency, 1571 NvU64 address, 1572 size_t region_size, 1573 UvmEventMapRemoteCause cause) 1574 { 1575 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1576 1577 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1578 UVM_ASSERT(UVM_ID_IS_VALID(residency)); 1579 UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid); 1580 1581 uvm_assert_rwsem_locked(&va_space->lock); 1582 1583 if (!va_space->tools.enabled) 1584 return; 1585 1586 uvm_down_read(&va_space->tools.lock); 1587 if (!tools_is_event_enabled(va_space, UvmEventTypeMapRemote)) 1588 goto done; 1589 1590 if (UVM_ID_IS_CPU(processor)) { 1591 UvmEventEntry entry; 1592 memset(&entry, 0, sizeof(entry)); 1593 1594 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote; 1595 entry.eventData.mapRemote.srcIndex = uvm_parent_id_value_from_processor_id(processor); 1596 entry.eventData.mapRemote.dstIndex = uvm_parent_id_value_from_processor_id(residency); 1597 entry.eventData.mapRemote.mapRemoteCause = cause; 1598 entry.eventData.mapRemote.timeStamp = NV_GETTIME(); 1599 entry.eventData.mapRemote.address = address; 1600 entry.eventData.mapRemote.size = region_size; 1601 entry.eventData.mapRemote.timeStampGpu = 0; 1602 1603 UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid); 1604 1605 uvm_tools_record_event(va_space, &entry); 1606 } 1607 else { 1608 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1609 block_map_remote_data_t *block_map_remote; 1610 map_remote_data_t *map_remote; 1611 1612 // The first call on this pushbuffer creates the per-VA block structure 1613 if (push_info->on_complete == NULL) { 1614 UVM_ASSERT(push_info->on_complete_data == NULL); 1615 1616 block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS); 1617 if (block_map_remote == NULL) 1618 goto done; 1619 1620 block_map_remote->src = processor; 1621 block_map_remote->dst = residency; 1622 block_map_remote->cause = cause; 1623 block_map_remote->timestamp = NV_GETTIME(); 1624 block_map_remote->va_space = va_space; 1625 block_map_remote->channel = push->channel; 1626 INIT_LIST_HEAD(&block_map_remote->events); 1627 1628 push_info->on_complete_data = block_map_remote; 1629 push_info->on_complete = on_map_remote_complete; 1630 1631 uvm_spin_lock(&g_tools_channel_list_lock); 1632 add_pending_event_for_channel(block_map_remote->channel); 1633 uvm_spin_unlock(&g_tools_channel_list_lock); 1634 } 1635 else { 1636 block_map_remote = push_info->on_complete_data; 1637 } 1638 UVM_ASSERT(block_map_remote); 1639 1640 map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS); 1641 if (map_remote == NULL) 1642 goto done; 1643 1644 map_remote->address = address; 1645 map_remote->size = region_size; 1646 map_remote->timestamp_gpu_addr = uvm_push_timestamp(push); 1647 1648 list_add_tail(&map_remote->events_node, &block_map_remote->events); 1649 } 1650 1651 done: 1652 uvm_up_read(&va_space->tools.lock); 1653 } 1654 1655 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp) 1656 { 1657 NV_STATUS status = NV_OK; 1658 uvm_tools_event_tracker_t *event_tracker; 1659 1660 event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS); 1661 if (event_tracker == NULL) 1662 return NV_ERR_NO_MEMORY; 1663 1664 event_tracker->uvm_file = fget(params->uvmFd); 1665 if (event_tracker->uvm_file == NULL) { 1666 status = NV_ERR_INSUFFICIENT_PERMISSIONS; 1667 goto fail; 1668 } 1669 1670 if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) { 1671 fput(event_tracker->uvm_file); 1672 event_tracker->uvm_file = NULL; 1673 status = NV_ERR_INSUFFICIENT_PERMISSIONS; 1674 goto fail; 1675 } 1676 1677 // We don't use uvm_fd_va_space() here because tools can work 1678 // without an associated va_space_mm. 1679 if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) { 1680 fput(event_tracker->uvm_file); 1681 event_tracker->uvm_file = NULL; 1682 status = NV_ERR_ILLEGAL_ACTION; 1683 goto fail; 1684 } 1685 1686 event_tracker->is_queue = params->queueBufferSize != 0; 1687 if (event_tracker->is_queue) { 1688 uvm_tools_queue_t *queue = &event_tracker->queue; 1689 uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF); 1690 init_waitqueue_head(&queue->wait_queue); 1691 1692 if (params->queueBufferSize > UINT_MAX) { 1693 status = NV_ERR_INVALID_ARGUMENT; 1694 goto fail; 1695 } 1696 1697 queue->queue_buffer_count = (NvU32)params->queueBufferSize; 1698 queue->notification_threshold = queue->queue_buffer_count / 2; 1699 1700 // queue_buffer_count must be a power of 2, of at least 2 1701 if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) { 1702 status = NV_ERR_INVALID_ARGUMENT; 1703 goto fail; 1704 } 1705 1706 status = map_user_pages(params->queueBuffer, 1707 queue->queue_buffer_count * sizeof(UvmEventEntry), 1708 (void **)&queue->queue, 1709 &queue->queue_buffer_pages); 1710 if (status != NV_OK) 1711 goto fail; 1712 1713 status = map_user_pages(params->controlBuffer, 1714 sizeof(UvmToolsEventControlData), 1715 (void **)&queue->control, 1716 &queue->control_buffer_pages); 1717 1718 if (status != NV_OK) 1719 goto fail; 1720 } 1721 else { 1722 uvm_tools_counter_t *counter = &event_tracker->counter; 1723 counter->all_processors = params->allProcessors; 1724 counter->processor = params->processor; 1725 status = map_user_pages(params->controlBuffer, 1726 sizeof(NvU64) * UVM_TOTAL_COUNTERS, 1727 (void **)&counter->counters, 1728 &counter->counter_buffer_pages); 1729 if (status != NV_OK) 1730 goto fail; 1731 } 1732 1733 if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) { 1734 status = NV_ERR_INVALID_ARGUMENT; 1735 goto fail; 1736 } 1737 1738 return NV_OK; 1739 1740 fail: 1741 destroy_event_tracker(event_tracker); 1742 return status; 1743 } 1744 1745 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp) 1746 { 1747 UvmToolsEventControlData *ctrl; 1748 uvm_tools_queue_snapshot_t sn; 1749 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1750 1751 if (!tracker_is_queue(event_tracker)) 1752 return NV_ERR_INVALID_ARGUMENT; 1753 1754 uvm_spin_lock(&event_tracker->queue.lock); 1755 1756 event_tracker->queue.notification_threshold = params->notificationThreshold; 1757 1758 ctrl = event_tracker->queue.control; 1759 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind); 1760 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 1761 1762 if (queue_needs_wakeup(&event_tracker->queue, &sn)) 1763 wake_up_all(&event_tracker->queue.wait_queue); 1764 1765 uvm_spin_unlock(&event_tracker->queue.lock); 1766 1767 return NV_OK; 1768 } 1769 1770 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space) 1771 { 1772 NV_STATUS status; 1773 1774 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock); 1775 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 1776 1777 if (tools_is_fault_callback_needed(va_space)) { 1778 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) { 1779 status = uvm_perf_register_event_callback_locked(&va_space->perf_events, 1780 UVM_PERF_EVENT_FAULT, 1781 uvm_tools_record_fault); 1782 1783 if (status != NV_OK) 1784 return status; 1785 } 1786 } 1787 else { 1788 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) { 1789 uvm_perf_unregister_event_callback_locked(&va_space->perf_events, 1790 UVM_PERF_EVENT_FAULT, 1791 uvm_tools_record_fault); 1792 } 1793 } 1794 1795 if (tools_is_migration_callback_needed(va_space)) { 1796 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) { 1797 status = uvm_perf_register_event_callback_locked(&va_space->perf_events, 1798 UVM_PERF_EVENT_MIGRATION, 1799 uvm_tools_record_migration); 1800 1801 if (status != NV_OK) 1802 return status; 1803 } 1804 } 1805 else { 1806 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) { 1807 uvm_perf_unregister_event_callback_locked(&va_space->perf_events, 1808 UVM_PERF_EVENT_MIGRATION, 1809 uvm_tools_record_migration); 1810 } 1811 } 1812 1813 return NV_OK; 1814 } 1815 1816 static NV_STATUS tools_update_status(uvm_va_space_t *va_space) 1817 { 1818 NV_STATUS status; 1819 bool should_be_enabled; 1820 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 1821 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock); 1822 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 1823 1824 status = tools_update_perf_events_callbacks(va_space); 1825 if (status != NV_OK) 1826 return status; 1827 1828 should_be_enabled = tools_are_enabled(va_space); 1829 if (should_be_enabled != va_space->tools.enabled) { 1830 if (should_be_enabled) 1831 list_add(&va_space->tools.node, &g_tools_va_space_list); 1832 else 1833 list_del(&va_space->tools.node); 1834 1835 va_space->tools.enabled = should_be_enabled; 1836 } 1837 1838 return NV_OK; 1839 } 1840 1841 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8) 1842 1843 static bool mask_contains_invalid_events(NvU64 event_flags) 1844 { 1845 const unsigned long *event_mask = (const unsigned long *)&event_flags; 1846 DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS); 1847 DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS); 1848 DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS); 1849 1850 bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS); 1851 bitmap_set(tests_events_mask, 1852 UvmEventTestTypesFirst, 1853 UvmEventTestTypesLast - UvmEventTestTypesFirst + 1); 1854 1855 bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS); 1856 bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1); 1857 1858 if (uvm_enable_builtin_tests) 1859 bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS); 1860 1861 // Make sure that test event ids do not overlap with regular events 1862 BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes); 1863 BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast); 1864 BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll); 1865 1866 // Make sure that no test event ever changes the size of UvmEventEntry 1867 BUILD_BUG_ON(sizeof(((UvmEventEntry *)NULL)->testEventData) > 1868 sizeof(((UvmEventEntry *)NULL)->eventData)); 1869 BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS); 1870 1871 if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS)) 1872 return false; 1873 1874 if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS)) 1875 UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n"); 1876 1877 return true; 1878 } 1879 1880 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp) 1881 { 1882 uvm_va_space_t *va_space; 1883 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1884 NV_STATUS status = NV_OK; 1885 NvU64 inserted_lists; 1886 1887 if (!tracker_is_queue(event_tracker)) 1888 return NV_ERR_INVALID_ARGUMENT; 1889 1890 if (mask_contains_invalid_events(params->eventTypeFlags)) 1891 return NV_ERR_INVALID_ARGUMENT; 1892 1893 va_space = tools_event_tracker_va_space(event_tracker); 1894 1895 uvm_down_write(&g_tools_va_space_list_lock); 1896 uvm_down_write(&va_space->perf_events.lock); 1897 uvm_down_write(&va_space->tools.lock); 1898 1899 insert_event_tracker(va_space, 1900 event_tracker->queue.queue_nodes, 1901 UvmEventNumTypesAll, 1902 params->eventTypeFlags, 1903 &event_tracker->queue.subscribed_queues, 1904 va_space->tools.queues, 1905 &inserted_lists); 1906 1907 // perform any necessary registration 1908 status = tools_update_status(va_space); 1909 if (status != NV_OK) { 1910 // on error, unregister any newly registered event 1911 remove_event_tracker(va_space, 1912 event_tracker->queue.queue_nodes, 1913 UvmEventNumTypes, 1914 inserted_lists, 1915 &event_tracker->queue.subscribed_queues); 1916 } 1917 1918 uvm_up_write(&va_space->tools.lock); 1919 uvm_up_write(&va_space->perf_events.lock); 1920 uvm_up_write(&g_tools_va_space_list_lock); 1921 1922 return status; 1923 } 1924 1925 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp) 1926 { 1927 NV_STATUS status; 1928 uvm_va_space_t *va_space; 1929 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1930 1931 if (!tracker_is_queue(event_tracker)) 1932 return NV_ERR_INVALID_ARGUMENT; 1933 1934 va_space = tools_event_tracker_va_space(event_tracker); 1935 1936 uvm_down_write(&g_tools_va_space_list_lock); 1937 uvm_down_write(&va_space->perf_events.lock); 1938 uvm_down_write(&va_space->tools.lock); 1939 remove_event_tracker(va_space, 1940 event_tracker->queue.queue_nodes, 1941 UvmEventNumTypesAll, 1942 params->eventTypeFlags, 1943 &event_tracker->queue.subscribed_queues); 1944 1945 // de-registration should not fail 1946 status = tools_update_status(va_space); 1947 UVM_ASSERT(status == NV_OK); 1948 1949 uvm_up_write(&va_space->tools.lock); 1950 uvm_up_write(&va_space->perf_events.lock); 1951 uvm_up_write(&g_tools_va_space_list_lock); 1952 return NV_OK; 1953 } 1954 1955 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp) 1956 { 1957 uvm_va_space_t *va_space; 1958 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1959 NV_STATUS status = NV_OK; 1960 NvU64 inserted_lists; 1961 1962 if (!tracker_is_counter(event_tracker)) 1963 return NV_ERR_INVALID_ARGUMENT; 1964 1965 va_space = tools_event_tracker_va_space(event_tracker); 1966 1967 uvm_down_write(&g_tools_va_space_list_lock); 1968 uvm_down_write(&va_space->perf_events.lock); 1969 uvm_down_write(&va_space->tools.lock); 1970 1971 insert_event_tracker(va_space, 1972 event_tracker->counter.counter_nodes, 1973 UVM_TOTAL_COUNTERS, 1974 params->counterTypeFlags, 1975 &event_tracker->counter.subscribed_counters, 1976 va_space->tools.counters, 1977 &inserted_lists); 1978 1979 // perform any necessary registration 1980 status = tools_update_status(va_space); 1981 if (status != NV_OK) { 1982 remove_event_tracker(va_space, 1983 event_tracker->counter.counter_nodes, 1984 UVM_TOTAL_COUNTERS, 1985 inserted_lists, 1986 &event_tracker->counter.subscribed_counters); 1987 } 1988 1989 uvm_up_write(&va_space->tools.lock); 1990 uvm_up_write(&va_space->perf_events.lock); 1991 uvm_up_write(&g_tools_va_space_list_lock); 1992 1993 return status; 1994 } 1995 1996 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp) 1997 { 1998 NV_STATUS status; 1999 uvm_va_space_t *va_space; 2000 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 2001 2002 if (!tracker_is_counter(event_tracker)) 2003 return NV_ERR_INVALID_ARGUMENT; 2004 2005 va_space = tools_event_tracker_va_space(event_tracker); 2006 2007 uvm_down_write(&g_tools_va_space_list_lock); 2008 uvm_down_write(&va_space->perf_events.lock); 2009 uvm_down_write(&va_space->tools.lock); 2010 remove_event_tracker(va_space, 2011 event_tracker->counter.counter_nodes, 2012 UVM_TOTAL_COUNTERS, 2013 params->counterTypeFlags, 2014 &event_tracker->counter.subscribed_counters); 2015 2016 // de-registration should not fail 2017 status = tools_update_status(va_space); 2018 UVM_ASSERT(status == NV_OK); 2019 2020 uvm_up_write(&va_space->tools.lock); 2021 uvm_up_write(&va_space->perf_events.lock); 2022 uvm_up_write(&g_tools_va_space_list_lock); 2023 2024 return NV_OK; 2025 } 2026 2027 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block, 2028 uvm_va_block_context_t *block_context, 2029 NvU64 target_va, 2030 NvU64 size, 2031 bool is_write, 2032 uvm_mem_t *stage_mem) 2033 { 2034 if (is_write) { 2035 return UVM_VA_BLOCK_LOCK_RETRY(va_block, 2036 NULL, 2037 uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size)); 2038 } 2039 else { 2040 return UVM_VA_BLOCK_LOCK_RETRY(va_block, 2041 NULL, 2042 uvm_va_block_read_to_cpu(va_block, stage_mem, target_va, size)); 2043 2044 } 2045 } 2046 2047 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space, 2048 NvU64 target_va, 2049 NvU64 size, 2050 NvU64 user_va, 2051 NvU64 *bytes, 2052 bool is_write) 2053 { 2054 NV_STATUS status; 2055 uvm_mem_t *stage_mem = NULL; 2056 void *stage_addr; 2057 uvm_processor_mask_t *retained_gpus = NULL; 2058 uvm_va_block_context_t *block_context = NULL; 2059 struct mm_struct *mm = NULL; 2060 2061 retained_gpus = uvm_processor_mask_cache_alloc(); 2062 if (!retained_gpus) 2063 return NV_ERR_NO_MEMORY; 2064 2065 uvm_processor_mask_zero(retained_gpus); 2066 2067 mm = uvm_va_space_mm_or_current_retain(va_space); 2068 2069 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem); 2070 if (status != NV_OK) 2071 goto exit; 2072 2073 block_context = uvm_va_block_context_alloc(mm); 2074 if (!block_context) { 2075 status = NV_ERR_NO_MEMORY; 2076 goto exit; 2077 } 2078 2079 stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem); 2080 *bytes = 0; 2081 2082 while (*bytes < size) { 2083 uvm_gpu_t *gpu; 2084 uvm_va_block_t *block; 2085 void *user_va_start = (void *) (user_va + *bytes); 2086 NvU64 target_va_start = target_va + *bytes; 2087 NvU64 bytes_left = size - *bytes; 2088 NvU64 page_offset = target_va_start & (PAGE_SIZE - 1); 2089 NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset)); 2090 bool map_stage_mem_on_gpus = true; 2091 2092 if (is_write) { 2093 NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now); 2094 if (remaining != 0) { 2095 status = NV_ERR_INVALID_ARGUMENT; 2096 goto exit; 2097 } 2098 } 2099 2100 if (mm) 2101 uvm_down_read_mmap_lock(mm); 2102 2103 // The RM flavor of the lock is needed to perform ECC checks. 2104 uvm_va_space_down_read_rm(va_space); 2105 if (mm) 2106 status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block); 2107 else 2108 status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block); 2109 2110 if (status != NV_OK) 2111 goto unlock_and_exit; 2112 2113 // When CC is enabled, the staging memory cannot be mapped on the GPU 2114 // (it is protected sysmem), but it is still used to store the 2115 // unencrypted version of the page contents when the page is resident 2116 // on vidmem. 2117 if (g_uvm_global.conf_computing_enabled) 2118 map_stage_mem_on_gpus = false; 2119 2120 if (map_stage_mem_on_gpus) { 2121 for_each_gpu_in_mask(gpu, &va_space->registered_gpus) { 2122 if (uvm_processor_mask_test_and_set(retained_gpus, gpu->id)) 2123 continue; 2124 2125 // The retention of each GPU ensures that the staging memory is 2126 // freed before the unregistration of any of the GPUs is mapped 2127 // on. Each GPU is retained once. 2128 uvm_gpu_retain(gpu); 2129 2130 // Accessing the VA block may result in copying data between the 2131 // CPU and a GPU. Conservatively add virtual mappings to all the 2132 // GPUs (even if those mappings may never be used) as tools 2133 // read/write is not on a performance critical path. 2134 status = uvm_mem_map_gpu_kernel(stage_mem, gpu); 2135 if (status != NV_OK) 2136 goto unlock_and_exit; 2137 } 2138 } 2139 else { 2140 UVM_ASSERT(uvm_processor_mask_empty(retained_gpus)); 2141 } 2142 2143 // Make sure a CPU resident page has an up to date struct page pointer. 2144 if (uvm_va_block_is_hmm(block)) { 2145 status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true); 2146 if (status != NV_OK) 2147 goto unlock_and_exit; 2148 } 2149 2150 status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem); 2151 2152 // For simplicity, check for ECC errors on all GPUs registered in the VA 2153 // space 2154 if (status == NV_OK) 2155 status = uvm_global_gpu_check_ecc_error(&va_space->registered_gpus); 2156 2157 uvm_va_space_up_read_rm(va_space); 2158 if (mm) 2159 uvm_up_read_mmap_lock(mm); 2160 2161 if (status != NV_OK) 2162 goto exit; 2163 2164 if (!is_write) { 2165 NvU64 remaining; 2166 2167 // Prevent processor speculation prior to accessing user-mapped 2168 // memory to avoid leaking information from side-channel attacks. 2169 // Under speculation, a valid VA range which does not contain 2170 // target_va could be used, and the block index could run off the 2171 // end of the array. Information about the state of that kernel 2172 // memory could be inferred if speculative execution gets to the 2173 // point where the data is copied out. 2174 nv_speculation_barrier(); 2175 2176 remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now); 2177 if (remaining > 0) { 2178 status = NV_ERR_INVALID_ARGUMENT; 2179 goto exit; 2180 } 2181 } 2182 2183 *bytes += bytes_now; 2184 } 2185 2186 unlock_and_exit: 2187 if (status != NV_OK) { 2188 uvm_va_space_up_read_rm(va_space); 2189 if (mm) 2190 uvm_up_read_mmap_lock(mm); 2191 } 2192 2193 exit: 2194 uvm_va_block_context_free(block_context); 2195 2196 uvm_mem_free(stage_mem); 2197 2198 uvm_global_gpu_release(retained_gpus); 2199 2200 uvm_va_space_mm_or_current_release(va_space, mm); 2201 2202 uvm_processor_mask_cache_free(retained_gpus); 2203 2204 return status; 2205 } 2206 2207 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp) 2208 { 2209 return tools_access_process_memory(uvm_va_space_get(filp), 2210 params->targetVa, 2211 params->size, 2212 params->buffer, 2213 ¶ms->bytesRead, 2214 false); 2215 } 2216 2217 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp) 2218 { 2219 return tools_access_process_memory(uvm_va_space_get(filp), 2220 params->targetVa, 2221 params->size, 2222 params->buffer, 2223 ¶ms->bytesWritten, 2224 true); 2225 } 2226 2227 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp) 2228 { 2229 NvU32 i; 2230 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2231 2232 if (params->entry.eventData.eventType >= UvmEventNumTypesAll) 2233 return NV_ERR_INVALID_ARGUMENT; 2234 2235 uvm_down_read(&va_space->tools.lock); 2236 for (i = 0; i < params->count; i++) 2237 uvm_tools_record_event(va_space, ¶ms->entry); 2238 uvm_up_read(&va_space->tools.lock); 2239 return NV_OK; 2240 } 2241 2242 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp) 2243 { 2244 NvU32 i; 2245 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2246 2247 if (params->counter >= UVM_TOTAL_COUNTERS) 2248 return NV_ERR_INVALID_ARGUMENT; 2249 2250 uvm_down_read(&va_space->tools.lock); 2251 for (i = 0; i < params->count; i++) 2252 uvm_tools_inc_counter(va_space, params->counter, params->amount, ¶ms->processor); 2253 uvm_up_read(&va_space->tools.lock); 2254 2255 return NV_OK; 2256 } 2257 2258 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp) 2259 { 2260 NvProcessorUuid *uuids; 2261 NvU64 remaining; 2262 uvm_gpu_t *gpu; 2263 NvU32 count = params->count; 2264 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2265 2266 // Prior to Multi-MIG support, params->count was always zero meaning the 2267 // input array was size UVM_MAX_PROCESSORS or 33 at that time. 2268 if (count == 0) 2269 count = 33; 2270 else if (count > UVM_ID_MAX_PROCESSORS) 2271 count = UVM_ID_MAX_PROCESSORS; 2272 2273 uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * count); 2274 if (uuids == NULL) 2275 return NV_ERR_NO_MEMORY; 2276 2277 uvm_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT); 2278 params->count = 1; 2279 2280 uvm_va_space_down_read(va_space); 2281 for_each_va_space_gpu(gpu, va_space) { 2282 NvU32 id_value; 2283 const NvProcessorUuid *uuid; 2284 2285 id_value = uvm_parent_id_value(gpu->parent->id); 2286 uuid = &gpu->parent->uuid; 2287 2288 if (id_value < count) 2289 uvm_uuid_copy(&uuids[id_value], uuid); 2290 2291 // Return the actual count even if the UUID isn't returned due to 2292 // limited input array size. 2293 if (id_value + 1 > params->count) 2294 params->count = id_value + 1; 2295 } 2296 uvm_va_space_up_read(va_space); 2297 2298 remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count); 2299 uvm_kvfree(uuids); 2300 2301 if (remaining != 0) 2302 return NV_ERR_INVALID_ADDRESS; 2303 2304 return NV_OK; 2305 } 2306 2307 void uvm_tools_flush_events(void) 2308 { 2309 tools_schedule_completed_events(); 2310 2311 nv_kthread_q_flush(&g_tools_queue); 2312 } 2313 2314 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp) 2315 { 2316 uvm_tools_flush_events(); 2317 return NV_OK; 2318 } 2319 2320 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp) 2321 { 2322 NV_STATUS status = NV_OK; 2323 uvm_gpu_t *gpu = NULL; 2324 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2325 2326 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpuUuid); 2327 if (!gpu) 2328 return NV_ERR_INVALID_DEVICE; 2329 2330 // Wait for register-based fault clears to queue the replay event 2331 if (!gpu->parent->has_clear_faulted_channel_method) { 2332 uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent); 2333 uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent); 2334 } 2335 2336 // Wait for pending fault replay methods to complete (replayable faults on 2337 // all GPUs, and non-replayable faults on method-based GPUs). 2338 status = uvm_channel_manager_wait(gpu->channel_manager); 2339 2340 // Flush any pending events even if (status != NV_OK) 2341 uvm_tools_flush_events(); 2342 uvm_gpu_release(gpu); 2343 2344 return status; 2345 } 2346 2347 static const struct file_operations uvm_tools_fops = 2348 { 2349 .open = uvm_tools_open_entry, 2350 .release = uvm_tools_release_entry, 2351 .unlocked_ioctl = uvm_tools_unlocked_ioctl_entry, 2352 #if NVCPU_IS_X86_64 2353 .compat_ioctl = uvm_tools_unlocked_ioctl_entry, 2354 #endif 2355 .poll = uvm_tools_poll_entry, 2356 .owner = THIS_MODULE, 2357 }; 2358 2359 static void _uvm_tools_destroy_cache_all(void) 2360 { 2361 // The pointers are initialized to NULL, 2362 // it's safe to call destroy on all of them. 2363 kmem_cache_destroy_safe(&g_tools_event_tracker_cache); 2364 kmem_cache_destroy_safe(&g_tools_block_migration_data_cache); 2365 kmem_cache_destroy_safe(&g_tools_migration_data_cache); 2366 kmem_cache_destroy_safe(&g_tools_replay_data_cache); 2367 kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache); 2368 kmem_cache_destroy_safe(&g_tools_map_remote_data_cache); 2369 } 2370 2371 int uvm_tools_init(dev_t uvm_base_dev) 2372 { 2373 dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER); 2374 int ret = -ENOMEM; // This will be updated later if allocations succeed 2375 2376 uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST); 2377 2378 g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t", 2379 uvm_tools_event_tracker_t); 2380 if (!g_tools_event_tracker_cache) 2381 goto err_cache_destroy; 2382 2383 g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t", 2384 block_migration_data_t); 2385 if (!g_tools_block_migration_data_cache) 2386 goto err_cache_destroy; 2387 2388 g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t", 2389 migration_data_t); 2390 if (!g_tools_migration_data_cache) 2391 goto err_cache_destroy; 2392 2393 g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t", 2394 replay_data_t); 2395 if (!g_tools_replay_data_cache) 2396 goto err_cache_destroy; 2397 2398 g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t", 2399 block_map_remote_data_t); 2400 if (!g_tools_block_map_remote_data_cache) 2401 goto err_cache_destroy; 2402 2403 g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t", 2404 map_remote_data_t); 2405 if (!g_tools_map_remote_data_cache) 2406 goto err_cache_destroy; 2407 2408 uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF); 2409 2410 ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue"); 2411 if (ret < 0) 2412 goto err_cache_destroy; 2413 2414 uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops); 2415 ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1); 2416 if (ret != 0) { 2417 UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev), 2418 MINOR(uvm_tools_dev), ret); 2419 goto err_stop_thread; 2420 } 2421 2422 return ret; 2423 2424 err_stop_thread: 2425 nv_kthread_q_stop(&g_tools_queue); 2426 2427 err_cache_destroy: 2428 _uvm_tools_destroy_cache_all(); 2429 return ret; 2430 } 2431 2432 void uvm_tools_exit(void) 2433 { 2434 unsigned i; 2435 cdev_del(&g_uvm_tools_cdev); 2436 2437 nv_kthread_q_stop(&g_tools_queue); 2438 2439 for (i = 0; i < UvmEventNumTypesAll; ++i) 2440 UVM_ASSERT(g_tools_enabled_event_count[i] == 0); 2441 2442 UVM_ASSERT(list_empty(&g_tools_va_space_list)); 2443 2444 _uvm_tools_destroy_cache_all(); 2445 } 2446