1 /******************************************************************************* 2 Copyright (c) 2016-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 #include "uvm_common.h" 24 #include "uvm_ioctl.h" 25 #include "uvm_gpu.h" 26 #include "uvm_hal.h" 27 #include "uvm_tools.h" 28 #include "uvm_va_space.h" 29 #include "uvm_api.h" 30 #include "uvm_hal_types.h" 31 #include "uvm_va_block.h" 32 #include "uvm_va_range.h" 33 #include "uvm_push.h" 34 #include "uvm_forward_decl.h" 35 #include "uvm_range_group.h" 36 #include "uvm_mem.h" 37 #include "nv_speculation_barrier.h" 38 39 // We limit the number of times a page can be retained by the kernel 40 // to prevent the user from maliciously passing UVM tools the same page 41 // over and over again in an attempt to overflow the refcount. 42 #define MAX_PAGE_COUNT (1 << 20) 43 44 typedef struct 45 { 46 NvU32 get_ahead; 47 NvU32 get_behind; 48 NvU32 put_ahead; 49 NvU32 put_behind; 50 } uvm_tools_queue_snapshot_t; 51 52 typedef struct 53 { 54 uvm_spinlock_t lock; 55 NvU64 subscribed_queues; 56 struct list_head queue_nodes[UvmEventNumTypesAll]; 57 58 struct page **queue_buffer_pages; 59 UvmEventEntry *queue; 60 NvU32 queue_buffer_count; 61 NvU32 notification_threshold; 62 63 struct page **control_buffer_pages; 64 UvmToolsEventControlData *control; 65 66 wait_queue_head_t wait_queue; 67 bool is_wakeup_get_valid; 68 NvU32 wakeup_get; 69 } uvm_tools_queue_t; 70 71 typedef struct 72 { 73 struct list_head counter_nodes[UVM_TOTAL_COUNTERS]; 74 NvU64 subscribed_counters; 75 76 struct page **counter_buffer_pages; 77 NvU64 *counters; 78 79 bool all_processors; 80 NvProcessorUuid processor; 81 } uvm_tools_counter_t; 82 83 // private_data for /dev/nvidia-uvm-tools 84 typedef struct 85 { 86 bool is_queue; 87 struct file *uvm_file; 88 union 89 { 90 uvm_tools_queue_t queue; 91 uvm_tools_counter_t counter; 92 }; 93 } uvm_tools_event_tracker_t; 94 95 // Delayed events 96 // 97 // Events that require gpu timestamps for asynchronous operations use a delayed 98 // notification mechanism. Each event type registers a callback that is invoked 99 // from the update_progress channel routines. The callback then enqueues a 100 // work item that takes care of notifying the events. This module keeps a 101 // global list of channels with pending events. Other modules or user apps (via 102 // ioctl) may call uvm_tools_flush_events to update the progress of the channels 103 // in the list, as needed. 104 // 105 // User apps will need to flush events before removing gpus to avoid getting 106 // events with gpus ids that have been removed. 107 108 // This object describes the pending migrations operations within a VA block 109 typedef struct 110 { 111 nv_kthread_q_item_t queue_item; 112 uvm_processor_id_t dst; 113 uvm_processor_id_t src; 114 uvm_va_space_t *va_space; 115 116 uvm_channel_t *channel; 117 struct list_head events; 118 NvU64 start_timestamp_cpu; 119 NvU64 end_timestamp_cpu; 120 NvU64 *start_timestamp_gpu_addr; 121 NvU64 start_timestamp_gpu; 122 NvU64 range_group_id; 123 } block_migration_data_t; 124 125 // This object represents a specific pending migration within a VA block 126 typedef struct 127 { 128 struct list_head events_node; 129 NvU64 bytes; 130 NvU64 address; 131 NvU64 *end_timestamp_gpu_addr; 132 NvU64 end_timestamp_gpu; 133 UvmEventMigrationCause cause; 134 } migration_data_t; 135 136 // This object represents a pending gpu faut replay operation 137 typedef struct 138 { 139 nv_kthread_q_item_t queue_item; 140 uvm_channel_t *channel; 141 uvm_gpu_id_t gpu_id; 142 NvU32 batch_id; 143 uvm_fault_client_type_t client_type; 144 NvU64 timestamp; 145 NvU64 timestamp_gpu; 146 NvU64 *timestamp_gpu_addr; 147 } replay_data_t; 148 149 // This object describes the pending map remote operations within a VA block 150 typedef struct 151 { 152 nv_kthread_q_item_t queue_item; 153 uvm_processor_id_t src; 154 uvm_processor_id_t dst; 155 UvmEventMapRemoteCause cause; 156 NvU64 timestamp; 157 uvm_va_space_t *va_space; 158 159 uvm_channel_t *channel; 160 struct list_head events; 161 } block_map_remote_data_t; 162 163 // This object represents a pending map remote operation 164 typedef struct 165 { 166 struct list_head events_node; 167 168 NvU64 address; 169 NvU64 size; 170 NvU64 timestamp_gpu; 171 NvU64 *timestamp_gpu_addr; 172 } map_remote_data_t; 173 174 175 static struct cdev g_uvm_tools_cdev; 176 static LIST_HEAD(g_tools_va_space_list); 177 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll]; 178 static uvm_rw_semaphore_t g_tools_va_space_list_lock; 179 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL; 180 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL; 181 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL; 182 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL; 183 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL; 184 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL; 185 static uvm_spinlock_t g_tools_channel_list_lock; 186 static LIST_HEAD(g_tools_channel_list); 187 static nv_kthread_q_t g_tools_queue; 188 189 static NV_STATUS tools_update_status(uvm_va_space_t *va_space); 190 191 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp) 192 { 193 return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data); 194 } 195 196 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker) 197 { 198 return event_tracker != NULL && event_tracker->is_queue; 199 } 200 201 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker) 202 { 203 return event_tracker != NULL && !event_tracker->is_queue; 204 } 205 206 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker) 207 { 208 uvm_va_space_t *va_space; 209 UVM_ASSERT(event_tracker->uvm_file); 210 va_space = uvm_va_space_get(event_tracker->uvm_file); 211 return va_space; 212 } 213 214 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count) 215 { 216 NvU64 i; 217 218 for (i = 0; i < page_count; i++) { 219 set_page_dirty(pages[i]); 220 NV_UNPIN_USER_PAGE(pages[i]); 221 } 222 } 223 224 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size) 225 { 226 size = DIV_ROUND_UP(size, PAGE_SIZE); 227 vunmap((NvU8 *)addr); 228 uvm_put_user_pages_dirty(pages, size); 229 uvm_kvfree(pages); 230 } 231 232 // This must be called with the mmap_lock held in read mode or better. 233 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size) 234 { 235 struct vm_area_struct *vma; 236 NvU64 addr = start_va; 237 NvU64 region_end = start_va + size; 238 239 do { 240 vma = find_vma(mm, addr); 241 if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file)) 242 return NV_ERR_INVALID_ARGUMENT; 243 244 addr = vma->vm_end; 245 } while (addr < region_end); 246 247 return NV_OK; 248 } 249 250 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel. 251 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory. 252 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages) 253 { 254 NV_STATUS status = NV_OK; 255 long ret = 0; 256 long num_pages; 257 long i; 258 259 *addr = NULL; 260 *pages = NULL; 261 num_pages = DIV_ROUND_UP(size, PAGE_SIZE); 262 263 if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) { 264 status = NV_ERR_INVALID_ADDRESS; 265 goto fail; 266 } 267 268 *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages); 269 if (*pages == NULL) { 270 status = NV_ERR_NO_MEMORY; 271 goto fail; 272 } 273 274 // Although uvm_down_read_mmap_lock() is preferable due to its participation 275 // in the UVM lock dependency tracker, it cannot be used here. That's 276 // because pin_user_pages() may fault in HMM pages which are GPU-resident. 277 // When that happens, the UVM page fault handler would record another 278 // mmap_read_lock() on the same thread as this one, leading to a false 279 // positive lock dependency report. 280 // 281 // Therefore, use the lower level nv_mmap_read_lock() here. 282 nv_mmap_read_lock(current->mm); 283 status = check_vmas(current->mm, user_va, size); 284 if (status != NV_OK) { 285 nv_mmap_read_unlock(current->mm); 286 goto fail; 287 } 288 ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, NULL); 289 nv_mmap_read_unlock(current->mm); 290 291 if (ret != num_pages) { 292 status = NV_ERR_INVALID_ARGUMENT; 293 goto fail; 294 } 295 296 for (i = 0; i < num_pages; i++) { 297 if (page_count((*pages)[i]) > MAX_PAGE_COUNT) { 298 status = NV_ERR_INVALID_ARGUMENT; 299 goto fail; 300 } 301 } 302 303 *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL); 304 if (*addr == NULL) 305 goto fail; 306 307 return NV_OK; 308 309 fail: 310 if (*pages == NULL) 311 return status; 312 313 if (ret > 0) 314 uvm_put_user_pages_dirty(*pages, ret); 315 else if (ret < 0) 316 status = errno_to_nv_status(ret); 317 318 uvm_kvfree(*pages); 319 *pages = NULL; 320 return status; 321 } 322 323 static void insert_event_tracker(uvm_va_space_t *va_space, 324 struct list_head *node, 325 NvU32 list_count, 326 NvU64 list_mask, 327 NvU64 *subscribed_mask, 328 struct list_head *lists, 329 NvU64 *inserted_lists) 330 { 331 NvU32 i; 332 NvU64 insertable_lists = list_mask & ~*subscribed_mask; 333 334 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 335 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 336 337 for (i = 0; i < list_count; i++) { 338 if (insertable_lists & (1ULL << i)) { 339 ++g_tools_enabled_event_count[i]; 340 list_add(node + i, lists + i); 341 } 342 } 343 344 *subscribed_mask |= list_mask; 345 *inserted_lists = insertable_lists; 346 } 347 348 static void remove_event_tracker(uvm_va_space_t *va_space, 349 struct list_head *node, 350 NvU32 list_count, 351 NvU64 list_mask, 352 NvU64 *subscribed_mask) 353 { 354 NvU32 i; 355 NvU64 removable_lists = list_mask & *subscribed_mask; 356 357 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 358 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 359 360 for (i = 0; i < list_count; i++) { 361 if (removable_lists & (1ULL << i)) { 362 UVM_ASSERT(g_tools_enabled_event_count[i] > 0); 363 --g_tools_enabled_event_count[i]; 364 list_del(node + i); 365 } 366 } 367 368 *subscribed_mask &= ~list_mask; 369 } 370 371 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn) 372 { 373 NvU32 queue_mask = queue->queue_buffer_count - 1; 374 375 uvm_assert_spinlock_locked(&queue->lock); 376 return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold; 377 } 378 379 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker) 380 { 381 if (event_tracker->uvm_file != NULL) { 382 NV_STATUS status; 383 uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker); 384 385 uvm_down_write(&g_tools_va_space_list_lock); 386 uvm_down_write(&va_space->perf_events.lock); 387 uvm_down_write(&va_space->tools.lock); 388 389 if (event_tracker->is_queue) { 390 uvm_tools_queue_t *queue = &event_tracker->queue; 391 392 remove_event_tracker(va_space, 393 queue->queue_nodes, 394 UvmEventNumTypesAll, 395 queue->subscribed_queues, 396 &queue->subscribed_queues); 397 398 if (queue->queue != NULL) { 399 unmap_user_pages(queue->queue_buffer_pages, 400 queue->queue, 401 queue->queue_buffer_count * sizeof(UvmEventEntry)); 402 } 403 404 if (queue->control != NULL) { 405 unmap_user_pages(queue->control_buffer_pages, 406 queue->control, 407 sizeof(UvmToolsEventControlData)); 408 } 409 } 410 else { 411 uvm_tools_counter_t *counters = &event_tracker->counter; 412 413 remove_event_tracker(va_space, 414 counters->counter_nodes, 415 UVM_TOTAL_COUNTERS, 416 counters->subscribed_counters, 417 &counters->subscribed_counters); 418 419 if (counters->counters != NULL) { 420 unmap_user_pages(counters->counter_buffer_pages, 421 counters->counters, 422 UVM_TOTAL_COUNTERS * sizeof(NvU64)); 423 } 424 } 425 426 // de-registration should not fail 427 status = tools_update_status(va_space); 428 UVM_ASSERT(status == NV_OK); 429 430 uvm_up_write(&va_space->tools.lock); 431 uvm_up_write(&va_space->perf_events.lock); 432 uvm_up_write(&g_tools_va_space_list_lock); 433 434 fput(event_tracker->uvm_file); 435 } 436 kmem_cache_free(g_tools_event_tracker_cache, event_tracker); 437 } 438 439 static void enqueue_event(const UvmEventEntry *entry, uvm_tools_queue_t *queue) 440 { 441 UvmToolsEventControlData *ctrl = queue->control; 442 uvm_tools_queue_snapshot_t sn; 443 NvU32 queue_size = queue->queue_buffer_count; 444 NvU32 queue_mask = queue_size - 1; 445 446 // Prevent processor speculation prior to accessing user-mapped memory to 447 // avoid leaking information from side-channel attacks. There are many 448 // possible paths leading to this point and it would be difficult and error- 449 // prone to audit all of them to determine whether user mode could guide 450 // this access to kernel memory under speculative execution, so to be on the 451 // safe side we'll just always block speculation. 452 nv_speculation_barrier(); 453 454 uvm_spin_lock(&queue->lock); 455 456 // ctrl is mapped into user space with read and write permissions, 457 // so its values cannot be trusted. 458 sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask; 459 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask; 460 sn.put_ahead = (sn.put_behind + 1) & queue_mask; 461 462 // one free element means that the queue is full 463 if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) { 464 atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType); 465 goto unlock; 466 } 467 468 memcpy(queue->queue + sn.put_behind, entry, sizeof(*entry)); 469 470 sn.put_behind = sn.put_ahead; 471 // put_ahead and put_behind will always be the same outside of queue->lock 472 // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach 473 atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind); 474 atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind); 475 476 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 477 // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead 478 if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) { 479 queue->is_wakeup_get_valid = true; 480 queue->wakeup_get = sn.get_ahead; 481 wake_up_all(&queue->wait_queue); 482 } 483 484 unlock: 485 uvm_spin_unlock(&queue->lock); 486 } 487 488 static void uvm_tools_record_event(uvm_va_space_t *va_space, const UvmEventEntry *entry) 489 { 490 NvU8 eventType = entry->eventData.eventType; 491 uvm_tools_queue_t *queue; 492 493 UVM_ASSERT(eventType < UvmEventNumTypesAll); 494 495 uvm_assert_rwsem_locked(&va_space->tools.lock); 496 497 list_for_each_entry(queue, va_space->tools.queues + eventType, queue_nodes[eventType]) 498 enqueue_event(entry, queue); 499 } 500 501 static void uvm_tools_broadcast_event(const UvmEventEntry *entry) 502 { 503 uvm_va_space_t *va_space; 504 505 uvm_down_read(&g_tools_va_space_list_lock); 506 list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) { 507 uvm_down_read(&va_space->tools.lock); 508 uvm_tools_record_event(va_space, entry); 509 uvm_up_read(&va_space->tools.lock); 510 } 511 uvm_up_read(&g_tools_va_space_list_lock); 512 } 513 514 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor) 515 { 516 // For compatibility with older counters, CPU faults for memory with a preferred location are reported 517 // for their preferred location as well as for the CPU device itself. 518 // This check prevents double counting in the aggregate count. 519 if (counter == UvmCounterNameCpuPageFaultCount) 520 return uvm_processor_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT); 521 return true; 522 } 523 524 static void uvm_tools_inc_counter(uvm_va_space_t *va_space, 525 UvmCounterName counter, 526 NvU64 amount, 527 const NvProcessorUuid *processor) 528 { 529 UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS); 530 uvm_assert_rwsem_locked(&va_space->tools.lock); 531 532 if (amount > 0) { 533 uvm_tools_counter_t *counters; 534 535 // Prevent processor speculation prior to accessing user-mapped memory 536 // to avoid leaking information from side-channel attacks. There are 537 // many possible paths leading to this point and it would be difficult 538 // and error-prone to audit all of them to determine whether user mode 539 // could guide this access to kernel memory under speculative execution, 540 // so to be on the safe side we'll just always block speculation. 541 nv_speculation_barrier(); 542 543 list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) { 544 if ((counters->all_processors && counter_matches_processor(counter, processor)) || 545 uvm_processor_uuid_eq(&counters->processor, processor)) { 546 atomic64_add(amount, (atomic64_t *)(counters->counters + counter)); 547 } 548 } 549 } 550 } 551 552 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter) 553 { 554 uvm_assert_rwsem_locked(&va_space->tools.lock); 555 556 UVM_ASSERT(counter < UVM_TOTAL_COUNTERS); 557 return !list_empty(va_space->tools.counters + counter); 558 } 559 560 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event) 561 { 562 uvm_assert_rwsem_locked(&va_space->tools.lock); 563 564 UVM_ASSERT(event < UvmEventNumTypesAll); 565 return !list_empty(va_space->tools.queues + event); 566 } 567 568 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event) 569 { 570 bool ret = false; 571 572 uvm_down_read(&g_tools_va_space_list_lock); 573 ret = g_tools_enabled_event_count[event] != 0; 574 uvm_up_read(&g_tools_va_space_list_lock); 575 576 return ret; 577 } 578 579 static bool tools_are_enabled(uvm_va_space_t *va_space) 580 { 581 NvU32 i; 582 583 uvm_assert_rwsem_locked(&va_space->tools.lock); 584 585 for (i = 0; i < UVM_TOTAL_COUNTERS; i++) { 586 if (tools_is_counter_enabled(va_space, i)) 587 return true; 588 } 589 for (i = 0; i < UvmEventNumTypesAll; i++) { 590 if (tools_is_event_enabled(va_space, i)) 591 return true; 592 } 593 return false; 594 } 595 596 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space) 597 { 598 return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) || 599 tools_is_event_enabled(va_space, UvmEventTypeGpuFault) || 600 tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) || 601 tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount); 602 } 603 604 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space) 605 { 606 return tools_is_event_enabled(va_space, UvmEventTypeMigration) || 607 tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) || 608 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) || 609 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD); 610 } 611 612 static int uvm_tools_open(struct inode *inode, struct file *filp) 613 { 614 filp->private_data = NULL; 615 return -nv_status_to_errno(uvm_global_get_status()); 616 } 617 618 static int uvm_tools_open_entry(struct inode *inode, struct file *filp) 619 { 620 UVM_ENTRY_RET(uvm_tools_open(inode, filp)); 621 } 622 623 static int uvm_tools_release(struct inode *inode, struct file *filp) 624 { 625 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 626 if (event_tracker != NULL) { 627 destroy_event_tracker(event_tracker); 628 filp->private_data = NULL; 629 } 630 return -nv_status_to_errno(uvm_global_get_status()); 631 } 632 633 static int uvm_tools_release_entry(struct inode *inode, struct file *filp) 634 { 635 UVM_ENTRY_RET(uvm_tools_release(inode, filp)); 636 } 637 638 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 639 { 640 switch (cmd) { 641 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER, uvm_api_tools_init_event_tracker); 642 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold); 643 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS, uvm_api_tools_event_queue_enable_events); 644 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events); 645 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS, uvm_api_tools_enable_counters); 646 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS, uvm_api_tools_disable_counters); 647 } 648 649 uvm_thread_assert_all_unlocked(); 650 651 return -EINVAL; 652 } 653 654 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg) 655 { 656 UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg)); 657 } 658 659 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait) 660 { 661 int flags = 0; 662 uvm_tools_queue_snapshot_t sn; 663 uvm_tools_event_tracker_t *event_tracker; 664 UvmToolsEventControlData *ctrl; 665 666 if (uvm_global_get_status() != NV_OK) 667 return POLLERR; 668 669 event_tracker = tools_event_tracker(filp); 670 if (!tracker_is_queue(event_tracker)) 671 return POLLERR; 672 673 uvm_spin_lock(&event_tracker->queue.lock); 674 675 event_tracker->queue.is_wakeup_get_valid = false; 676 ctrl = event_tracker->queue.control; 677 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 678 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind); 679 680 if (queue_needs_wakeup(&event_tracker->queue, &sn)) 681 flags = POLLIN | POLLRDNORM; 682 683 uvm_spin_unlock(&event_tracker->queue.lock); 684 685 poll_wait(filp, &event_tracker->queue.wait_queue, wait); 686 return flags; 687 } 688 689 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait) 690 { 691 UVM_ENTRY_RET(uvm_tools_poll(filp, wait)); 692 } 693 694 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = { 695 [UVM_FAULT_TYPE_INVALID_PDE] = UvmFaultTypeInvalidPde, 696 [UVM_FAULT_TYPE_INVALID_PTE] = UvmFaultTypeInvalidPte, 697 [UVM_FAULT_TYPE_ATOMIC] = UvmFaultTypeAtomic, 698 [UVM_FAULT_TYPE_WRITE] = UvmFaultTypeWrite, 699 [UVM_FAULT_TYPE_PDE_SIZE] = UvmFaultTypeInvalidPdeSize, 700 [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION] = UvmFaultTypeLimitViolation, 701 [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK] = UvmFaultTypeUnboundInstBlock, 702 [UVM_FAULT_TYPE_PRIV_VIOLATION] = UvmFaultTypePrivViolation, 703 [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation, 704 [UVM_FAULT_TYPE_WORK_CREATION] = UvmFaultTypeWorkCreation, 705 [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture, 706 [UVM_FAULT_TYPE_COMPRESSION_FAILURE] = UvmFaultTypeCompressionFailure, 707 [UVM_FAULT_TYPE_UNSUPPORTED_KIND] = UvmFaultTypeUnsupportedKind, 708 [UVM_FAULT_TYPE_REGION_VIOLATION] = UvmFaultTypeRegionViolation, 709 [UVM_FAULT_TYPE_POISONED] = UvmFaultTypePoison, 710 }; 711 712 // TODO: add new value for weak atomics in tools 713 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = { 714 [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic, 715 [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK] = UvmEventMemoryAccessTypeAtomic, 716 [UVM_FAULT_ACCESS_TYPE_WRITE] = UvmEventMemoryAccessTypeWrite, 717 [UVM_FAULT_ACCESS_TYPE_READ] = UvmEventMemoryAccessTypeRead, 718 [UVM_FAULT_ACCESS_TYPE_PREFETCH] = UvmEventMemoryAccessTypePrefetch 719 }; 720 721 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = { 722 [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0, 723 [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1, 724 [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2, 725 [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3, 726 [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4, 727 [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5, 728 [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6, 729 [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7, 730 [UVM_APERTURE_SYS] = UvmEventApertureSys, 731 [UVM_APERTURE_VID] = UvmEventApertureVid, 732 }; 733 734 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = { 735 [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc, 736 [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub, 737 }; 738 739 static void record_gpu_fault_instance(uvm_gpu_t *gpu, 740 uvm_va_space_t *va_space, 741 const uvm_fault_buffer_entry_t *fault_entry, 742 NvU64 batch_id, 743 NvU64 timestamp) 744 { 745 UvmEventEntry entry; 746 UvmEventGpuFaultInfo *info = &entry.eventData.gpuFault; 747 memset(&entry, 0, sizeof(entry)); 748 749 info->eventType = UvmEventTypeGpuFault; 750 info->gpuIndex = uvm_id_value(gpu->id); 751 info->faultType = g_hal_to_tools_fault_type_table[fault_entry->fault_type]; 752 info->accessType = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type]; 753 info->clientType = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type]; 754 if (fault_entry->is_replayable) 755 info->gpcId = fault_entry->fault_source.gpc_id; 756 else 757 info->channelId = fault_entry->fault_source.channel_id; 758 info->clientId = fault_entry->fault_source.client_id; 759 info->address = fault_entry->fault_address; 760 info->timeStamp = timestamp; 761 info->timeStampGpu = fault_entry->timestamp; 762 info->batchId = batch_id; 763 764 uvm_tools_record_event(va_space, &entry); 765 } 766 767 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data) 768 { 769 uvm_va_space_t *va_space = event_data->fault.space; 770 771 UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT); 772 UVM_ASSERT(event_data->fault.space); 773 774 uvm_assert_rwsem_locked(&va_space->lock); 775 uvm_assert_rwsem_locked(&va_space->perf_events.lock); 776 UVM_ASSERT(va_space->tools.enabled); 777 778 uvm_down_read(&va_space->tools.lock); 779 UVM_ASSERT(tools_is_fault_callback_needed(va_space)); 780 781 if (UVM_ID_IS_CPU(event_data->fault.proc_id)) { 782 if (tools_is_event_enabled(va_space, UvmEventTypeCpuFault)) { 783 UvmEventEntry entry; 784 UvmEventCpuFaultInfo *info = &entry.eventData.cpuFault; 785 memset(&entry, 0, sizeof(entry)); 786 787 info->eventType = UvmEventTypeCpuFault; 788 if (event_data->fault.cpu.is_write) 789 info->accessType = UvmEventMemoryAccessTypeWrite; 790 else 791 info->accessType = UvmEventMemoryAccessTypeRead; 792 793 info->address = event_data->fault.cpu.fault_va; 794 info->timeStamp = NV_GETTIME(); 795 // assume that current owns va_space 796 info->pid = uvm_get_stale_process_id(); 797 info->threadId = uvm_get_stale_thread_id(); 798 info->pc = event_data->fault.cpu.pc; 799 800 uvm_tools_record_event(va_space, &entry); 801 } 802 if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) { 803 uvm_processor_id_t preferred_location; 804 805 // The UVM Lite tools interface did not represent the CPU as a UVM 806 // device. It reported CPU faults against the corresponding 807 // allocation's 'home location'. Though this driver's tools 808 // interface does include a CPU device, for compatibility, the 809 // driver still reports faults against a buffer's preferred 810 // location, in addition to the CPU. 811 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT); 812 813 preferred_location = event_data->fault.preferred_location; 814 if (UVM_ID_IS_GPU(preferred_location)) { 815 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location); 816 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, uvm_gpu_uuid(gpu)); 817 } 818 } 819 } 820 else { 821 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id); 822 UVM_ASSERT(gpu); 823 824 if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) { 825 NvU64 timestamp = NV_GETTIME(); 826 uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry; 827 uvm_fault_buffer_entry_t *fault_instance; 828 829 record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp); 830 831 list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list) 832 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp); 833 } 834 835 if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount)) 836 uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, uvm_gpu_uuid(gpu)); 837 } 838 uvm_up_read(&va_space->tools.lock); 839 } 840 841 static void add_pending_event_for_channel(uvm_channel_t *channel) 842 { 843 uvm_assert_spinlock_locked(&g_tools_channel_list_lock); 844 845 if (channel->tools.pending_event_count++ == 0) 846 list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list); 847 } 848 849 static void remove_pending_event_for_channel(uvm_channel_t *channel) 850 { 851 uvm_assert_spinlock_locked(&g_tools_channel_list_lock); 852 UVM_ASSERT(channel->tools.pending_event_count > 0); 853 if (--channel->tools.pending_event_count == 0) 854 list_del_init(&channel->tools.channel_list_node); 855 } 856 857 858 static void record_migration_events(void *args) 859 { 860 block_migration_data_t *block_mig = (block_migration_data_t *)args; 861 migration_data_t *mig; 862 migration_data_t *next; 863 UvmEventEntry entry; 864 UvmEventMigrationInfo *info = &entry.eventData.migration; 865 uvm_va_space_t *va_space = block_mig->va_space; 866 867 NvU64 gpu_timestamp = block_mig->start_timestamp_gpu; 868 869 // Initialize fields that are constant throughout the whole block 870 memset(&entry, 0, sizeof(entry)); 871 info->eventType = UvmEventTypeMigration; 872 info->srcIndex = uvm_id_value(block_mig->src); 873 info->dstIndex = uvm_id_value(block_mig->dst); 874 info->beginTimeStamp = block_mig->start_timestamp_cpu; 875 info->endTimeStamp = block_mig->end_timestamp_cpu; 876 info->rangeGroupId = block_mig->range_group_id; 877 878 uvm_down_read(&va_space->tools.lock); 879 list_for_each_entry_safe(mig, next, &block_mig->events, events_node) { 880 UVM_ASSERT(mig->bytes > 0); 881 list_del(&mig->events_node); 882 883 info->address = mig->address; 884 info->migratedBytes = mig->bytes; 885 info->beginTimeStampGpu = gpu_timestamp; 886 info->endTimeStampGpu = mig->end_timestamp_gpu; 887 info->migrationCause = mig->cause; 888 gpu_timestamp = mig->end_timestamp_gpu; 889 kmem_cache_free(g_tools_migration_data_cache, mig); 890 891 uvm_tools_record_event(va_space, &entry); 892 } 893 uvm_up_read(&va_space->tools.lock); 894 895 UVM_ASSERT(list_empty(&block_mig->events)); 896 kmem_cache_free(g_tools_block_migration_data_cache, block_mig); 897 } 898 899 static void record_migration_events_entry(void *args) 900 { 901 UVM_ENTRY_VOID(record_migration_events(args)); 902 } 903 904 static void on_block_migration_complete(void *ptr) 905 { 906 migration_data_t *mig; 907 block_migration_data_t *block_mig = (block_migration_data_t *)ptr; 908 909 block_mig->end_timestamp_cpu = NV_GETTIME(); 910 block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr; 911 list_for_each_entry(mig, &block_mig->events, events_node) 912 mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr; 913 914 nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig); 915 916 // The UVM driver may notice that work in a channel is complete in a variety of situations 917 // and the va_space lock is not always held in all of them, nor can it always be taken safely on them. 918 // Dispatching events requires the va_space lock to be held in at least read mode, so 919 // this callback simply enqueues the dispatching onto a queue, where the 920 // va_space lock is always safe to acquire. 921 uvm_spin_lock(&g_tools_channel_list_lock); 922 remove_pending_event_for_channel(block_mig->channel); 923 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item); 924 uvm_spin_unlock(&g_tools_channel_list_lock); 925 } 926 927 static void record_replay_event_helper(uvm_gpu_id_t gpu_id, 928 NvU32 batch_id, 929 uvm_fault_client_type_t client_type, 930 NvU64 timestamp, 931 NvU64 timestamp_gpu) 932 { 933 UvmEventEntry entry; 934 935 memset(&entry, 0, sizeof(entry)); 936 entry.eventData.gpuFaultReplay.eventType = UvmEventTypeGpuFaultReplay; 937 entry.eventData.gpuFaultReplay.gpuIndex = uvm_id_value(gpu_id); 938 entry.eventData.gpuFaultReplay.batchId = batch_id; 939 entry.eventData.gpuFaultReplay.clientType = g_hal_to_tools_fault_client_type_table[client_type]; 940 entry.eventData.gpuFaultReplay.timeStamp = timestamp; 941 entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu; 942 943 uvm_tools_broadcast_event(&entry); 944 } 945 946 static void record_replay_events(void *args) 947 { 948 replay_data_t *replay = (replay_data_t *)args; 949 950 record_replay_event_helper(replay->gpu_id, 951 replay->batch_id, 952 replay->client_type, 953 replay->timestamp, 954 replay->timestamp_gpu); 955 956 kmem_cache_free(g_tools_replay_data_cache, replay); 957 } 958 959 static void record_replay_events_entry(void *args) 960 { 961 UVM_ENTRY_VOID(record_replay_events(args)); 962 } 963 964 static void on_replay_complete(void *ptr) 965 { 966 replay_data_t *replay = (replay_data_t *)ptr; 967 replay->timestamp_gpu = *replay->timestamp_gpu_addr; 968 969 nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr); 970 971 uvm_spin_lock(&g_tools_channel_list_lock); 972 remove_pending_event_for_channel(replay->channel); 973 nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item); 974 uvm_spin_unlock(&g_tools_channel_list_lock); 975 976 } 977 978 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = { 979 [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence, 980 [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence, 981 [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER] = UvmEventMigrationCauseAccessCounters, 982 [UVM_MAKE_RESIDENT_CAUSE_PREFETCH] = UvmEventMigrationCausePrefetch, 983 [UVM_MAKE_RESIDENT_CAUSE_EVICTION] = UvmEventMigrationCauseEviction, 984 [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS] = UvmEventMigrationCauseInvalid, 985 [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE] = UvmEventMigrationCauseUser, 986 [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP] = UvmEventMigrationCauseCoherence, 987 [UVM_MAKE_RESIDENT_CAUSE_API_HINT] = UvmEventMigrationCauseUser, 988 }; 989 990 // This event is notified asynchronously when all the migrations pushed to the 991 // same uvm_push_t object in a call to block_copy_resident_pages_between have 992 // finished 993 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data) 994 { 995 uvm_va_block_t *va_block = event_data->migration.block; 996 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 997 998 UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION); 999 1000 uvm_assert_mutex_locked(&va_block->lock); 1001 uvm_assert_rwsem_locked(&va_space->perf_events.lock); 1002 UVM_ASSERT(va_space->tools.enabled); 1003 1004 uvm_down_read(&va_space->tools.lock); 1005 UVM_ASSERT(tools_is_migration_callback_needed(va_space)); 1006 1007 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) { 1008 migration_data_t *mig; 1009 uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push); 1010 block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data; 1011 1012 if (push_info->on_complete != NULL) { 1013 mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS); 1014 if (mig == NULL) 1015 goto done_unlock; 1016 1017 mig->address = event_data->migration.address; 1018 mig->bytes = event_data->migration.bytes; 1019 mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push); 1020 mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause]; 1021 1022 list_add_tail(&mig->events_node, &block_mig->events); 1023 } 1024 } 1025 1026 // Increment counters 1027 if (UVM_ID_IS_CPU(event_data->migration.src) && 1028 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) { 1029 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst); 1030 uvm_tools_inc_counter(va_space, 1031 UvmCounterNameBytesXferHtD, 1032 event_data->migration.bytes, 1033 uvm_gpu_uuid(gpu)); 1034 } 1035 if (UVM_ID_IS_CPU(event_data->migration.dst) && 1036 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) { 1037 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src); 1038 uvm_tools_inc_counter(va_space, 1039 UvmCounterNameBytesXferDtH, 1040 event_data->migration.bytes, 1041 uvm_gpu_uuid(gpu)); 1042 } 1043 1044 done_unlock: 1045 uvm_up_read(&va_space->tools.lock); 1046 } 1047 1048 // This event is notified asynchronously when it is marked as completed in the 1049 // pushbuffer the replay method belongs to. 1050 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, 1051 uvm_push_t *push, 1052 NvU32 batch_id, 1053 uvm_fault_client_type_t client_type) 1054 { 1055 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1056 replay_data_t *replay; 1057 1058 // Perform delayed notification only if some VA space has signed up for 1059 // UvmEventTypeGpuFaultReplay 1060 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay)) 1061 return; 1062 1063 replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS); 1064 if (replay == NULL) 1065 return; 1066 1067 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL); 1068 1069 replay->timestamp_gpu_addr = uvm_push_timestamp(push); 1070 replay->gpu_id = gpu->id; 1071 replay->batch_id = batch_id; 1072 replay->client_type = client_type; 1073 replay->timestamp = NV_GETTIME(); 1074 replay->channel = push->channel; 1075 1076 push_info->on_complete_data = replay; 1077 push_info->on_complete = on_replay_complete; 1078 1079 uvm_spin_lock(&g_tools_channel_list_lock); 1080 add_pending_event_for_channel(replay->channel); 1081 uvm_spin_unlock(&g_tools_channel_list_lock); 1082 } 1083 1084 1085 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, 1086 NvU32 batch_id, 1087 uvm_fault_client_type_t client_type) 1088 { 1089 UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method); 1090 1091 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay)) 1092 return; 1093 1094 record_replay_event_helper(gpu->id, 1095 batch_id, 1096 client_type, 1097 NV_GETTIME(), 1098 gpu->parent->host_hal->get_time(gpu)); 1099 } 1100 1101 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, 1102 const uvm_access_counter_buffer_entry_t *buffer_entry, 1103 bool on_managed) 1104 { 1105 UvmEventEntry entry; 1106 UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter; 1107 1108 // Perform delayed notification only if some VA space has signed up for 1109 // UvmEventTypeAccessCounter 1110 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeTestAccessCounter)) 1111 return; 1112 1113 if (!buffer_entry->address.is_virtual) 1114 UVM_ASSERT(UVM_ID_IS_VALID(buffer_entry->physical_info.resident_id)); 1115 1116 memset(&entry, 0, sizeof(entry)); 1117 1118 info->eventType = UvmEventTypeTestAccessCounter; 1119 info->srcIndex = uvm_id_value(gpu->id); 1120 info->address = buffer_entry->address.address; 1121 info->isVirtual = buffer_entry->address.is_virtual? 1: 0; 1122 if (buffer_entry->address.is_virtual) { 1123 info->instancePtr = buffer_entry->virtual_info.instance_ptr.address; 1124 info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture]; 1125 info->veId = buffer_entry->virtual_info.ve_id; 1126 } 1127 else { 1128 info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture]; 1129 } 1130 info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0; 1131 info->onManaged = on_managed? 1 : 0; 1132 info->value = buffer_entry->counter_value; 1133 info->subGranularity = buffer_entry->sub_granularity; 1134 info->bank = buffer_entry->bank; 1135 info->tag = buffer_entry->tag; 1136 1137 uvm_tools_broadcast_event(&entry); 1138 } 1139 1140 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space) 1141 { 1142 UvmEventEntry entry; 1143 1144 if (!va_space->tools.enabled) 1145 return; 1146 1147 entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate; 1148 uvm_down_read(&va_space->tools.lock); 1149 uvm_tools_record_event(va_space, &entry); 1150 uvm_up_read(&va_space->tools.lock); 1151 } 1152 1153 // This function is used as a begin marker to group all migrations within a VA 1154 // block that are performed in the same call to 1155 // block_copy_resident_pages_between. All of these are pushed to the same 1156 // uvm_push_t object, and will be notified in burst when the last one finishes. 1157 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block, 1158 uvm_push_t *push, 1159 uvm_processor_id_t dst_id, 1160 uvm_processor_id_t src_id, 1161 NvU64 start, 1162 uvm_make_resident_cause_t cause) 1163 { 1164 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1165 uvm_range_group_range_t *range; 1166 1167 // Calls from tools read/write functions to make_resident must not trigger 1168 // any migration 1169 UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS); 1170 1171 // During evictions the va_space lock is not held. 1172 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) 1173 uvm_assert_rwsem_locked(&va_space->lock); 1174 1175 if (!va_space->tools.enabled) 1176 return; 1177 1178 uvm_down_read(&va_space->tools.lock); 1179 1180 // Perform delayed notification only if the VA space has signed up for 1181 // UvmEventTypeMigration 1182 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) { 1183 block_migration_data_t *block_mig; 1184 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1185 1186 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL); 1187 1188 block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS); 1189 if (block_mig == NULL) 1190 goto done_unlock; 1191 1192 block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push); 1193 block_mig->channel = push->channel; 1194 block_mig->start_timestamp_cpu = NV_GETTIME(); 1195 block_mig->dst = dst_id; 1196 block_mig->src = src_id; 1197 block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE; 1198 1199 // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held. 1200 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) { 1201 range = uvm_range_group_range_find(va_space, start); 1202 if (range != NULL) 1203 block_mig->range_group_id = range->range_group->id; 1204 } 1205 block_mig->va_space = va_space; 1206 1207 INIT_LIST_HEAD(&block_mig->events); 1208 push_info->on_complete_data = block_mig; 1209 push_info->on_complete = on_block_migration_complete; 1210 1211 uvm_spin_lock(&g_tools_channel_list_lock); 1212 add_pending_event_for_channel(block_mig->channel); 1213 uvm_spin_unlock(&g_tools_channel_list_lock); 1214 } 1215 1216 done_unlock: 1217 uvm_up_read(&va_space->tools.lock); 1218 } 1219 1220 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block, 1221 uvm_processor_id_t dst, 1222 uvm_va_block_region_t region, 1223 const uvm_page_mask_t *page_mask) 1224 { 1225 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1226 1227 if (!va_space->tools.enabled) 1228 return; 1229 1230 uvm_down_read(&va_space->tools.lock); 1231 if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate)) { 1232 // Read-duplication events 1233 UvmEventEntry entry; 1234 UvmEventReadDuplicateInfo *info_read_duplicate = &entry.eventData.readDuplicate; 1235 uvm_page_index_t page_index; 1236 memset(&entry, 0, sizeof(entry)); 1237 1238 info_read_duplicate->eventType = UvmEventTypeReadDuplicate; 1239 info_read_duplicate->size = PAGE_SIZE; 1240 info_read_duplicate->timeStamp = NV_GETTIME(); 1241 1242 for_each_va_block_page_in_region_mask(page_index, page_mask, region) { 1243 uvm_processor_id_t id; 1244 uvm_processor_mask_t resident_processors; 1245 1246 info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index); 1247 info_read_duplicate->processors = 0; 1248 1249 uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors); 1250 for_each_id_in_mask(id, &resident_processors) 1251 info_read_duplicate->processors |= (1 << uvm_id_value(id)); 1252 1253 uvm_tools_record_event(va_space, &entry); 1254 } 1255 } 1256 uvm_up_read(&va_space->tools.lock); 1257 } 1258 1259 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block, 1260 uvm_processor_id_t dst, 1261 uvm_va_block_region_t region, 1262 const uvm_page_mask_t *page_mask) 1263 { 1264 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1265 1266 if (!va_space->tools.enabled) 1267 return; 1268 1269 uvm_down_read(&va_space->tools.lock); 1270 if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicateInvalidate)) { 1271 UvmEventEntry entry; 1272 uvm_page_index_t page_index; 1273 UvmEventReadDuplicateInvalidateInfo *info = &entry.eventData.readDuplicateInvalidate; 1274 memset(&entry, 0, sizeof(entry)); 1275 1276 info->eventType = UvmEventTypeReadDuplicateInvalidate; 1277 info->residentIndex = uvm_id_value(dst); 1278 info->size = PAGE_SIZE; 1279 info->timeStamp = NV_GETTIME(); 1280 1281 for_each_va_block_page_in_region_mask(page_index, page_mask, region) { 1282 UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index)); 1283 1284 info->address = uvm_va_block_cpu_page_address(va_block, page_index); 1285 uvm_tools_record_event(va_space, &entry); 1286 } 1287 } 1288 uvm_up_read(&va_space->tools.lock); 1289 } 1290 1291 static void tools_schedule_completed_events(void) 1292 { 1293 uvm_channel_t *channel; 1294 uvm_channel_t *next_channel; 1295 NvU64 channel_count = 0; 1296 NvU64 i; 1297 1298 uvm_spin_lock(&g_tools_channel_list_lock); 1299 1300 // retain every channel list entry currently in the list and keep track of their count. 1301 list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) { 1302 ++channel->tools.pending_event_count; 1303 ++channel_count; 1304 } 1305 uvm_spin_unlock(&g_tools_channel_list_lock); 1306 1307 if (channel_count == 0) 1308 return; 1309 1310 // new entries always appear at the end, and all the entries seen in the first loop have been retained 1311 // so it is safe to go through them 1312 channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node); 1313 for (i = 0; i < channel_count; i++) { 1314 uvm_channel_update_progress_all(channel); 1315 channel = list_next_entry(channel, tools.channel_list_node); 1316 } 1317 1318 // now release all the entries we retained in the beginning 1319 i = 0; 1320 uvm_spin_lock(&g_tools_channel_list_lock); 1321 list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) { 1322 if (i++ == channel_count) 1323 break; 1324 1325 remove_pending_event_for_channel(channel); 1326 } 1327 uvm_spin_unlock(&g_tools_channel_list_lock); 1328 } 1329 1330 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space, 1331 NvU64 address, 1332 bool is_write, 1333 UvmEventFatalReason reason) 1334 { 1335 uvm_assert_rwsem_locked(&va_space->lock); 1336 1337 if (!va_space->tools.enabled) 1338 return; 1339 1340 uvm_down_read(&va_space->tools.lock); 1341 if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) { 1342 UvmEventEntry entry; 1343 UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault; 1344 memset(&entry, 0, sizeof(entry)); 1345 1346 info->eventType = UvmEventTypeFatalFault; 1347 info->processorIndex = UVM_ID_CPU_VALUE; 1348 info->timeStamp = NV_GETTIME(); 1349 info->address = address; 1350 info->accessType = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead; 1351 // info->faultType is not valid for cpu faults 1352 info->reason = reason; 1353 1354 uvm_tools_record_event(va_space, &entry); 1355 } 1356 uvm_up_read(&va_space->tools.lock); 1357 } 1358 1359 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id, 1360 uvm_va_space_t *va_space, 1361 const uvm_fault_buffer_entry_t *buffer_entry, 1362 UvmEventFatalReason reason) 1363 { 1364 uvm_assert_rwsem_locked(&va_space->lock); 1365 1366 if (!va_space->tools.enabled) 1367 return; 1368 1369 uvm_down_read(&va_space->tools.lock); 1370 if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) { 1371 UvmEventEntry entry; 1372 UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault; 1373 memset(&entry, 0, sizeof(entry)); 1374 1375 info->eventType = UvmEventTypeFatalFault; 1376 info->processorIndex = uvm_id_value(gpu_id); 1377 info->timeStamp = NV_GETTIME(); 1378 info->address = buffer_entry->fault_address; 1379 info->accessType = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type]; 1380 info->faultType = g_hal_to_tools_fault_type_table[buffer_entry->fault_type]; 1381 info->reason = reason; 1382 1383 uvm_tools_record_event(va_space, &entry); 1384 } 1385 uvm_up_read(&va_space->tools.lock); 1386 } 1387 1388 void uvm_tools_record_thrashing(uvm_va_space_t *va_space, 1389 NvU64 address, 1390 size_t region_size, 1391 const uvm_processor_mask_t *processors) 1392 { 1393 UVM_ASSERT(address); 1394 UVM_ASSERT(PAGE_ALIGNED(address)); 1395 UVM_ASSERT(region_size > 0); 1396 1397 uvm_assert_rwsem_locked(&va_space->lock); 1398 1399 if (!va_space->tools.enabled) 1400 return; 1401 1402 uvm_down_read(&va_space->tools.lock); 1403 if (tools_is_event_enabled(va_space, UvmEventTypeThrashingDetected)) { 1404 UvmEventEntry entry; 1405 UvmEventThrashingDetectedInfo *info = &entry.eventData.thrashing; 1406 memset(&entry, 0, sizeof(entry)); 1407 1408 info->eventType = UvmEventTypeThrashingDetected; 1409 info->address = address; 1410 info->size = region_size; 1411 info->timeStamp = NV_GETTIME(); 1412 bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS); 1413 1414 uvm_tools_record_event(va_space, &entry); 1415 } 1416 uvm_up_read(&va_space->tools.lock); 1417 } 1418 1419 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor) 1420 { 1421 UVM_ASSERT(address); 1422 UVM_ASSERT(PAGE_ALIGNED(address)); 1423 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1424 1425 uvm_assert_rwsem_locked(&va_space->lock); 1426 1427 if (!va_space->tools.enabled) 1428 return; 1429 1430 uvm_down_read(&va_space->tools.lock); 1431 if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingStart)) { 1432 UvmEventEntry entry; 1433 UvmEventThrottlingStartInfo *info = &entry.eventData.throttlingStart; 1434 memset(&entry, 0, sizeof(entry)); 1435 1436 info->eventType = UvmEventTypeThrottlingStart; 1437 info->processorIndex = uvm_id_value(processor); 1438 info->address = address; 1439 info->timeStamp = NV_GETTIME(); 1440 1441 uvm_tools_record_event(va_space, &entry); 1442 } 1443 uvm_up_read(&va_space->tools.lock); 1444 } 1445 1446 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor) 1447 { 1448 UVM_ASSERT(address); 1449 UVM_ASSERT(PAGE_ALIGNED(address)); 1450 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1451 1452 uvm_assert_rwsem_locked(&va_space->lock); 1453 1454 if (!va_space->tools.enabled) 1455 return; 1456 1457 uvm_down_read(&va_space->tools.lock); 1458 if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingEnd)) { 1459 UvmEventEntry entry; 1460 UvmEventThrottlingEndInfo *info = &entry.eventData.throttlingEnd; 1461 memset(&entry, 0, sizeof(entry)); 1462 1463 info->eventType = UvmEventTypeThrottlingEnd; 1464 info->processorIndex = uvm_id_value(processor); 1465 info->address = address; 1466 info->timeStamp = NV_GETTIME(); 1467 1468 uvm_tools_record_event(va_space, &entry); 1469 } 1470 uvm_up_read(&va_space->tools.lock); 1471 } 1472 1473 static void record_map_remote_events(void *args) 1474 { 1475 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args; 1476 map_remote_data_t *map_remote, *next; 1477 UvmEventEntry entry; 1478 uvm_va_space_t *va_space = block_map_remote->va_space; 1479 1480 memset(&entry, 0, sizeof(entry)); 1481 1482 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote; 1483 entry.eventData.mapRemote.srcIndex = uvm_id_value(block_map_remote->src); 1484 entry.eventData.mapRemote.dstIndex = uvm_id_value(block_map_remote->dst); 1485 entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause; 1486 entry.eventData.mapRemote.timeStamp = block_map_remote->timestamp; 1487 1488 uvm_down_read(&va_space->tools.lock); 1489 list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) { 1490 list_del(&map_remote->events_node); 1491 1492 entry.eventData.mapRemote.address = map_remote->address; 1493 entry.eventData.mapRemote.size = map_remote->size; 1494 entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu; 1495 kmem_cache_free(g_tools_map_remote_data_cache, map_remote); 1496 1497 uvm_tools_record_event(va_space, &entry); 1498 } 1499 uvm_up_read(&va_space->tools.lock); 1500 1501 UVM_ASSERT(list_empty(&block_map_remote->events)); 1502 kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote); 1503 } 1504 1505 static void record_map_remote_events_entry(void *args) 1506 { 1507 UVM_ENTRY_VOID(record_map_remote_events(args)); 1508 } 1509 1510 static void on_map_remote_complete(void *ptr) 1511 { 1512 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr; 1513 map_remote_data_t *map_remote; 1514 1515 // Only GPU mappings use the deferred mechanism 1516 UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src)); 1517 list_for_each_entry(map_remote, &block_map_remote->events, events_node) 1518 map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr; 1519 1520 nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr); 1521 1522 uvm_spin_lock(&g_tools_channel_list_lock); 1523 remove_pending_event_for_channel(block_map_remote->channel); 1524 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item); 1525 uvm_spin_unlock(&g_tools_channel_list_lock); 1526 } 1527 1528 void uvm_tools_record_map_remote(uvm_va_block_t *va_block, 1529 uvm_push_t *push, 1530 uvm_processor_id_t processor, 1531 uvm_processor_id_t residency, 1532 NvU64 address, 1533 size_t region_size, 1534 UvmEventMapRemoteCause cause) 1535 { 1536 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 1537 1538 UVM_ASSERT(UVM_ID_IS_VALID(processor)); 1539 UVM_ASSERT(UVM_ID_IS_VALID(residency)); 1540 UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid); 1541 1542 uvm_assert_rwsem_locked(&va_space->lock); 1543 1544 if (!va_space->tools.enabled) 1545 return; 1546 1547 uvm_down_read(&va_space->tools.lock); 1548 if (!tools_is_event_enabled(va_space, UvmEventTypeMapRemote)) 1549 goto done; 1550 1551 if (UVM_ID_IS_CPU(processor)) { 1552 UvmEventEntry entry; 1553 memset(&entry, 0, sizeof(entry)); 1554 1555 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote; 1556 entry.eventData.mapRemote.srcIndex = uvm_id_value(processor); 1557 entry.eventData.mapRemote.dstIndex = uvm_id_value(residency); 1558 entry.eventData.mapRemote.mapRemoteCause = cause; 1559 entry.eventData.mapRemote.timeStamp = NV_GETTIME(); 1560 entry.eventData.mapRemote.address = address; 1561 entry.eventData.mapRemote.size = region_size; 1562 entry.eventData.mapRemote.timeStampGpu = 0; 1563 1564 UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid); 1565 1566 uvm_tools_record_event(va_space, &entry); 1567 } 1568 else { 1569 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 1570 block_map_remote_data_t *block_map_remote; 1571 map_remote_data_t *map_remote; 1572 1573 // The first call on this pushbuffer creates the per-VA block structure 1574 if (push_info->on_complete == NULL) { 1575 UVM_ASSERT(push_info->on_complete_data == NULL); 1576 1577 block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS); 1578 if (block_map_remote == NULL) 1579 goto done; 1580 1581 block_map_remote->src = processor; 1582 block_map_remote->dst = residency; 1583 block_map_remote->cause = cause; 1584 block_map_remote->timestamp = NV_GETTIME(); 1585 block_map_remote->va_space = va_space; 1586 block_map_remote->channel = push->channel; 1587 INIT_LIST_HEAD(&block_map_remote->events); 1588 1589 push_info->on_complete_data = block_map_remote; 1590 push_info->on_complete = on_map_remote_complete; 1591 1592 uvm_spin_lock(&g_tools_channel_list_lock); 1593 add_pending_event_for_channel(block_map_remote->channel); 1594 uvm_spin_unlock(&g_tools_channel_list_lock); 1595 } 1596 else { 1597 block_map_remote = push_info->on_complete_data; 1598 } 1599 UVM_ASSERT(block_map_remote); 1600 1601 map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS); 1602 if (map_remote == NULL) 1603 goto done; 1604 1605 map_remote->address = address; 1606 map_remote->size = region_size; 1607 map_remote->timestamp_gpu_addr = uvm_push_timestamp(push); 1608 1609 list_add_tail(&map_remote->events_node, &block_map_remote->events); 1610 } 1611 1612 done: 1613 uvm_up_read(&va_space->tools.lock); 1614 } 1615 1616 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp) 1617 { 1618 NV_STATUS status = NV_OK; 1619 uvm_tools_event_tracker_t *event_tracker; 1620 1621 event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS); 1622 if (event_tracker == NULL) 1623 return NV_ERR_NO_MEMORY; 1624 1625 event_tracker->uvm_file = fget(params->uvmFd); 1626 if (event_tracker->uvm_file == NULL) { 1627 status = NV_ERR_INSUFFICIENT_PERMISSIONS; 1628 goto fail; 1629 } 1630 1631 if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) { 1632 fput(event_tracker->uvm_file); 1633 event_tracker->uvm_file = NULL; 1634 status = NV_ERR_INSUFFICIENT_PERMISSIONS; 1635 goto fail; 1636 } 1637 1638 // We don't use uvm_fd_va_space() here because tools can work 1639 // without an associated va_space_mm. 1640 if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) { 1641 fput(event_tracker->uvm_file); 1642 event_tracker->uvm_file = NULL; 1643 status = NV_ERR_ILLEGAL_ACTION; 1644 goto fail; 1645 } 1646 1647 event_tracker->is_queue = params->queueBufferSize != 0; 1648 if (event_tracker->is_queue) { 1649 uvm_tools_queue_t *queue = &event_tracker->queue; 1650 uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF); 1651 init_waitqueue_head(&queue->wait_queue); 1652 1653 if (params->queueBufferSize > UINT_MAX) { 1654 status = NV_ERR_INVALID_ARGUMENT; 1655 goto fail; 1656 } 1657 1658 queue->queue_buffer_count = (NvU32)params->queueBufferSize; 1659 queue->notification_threshold = queue->queue_buffer_count / 2; 1660 1661 // queue_buffer_count must be a power of 2, of at least 2 1662 if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) { 1663 status = NV_ERR_INVALID_ARGUMENT; 1664 goto fail; 1665 } 1666 1667 status = map_user_pages(params->queueBuffer, 1668 queue->queue_buffer_count * sizeof(UvmEventEntry), 1669 (void **)&queue->queue, 1670 &queue->queue_buffer_pages); 1671 if (status != NV_OK) 1672 goto fail; 1673 1674 status = map_user_pages(params->controlBuffer, 1675 sizeof(UvmToolsEventControlData), 1676 (void **)&queue->control, 1677 &queue->control_buffer_pages); 1678 1679 if (status != NV_OK) 1680 goto fail; 1681 } 1682 else { 1683 uvm_tools_counter_t *counter = &event_tracker->counter; 1684 counter->all_processors = params->allProcessors; 1685 counter->processor = params->processor; 1686 status = map_user_pages(params->controlBuffer, 1687 sizeof(NvU64) * UVM_TOTAL_COUNTERS, 1688 (void **)&counter->counters, 1689 &counter->counter_buffer_pages); 1690 if (status != NV_OK) 1691 goto fail; 1692 } 1693 1694 if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) { 1695 status = NV_ERR_INVALID_ARGUMENT; 1696 goto fail; 1697 } 1698 1699 return NV_OK; 1700 1701 fail: 1702 destroy_event_tracker(event_tracker); 1703 return status; 1704 } 1705 1706 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp) 1707 { 1708 UvmToolsEventControlData *ctrl; 1709 uvm_tools_queue_snapshot_t sn; 1710 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1711 1712 if (!tracker_is_queue(event_tracker)) 1713 return NV_ERR_INVALID_ARGUMENT; 1714 1715 uvm_spin_lock(&event_tracker->queue.lock); 1716 1717 event_tracker->queue.notification_threshold = params->notificationThreshold; 1718 1719 ctrl = event_tracker->queue.control; 1720 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind); 1721 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead); 1722 1723 if (queue_needs_wakeup(&event_tracker->queue, &sn)) 1724 wake_up_all(&event_tracker->queue.wait_queue); 1725 1726 uvm_spin_unlock(&event_tracker->queue.lock); 1727 1728 return NV_OK; 1729 } 1730 1731 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space) 1732 { 1733 NV_STATUS status; 1734 1735 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock); 1736 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 1737 1738 if (tools_is_fault_callback_needed(va_space)) { 1739 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) { 1740 status = uvm_perf_register_event_callback_locked(&va_space->perf_events, 1741 UVM_PERF_EVENT_FAULT, 1742 uvm_tools_record_fault); 1743 1744 if (status != NV_OK) 1745 return status; 1746 } 1747 } 1748 else { 1749 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) { 1750 uvm_perf_unregister_event_callback_locked(&va_space->perf_events, 1751 UVM_PERF_EVENT_FAULT, 1752 uvm_tools_record_fault); 1753 } 1754 } 1755 1756 if (tools_is_migration_callback_needed(va_space)) { 1757 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) { 1758 status = uvm_perf_register_event_callback_locked(&va_space->perf_events, 1759 UVM_PERF_EVENT_MIGRATION, 1760 uvm_tools_record_migration); 1761 1762 if (status != NV_OK) 1763 return status; 1764 } 1765 } 1766 else { 1767 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) { 1768 uvm_perf_unregister_event_callback_locked(&va_space->perf_events, 1769 UVM_PERF_EVENT_MIGRATION, 1770 uvm_tools_record_migration); 1771 } 1772 } 1773 1774 return NV_OK; 1775 } 1776 1777 static NV_STATUS tools_update_status(uvm_va_space_t *va_space) 1778 { 1779 NV_STATUS status; 1780 bool should_be_enabled; 1781 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock); 1782 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock); 1783 uvm_assert_rwsem_locked_write(&va_space->tools.lock); 1784 1785 status = tools_update_perf_events_callbacks(va_space); 1786 if (status != NV_OK) 1787 return status; 1788 1789 should_be_enabled = tools_are_enabled(va_space); 1790 if (should_be_enabled != va_space->tools.enabled) { 1791 if (should_be_enabled) 1792 list_add(&va_space->tools.node, &g_tools_va_space_list); 1793 else 1794 list_del(&va_space->tools.node); 1795 1796 va_space->tools.enabled = should_be_enabled; 1797 } 1798 1799 return NV_OK; 1800 } 1801 1802 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8) 1803 1804 static bool mask_contains_invalid_events(NvU64 event_flags) 1805 { 1806 const unsigned long *event_mask = (const unsigned long *)&event_flags; 1807 DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS); 1808 DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS); 1809 DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS); 1810 1811 bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS); 1812 bitmap_set(tests_events_mask, 1813 UvmEventTestTypesFirst, 1814 UvmEventTestTypesLast - UvmEventTestTypesFirst + 1); 1815 1816 bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS); 1817 bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1); 1818 1819 if (uvm_enable_builtin_tests) 1820 bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS); 1821 1822 // Make sure that test event ids do not overlap with regular events 1823 BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes); 1824 BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast); 1825 BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll); 1826 1827 // Make sure that no test event ever changes the size of UvmEventEntry 1828 BUILD_BUG_ON(sizeof(((UvmEventEntry *)NULL)->testEventData) > 1829 sizeof(((UvmEventEntry *)NULL)->eventData)); 1830 BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS); 1831 1832 if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS)) 1833 return false; 1834 1835 if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS)) 1836 UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n"); 1837 1838 return true; 1839 } 1840 1841 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp) 1842 { 1843 uvm_va_space_t *va_space; 1844 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1845 NV_STATUS status = NV_OK; 1846 NvU64 inserted_lists; 1847 1848 if (!tracker_is_queue(event_tracker)) 1849 return NV_ERR_INVALID_ARGUMENT; 1850 1851 if (mask_contains_invalid_events(params->eventTypeFlags)) 1852 return NV_ERR_INVALID_ARGUMENT; 1853 1854 va_space = tools_event_tracker_va_space(event_tracker); 1855 1856 uvm_down_write(&g_tools_va_space_list_lock); 1857 uvm_down_write(&va_space->perf_events.lock); 1858 uvm_down_write(&va_space->tools.lock); 1859 1860 insert_event_tracker(va_space, 1861 event_tracker->queue.queue_nodes, 1862 UvmEventNumTypesAll, 1863 params->eventTypeFlags, 1864 &event_tracker->queue.subscribed_queues, 1865 va_space->tools.queues, 1866 &inserted_lists); 1867 1868 // perform any necessary registration 1869 status = tools_update_status(va_space); 1870 if (status != NV_OK) { 1871 // on error, unregister any newly registered event 1872 remove_event_tracker(va_space, 1873 event_tracker->queue.queue_nodes, 1874 UvmEventNumTypes, 1875 inserted_lists, 1876 &event_tracker->queue.subscribed_queues); 1877 } 1878 1879 uvm_up_write(&va_space->tools.lock); 1880 uvm_up_write(&va_space->perf_events.lock); 1881 uvm_up_write(&g_tools_va_space_list_lock); 1882 1883 return status; 1884 } 1885 1886 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp) 1887 { 1888 NV_STATUS status; 1889 uvm_va_space_t *va_space; 1890 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1891 1892 if (!tracker_is_queue(event_tracker)) 1893 return NV_ERR_INVALID_ARGUMENT; 1894 1895 va_space = tools_event_tracker_va_space(event_tracker); 1896 1897 uvm_down_write(&g_tools_va_space_list_lock); 1898 uvm_down_write(&va_space->perf_events.lock); 1899 uvm_down_write(&va_space->tools.lock); 1900 remove_event_tracker(va_space, 1901 event_tracker->queue.queue_nodes, 1902 UvmEventNumTypesAll, 1903 params->eventTypeFlags, 1904 &event_tracker->queue.subscribed_queues); 1905 1906 // de-registration should not fail 1907 status = tools_update_status(va_space); 1908 UVM_ASSERT(status == NV_OK); 1909 1910 uvm_up_write(&va_space->tools.lock); 1911 uvm_up_write(&va_space->perf_events.lock); 1912 uvm_up_write(&g_tools_va_space_list_lock); 1913 return NV_OK; 1914 } 1915 1916 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp) 1917 { 1918 uvm_va_space_t *va_space; 1919 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1920 NV_STATUS status = NV_OK; 1921 NvU64 inserted_lists; 1922 1923 if (!tracker_is_counter(event_tracker)) 1924 return NV_ERR_INVALID_ARGUMENT; 1925 1926 va_space = tools_event_tracker_va_space(event_tracker); 1927 1928 uvm_down_write(&g_tools_va_space_list_lock); 1929 uvm_down_write(&va_space->perf_events.lock); 1930 uvm_down_write(&va_space->tools.lock); 1931 1932 insert_event_tracker(va_space, 1933 event_tracker->counter.counter_nodes, 1934 UVM_TOTAL_COUNTERS, 1935 params->counterTypeFlags, 1936 &event_tracker->counter.subscribed_counters, 1937 va_space->tools.counters, 1938 &inserted_lists); 1939 1940 // perform any necessary registration 1941 status = tools_update_status(va_space); 1942 if (status != NV_OK) { 1943 remove_event_tracker(va_space, 1944 event_tracker->counter.counter_nodes, 1945 UVM_TOTAL_COUNTERS, 1946 inserted_lists, 1947 &event_tracker->counter.subscribed_counters); 1948 } 1949 1950 uvm_up_write(&va_space->tools.lock); 1951 uvm_up_write(&va_space->perf_events.lock); 1952 uvm_up_write(&g_tools_va_space_list_lock); 1953 1954 return status; 1955 } 1956 1957 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp) 1958 { 1959 NV_STATUS status; 1960 uvm_va_space_t *va_space; 1961 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp); 1962 1963 if (!tracker_is_counter(event_tracker)) 1964 return NV_ERR_INVALID_ARGUMENT; 1965 1966 va_space = tools_event_tracker_va_space(event_tracker); 1967 1968 uvm_down_write(&g_tools_va_space_list_lock); 1969 uvm_down_write(&va_space->perf_events.lock); 1970 uvm_down_write(&va_space->tools.lock); 1971 remove_event_tracker(va_space, 1972 event_tracker->counter.counter_nodes, 1973 UVM_TOTAL_COUNTERS, 1974 params->counterTypeFlags, 1975 &event_tracker->counter.subscribed_counters); 1976 1977 // de-registration should not fail 1978 status = tools_update_status(va_space); 1979 UVM_ASSERT(status == NV_OK); 1980 1981 uvm_up_write(&va_space->tools.lock); 1982 uvm_up_write(&va_space->perf_events.lock); 1983 uvm_up_write(&g_tools_va_space_list_lock); 1984 1985 return NV_OK; 1986 } 1987 1988 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block, 1989 uvm_va_block_context_t *block_context, 1990 NvU64 target_va, 1991 NvU64 size, 1992 bool is_write, 1993 uvm_mem_t *stage_mem) 1994 { 1995 if (is_write) { 1996 return UVM_VA_BLOCK_LOCK_RETRY(va_block, 1997 NULL, 1998 uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size)); 1999 } 2000 else { 2001 return UVM_VA_BLOCK_LOCK_RETRY(va_block, 2002 NULL, 2003 uvm_va_block_read_to_cpu(va_block, stage_mem, target_va, size)); 2004 2005 } 2006 } 2007 2008 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space, 2009 NvU64 target_va, 2010 NvU64 size, 2011 NvU64 user_va, 2012 NvU64 *bytes, 2013 bool is_write) 2014 { 2015 NV_STATUS status; 2016 uvm_mem_t *stage_mem = NULL; 2017 void *stage_addr; 2018 uvm_global_processor_mask_t *retained_global_gpus = NULL; 2019 uvm_global_processor_mask_t *global_gpus = NULL; 2020 uvm_va_block_context_t *block_context = NULL; 2021 struct mm_struct *mm = NULL; 2022 2023 retained_global_gpus = uvm_kvmalloc(sizeof(*retained_global_gpus)); 2024 if (retained_global_gpus == NULL) 2025 return NV_ERR_NO_MEMORY; 2026 2027 uvm_global_processor_mask_zero(retained_global_gpus); 2028 2029 global_gpus = uvm_kvmalloc(sizeof(*global_gpus)); 2030 if (global_gpus == NULL) { 2031 status = NV_ERR_NO_MEMORY; 2032 goto exit; 2033 } 2034 2035 mm = uvm_va_space_mm_or_current_retain(va_space); 2036 2037 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem); 2038 if (status != NV_OK) 2039 goto exit; 2040 2041 block_context = uvm_va_block_context_alloc(mm); 2042 if (!block_context) { 2043 status = NV_ERR_NO_MEMORY; 2044 goto exit; 2045 } 2046 2047 stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem); 2048 *bytes = 0; 2049 2050 while (*bytes < size) { 2051 uvm_gpu_t *gpu; 2052 uvm_va_block_t *block; 2053 void *user_va_start = (void *) (user_va + *bytes); 2054 NvU64 target_va_start = target_va + *bytes; 2055 NvU64 bytes_left = size - *bytes; 2056 NvU64 page_offset = target_va_start & (PAGE_SIZE - 1); 2057 NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset)); 2058 2059 if (is_write) { 2060 NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now); 2061 if (remaining != 0) { 2062 status = NV_ERR_INVALID_ARGUMENT; 2063 goto exit; 2064 } 2065 } 2066 2067 if (mm) 2068 uvm_down_read_mmap_lock(mm); 2069 2070 // The RM flavor of the lock is needed to perform ECC checks. 2071 uvm_va_space_down_read_rm(va_space); 2072 if (mm) 2073 status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block); 2074 else 2075 status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block); 2076 2077 if (status != NV_OK) 2078 goto unlock_and_exit; 2079 2080 uvm_va_space_global_gpus(va_space, global_gpus); 2081 2082 for_each_global_gpu_in_mask(gpu, global_gpus) { 2083 2084 // When CC is enabled, the staging memory cannot be mapped on the 2085 // GPU (it is protected sysmem), but it is still used to store the 2086 // unencrypted version of the page contents when the page is 2087 // resident on vidmem. 2088 if (uvm_conf_computing_mode_enabled(gpu)) { 2089 UVM_ASSERT(uvm_global_processor_mask_empty(retained_global_gpus)); 2090 2091 break; 2092 } 2093 if (uvm_global_processor_mask_test_and_set(retained_global_gpus, gpu->global_id)) 2094 continue; 2095 2096 // The retention of each GPU ensures that the staging memory is 2097 // freed before the unregistration of any of the GPUs is mapped on. 2098 // Each GPU is retained once. 2099 uvm_gpu_retain(gpu); 2100 2101 // Accessing the VA block may result in copying data between the CPU 2102 // and a GPU. Conservatively add virtual mappings to all the GPUs 2103 // (even if those mappings may never be used) as tools read/write is 2104 // not on a performance critical path. 2105 status = uvm_mem_map_gpu_kernel(stage_mem, gpu); 2106 if (status != NV_OK) 2107 goto unlock_and_exit; 2108 } 2109 2110 // Make sure a CPU resident page has an up to date struct page pointer. 2111 if (uvm_va_block_is_hmm(block)) { 2112 status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true); 2113 if (status != NV_OK) 2114 goto unlock_and_exit; 2115 } 2116 2117 status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem); 2118 2119 // For simplicity, check for ECC errors on all GPUs registered in the VA 2120 // space 2121 if (status == NV_OK) 2122 status = uvm_global_mask_check_ecc_error(global_gpus); 2123 2124 uvm_va_space_up_read_rm(va_space); 2125 if (mm) 2126 uvm_up_read_mmap_lock(mm); 2127 2128 if (status != NV_OK) 2129 goto exit; 2130 2131 if (!is_write) { 2132 NvU64 remaining; 2133 2134 // Prevent processor speculation prior to accessing user-mapped 2135 // memory to avoid leaking information from side-channel attacks. 2136 // Under speculation, a valid VA range which does not contain 2137 // target_va could be used, and the block index could run off the 2138 // end of the array. Information about the state of that kernel 2139 // memory could be inferred if speculative execution gets to the 2140 // point where the data is copied out. 2141 nv_speculation_barrier(); 2142 2143 remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now); 2144 if (remaining > 0) { 2145 status = NV_ERR_INVALID_ARGUMENT; 2146 goto exit; 2147 } 2148 } 2149 2150 *bytes += bytes_now; 2151 } 2152 2153 unlock_and_exit: 2154 if (status != NV_OK) { 2155 uvm_va_space_up_read_rm(va_space); 2156 if (mm) 2157 uvm_up_read_mmap_lock(mm); 2158 } 2159 2160 exit: 2161 uvm_va_block_context_free(block_context); 2162 2163 uvm_mem_free(stage_mem); 2164 2165 uvm_global_mask_release(retained_global_gpus); 2166 2167 uvm_va_space_mm_or_current_release(va_space, mm); 2168 2169 uvm_kvfree(global_gpus); 2170 uvm_kvfree(retained_global_gpus); 2171 2172 return status; 2173 } 2174 2175 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp) 2176 { 2177 return tools_access_process_memory(uvm_va_space_get(filp), 2178 params->targetVa, 2179 params->size, 2180 params->buffer, 2181 ¶ms->bytesRead, 2182 false); 2183 } 2184 2185 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp) 2186 { 2187 return tools_access_process_memory(uvm_va_space_get(filp), 2188 params->targetVa, 2189 params->size, 2190 params->buffer, 2191 ¶ms->bytesWritten, 2192 true); 2193 } 2194 2195 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp) 2196 { 2197 NvU32 i; 2198 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2199 2200 if (params->entry.eventData.eventType >= UvmEventNumTypesAll) 2201 return NV_ERR_INVALID_ARGUMENT; 2202 2203 uvm_down_read(&va_space->tools.lock); 2204 for (i = 0; i < params->count; i++) 2205 uvm_tools_record_event(va_space, ¶ms->entry); 2206 uvm_up_read(&va_space->tools.lock); 2207 return NV_OK; 2208 } 2209 2210 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp) 2211 { 2212 NvU32 i; 2213 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2214 2215 if (params->counter >= UVM_TOTAL_COUNTERS) 2216 return NV_ERR_INVALID_ARGUMENT; 2217 2218 uvm_down_read(&va_space->tools.lock); 2219 for (i = 0; i < params->count; i++) 2220 uvm_tools_inc_counter(va_space, params->counter, params->amount, ¶ms->processor); 2221 uvm_up_read(&va_space->tools.lock); 2222 2223 return NV_OK; 2224 } 2225 2226 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp) 2227 { 2228 NvProcessorUuid *uuids; 2229 NvU64 remaining; 2230 uvm_gpu_t *gpu; 2231 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2232 2233 uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * UVM_ID_MAX_PROCESSORS); 2234 if (uuids == NULL) 2235 return NV_ERR_NO_MEMORY; 2236 2237 uvm_processor_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT); 2238 params->count = 1; 2239 2240 uvm_va_space_down_read(va_space); 2241 for_each_va_space_gpu(gpu, va_space) { 2242 uvm_processor_uuid_copy(&uuids[uvm_id_value(gpu->id)], uvm_gpu_uuid(gpu)); 2243 if (uvm_id_value(gpu->id) + 1 > params->count) 2244 params->count = uvm_id_value(gpu->id) + 1; 2245 } 2246 uvm_va_space_up_read(va_space); 2247 2248 remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * params->count); 2249 uvm_kvfree(uuids); 2250 2251 if (remaining != 0) 2252 return NV_ERR_INVALID_ADDRESS; 2253 2254 return NV_OK; 2255 } 2256 2257 void uvm_tools_flush_events(void) 2258 { 2259 tools_schedule_completed_events(); 2260 2261 nv_kthread_q_flush(&g_tools_queue); 2262 } 2263 2264 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp) 2265 { 2266 uvm_tools_flush_events(); 2267 return NV_OK; 2268 } 2269 2270 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp) 2271 { 2272 NV_STATUS status = NV_OK; 2273 uvm_gpu_t *gpu = NULL; 2274 uvm_va_space_t *va_space = uvm_va_space_get(filp); 2275 2276 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpuUuid); 2277 if (!gpu) 2278 return NV_ERR_INVALID_DEVICE; 2279 2280 // Wait for register-based fault clears to queue the replay event 2281 if (!gpu->parent->has_clear_faulted_channel_method) { 2282 uvm_gpu_non_replayable_faults_isr_lock(gpu->parent); 2283 uvm_gpu_non_replayable_faults_isr_unlock(gpu->parent); 2284 } 2285 2286 // Wait for pending fault replay methods to complete (replayable faults on 2287 // all GPUs, and non-replayable faults on method-based GPUs). 2288 status = uvm_channel_manager_wait(gpu->channel_manager); 2289 2290 // Flush any pending events even if (status != NV_OK) 2291 uvm_tools_flush_events(); 2292 uvm_gpu_release(gpu); 2293 2294 return status; 2295 } 2296 2297 static const struct file_operations uvm_tools_fops = 2298 { 2299 .open = uvm_tools_open_entry, 2300 .release = uvm_tools_release_entry, 2301 .unlocked_ioctl = uvm_tools_unlocked_ioctl_entry, 2302 #if NVCPU_IS_X86_64 2303 .compat_ioctl = uvm_tools_unlocked_ioctl_entry, 2304 #endif 2305 .poll = uvm_tools_poll_entry, 2306 .owner = THIS_MODULE, 2307 }; 2308 2309 static void _uvm_tools_destroy_cache_all(void) 2310 { 2311 // The pointers are initialized to NULL, 2312 // it's safe to call destroy on all of them. 2313 kmem_cache_destroy_safe(&g_tools_event_tracker_cache); 2314 kmem_cache_destroy_safe(&g_tools_block_migration_data_cache); 2315 kmem_cache_destroy_safe(&g_tools_migration_data_cache); 2316 kmem_cache_destroy_safe(&g_tools_replay_data_cache); 2317 kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache); 2318 kmem_cache_destroy_safe(&g_tools_map_remote_data_cache); 2319 } 2320 2321 int uvm_tools_init(dev_t uvm_base_dev) 2322 { 2323 dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER); 2324 int ret = -ENOMEM; // This will be updated later if allocations succeed 2325 2326 uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST); 2327 2328 g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t", 2329 uvm_tools_event_tracker_t); 2330 if (!g_tools_event_tracker_cache) 2331 goto err_cache_destroy; 2332 2333 g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t", 2334 block_migration_data_t); 2335 if (!g_tools_block_migration_data_cache) 2336 goto err_cache_destroy; 2337 2338 g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t", 2339 migration_data_t); 2340 if (!g_tools_migration_data_cache) 2341 goto err_cache_destroy; 2342 2343 g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t", 2344 replay_data_t); 2345 if (!g_tools_replay_data_cache) 2346 goto err_cache_destroy; 2347 2348 g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t", 2349 block_map_remote_data_t); 2350 if (!g_tools_block_map_remote_data_cache) 2351 goto err_cache_destroy; 2352 2353 g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t", 2354 map_remote_data_t); 2355 if (!g_tools_map_remote_data_cache) 2356 goto err_cache_destroy; 2357 2358 uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF); 2359 2360 ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue"); 2361 if (ret < 0) 2362 goto err_cache_destroy; 2363 2364 uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops); 2365 ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1); 2366 if (ret != 0) { 2367 UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev), 2368 MINOR(uvm_tools_dev), ret); 2369 goto err_stop_thread; 2370 } 2371 2372 return ret; 2373 2374 err_stop_thread: 2375 nv_kthread_q_stop(&g_tools_queue); 2376 2377 err_cache_destroy: 2378 _uvm_tools_destroy_cache_all(); 2379 return ret; 2380 } 2381 2382 void uvm_tools_exit(void) 2383 { 2384 unsigned i; 2385 cdev_del(&g_uvm_tools_cdev); 2386 2387 nv_kthread_q_stop(&g_tools_queue); 2388 2389 for (i = 0; i < UvmEventNumTypesAll; ++i) 2390 UVM_ASSERT(g_tools_enabled_event_count[i] == 0); 2391 2392 UVM_ASSERT(list_empty(&g_tools_va_space_list)); 2393 2394 _uvm_tools_destroy_cache_all(); 2395 } 2396