1 /*******************************************************************************
2 Copyright (c) 2016-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23 #include "uvm_common.h"
24 #include "uvm_ioctl.h"
25 #include "uvm_global.h"
26 #include "uvm_gpu.h"
27 #include "uvm_hal.h"
28 #include "uvm_tools.h"
29 #include "uvm_va_space.h"
30 #include "uvm_api.h"
31 #include "uvm_hal_types.h"
32 #include "uvm_va_block.h"
33 #include "uvm_va_range.h"
34 #include "uvm_push.h"
35 #include "uvm_forward_decl.h"
36 #include "uvm_range_group.h"
37 #include "uvm_mem.h"
38 #include "nv_speculation_barrier.h"
39
40 // We limit the number of times a page can be retained by the kernel
41 // to prevent the user from maliciously passing UVM tools the same page
42 // over and over again in an attempt to overflow the refcount.
43 #define MAX_PAGE_COUNT (1 << 20)
44
45 typedef struct
46 {
47 NvU32 get_ahead;
48 NvU32 get_behind;
49 NvU32 put_ahead;
50 NvU32 put_behind;
51 } uvm_tools_queue_snapshot_t;
52
53 typedef struct
54 {
55 uvm_spinlock_t lock;
56 NvU64 subscribed_queues;
57 struct list_head queue_nodes[UvmEventNumTypesAll];
58
59 struct page **queue_buffer_pages;
60 union
61 {
62 UvmEventEntry_V1 *queue_v1;
63 UvmEventEntry_V2 *queue_v2;
64 };
65 NvU32 queue_buffer_count;
66 NvU32 notification_threshold;
67
68 struct page **control_buffer_pages;
69 union
70 {
71 UvmToolsEventControlData_V1 *control_v1;
72 UvmToolsEventControlData_V2 *control_v2;
73 };
74
75 wait_queue_head_t wait_queue;
76 bool is_wakeup_get_valid;
77 NvU32 wakeup_get;
78 } uvm_tools_queue_t;
79
80 typedef struct
81 {
82 struct list_head counter_nodes[UVM_TOTAL_COUNTERS];
83 NvU64 subscribed_counters;
84
85 struct page **counter_buffer_pages;
86 NvU64 *counters;
87
88 bool all_processors;
89 NvProcessorUuid processor;
90 } uvm_tools_counter_t;
91
92 // private_data for /dev/nvidia-uvm-tools
93 typedef struct
94 {
95 NvU32 version;
96 bool is_queue;
97 struct file *uvm_file;
98 union
99 {
100 uvm_tools_queue_t queue;
101 uvm_tools_counter_t counter;
102 };
103 } uvm_tools_event_tracker_t;
104
105 // Delayed events
106 //
107 // Events that require gpu timestamps for asynchronous operations use a delayed
108 // notification mechanism. Each event type registers a callback that is invoked
109 // from the update_progress channel routines. The callback then enqueues a
110 // work item that takes care of notifying the events. This module keeps a
111 // global list of channels with pending events. Other modules or user apps (via
112 // ioctl) may call uvm_tools_flush_events to update the progress of the channels
113 // in the list, as needed.
114 //
115 // User apps will need to flush events before removing gpus to avoid getting
116 // events with gpus ids that have been removed.
117
118 // This object describes the pending migrations operations within a VA block
119 typedef struct
120 {
121 nv_kthread_q_item_t queue_item;
122 uvm_processor_id_t dst;
123 uvm_processor_id_t src;
124 uvm_va_space_t *va_space;
125
126 uvm_channel_t *channel;
127 struct list_head events;
128 NvU64 start_timestamp_cpu;
129 NvU64 end_timestamp_cpu;
130 NvU64 *start_timestamp_gpu_addr;
131 NvU64 start_timestamp_gpu;
132 NvU64 range_group_id;
133 } block_migration_data_t;
134
135 // This object represents a specific pending migration within a VA block
136 typedef struct
137 {
138 struct list_head events_node;
139 NvU64 bytes;
140 NvU64 address;
141 NvU64 *end_timestamp_gpu_addr;
142 NvU64 end_timestamp_gpu;
143 UvmEventMigrationCause cause;
144 } migration_data_t;
145
146 // This object represents a pending gpu faut replay operation
147 typedef struct
148 {
149 nv_kthread_q_item_t queue_item;
150 uvm_channel_t *channel;
151 uvm_gpu_id_t gpu_id;
152 NvU32 batch_id;
153 uvm_fault_client_type_t client_type;
154 NvU64 timestamp;
155 NvU64 timestamp_gpu;
156 NvU64 *timestamp_gpu_addr;
157 } replay_data_t;
158
159 // This object describes the pending map remote operations within a VA block
160 typedef struct
161 {
162 nv_kthread_q_item_t queue_item;
163 uvm_processor_id_t src;
164 uvm_processor_id_t dst;
165 UvmEventMapRemoteCause cause;
166 NvU64 timestamp;
167 uvm_va_space_t *va_space;
168
169 uvm_channel_t *channel;
170 struct list_head events;
171 } block_map_remote_data_t;
172
173 // This object represents a pending map remote operation
174 typedef struct
175 {
176 struct list_head events_node;
177
178 NvU64 address;
179 NvU64 size;
180 NvU64 timestamp_gpu;
181 NvU64 *timestamp_gpu_addr;
182 } map_remote_data_t;
183
184
185 static struct cdev g_uvm_tools_cdev;
186 static LIST_HEAD(g_tools_va_space_list);
187 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll];
188 static uvm_rw_semaphore_t g_tools_va_space_list_lock;
189 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL;
190 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL;
191 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL;
192 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL;
193 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL;
194 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL;
195 static uvm_spinlock_t g_tools_channel_list_lock;
196 static LIST_HEAD(g_tools_channel_list);
197 static nv_kthread_q_t g_tools_queue;
198
199 static NV_STATUS tools_update_status(uvm_va_space_t *va_space);
200
tools_event_tracker(struct file * filp)201 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp)
202 {
203 return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data);
204 }
205
tracker_is_queue(uvm_tools_event_tracker_t * event_tracker)206 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker)
207 {
208 return event_tracker != NULL && event_tracker->is_queue;
209 }
210
tracker_is_counter(uvm_tools_event_tracker_t * event_tracker)211 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker)
212 {
213 return event_tracker != NULL && !event_tracker->is_queue;
214 }
215
tools_event_tracker_va_space(uvm_tools_event_tracker_t * event_tracker)216 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker)
217 {
218 uvm_va_space_t *va_space;
219 UVM_ASSERT(event_tracker->uvm_file);
220 va_space = uvm_va_space_get(event_tracker->uvm_file);
221 return va_space;
222 }
223
uvm_put_user_pages_dirty(struct page ** pages,NvU64 page_count)224 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
225 {
226 NvU64 i;
227
228 for (i = 0; i < page_count; i++) {
229 set_page_dirty(pages[i]);
230 NV_UNPIN_USER_PAGE(pages[i]);
231 }
232 }
233
unmap_user_pages(struct page ** pages,void * addr,NvU64 size)234 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
235 {
236 size = DIV_ROUND_UP(size, PAGE_SIZE);
237 vunmap((NvU8 *)addr);
238 uvm_put_user_pages_dirty(pages, size);
239 uvm_kvfree(pages);
240 }
241
242 // This must be called with the mmap_lock held in read mode or better.
check_vmas(struct mm_struct * mm,NvU64 start_va,NvU64 size)243 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
244 {
245 struct vm_area_struct *vma;
246 NvU64 addr = start_va;
247 NvU64 region_end = start_va + size;
248
249 do {
250 vma = find_vma(mm, addr);
251 if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
252 return NV_ERR_INVALID_ARGUMENT;
253
254 addr = vma->vm_end;
255 } while (addr < region_end);
256
257 return NV_OK;
258 }
259
260 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
261 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
map_user_pages(NvU64 user_va,NvU64 size,void ** addr,struct page *** pages)262 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
263 {
264 NV_STATUS status = NV_OK;
265 long ret = 0;
266 long num_pages;
267 long i;
268
269 *addr = NULL;
270 *pages = NULL;
271 num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
272
273 if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) {
274 status = NV_ERR_INVALID_ADDRESS;
275 goto fail;
276 }
277
278 *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages);
279 if (*pages == NULL) {
280 status = NV_ERR_NO_MEMORY;
281 goto fail;
282 }
283
284 // Although uvm_down_read_mmap_lock() is preferable due to its participation
285 // in the UVM lock dependency tracker, it cannot be used here. That's
286 // because pin_user_pages() may fault in HMM pages which are GPU-resident.
287 // When that happens, the UVM page fault handler would record another
288 // mmap_read_lock() on the same thread as this one, leading to a false
289 // positive lock dependency report.
290 //
291 // Therefore, use the lower level nv_mmap_read_lock() here.
292 nv_mmap_read_lock(current->mm);
293 status = check_vmas(current->mm, user_va, size);
294 if (status != NV_OK) {
295 nv_mmap_read_unlock(current->mm);
296 goto fail;
297 }
298 ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages);
299 nv_mmap_read_unlock(current->mm);
300
301 if (ret != num_pages) {
302 status = NV_ERR_INVALID_ARGUMENT;
303 goto fail;
304 }
305
306 for (i = 0; i < num_pages; i++) {
307 if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
308 status = NV_ERR_INVALID_ARGUMENT;
309 goto fail;
310 }
311 }
312
313 *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL);
314 if (*addr == NULL)
315 goto fail;
316
317 return NV_OK;
318
319 fail:
320 if (*pages == NULL)
321 return status;
322
323 if (ret > 0)
324 uvm_put_user_pages_dirty(*pages, ret);
325 else if (ret < 0)
326 status = errno_to_nv_status(ret);
327
328 uvm_kvfree(*pages);
329 *pages = NULL;
330 return status;
331 }
332
insert_event_tracker(uvm_va_space_t * va_space,struct list_head * node,NvU32 list_count,NvU64 list_mask,NvU64 * subscribed_mask,struct list_head * lists,NvU64 * inserted_lists)333 static void insert_event_tracker(uvm_va_space_t *va_space,
334 struct list_head *node,
335 NvU32 list_count,
336 NvU64 list_mask,
337 NvU64 *subscribed_mask,
338 struct list_head *lists,
339 NvU64 *inserted_lists)
340 {
341 NvU32 i;
342 NvU64 insertable_lists = list_mask & ~*subscribed_mask;
343
344 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
345 uvm_assert_rwsem_locked_write(&va_space->tools.lock);
346
347 for (i = 0; i < list_count; i++) {
348 if (insertable_lists & (1ULL << i)) {
349 ++g_tools_enabled_event_count[i];
350 list_add(node + i, lists + i);
351 }
352 }
353
354 *subscribed_mask |= list_mask;
355 *inserted_lists = insertable_lists;
356 }
357
remove_event_tracker(uvm_va_space_t * va_space,struct list_head * node,NvU32 list_count,NvU64 list_mask,NvU64 * subscribed_mask)358 static void remove_event_tracker(uvm_va_space_t *va_space,
359 struct list_head *node,
360 NvU32 list_count,
361 NvU64 list_mask,
362 NvU64 *subscribed_mask)
363 {
364 NvU32 i;
365 NvU64 removable_lists = list_mask & *subscribed_mask;
366
367 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
368 uvm_assert_rwsem_locked_write(&va_space->tools.lock);
369
370 for (i = 0; i < list_count; i++) {
371 if (removable_lists & (1ULL << i)) {
372 UVM_ASSERT(g_tools_enabled_event_count[i] > 0);
373 --g_tools_enabled_event_count[i];
374 list_del(node + i);
375 }
376 }
377
378 *subscribed_mask &= ~list_mask;
379 }
380
queue_needs_wakeup(uvm_tools_queue_t * queue,uvm_tools_queue_snapshot_t * sn)381 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn)
382 {
383 NvU32 queue_mask = queue->queue_buffer_count - 1;
384
385 uvm_assert_spinlock_locked(&queue->lock);
386 return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold;
387 }
388
destroy_event_tracker(uvm_tools_event_tracker_t * event_tracker)389 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
390 {
391 if (event_tracker->uvm_file != NULL) {
392 NV_STATUS status;
393 uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker);
394
395 uvm_down_write(&g_tools_va_space_list_lock);
396 uvm_down_write(&va_space->perf_events.lock);
397 uvm_down_write(&va_space->tools.lock);
398
399 if (event_tracker->is_queue) {
400 uvm_tools_queue_t *queue = &event_tracker->queue;
401 NvU64 buffer_size, control_size;
402
403 if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
404 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
405 control_size = sizeof(UvmToolsEventControlData_V1);
406 }
407 else {
408 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
409 control_size = sizeof(UvmToolsEventControlData_V2);
410 }
411
412 remove_event_tracker(va_space,
413 queue->queue_nodes,
414 UvmEventNumTypesAll,
415 queue->subscribed_queues,
416 &queue->subscribed_queues);
417
418 if (queue->queue_v2 != NULL) {
419 unmap_user_pages(queue->queue_buffer_pages,
420 queue->queue_v2,
421 buffer_size);
422 }
423
424 if (queue->control_v2 != NULL) {
425 unmap_user_pages(queue->control_buffer_pages,
426 queue->control_v2,
427 control_size);
428 }
429 }
430 else {
431 uvm_tools_counter_t *counters = &event_tracker->counter;
432
433 remove_event_tracker(va_space,
434 counters->counter_nodes,
435 UVM_TOTAL_COUNTERS,
436 counters->subscribed_counters,
437 &counters->subscribed_counters);
438
439 if (counters->counters != NULL) {
440 unmap_user_pages(counters->counter_buffer_pages,
441 counters->counters,
442 UVM_TOTAL_COUNTERS * sizeof(NvU64));
443 }
444 }
445
446 // de-registration should not fail
447 status = tools_update_status(va_space);
448 UVM_ASSERT(status == NV_OK);
449
450 uvm_up_write(&va_space->tools.lock);
451 uvm_up_write(&va_space->perf_events.lock);
452 uvm_up_write(&g_tools_va_space_list_lock);
453
454 fput(event_tracker->uvm_file);
455 }
456 kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
457 }
458
enqueue_event_v1(const UvmEventEntry_V1 * entry,uvm_tools_queue_t * queue)459 static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
460 {
461 UvmToolsEventControlData_V1 *ctrl = queue->control_v1;
462 uvm_tools_queue_snapshot_t sn;
463 NvU32 queue_size = queue->queue_buffer_count;
464 NvU32 queue_mask = queue_size - 1;
465
466 // Prevent processor speculation prior to accessing user-mapped memory to
467 // avoid leaking information from side-channel attacks. There are many
468 // possible paths leading to this point and it would be difficult and error-
469 // prone to audit all of them to determine whether user mode could guide
470 // this access to kernel memory under speculative execution, so to be on the
471 // safe side we'll just always block speculation.
472 nv_speculation_barrier();
473
474 uvm_spin_lock(&queue->lock);
475
476 // ctrl is mapped into user space with read and write permissions,
477 // so its values cannot be trusted.
478 sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
479 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
480 sn.put_ahead = (sn.put_behind + 1) & queue_mask;
481
482 // one free element means that the queue is full
483 if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
484 atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
485 goto unlock;
486 }
487
488 memcpy(queue->queue_v1 + sn.put_behind, entry, sizeof(*entry));
489
490 sn.put_behind = sn.put_ahead;
491
492 // put_ahead and put_behind will always be the same outside of queue->lock
493 // this allows the user-space consumer to choose either a 2 or 4 pointer
494 // synchronization approach.
495 atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
496 atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
497
498 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
499
500 // if the queue needs to be woken up, only signal if we haven't signaled
501 // before for this value of get_ahead.
502 if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
503 queue->is_wakeup_get_valid = true;
504 queue->wakeup_get = sn.get_ahead;
505 wake_up_all(&queue->wait_queue);
506 }
507
508 unlock:
509 uvm_spin_unlock(&queue->lock);
510 }
511
enqueue_event_v2(const UvmEventEntry_V2 * entry,uvm_tools_queue_t * queue)512 static void enqueue_event_v2(const UvmEventEntry_V2 *entry, uvm_tools_queue_t *queue)
513 {
514 UvmToolsEventControlData_V2 *ctrl = queue->control_v2;
515 uvm_tools_queue_snapshot_t sn;
516 NvU32 queue_size = queue->queue_buffer_count;
517 NvU32 queue_mask = queue_size - 1;
518
519 // Prevent processor speculation prior to accessing user-mapped memory to
520 // avoid leaking information from side-channel attacks. There are many
521 // possible paths leading to this point and it would be difficult and error-
522 // prone to audit all of them to determine whether user mode could guide
523 // this access to kernel memory under speculative execution, so to be on the
524 // safe side we'll just always block speculation.
525 nv_speculation_barrier();
526
527 uvm_spin_lock(&queue->lock);
528
529 // ctrl is mapped into user space with read and write permissions,
530 // so its values cannot be trusted.
531 sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
532 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
533 sn.put_ahead = (sn.put_behind + 1) & queue_mask;
534
535 // one free element means that the queue is full
536 if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
537 atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
538 goto unlock;
539 }
540
541 memcpy(queue->queue_v2 + sn.put_behind, entry, sizeof(*entry));
542
543 sn.put_behind = sn.put_ahead;
544 // put_ahead and put_behind will always be the same outside of queue->lock
545 // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
546 atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
547 atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
548
549 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
550 // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
551 if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
552 queue->is_wakeup_get_valid = true;
553 queue->wakeup_get = sn.get_ahead;
554 wake_up_all(&queue->wait_queue);
555 }
556
557 unlock:
558 uvm_spin_unlock(&queue->lock);
559 }
560
uvm_tools_record_event_v1(uvm_va_space_t * va_space,const UvmEventEntry_V1 * entry)561 static void uvm_tools_record_event_v1(uvm_va_space_t *va_space, const UvmEventEntry_V1 *entry)
562 {
563 NvU8 eventType = entry->eventData.eventType;
564 uvm_tools_queue_t *queue;
565
566 UVM_ASSERT(eventType < UvmEventNumTypesAll);
567
568 uvm_assert_rwsem_locked(&va_space->tools.lock);
569
570 list_for_each_entry(queue, va_space->tools.queues_v1 + eventType, queue_nodes[eventType])
571 enqueue_event_v1(entry, queue);
572 }
573
uvm_tools_record_event_v2(uvm_va_space_t * va_space,const UvmEventEntry_V2 * entry)574 static void uvm_tools_record_event_v2(uvm_va_space_t *va_space, const UvmEventEntry_V2 *entry)
575 {
576 NvU8 eventType = entry->eventData.eventType;
577 uvm_tools_queue_t *queue;
578
579 UVM_ASSERT(eventType < UvmEventNumTypesAll);
580
581 uvm_assert_rwsem_locked(&va_space->tools.lock);
582
583 list_for_each_entry(queue, va_space->tools.queues_v2 + eventType, queue_nodes[eventType])
584 enqueue_event_v2(entry, queue);
585 }
586
counter_matches_processor(UvmCounterName counter,const NvProcessorUuid * processor)587 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
588 {
589 // For compatibility with older counters, CPU faults for memory with a
590 // preferred location are reported for their preferred location as well as
591 // for the CPU device itself.
592 // This check prevents double counting in the aggregate count.
593 if (counter == UvmCounterNameCpuPageFaultCount)
594 return uvm_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT);
595 return true;
596 }
597
uvm_tools_inc_counter(uvm_va_space_t * va_space,UvmCounterName counter,NvU64 amount,const NvProcessorUuid * processor)598 static void uvm_tools_inc_counter(uvm_va_space_t *va_space,
599 UvmCounterName counter,
600 NvU64 amount,
601 const NvProcessorUuid *processor)
602 {
603 UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS);
604 uvm_assert_rwsem_locked(&va_space->tools.lock);
605
606 if (amount > 0) {
607 uvm_tools_counter_t *counters;
608
609 // Prevent processor speculation prior to accessing user-mapped memory
610 // to avoid leaking information from side-channel attacks. There are
611 // many possible paths leading to this point and it would be difficult
612 // and error-prone to audit all of them to determine whether user mode
613 // could guide this access to kernel memory under speculative execution,
614 // so to be on the safe side we'll just always block speculation.
615 nv_speculation_barrier();
616
617 list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) {
618 if ((counters->all_processors && counter_matches_processor(counter, processor)) ||
619 uvm_uuid_eq(&counters->processor, processor)) {
620 atomic64_add(amount, (atomic64_t *)(counters->counters + counter));
621 }
622 }
623 }
624 }
625
tools_is_counter_enabled(uvm_va_space_t * va_space,UvmCounterName counter)626 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter)
627 {
628 uvm_assert_rwsem_locked(&va_space->tools.lock);
629
630 UVM_ASSERT(counter < UVM_TOTAL_COUNTERS);
631 return !list_empty(va_space->tools.counters + counter);
632 }
633
tools_is_event_enabled_version(uvm_va_space_t * va_space,UvmEventType event,UvmToolsEventQueueVersion version)634 static bool tools_is_event_enabled_version(uvm_va_space_t *va_space,
635 UvmEventType event,
636 UvmToolsEventQueueVersion version)
637 {
638 uvm_assert_rwsem_locked(&va_space->tools.lock);
639
640 UVM_ASSERT(event < UvmEventNumTypesAll);
641
642 if (version == UvmToolsEventQueueVersion_V1)
643 return !list_empty(va_space->tools.queues_v1 + event);
644 else
645 return !list_empty(va_space->tools.queues_v2 + event);
646 }
647
tools_is_event_enabled(uvm_va_space_t * va_space,UvmEventType event)648 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event)
649 {
650 uvm_assert_rwsem_locked(&va_space->tools.lock);
651
652 UVM_ASSERT(event < UvmEventNumTypesAll);
653
654 return !list_empty(va_space->tools.queues_v1 + event) ||
655 !list_empty(va_space->tools.queues_v2 + event);
656 }
657
tools_is_event_enabled_in_any_va_space(UvmEventType event)658 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event)
659 {
660 bool ret = false;
661
662 uvm_down_read(&g_tools_va_space_list_lock);
663 ret = g_tools_enabled_event_count[event] != 0;
664 uvm_up_read(&g_tools_va_space_list_lock);
665
666 return ret;
667 }
668
tools_are_enabled(uvm_va_space_t * va_space)669 static bool tools_are_enabled(uvm_va_space_t *va_space)
670 {
671 NvU32 i;
672
673 uvm_assert_rwsem_locked(&va_space->tools.lock);
674
675 for (i = 0; i < UVM_TOTAL_COUNTERS; i++) {
676 if (tools_is_counter_enabled(va_space, i))
677 return true;
678 }
679 for (i = 0; i < UvmEventNumTypesAll; i++) {
680 if (tools_is_event_enabled(va_space, i))
681 return true;
682 }
683 return false;
684 }
685
tools_is_fault_callback_needed(uvm_va_space_t * va_space)686 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space)
687 {
688 return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) ||
689 tools_is_event_enabled(va_space, UvmEventTypeGpuFault) ||
690 tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) ||
691 tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount);
692 }
693
tools_is_migration_callback_needed(uvm_va_space_t * va_space)694 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space)
695 {
696 return tools_is_event_enabled(va_space, UvmEventTypeMigration) ||
697 tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) ||
698 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) ||
699 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD);
700 }
701
uvm_tools_open(struct inode * inode,struct file * filp)702 static int uvm_tools_open(struct inode *inode, struct file *filp)
703 {
704 filp->private_data = NULL;
705 return -nv_status_to_errno(uvm_global_get_status());
706 }
707
uvm_tools_open_entry(struct inode * inode,struct file * filp)708 static int uvm_tools_open_entry(struct inode *inode, struct file *filp)
709 {
710 UVM_ENTRY_RET(uvm_tools_open(inode, filp));
711 }
712
uvm_tools_release(struct inode * inode,struct file * filp)713 static int uvm_tools_release(struct inode *inode, struct file *filp)
714 {
715 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
716 if (event_tracker != NULL) {
717 destroy_event_tracker(event_tracker);
718 filp->private_data = NULL;
719 }
720 return -nv_status_to_errno(uvm_global_get_status());
721 }
722
uvm_tools_release_entry(struct inode * inode,struct file * filp)723 static int uvm_tools_release_entry(struct inode *inode, struct file *filp)
724 {
725 UVM_ENTRY_RET(uvm_tools_release(inode, filp));
726 }
727
uvm_tools_unlocked_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)728 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
729 {
730 switch (cmd) {
731 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER, uvm_api_tools_init_event_tracker);
732 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold);
733 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS, uvm_api_tools_event_queue_enable_events);
734 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events);
735 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS, uvm_api_tools_enable_counters);
736 UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS, uvm_api_tools_disable_counters);
737 }
738
739 uvm_thread_assert_all_unlocked();
740
741 return -EINVAL;
742 }
743
uvm_tools_unlocked_ioctl_entry(struct file * filp,unsigned int cmd,unsigned long arg)744 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg)
745 {
746 UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg));
747 }
748
uvm_tools_poll(struct file * filp,poll_table * wait)749 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
750 {
751 int flags = 0;
752 uvm_tools_queue_snapshot_t sn;
753 uvm_tools_event_tracker_t *event_tracker;
754 UvmToolsEventControlData_V2 *ctrl;
755
756 if (uvm_global_get_status() != NV_OK)
757 return POLLERR;
758
759 event_tracker = tools_event_tracker(filp);
760 if (!tracker_is_queue(event_tracker))
761 return POLLERR;
762
763 uvm_spin_lock(&event_tracker->queue.lock);
764
765 event_tracker->queue.is_wakeup_get_valid = false;
766 ctrl = event_tracker->queue.control_v2;
767 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
768 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
769
770 if (queue_needs_wakeup(&event_tracker->queue, &sn))
771 flags = POLLIN | POLLRDNORM;
772
773 uvm_spin_unlock(&event_tracker->queue.lock);
774
775 poll_wait(filp, &event_tracker->queue.wait_queue, wait);
776 return flags;
777 }
778
uvm_tools_poll_entry(struct file * filp,poll_table * wait)779 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait)
780 {
781 UVM_ENTRY_RET(uvm_tools_poll(filp, wait));
782 }
783
784 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = {
785 [UVM_FAULT_TYPE_INVALID_PDE] = UvmFaultTypeInvalidPde,
786 [UVM_FAULT_TYPE_INVALID_PTE] = UvmFaultTypeInvalidPte,
787 [UVM_FAULT_TYPE_ATOMIC] = UvmFaultTypeAtomic,
788 [UVM_FAULT_TYPE_WRITE] = UvmFaultTypeWrite,
789 [UVM_FAULT_TYPE_PDE_SIZE] = UvmFaultTypeInvalidPdeSize,
790 [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION] = UvmFaultTypeLimitViolation,
791 [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK] = UvmFaultTypeUnboundInstBlock,
792 [UVM_FAULT_TYPE_PRIV_VIOLATION] = UvmFaultTypePrivViolation,
793 [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation,
794 [UVM_FAULT_TYPE_WORK_CREATION] = UvmFaultTypeWorkCreation,
795 [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture,
796 [UVM_FAULT_TYPE_COMPRESSION_FAILURE] = UvmFaultTypeCompressionFailure,
797 [UVM_FAULT_TYPE_UNSUPPORTED_KIND] = UvmFaultTypeUnsupportedKind,
798 [UVM_FAULT_TYPE_REGION_VIOLATION] = UvmFaultTypeRegionViolation,
799 [UVM_FAULT_TYPE_POISONED] = UvmFaultTypePoison,
800 };
801
802 // TODO: add new value for weak atomics in tools
803 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = {
804 [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic,
805 [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK] = UvmEventMemoryAccessTypeAtomic,
806 [UVM_FAULT_ACCESS_TYPE_WRITE] = UvmEventMemoryAccessTypeWrite,
807 [UVM_FAULT_ACCESS_TYPE_READ] = UvmEventMemoryAccessTypeRead,
808 [UVM_FAULT_ACCESS_TYPE_PREFETCH] = UvmEventMemoryAccessTypePrefetch
809 };
810
811 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = {
812 [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0,
813 [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1,
814 [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2,
815 [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3,
816 [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4,
817 [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5,
818 [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6,
819 [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7,
820 [UVM_APERTURE_SYS] = UvmEventApertureSys,
821 [UVM_APERTURE_VID] = UvmEventApertureVid,
822 };
823
824 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = {
825 [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc,
826 [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub,
827 };
828
record_gpu_fault_instance(uvm_gpu_t * gpu,uvm_va_space_t * va_space,const uvm_fault_buffer_entry_t * fault_entry,NvU64 batch_id,NvU64 timestamp)829 static void record_gpu_fault_instance(uvm_gpu_t *gpu,
830 uvm_va_space_t *va_space,
831 const uvm_fault_buffer_entry_t *fault_entry,
832 NvU64 batch_id,
833 NvU64 timestamp)
834 {
835 if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFault, UvmToolsEventQueueVersion_V1)) {
836 UvmEventEntry_V1 entry;
837 UvmEventGpuFaultInfo_V1 *info = &entry.eventData.gpuFault;
838 memset(&entry, 0, sizeof(entry));
839
840 info->eventType = UvmEventTypeGpuFault;
841 info->gpuIndex = uvm_parent_id_value_from_processor_id(gpu->id);
842 info->faultType = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
843 info->accessType = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
844 info->clientType = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
845 if (fault_entry->is_replayable)
846 info->gpcId = fault_entry->fault_source.gpc_id;
847 else
848 info->channelId = fault_entry->fault_source.channel_id;
849 info->clientId = fault_entry->fault_source.client_id;
850 info->address = fault_entry->fault_address;
851 info->timeStamp = timestamp;
852 info->timeStampGpu = fault_entry->timestamp;
853 info->batchId = batch_id;
854
855 uvm_tools_record_event_v1(va_space, &entry);
856 }
857 if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFault, UvmToolsEventQueueVersion_V2)) {
858 UvmEventEntry_V2 entry;
859 UvmEventGpuFaultInfo_V2 *info = &entry.eventData.gpuFault;
860 memset(&entry, 0, sizeof(entry));
861
862 info->eventType = UvmEventTypeGpuFault;
863 info->gpuIndex = uvm_id_value(gpu->id);
864 info->faultType = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
865 info->accessType = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
866 info->clientType = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
867 if (fault_entry->is_replayable)
868 info->gpcId = fault_entry->fault_source.gpc_id;
869 else
870 info->channelId = fault_entry->fault_source.channel_id;
871 info->clientId = fault_entry->fault_source.client_id;
872 info->address = fault_entry->fault_address;
873 info->timeStamp = timestamp;
874 info->timeStampGpu = fault_entry->timestamp;
875 info->batchId = batch_id;
876
877 uvm_tools_record_event_v2(va_space, &entry);
878 }
879 }
880
uvm_tools_record_fault(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)881 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
882 {
883 uvm_va_space_t *va_space = event_data->fault.space;
884
885 UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
886 UVM_ASSERT(event_data->fault.space);
887
888 uvm_assert_rwsem_locked(&va_space->lock);
889 uvm_assert_rwsem_locked(&va_space->perf_events.lock);
890 UVM_ASSERT(va_space->tools.enabled);
891
892 uvm_down_read(&va_space->tools.lock);
893 UVM_ASSERT(tools_is_fault_callback_needed(va_space));
894
895 if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
896 if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V1)) {
897 UvmEventEntry_V1 entry;
898 UvmEventCpuFaultInfo_V1 *info = &entry.eventData.cpuFault;
899 memset(&entry, 0, sizeof(entry));
900
901 info->eventType = UvmEventTypeCpuFault;
902 if (event_data->fault.cpu.is_write)
903 info->accessType = UvmEventMemoryAccessTypeWrite;
904 else
905 info->accessType = UvmEventMemoryAccessTypeRead;
906
907 info->address = event_data->fault.cpu.fault_va;
908 info->timeStamp = NV_GETTIME();
909 // assume that current owns va_space
910 info->pid = uvm_get_stale_process_id();
911 info->threadId = uvm_get_stale_thread_id();
912 info->pc = event_data->fault.cpu.pc;
913
914 uvm_tools_record_event_v1(va_space, &entry);
915 }
916 if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V2)) {
917 UvmEventEntry_V2 entry;
918 UvmEventCpuFaultInfo_V2 *info = &entry.eventData.cpuFault;
919 memset(&entry, 0, sizeof(entry));
920
921 info->eventType = UvmEventTypeCpuFault;
922 if (event_data->fault.cpu.is_write)
923 info->accessType = UvmEventMemoryAccessTypeWrite;
924 else
925 info->accessType = UvmEventMemoryAccessTypeRead;
926
927 info->address = event_data->fault.cpu.fault_va;
928 info->timeStamp = NV_GETTIME();
929 // assume that current owns va_space
930 info->pid = uvm_get_stale_process_id();
931 info->threadId = uvm_get_stale_thread_id();
932 info->pc = event_data->fault.cpu.pc;
933
934 uvm_tools_record_event_v2(va_space, &entry);
935 }
936 if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) {
937 uvm_processor_id_t preferred_location;
938
939 // The UVM Lite tools interface did not represent the CPU as a UVM
940 // device. It reported CPU faults against the corresponding
941 // allocation's 'home location'. Though this driver's tools
942 // interface does include a CPU device, for compatibility, the
943 // driver still reports faults against a buffer's preferred
944 // location, in addition to the CPU.
945 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT);
946
947 preferred_location = event_data->fault.preferred_location;
948 if (UVM_ID_IS_GPU(preferred_location)) {
949 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location);
950 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &gpu->uuid);
951 }
952 }
953 }
954 else {
955 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id);
956 UVM_ASSERT(gpu);
957
958 if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) {
959 NvU64 timestamp = NV_GETTIME();
960 uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry;
961 uvm_fault_buffer_entry_t *fault_instance;
962
963 record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp);
964
965 list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
966 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp);
967 }
968
969 if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount))
970 uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, &gpu->uuid);
971 }
972 uvm_up_read(&va_space->tools.lock);
973 }
974
add_pending_event_for_channel(uvm_channel_t * channel)975 static void add_pending_event_for_channel(uvm_channel_t *channel)
976 {
977 uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
978
979 if (channel->tools.pending_event_count++ == 0)
980 list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list);
981 }
982
remove_pending_event_for_channel(uvm_channel_t * channel)983 static void remove_pending_event_for_channel(uvm_channel_t *channel)
984 {
985 uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
986 UVM_ASSERT(channel->tools.pending_event_count > 0);
987 if (--channel->tools.pending_event_count == 0)
988 list_del_init(&channel->tools.channel_list_node);
989 }
990
record_migration_events(void * args)991 static void record_migration_events(void *args)
992 {
993 block_migration_data_t *block_mig = (block_migration_data_t *)args;
994 migration_data_t *mig;
995 migration_data_t *next;
996 uvm_va_space_t *va_space = block_mig->va_space;
997
998 NvU64 gpu_timestamp = block_mig->start_timestamp_gpu;
999
1000 uvm_down_read(&va_space->tools.lock);
1001 if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V1)) {
1002 UvmEventEntry_V1 entry;
1003 UvmEventMigrationInfo_V1 *info = &entry.eventData.migration;
1004
1005 // Initialize fields that are constant throughout the whole block
1006 memset(&entry, 0, sizeof(entry));
1007 info->eventType = UvmEventTypeMigration;
1008 info->srcIndex = uvm_parent_id_value_from_processor_id(block_mig->src);
1009 info->dstIndex = uvm_parent_id_value_from_processor_id(block_mig->dst);
1010 info->beginTimeStamp = block_mig->start_timestamp_cpu;
1011 info->endTimeStamp = block_mig->end_timestamp_cpu;
1012 info->rangeGroupId = block_mig->range_group_id;
1013
1014 list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
1015 UVM_ASSERT(mig->bytes > 0);
1016 list_del(&mig->events_node);
1017
1018 info->address = mig->address;
1019 info->migratedBytes = mig->bytes;
1020 info->beginTimeStampGpu = gpu_timestamp;
1021 info->endTimeStampGpu = mig->end_timestamp_gpu;
1022 info->migrationCause = mig->cause;
1023 gpu_timestamp = mig->end_timestamp_gpu;
1024 kmem_cache_free(g_tools_migration_data_cache, mig);
1025
1026 uvm_tools_record_event_v1(va_space, &entry);
1027 }
1028 }
1029 if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V2)) {
1030 UvmEventEntry_V2 entry;
1031 UvmEventMigrationInfo_V2 *info = &entry.eventData.migration;
1032
1033 // Initialize fields that are constant throughout the whole block
1034 memset(&entry, 0, sizeof(entry));
1035 info->eventType = UvmEventTypeMigration;
1036 info->srcIndex = uvm_id_value(block_mig->src);
1037 info->dstIndex = uvm_id_value(block_mig->dst);
1038 info->beginTimeStamp = block_mig->start_timestamp_cpu;
1039 info->endTimeStamp = block_mig->end_timestamp_cpu;
1040 info->rangeGroupId = block_mig->range_group_id;
1041
1042 list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
1043 UVM_ASSERT(mig->bytes > 0);
1044 list_del(&mig->events_node);
1045
1046 info->address = mig->address;
1047 info->migratedBytes = mig->bytes;
1048 info->beginTimeStampGpu = gpu_timestamp;
1049 info->endTimeStampGpu = mig->end_timestamp_gpu;
1050 info->migrationCause = mig->cause;
1051 gpu_timestamp = mig->end_timestamp_gpu;
1052 kmem_cache_free(g_tools_migration_data_cache, mig);
1053
1054 uvm_tools_record_event_v2(va_space, &entry);
1055 }
1056 }
1057 uvm_up_read(&va_space->tools.lock);
1058
1059 UVM_ASSERT(list_empty(&block_mig->events));
1060 kmem_cache_free(g_tools_block_migration_data_cache, block_mig);
1061 }
1062
record_migration_events_entry(void * args)1063 static void record_migration_events_entry(void *args)
1064 {
1065 UVM_ENTRY_VOID(record_migration_events(args));
1066 }
1067
on_block_migration_complete(void * ptr)1068 static void on_block_migration_complete(void *ptr)
1069 {
1070 migration_data_t *mig;
1071 block_migration_data_t *block_mig = (block_migration_data_t *)ptr;
1072
1073 block_mig->end_timestamp_cpu = NV_GETTIME();
1074 block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr;
1075 list_for_each_entry(mig, &block_mig->events, events_node)
1076 mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr;
1077
1078 nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig);
1079
1080 // The UVM driver may notice that work in a channel is complete in a variety of situations
1081 // and the va_space lock is not always held in all of them, nor can it always be taken safely on them.
1082 // Dispatching events requires the va_space lock to be held in at least read mode, so
1083 // this callback simply enqueues the dispatching onto a queue, where the
1084 // va_space lock is always safe to acquire.
1085 uvm_spin_lock(&g_tools_channel_list_lock);
1086 remove_pending_event_for_channel(block_mig->channel);
1087 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item);
1088 uvm_spin_unlock(&g_tools_channel_list_lock);
1089 }
1090
record_replay_event_helper(uvm_va_space_t * va_space,uvm_gpu_id_t gpu_id,NvU32 batch_id,uvm_fault_client_type_t client_type,NvU64 timestamp,NvU64 timestamp_gpu)1091 static void record_replay_event_helper(uvm_va_space_t *va_space,
1092 uvm_gpu_id_t gpu_id,
1093 NvU32 batch_id,
1094 uvm_fault_client_type_t client_type,
1095 NvU64 timestamp,
1096 NvU64 timestamp_gpu)
1097 {
1098 uvm_down_read(&va_space->tools.lock);
1099
1100 if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFaultReplay, UvmToolsEventQueueVersion_V1)) {
1101 UvmEventEntry_V1 entry;
1102
1103 memset(&entry, 0, sizeof(entry));
1104 entry.eventData.gpuFaultReplay.eventType = UvmEventTypeGpuFaultReplay;
1105 entry.eventData.gpuFaultReplay.gpuIndex = uvm_parent_id_value_from_processor_id(gpu_id);
1106 entry.eventData.gpuFaultReplay.batchId = batch_id;
1107 entry.eventData.gpuFaultReplay.clientType = g_hal_to_tools_fault_client_type_table[client_type];
1108 entry.eventData.gpuFaultReplay.timeStamp = timestamp;
1109 entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
1110
1111 uvm_tools_record_event_v1(va_space, &entry);
1112 }
1113 if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFaultReplay, UvmToolsEventQueueVersion_V2)) {
1114 UvmEventEntry_V2 entry;
1115
1116 memset(&entry, 0, sizeof(entry));
1117 entry.eventData.gpuFaultReplay.eventType = UvmEventTypeGpuFaultReplay;
1118 entry.eventData.gpuFaultReplay.gpuIndex = uvm_id_value(gpu_id);
1119 entry.eventData.gpuFaultReplay.batchId = batch_id;
1120 entry.eventData.gpuFaultReplay.clientType = g_hal_to_tools_fault_client_type_table[client_type];
1121 entry.eventData.gpuFaultReplay.timeStamp = timestamp;
1122 entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
1123
1124 uvm_tools_record_event_v2(va_space, &entry);
1125 }
1126
1127 uvm_up_read(&va_space->tools.lock);
1128 }
1129
record_replay_event_broadcast(uvm_gpu_id_t gpu_id,NvU32 batch_id,uvm_fault_client_type_t client_type,NvU64 timestamp,NvU64 timestamp_gpu)1130 static void record_replay_event_broadcast(uvm_gpu_id_t gpu_id,
1131 NvU32 batch_id,
1132 uvm_fault_client_type_t client_type,
1133 NvU64 timestamp,
1134 NvU64 timestamp_gpu)
1135 {
1136 uvm_va_space_t *va_space;
1137
1138 uvm_down_read(&g_tools_va_space_list_lock);
1139
1140 list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
1141 record_replay_event_helper(va_space,
1142 gpu_id,
1143 batch_id,
1144 client_type,
1145 timestamp,
1146 timestamp_gpu);
1147 }
1148
1149 uvm_up_read(&g_tools_va_space_list_lock);
1150 }
1151
record_replay_events(void * args)1152 static void record_replay_events(void *args)
1153 {
1154 replay_data_t *replay = (replay_data_t *)args;
1155
1156 record_replay_event_broadcast(replay->gpu_id,
1157 replay->batch_id,
1158 replay->client_type,
1159 replay->timestamp,
1160 replay->timestamp_gpu);
1161
1162 kmem_cache_free(g_tools_replay_data_cache, replay);
1163 }
1164
record_replay_events_entry(void * args)1165 static void record_replay_events_entry(void *args)
1166 {
1167 UVM_ENTRY_VOID(record_replay_events(args));
1168 }
1169
on_replay_complete(void * ptr)1170 static void on_replay_complete(void *ptr)
1171 {
1172 replay_data_t *replay = (replay_data_t *)ptr;
1173 replay->timestamp_gpu = *replay->timestamp_gpu_addr;
1174
1175 nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr);
1176
1177 uvm_spin_lock(&g_tools_channel_list_lock);
1178 remove_pending_event_for_channel(replay->channel);
1179 nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item);
1180 uvm_spin_unlock(&g_tools_channel_list_lock);
1181
1182 }
1183
1184 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = {
1185 [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
1186 [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
1187 [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER] = UvmEventMigrationCauseAccessCounters,
1188 [UVM_MAKE_RESIDENT_CAUSE_PREFETCH] = UvmEventMigrationCausePrefetch,
1189 [UVM_MAKE_RESIDENT_CAUSE_EVICTION] = UvmEventMigrationCauseEviction,
1190 [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS] = UvmEventMigrationCauseInvalid,
1191 [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE] = UvmEventMigrationCauseUser,
1192 [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP] = UvmEventMigrationCauseCoherence,
1193 [UVM_MAKE_RESIDENT_CAUSE_API_HINT] = UvmEventMigrationCauseUser,
1194 };
1195
uvm_tools_record_migration_cpu_to_cpu(uvm_va_space_t * va_space,uvm_perf_event_data_t * event_data)1196 static void uvm_tools_record_migration_cpu_to_cpu(uvm_va_space_t *va_space,
1197 uvm_perf_event_data_t *event_data)
1198 {
1199 if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V1)) {
1200 UvmEventEntry_V1 entry;
1201 UvmEventMigrationInfo_V1 *info = &entry.eventData.migration;
1202
1203 // CPU-to-CPU migration events can be added directly to the queue.
1204 memset(&entry, 0, sizeof(entry));
1205 info->eventType = UvmEventTypeMigration;
1206 info->srcIndex = uvm_parent_id_value_from_processor_id(event_data->migration.src);
1207 info->dstIndex = uvm_parent_id_value_from_processor_id(event_data->migration.dst);
1208 info->address = event_data->migration.address;
1209 info->migratedBytes = event_data->migration.bytes;
1210 info->beginTimeStamp = event_data->migration.cpu_start_timestamp;
1211 info->endTimeStamp = NV_GETTIME();
1212 info->migrationCause = event_data->migration.cause;
1213 info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE;
1214
1215 // During evictions, it is not safe to uvm_range_group_range_find()
1216 // because the va_space lock is not held.
1217 if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1218 uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address);
1219 if (range != NULL)
1220 info->rangeGroupId = range->range_group->id;
1221 }
1222
1223 uvm_tools_record_event_v1(va_space, &entry);
1224 }
1225 if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V2)) {
1226 UvmEventEntry_V2 entry;
1227 UvmEventMigrationInfo_V2 *info = &entry.eventData.migration;
1228
1229 // CPU-to-CPU migration events can be added directly to the queue.
1230 memset(&entry, 0, sizeof(entry));
1231 info->eventType = UvmEventTypeMigration;
1232 info->srcIndex = uvm_id_value(event_data->migration.src);
1233 info->dstIndex = uvm_id_value(event_data->migration.dst);
1234 info->srcNid = event_data->migration.src_nid;
1235 info->dstNid = event_data->migration.dst_nid;
1236 info->address = event_data->migration.address;
1237 info->migratedBytes = event_data->migration.bytes;
1238 info->beginTimeStamp = event_data->migration.cpu_start_timestamp;
1239 info->endTimeStamp = NV_GETTIME();
1240 info->migrationCause = event_data->migration.cause;
1241 info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE;
1242
1243 // During evictions, it is not safe to uvm_range_group_range_find()
1244 // because the va_space lock is not held.
1245 if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1246 uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address);
1247 if (range != NULL)
1248 info->rangeGroupId = range->range_group->id;
1249 }
1250
1251 uvm_tools_record_event_v2(va_space, &entry);
1252 }
1253 }
1254
1255 // For non-CPU-to-CPU migrations (or CPU-to-CPU copies using CEs), this event is
1256 // notified asynchronously when all the migrations pushed to the same uvm_push_t
1257 // object in a call to block_copy_resident_pages_between have finished.
1258 // For CPU-to-CPU copies using memcpy, this event is notified when all of the
1259 // page copies does by block_copy_resident_pages have finished.
uvm_tools_record_migration(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)1260 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
1261 {
1262 uvm_va_block_t *va_block = event_data->migration.block;
1263 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1264
1265 UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION);
1266
1267 uvm_assert_mutex_locked(&va_block->lock);
1268 uvm_assert_rwsem_locked(&va_space->perf_events.lock);
1269 UVM_ASSERT(va_space->tools.enabled);
1270
1271 uvm_down_read(&va_space->tools.lock);
1272 UVM_ASSERT(tools_is_migration_callback_needed(va_space));
1273
1274 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1275 if (!UVM_ID_IS_CPU(event_data->migration.src) || !UVM_ID_IS_CPU(event_data->migration.dst)) {
1276 migration_data_t *mig;
1277 uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push);
1278 block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data;
1279
1280 if (push_info->on_complete != NULL) {
1281 mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS);
1282 if (mig == NULL)
1283 goto done_unlock;
1284
1285 mig->address = event_data->migration.address;
1286 mig->bytes = event_data->migration.bytes;
1287 mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push);
1288 mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause];
1289
1290 list_add_tail(&mig->events_node, &block_mig->events);
1291 }
1292 }
1293 else {
1294 uvm_tools_record_migration_cpu_to_cpu(va_space, event_data);
1295 }
1296 }
1297
1298 // We don't want to increment neither UvmCounterNameBytesXferDtH nor
1299 // UvmCounterNameBytesXferHtD in a CPU-to-CPU migration.
1300 if (UVM_ID_IS_CPU(event_data->migration.src) && UVM_ID_IS_CPU(event_data->migration.dst))
1301 goto done_unlock;
1302
1303 // Increment counters
1304 if (UVM_ID_IS_CPU(event_data->migration.src) &&
1305 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) {
1306 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst);
1307 uvm_tools_inc_counter(va_space,
1308 UvmCounterNameBytesXferHtD,
1309 event_data->migration.bytes,
1310 &gpu->uuid);
1311 }
1312 if (UVM_ID_IS_CPU(event_data->migration.dst) &&
1313 tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) {
1314 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src);
1315 uvm_tools_inc_counter(va_space,
1316 UvmCounterNameBytesXferDtH,
1317 event_data->migration.bytes,
1318 &gpu->uuid);
1319 }
1320
1321 done_unlock:
1322 uvm_up_read(&va_space->tools.lock);
1323 }
1324
1325 // This event is notified asynchronously when it is marked as completed in the
1326 // pushbuffer the replay method belongs to.
uvm_tools_broadcast_replay(uvm_gpu_t * gpu,uvm_push_t * push,NvU32 batch_id,uvm_fault_client_type_t client_type)1327 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
1328 uvm_push_t *push,
1329 NvU32 batch_id,
1330 uvm_fault_client_type_t client_type)
1331 {
1332 uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1333 replay_data_t *replay;
1334
1335 // Perform delayed notification only if some VA space has signed up for
1336 // UvmEventTypeGpuFaultReplay
1337 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1338 return;
1339
1340 replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS);
1341 if (replay == NULL)
1342 return;
1343
1344 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1345
1346 replay->timestamp_gpu_addr = uvm_push_timestamp(push);
1347 replay->gpu_id = gpu->id;
1348 replay->batch_id = batch_id;
1349 replay->client_type = client_type;
1350 replay->timestamp = NV_GETTIME();
1351 replay->channel = push->channel;
1352
1353 push_info->on_complete_data = replay;
1354 push_info->on_complete = on_replay_complete;
1355
1356 uvm_spin_lock(&g_tools_channel_list_lock);
1357 add_pending_event_for_channel(replay->channel);
1358 uvm_spin_unlock(&g_tools_channel_list_lock);
1359 }
1360
uvm_tools_broadcast_replay_sync(uvm_gpu_t * gpu,NvU32 batch_id,uvm_fault_client_type_t client_type)1361 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
1362 {
1363 UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
1364
1365 if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1366 return;
1367
1368 record_replay_event_broadcast(gpu->id,
1369 batch_id,
1370 client_type,
1371 NV_GETTIME(),
1372 gpu->parent->host_hal->get_time(gpu));
1373 }
1374
uvm_tools_record_access_counter(uvm_va_space_t * va_space,uvm_gpu_id_t gpu_id,const uvm_access_counter_buffer_entry_t * buffer_entry,bool on_managed_phys)1375 void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
1376 uvm_gpu_id_t gpu_id,
1377 const uvm_access_counter_buffer_entry_t *buffer_entry,
1378 bool on_managed_phys)
1379 {
1380 uvm_down_read(&va_space->tools.lock);
1381
1382 if (tools_is_event_enabled_version(va_space, UvmEventTypeTestAccessCounter, UvmToolsEventQueueVersion_V1)) {
1383 UvmEventEntry_V1 entry;
1384 UvmEventTestAccessCounterInfo_V1 *info = &entry.testEventData.accessCounter;
1385
1386 memset(&entry, 0, sizeof(entry));
1387
1388 info->eventType = UvmEventTypeTestAccessCounter;
1389 info->srcIndex = uvm_parent_id_value_from_processor_id(gpu_id);
1390 info->address = buffer_entry->address.address;
1391 info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
1392 if (buffer_entry->address.is_virtual) {
1393 info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
1394 info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1395 info->veId = buffer_entry->virtual_info.ve_id;
1396 }
1397 else {
1398 info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1399 }
1400 info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1401 info->physOnManaged = on_managed_phys? 1 : 0;
1402 info->value = buffer_entry->counter_value;
1403 info->subGranularity = buffer_entry->sub_granularity;
1404 info->bank = buffer_entry->bank;
1405 info->tag = buffer_entry->tag;
1406
1407 uvm_tools_record_event_v1(va_space, &entry);
1408 }
1409 if (tools_is_event_enabled_version(va_space, UvmEventTypeTestAccessCounter, UvmToolsEventQueueVersion_V2)) {
1410 UvmEventEntry_V2 entry;
1411 UvmEventTestAccessCounterInfo_V2 *info = &entry.testEventData.accessCounter;
1412
1413 memset(&entry, 0, sizeof(entry));
1414
1415 info->eventType = UvmEventTypeTestAccessCounter;
1416 info->srcIndex = uvm_id_value(gpu_id);
1417 info->address = buffer_entry->address.address;
1418 info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
1419 if (buffer_entry->address.is_virtual) {
1420 info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
1421 info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1422 info->veId = buffer_entry->virtual_info.ve_id;
1423 }
1424 else {
1425 info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1426 }
1427 info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1428 info->physOnManaged = on_managed_phys? 1 : 0;
1429 info->value = buffer_entry->counter_value;
1430 info->subGranularity = buffer_entry->sub_granularity;
1431 info->bank = buffer_entry->bank;
1432 info->tag = buffer_entry->tag;
1433
1434 uvm_tools_record_event_v2(va_space, &entry);
1435 }
1436
1437 uvm_up_read(&va_space->tools.lock);
1438 }
1439
uvm_tools_broadcast_access_counter(uvm_gpu_t * gpu,const uvm_access_counter_buffer_entry_t * buffer_entry,bool on_managed_phys)1440 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
1441 const uvm_access_counter_buffer_entry_t *buffer_entry,
1442 bool on_managed_phys)
1443 {
1444 uvm_va_space_t *va_space;
1445
1446 uvm_down_read(&g_tools_va_space_list_lock);
1447 list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
1448 uvm_tools_record_access_counter(va_space,
1449 gpu->id,
1450 buffer_entry,
1451 on_managed_phys);
1452 }
1453 uvm_up_read(&g_tools_va_space_list_lock);
1454 }
1455
uvm_tools_test_hmm_split_invalidate(uvm_va_space_t * va_space)1456 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
1457 {
1458 UvmEventEntry_V2 entry;
1459
1460 if (!va_space->tools.enabled)
1461 return;
1462
1463 entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
1464 uvm_down_read(&va_space->tools.lock);
1465 uvm_tools_record_event_v2(va_space, &entry);
1466 uvm_up_read(&va_space->tools.lock);
1467 }
1468
1469 // This function is used as a begin marker to group all migrations within a VA
1470 // block that are performed in the same call to
1471 // block_copy_resident_pages_between. All of these are pushed to the same
1472 // uvm_push_t object, and will be notified in burst when the last one finishes.
uvm_tools_record_block_migration_begin(uvm_va_block_t * va_block,uvm_push_t * push,uvm_processor_id_t dst_id,uvm_processor_id_t src_id,NvU64 start,uvm_make_resident_cause_t cause)1473 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block,
1474 uvm_push_t *push,
1475 uvm_processor_id_t dst_id,
1476 uvm_processor_id_t src_id,
1477 NvU64 start,
1478 uvm_make_resident_cause_t cause)
1479 {
1480 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1481 uvm_range_group_range_t *range;
1482
1483 // Calls from tools read/write functions to make_resident must not trigger
1484 // any migration
1485 UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS);
1486
1487 // During evictions the va_space lock is not held.
1488 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1489 uvm_assert_rwsem_locked(&va_space->lock);
1490
1491 if (!va_space->tools.enabled)
1492 return;
1493
1494 uvm_down_read(&va_space->tools.lock);
1495
1496 // Perform delayed notification only if the VA space has signed up for
1497 // UvmEventTypeMigration
1498 if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1499 block_migration_data_t *block_mig;
1500 uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1501
1502 UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1503
1504 block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS);
1505 if (block_mig == NULL)
1506 goto done_unlock;
1507
1508 block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push);
1509 block_mig->channel = push->channel;
1510 block_mig->start_timestamp_cpu = NV_GETTIME();
1511 block_mig->dst = dst_id;
1512 block_mig->src = src_id;
1513 block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE;
1514
1515 // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1516 if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1517 range = uvm_range_group_range_find(va_space, start);
1518 if (range != NULL)
1519 block_mig->range_group_id = range->range_group->id;
1520 }
1521 block_mig->va_space = va_space;
1522
1523 INIT_LIST_HEAD(&block_mig->events);
1524 push_info->on_complete_data = block_mig;
1525 push_info->on_complete = on_block_migration_complete;
1526
1527 uvm_spin_lock(&g_tools_channel_list_lock);
1528 add_pending_event_for_channel(block_mig->channel);
1529 uvm_spin_unlock(&g_tools_channel_list_lock);
1530 }
1531
1532 done_unlock:
1533 uvm_up_read(&va_space->tools.lock);
1534 }
1535
uvm_tools_record_read_duplicate(uvm_va_block_t * va_block,uvm_processor_id_t dst,uvm_va_block_region_t region,const uvm_page_mask_t * page_mask)1536 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
1537 uvm_processor_id_t dst,
1538 uvm_va_block_region_t region,
1539 const uvm_page_mask_t *page_mask)
1540 {
1541 uvm_processor_mask_t *resident_processors;
1542 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1543
1544 if (!va_space->tools.enabled)
1545 return;
1546
1547 resident_processors = uvm_processor_mask_cache_alloc();
1548 if (!resident_processors)
1549 return;
1550
1551 uvm_down_read(&va_space->tools.lock);
1552
1553 if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V1)) {
1554 UvmEventEntry_V1 entry;
1555 UvmEventReadDuplicateInfo_V1 *info_read_duplicate = &entry.eventData.readDuplicate;
1556 uvm_page_index_t page_index;
1557
1558 memset(&entry, 0, sizeof(entry));
1559
1560 info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1561 info_read_duplicate->size = PAGE_SIZE;
1562 info_read_duplicate->timeStamp = NV_GETTIME();
1563
1564 for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1565 uvm_processor_id_t id;
1566
1567 info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
1568 info_read_duplicate->processors = 0;
1569
1570 uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
1571
1572 for_each_id_in_mask(id, resident_processors)
1573 __set_bit(uvm_parent_id_value_from_processor_id(id), (unsigned long *)&info_read_duplicate->processors);
1574
1575 uvm_tools_record_event_v1(va_space, &entry);
1576 }
1577 }
1578
1579 if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V2)) {
1580 UvmEventEntry_V2 entry;
1581 UvmEventReadDuplicateInfo_V2 *info_read_duplicate = &entry.eventData.readDuplicate;
1582 uvm_page_index_t page_index;
1583
1584 memset(&entry, 0, sizeof(entry));
1585
1586 info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1587 info_read_duplicate->size = PAGE_SIZE;
1588 info_read_duplicate->timeStamp = NV_GETTIME();
1589
1590 for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1591 uvm_processor_id_t id;
1592
1593 info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
1594 memset(info_read_duplicate->processors, 0, sizeof(info_read_duplicate->processors));
1595
1596 uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
1597
1598 for_each_id_in_mask(id, resident_processors)
1599 __set_bit(uvm_id_value(id), (unsigned long *)info_read_duplicate->processors);
1600
1601 uvm_tools_record_event_v2(va_space, &entry);
1602 }
1603 }
1604
1605 uvm_up_read(&va_space->tools.lock);
1606
1607 uvm_processor_mask_cache_free(resident_processors);
1608 }
1609
uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t * va_block,uvm_processor_id_t dst,uvm_va_block_region_t region,const uvm_page_mask_t * page_mask)1610 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
1611 uvm_processor_id_t dst,
1612 uvm_va_block_region_t region,
1613 const uvm_page_mask_t *page_mask)
1614 {
1615 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1616
1617 if (!va_space->tools.enabled)
1618 return;
1619
1620 uvm_down_read(&va_space->tools.lock);
1621 if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicateInvalidate, UvmToolsEventQueueVersion_V1)) {
1622 UvmEventEntry_V1 entry;
1623 uvm_page_index_t page_index;
1624 UvmEventReadDuplicateInvalidateInfo_V1 *info = &entry.eventData.readDuplicateInvalidate;
1625
1626 memset(&entry, 0, sizeof(entry));
1627
1628 info->eventType = UvmEventTypeReadDuplicateInvalidate;
1629 info->residentIndex = uvm_parent_id_value_from_processor_id(dst);
1630 info->size = PAGE_SIZE;
1631 info->timeStamp = NV_GETTIME();
1632
1633 for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1634 UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1635
1636 info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1637 uvm_tools_record_event_v1(va_space, &entry);
1638 }
1639 }
1640 if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicateInvalidate, UvmToolsEventQueueVersion_V2)) {
1641 UvmEventEntry_V2 entry;
1642 uvm_page_index_t page_index;
1643 UvmEventReadDuplicateInvalidateInfo_V2 *info = &entry.eventData.readDuplicateInvalidate;
1644
1645 memset(&entry, 0, sizeof(entry));
1646
1647 info->eventType = UvmEventTypeReadDuplicateInvalidate;
1648 info->residentIndex = uvm_id_value(dst);
1649 info->size = PAGE_SIZE;
1650 info->timeStamp = NV_GETTIME();
1651
1652 for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1653 UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1654
1655 info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1656 uvm_tools_record_event_v2(va_space, &entry);
1657 }
1658 }
1659 uvm_up_read(&va_space->tools.lock);
1660 }
1661
tools_schedule_completed_events(void)1662 static void tools_schedule_completed_events(void)
1663 {
1664 uvm_channel_t *channel;
1665 uvm_channel_t *next_channel;
1666 NvU64 channel_count = 0;
1667 NvU64 i;
1668
1669 uvm_spin_lock(&g_tools_channel_list_lock);
1670
1671 // retain every channel list entry currently in the list and keep track of their count.
1672 list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) {
1673 ++channel->tools.pending_event_count;
1674 ++channel_count;
1675 }
1676 uvm_spin_unlock(&g_tools_channel_list_lock);
1677
1678 if (channel_count == 0)
1679 return;
1680
1681 // new entries always appear at the end, and all the entries seen in the first loop have been retained
1682 // so it is safe to go through them
1683 channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node);
1684 for (i = 0; i < channel_count; i++) {
1685 uvm_channel_update_progress_all(channel);
1686 channel = list_next_entry(channel, tools.channel_list_node);
1687 }
1688
1689 // now release all the entries we retained in the beginning
1690 i = 0;
1691 uvm_spin_lock(&g_tools_channel_list_lock);
1692 list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) {
1693 if (i++ == channel_count)
1694 break;
1695
1696 remove_pending_event_for_channel(channel);
1697 }
1698 uvm_spin_unlock(&g_tools_channel_list_lock);
1699 }
1700
uvm_tools_record_cpu_fatal_fault(uvm_va_space_t * va_space,NvU64 address,bool is_write,UvmEventFatalReason reason)1701 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space,
1702 NvU64 address,
1703 bool is_write,
1704 UvmEventFatalReason reason)
1705 {
1706 uvm_assert_rwsem_locked(&va_space->lock);
1707
1708 if (!va_space->tools.enabled)
1709 return;
1710
1711 uvm_down_read(&va_space->tools.lock);
1712 if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V1)) {
1713 UvmEventEntry_V1 entry;
1714 UvmEventFatalFaultInfo_V1 *info = &entry.eventData.fatalFault;
1715
1716 memset(&entry, 0, sizeof(entry));
1717
1718 info->eventType = UvmEventTypeFatalFault;
1719 info->processorIndex = UVM_ID_CPU_VALUE;
1720 info->timeStamp = NV_GETTIME();
1721 info->address = address;
1722 info->accessType = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1723 // info->faultType is not valid for cpu faults
1724 info->reason = reason;
1725
1726 uvm_tools_record_event_v1(va_space, &entry);
1727 }
1728 if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V2)) {
1729 UvmEventEntry_V2 entry;
1730 UvmEventFatalFaultInfo_V2 *info = &entry.eventData.fatalFault;
1731
1732 memset(&entry, 0, sizeof(entry));
1733
1734 info->eventType = UvmEventTypeFatalFault;
1735 info->processorIndex = UVM_ID_CPU_VALUE;
1736 info->timeStamp = NV_GETTIME();
1737 info->address = address;
1738 info->accessType = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1739 // info->faultType is not valid for cpu faults
1740 info->reason = reason;
1741
1742 uvm_tools_record_event_v2(va_space, &entry);
1743 }
1744 uvm_up_read(&va_space->tools.lock);
1745 }
1746
uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,uvm_va_space_t * va_space,const uvm_fault_buffer_entry_t * buffer_entry,UvmEventFatalReason reason)1747 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,
1748 uvm_va_space_t *va_space,
1749 const uvm_fault_buffer_entry_t *buffer_entry,
1750 UvmEventFatalReason reason)
1751 {
1752 uvm_assert_rwsem_locked(&va_space->lock);
1753
1754 if (!va_space->tools.enabled)
1755 return;
1756
1757 uvm_down_read(&va_space->tools.lock);
1758 if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V1)) {
1759 UvmEventEntry_V1 entry;
1760 UvmEventFatalFaultInfo_V1 *info = &entry.eventData.fatalFault;
1761
1762 memset(&entry, 0, sizeof(entry));
1763
1764 info->eventType = UvmEventTypeFatalFault;
1765 info->processorIndex = uvm_parent_id_value_from_processor_id(gpu_id);
1766 info->timeStamp = NV_GETTIME();
1767 info->address = buffer_entry->fault_address;
1768 info->accessType = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1769 info->faultType = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1770 info->reason = reason;
1771
1772 uvm_tools_record_event_v1(va_space, &entry);
1773 }
1774 if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V2)) {
1775 UvmEventEntry_V2 entry;
1776 UvmEventFatalFaultInfo_V2 *info = &entry.eventData.fatalFault;
1777
1778 memset(&entry, 0, sizeof(entry));
1779
1780 info->eventType = UvmEventTypeFatalFault;
1781 info->processorIndex = uvm_id_value(gpu_id);
1782 info->timeStamp = NV_GETTIME();
1783 info->address = buffer_entry->fault_address;
1784 info->accessType = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1785 info->faultType = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1786 info->reason = reason;
1787
1788 uvm_tools_record_event_v2(va_space, &entry);
1789 }
1790 uvm_up_read(&va_space->tools.lock);
1791 }
1792
uvm_tools_record_thrashing(uvm_va_space_t * va_space,NvU64 address,size_t region_size,const uvm_processor_mask_t * processors)1793 void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
1794 NvU64 address,
1795 size_t region_size,
1796 const uvm_processor_mask_t *processors)
1797 {
1798 UVM_ASSERT(address);
1799 UVM_ASSERT(PAGE_ALIGNED(address));
1800 UVM_ASSERT(region_size > 0);
1801
1802 uvm_assert_rwsem_locked(&va_space->lock);
1803
1804 if (!va_space->tools.enabled)
1805 return;
1806
1807 uvm_down_read(&va_space->tools.lock);
1808 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrashingDetected, UvmToolsEventQueueVersion_V1)) {
1809 UvmEventEntry_V1 entry;
1810 UvmEventThrashingDetectedInfo_V1 *info = &entry.eventData.thrashing;
1811 uvm_processor_id_t id;
1812
1813 memset(&entry, 0, sizeof(entry));
1814
1815 info->eventType = UvmEventTypeThrashingDetected;
1816 info->address = address;
1817 info->size = region_size;
1818 info->timeStamp = NV_GETTIME();
1819
1820 for_each_id_in_mask(id, processors)
1821 __set_bit(uvm_parent_id_value_from_processor_id(id),
1822 (unsigned long *)&info->processors);
1823
1824 uvm_tools_record_event_v1(va_space, &entry);
1825 }
1826 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrashingDetected, UvmToolsEventQueueVersion_V2)) {
1827 UvmEventEntry_V2 entry;
1828 UvmEventThrashingDetectedInfo_V2 *info = &entry.eventData.thrashing;
1829
1830 memset(&entry, 0, sizeof(entry));
1831
1832 info->eventType = UvmEventTypeThrashingDetected;
1833 info->address = address;
1834 info->size = region_size;
1835 info->timeStamp = NV_GETTIME();
1836
1837 BUILD_BUG_ON(UVM_MAX_PROCESSORS_V2 < UVM_ID_MAX_PROCESSORS);
1838 bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
1839
1840 uvm_tools_record_event_v2(va_space, &entry);
1841 }
1842 uvm_up_read(&va_space->tools.lock);
1843 }
1844
uvm_tools_record_throttling_start(uvm_va_space_t * va_space,NvU64 address,uvm_processor_id_t processor)1845 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1846 {
1847 UVM_ASSERT(address);
1848 UVM_ASSERT(PAGE_ALIGNED(address));
1849 UVM_ASSERT(UVM_ID_IS_VALID(processor));
1850
1851 uvm_assert_rwsem_locked(&va_space->lock);
1852
1853 if (!va_space->tools.enabled)
1854 return;
1855
1856 uvm_down_read(&va_space->tools.lock);
1857 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingStart, UvmToolsEventQueueVersion_V1)) {
1858 UvmEventEntry_V1 entry;
1859 UvmEventThrottlingStartInfo_V1 *info = &entry.eventData.throttlingStart;
1860
1861 memset(&entry, 0, sizeof(entry));
1862
1863 info->eventType = UvmEventTypeThrottlingStart;
1864 info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1865 info->address = address;
1866 info->timeStamp = NV_GETTIME();
1867
1868 uvm_tools_record_event_v1(va_space, &entry);
1869 }
1870 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingStart, UvmToolsEventQueueVersion_V2)) {
1871 UvmEventEntry_V2 entry;
1872 UvmEventThrottlingStartInfo_V2 *info = &entry.eventData.throttlingStart;
1873
1874 memset(&entry, 0, sizeof(entry));
1875
1876 info->eventType = UvmEventTypeThrottlingStart;
1877 info->processorIndex = uvm_id_value(processor);
1878 info->address = address;
1879 info->timeStamp = NV_GETTIME();
1880
1881 uvm_tools_record_event_v2(va_space, &entry);
1882 }
1883 uvm_up_read(&va_space->tools.lock);
1884 }
1885
uvm_tools_record_throttling_end(uvm_va_space_t * va_space,NvU64 address,uvm_processor_id_t processor)1886 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1887 {
1888 UVM_ASSERT(address);
1889 UVM_ASSERT(PAGE_ALIGNED(address));
1890 UVM_ASSERT(UVM_ID_IS_VALID(processor));
1891
1892 uvm_assert_rwsem_locked(&va_space->lock);
1893
1894 if (!va_space->tools.enabled)
1895 return;
1896
1897 uvm_down_read(&va_space->tools.lock);
1898 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingEnd, UvmToolsEventQueueVersion_V1)) {
1899 UvmEventEntry_V1 entry;
1900 UvmEventThrottlingEndInfo_V1 *info = &entry.eventData.throttlingEnd;
1901
1902 memset(&entry, 0, sizeof(entry));
1903
1904 info->eventType = UvmEventTypeThrottlingEnd;
1905 info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1906 info->address = address;
1907 info->timeStamp = NV_GETTIME();
1908
1909 uvm_tools_record_event_v1(va_space, &entry);
1910 }
1911 if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingEnd, UvmToolsEventQueueVersion_V2)) {
1912 UvmEventEntry_V2 entry;
1913 UvmEventThrottlingEndInfo_V2 *info = &entry.eventData.throttlingEnd;
1914
1915 memset(&entry, 0, sizeof(entry));
1916
1917 info->eventType = UvmEventTypeThrottlingEnd;
1918 info->processorIndex = uvm_id_value(processor);
1919 info->address = address;
1920 info->timeStamp = NV_GETTIME();
1921
1922 uvm_tools_record_event_v2(va_space, &entry);
1923 }
1924 uvm_up_read(&va_space->tools.lock);
1925 }
1926
record_map_remote_events(void * args)1927 static void record_map_remote_events(void *args)
1928 {
1929 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args;
1930 map_remote_data_t *map_remote, *next;
1931 uvm_va_space_t *va_space = block_map_remote->va_space;
1932
1933 uvm_down_read(&va_space->tools.lock);
1934 if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V1)) {
1935 UvmEventEntry_V1 entry;
1936
1937 memset(&entry, 0, sizeof(entry));
1938
1939 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote;
1940 entry.eventData.mapRemote.srcIndex = uvm_parent_id_value_from_processor_id(block_map_remote->src);
1941 entry.eventData.mapRemote.dstIndex = uvm_parent_id_value_from_processor_id(block_map_remote->dst);
1942 entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1943 entry.eventData.mapRemote.timeStamp = block_map_remote->timestamp;
1944
1945 list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1946 list_del(&map_remote->events_node);
1947
1948 entry.eventData.mapRemote.address = map_remote->address;
1949 entry.eventData.mapRemote.size = map_remote->size;
1950 entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1951 kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1952
1953 uvm_tools_record_event_v1(va_space, &entry);
1954 }
1955 }
1956 if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V2)) {
1957 UvmEventEntry_V2 entry;
1958
1959 memset(&entry, 0, sizeof(entry));
1960
1961 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote;
1962 entry.eventData.mapRemote.srcIndex = uvm_id_value(block_map_remote->src);
1963 entry.eventData.mapRemote.dstIndex = uvm_id_value(block_map_remote->dst);
1964 entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1965 entry.eventData.mapRemote.timeStamp = block_map_remote->timestamp;
1966
1967 list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1968 list_del(&map_remote->events_node);
1969
1970 entry.eventData.mapRemote.address = map_remote->address;
1971 entry.eventData.mapRemote.size = map_remote->size;
1972 entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1973 kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1974
1975 uvm_tools_record_event_v2(va_space, &entry);
1976 }
1977 }
1978 uvm_up_read(&va_space->tools.lock);
1979
1980 UVM_ASSERT(list_empty(&block_map_remote->events));
1981 kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote);
1982 }
1983
record_map_remote_events_entry(void * args)1984 static void record_map_remote_events_entry(void *args)
1985 {
1986 UVM_ENTRY_VOID(record_map_remote_events(args));
1987 }
1988
on_map_remote_complete(void * ptr)1989 static void on_map_remote_complete(void *ptr)
1990 {
1991 block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr;
1992 map_remote_data_t *map_remote;
1993
1994 // Only GPU mappings use the deferred mechanism
1995 UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src));
1996 list_for_each_entry(map_remote, &block_map_remote->events, events_node)
1997 map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr;
1998
1999 nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr);
2000
2001 uvm_spin_lock(&g_tools_channel_list_lock);
2002 remove_pending_event_for_channel(block_map_remote->channel);
2003 nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item);
2004 uvm_spin_unlock(&g_tools_channel_list_lock);
2005 }
2006
uvm_tools_record_map_remote(uvm_va_block_t * va_block,uvm_push_t * push,uvm_processor_id_t processor,uvm_processor_id_t residency,NvU64 address,size_t region_size,UvmEventMapRemoteCause cause)2007 void uvm_tools_record_map_remote(uvm_va_block_t *va_block,
2008 uvm_push_t *push,
2009 uvm_processor_id_t processor,
2010 uvm_processor_id_t residency,
2011 NvU64 address,
2012 size_t region_size,
2013 UvmEventMapRemoteCause cause)
2014 {
2015 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
2016
2017 UVM_ASSERT(UVM_ID_IS_VALID(processor));
2018 UVM_ASSERT(UVM_ID_IS_VALID(residency));
2019 UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid);
2020
2021 uvm_assert_rwsem_locked(&va_space->lock);
2022
2023 if (!va_space->tools.enabled)
2024 return;
2025
2026 uvm_down_read(&va_space->tools.lock);
2027
2028 if (UVM_ID_IS_CPU(processor)) {
2029 if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V1)) {
2030 UvmEventEntry_V1 entry;
2031
2032 memset(&entry, 0, sizeof(entry));
2033
2034 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote;
2035 entry.eventData.mapRemote.srcIndex = uvm_parent_id_value_from_processor_id(processor);
2036 entry.eventData.mapRemote.dstIndex = uvm_parent_id_value_from_processor_id(residency);
2037 entry.eventData.mapRemote.mapRemoteCause = cause;
2038 entry.eventData.mapRemote.timeStamp = NV_GETTIME();
2039 entry.eventData.mapRemote.address = address;
2040 entry.eventData.mapRemote.size = region_size;
2041 entry.eventData.mapRemote.timeStampGpu = 0;
2042
2043 UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
2044
2045 uvm_tools_record_event_v1(va_space, &entry);
2046 }
2047 if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V2)) {
2048 UvmEventEntry_V2 entry;
2049
2050 memset(&entry, 0, sizeof(entry));
2051
2052 entry.eventData.mapRemote.eventType = UvmEventTypeMapRemote;
2053 entry.eventData.mapRemote.srcIndex = uvm_id_value(processor);
2054 entry.eventData.mapRemote.dstIndex = uvm_id_value(residency);
2055 entry.eventData.mapRemote.mapRemoteCause = cause;
2056 entry.eventData.mapRemote.timeStamp = NV_GETTIME();
2057 entry.eventData.mapRemote.address = address;
2058 entry.eventData.mapRemote.size = region_size;
2059 entry.eventData.mapRemote.timeStampGpu = 0;
2060
2061 UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
2062
2063 uvm_tools_record_event_v2(va_space, &entry);
2064 }
2065 }
2066 else if (tools_is_event_enabled(va_space, UvmEventTypeMapRemote)) {
2067 uvm_push_info_t *push_info = uvm_push_info_from_push(push);
2068 block_map_remote_data_t *block_map_remote;
2069 map_remote_data_t *map_remote;
2070
2071 // The first call on this pushbuffer creates the per-VA block structure
2072 if (push_info->on_complete == NULL) {
2073 UVM_ASSERT(push_info->on_complete_data == NULL);
2074
2075 block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS);
2076 if (block_map_remote == NULL)
2077 goto done;
2078
2079 block_map_remote->src = processor;
2080 block_map_remote->dst = residency;
2081 block_map_remote->cause = cause;
2082 block_map_remote->timestamp = NV_GETTIME();
2083 block_map_remote->va_space = va_space;
2084 block_map_remote->channel = push->channel;
2085 INIT_LIST_HEAD(&block_map_remote->events);
2086
2087 push_info->on_complete_data = block_map_remote;
2088 push_info->on_complete = on_map_remote_complete;
2089
2090 uvm_spin_lock(&g_tools_channel_list_lock);
2091 add_pending_event_for_channel(block_map_remote->channel);
2092 uvm_spin_unlock(&g_tools_channel_list_lock);
2093 }
2094 else {
2095 block_map_remote = push_info->on_complete_data;
2096 }
2097 UVM_ASSERT(block_map_remote);
2098
2099 map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS);
2100 if (map_remote == NULL)
2101 goto done;
2102
2103 map_remote->address = address;
2104 map_remote->size = region_size;
2105 map_remote->timestamp_gpu_addr = uvm_push_timestamp(push);
2106
2107 list_add_tail(&map_remote->events_node, &block_map_remote->events);
2108 }
2109
2110 done:
2111 uvm_up_read(&va_space->tools.lock);
2112 }
2113
uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS * params,struct file * filp)2114 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp)
2115 {
2116 NV_STATUS status = NV_OK;
2117 uvm_tools_event_tracker_t *event_tracker;
2118
2119 if (params->requestedVersion != UvmToolsEventQueueVersion_V1 &&
2120 params->requestedVersion != UvmToolsEventQueueVersion_V2)
2121 return NV_ERR_INVALID_ARGUMENT;
2122
2123 event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS);
2124 if (event_tracker == NULL)
2125 return NV_ERR_NO_MEMORY;
2126
2127 event_tracker->version = params->requestedVersion;
2128
2129 event_tracker->uvm_file = fget(params->uvmFd);
2130 if (event_tracker->uvm_file == NULL) {
2131 status = NV_ERR_INSUFFICIENT_PERMISSIONS;
2132 goto fail;
2133 }
2134
2135 if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) {
2136 fput(event_tracker->uvm_file);
2137 event_tracker->uvm_file = NULL;
2138 status = NV_ERR_INSUFFICIENT_PERMISSIONS;
2139 goto fail;
2140 }
2141
2142 // We don't use uvm_fd_va_space() here because tools can work
2143 // without an associated va_space_mm.
2144 if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) {
2145 fput(event_tracker->uvm_file);
2146 event_tracker->uvm_file = NULL;
2147 status = NV_ERR_ILLEGAL_ACTION;
2148 goto fail;
2149 }
2150
2151 event_tracker->is_queue = params->queueBufferSize != 0;
2152 if (event_tracker->is_queue) {
2153 uvm_tools_queue_t *queue = &event_tracker->queue;
2154 NvU64 buffer_size, control_size;
2155
2156 uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
2157 init_waitqueue_head(&queue->wait_queue);
2158
2159 if (params->queueBufferSize > UINT_MAX) {
2160 status = NV_ERR_INVALID_ARGUMENT;
2161 goto fail;
2162 }
2163
2164 queue->queue_buffer_count = (NvU32)params->queueBufferSize;
2165 queue->notification_threshold = queue->queue_buffer_count / 2;
2166
2167 // queue_buffer_count must be a power of 2, of at least 2
2168 if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) {
2169 status = NV_ERR_INVALID_ARGUMENT;
2170 goto fail;
2171 }
2172
2173 if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
2174 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
2175 control_size = sizeof(UvmToolsEventControlData_V1);
2176 }
2177 else {
2178 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
2179 control_size = sizeof(UvmToolsEventControlData_V2);
2180 }
2181
2182 status = map_user_pages(params->queueBuffer,
2183 buffer_size,
2184 (void **)&queue->queue_v2,
2185 &queue->queue_buffer_pages);
2186 if (status != NV_OK)
2187 goto fail;
2188
2189 status = map_user_pages(params->controlBuffer,
2190 control_size,
2191 (void **)&queue->control_v2,
2192 &queue->control_buffer_pages);
2193
2194 if (status != NV_OK)
2195 goto fail;
2196 }
2197 else {
2198 uvm_tools_counter_t *counter = &event_tracker->counter;
2199 counter->all_processors = params->allProcessors;
2200 counter->processor = params->processor;
2201 status = map_user_pages(params->controlBuffer,
2202 sizeof(NvU64) * UVM_TOTAL_COUNTERS,
2203 (void **)&counter->counters,
2204 &counter->counter_buffer_pages);
2205 if (status != NV_OK)
2206 goto fail;
2207 }
2208
2209 if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
2210 status = NV_ERR_INVALID_ARGUMENT;
2211 goto fail;
2212 }
2213
2214 params->grantedVersion = params->requestedVersion;
2215
2216 return NV_OK;
2217
2218 fail:
2219 destroy_event_tracker(event_tracker);
2220 return status;
2221 }
2222
uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS * params,struct file * filp)2223 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp)
2224 {
2225 uvm_tools_queue_snapshot_t sn;
2226 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2227
2228 if (!tracker_is_queue(event_tracker))
2229 return NV_ERR_INVALID_ARGUMENT;
2230
2231 uvm_spin_lock(&event_tracker->queue.lock);
2232
2233 event_tracker->queue.notification_threshold = params->notificationThreshold;
2234
2235 if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
2236 UvmToolsEventControlData_V1 *ctrl = event_tracker->queue.control_v1;
2237
2238 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
2239 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
2240 }
2241 else {
2242 UvmToolsEventControlData_V2 *ctrl = event_tracker->queue.control_v2;
2243
2244 sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
2245 sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
2246 }
2247
2248 if (queue_needs_wakeup(&event_tracker->queue, &sn))
2249 wake_up_all(&event_tracker->queue.wait_queue);
2250
2251 uvm_spin_unlock(&event_tracker->queue.lock);
2252
2253 return NV_OK;
2254 }
2255
tools_update_perf_events_callbacks(uvm_va_space_t * va_space)2256 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space)
2257 {
2258 NV_STATUS status;
2259
2260 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
2261 uvm_assert_rwsem_locked_write(&va_space->tools.lock);
2262
2263 if (tools_is_fault_callback_needed(va_space)) {
2264 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
2265 status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
2266 UVM_PERF_EVENT_FAULT,
2267 uvm_tools_record_fault);
2268
2269 if (status != NV_OK)
2270 return status;
2271 }
2272 }
2273 else {
2274 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
2275 uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
2276 UVM_PERF_EVENT_FAULT,
2277 uvm_tools_record_fault);
2278 }
2279 }
2280
2281 if (tools_is_migration_callback_needed(va_space)) {
2282 if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
2283 status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
2284 UVM_PERF_EVENT_MIGRATION,
2285 uvm_tools_record_migration);
2286
2287 if (status != NV_OK)
2288 return status;
2289 }
2290 }
2291 else {
2292 if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
2293 uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
2294 UVM_PERF_EVENT_MIGRATION,
2295 uvm_tools_record_migration);
2296 }
2297 }
2298
2299 return NV_OK;
2300 }
2301
tools_update_status(uvm_va_space_t * va_space)2302 static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
2303 {
2304 NV_STATUS status;
2305 bool should_be_enabled;
2306 uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
2307 uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
2308 uvm_assert_rwsem_locked_write(&va_space->tools.lock);
2309
2310 status = tools_update_perf_events_callbacks(va_space);
2311 if (status != NV_OK)
2312 return status;
2313
2314 should_be_enabled = tools_are_enabled(va_space);
2315 if (should_be_enabled != va_space->tools.enabled) {
2316 if (should_be_enabled)
2317 list_add(&va_space->tools.node, &g_tools_va_space_list);
2318 else
2319 list_del(&va_space->tools.node);
2320
2321 va_space->tools.enabled = should_be_enabled;
2322 }
2323
2324 return NV_OK;
2325 }
2326
2327 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8)
2328
mask_contains_invalid_events(NvU64 event_flags)2329 static bool mask_contains_invalid_events(NvU64 event_flags)
2330 {
2331 const unsigned long *event_mask = (const unsigned long *)&event_flags;
2332 DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS);
2333 DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS);
2334 DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS);
2335
2336 bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS);
2337 bitmap_set(tests_events_mask,
2338 UvmEventTestTypesFirst,
2339 UvmEventTestTypesLast - UvmEventTestTypesFirst + 1);
2340
2341 bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS);
2342 bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1);
2343
2344 if (uvm_enable_builtin_tests)
2345 bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS);
2346
2347 // Make sure that test event ids do not overlap with regular events
2348 BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes);
2349 BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast);
2350 BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll);
2351
2352 // Make sure that no test event ever changes the size of UvmEventEntry_V2
2353 BUILD_BUG_ON(sizeof(((UvmEventEntry_V2 *)NULL)->testEventData) >
2354 sizeof(((UvmEventEntry_V2 *)NULL)->eventData));
2355 BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS);
2356
2357 if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS))
2358 return false;
2359
2360 if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS))
2361 UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
2362
2363 return true;
2364 }
2365
uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS * params,struct file * filp)2366 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp)
2367 {
2368 uvm_va_space_t *va_space;
2369 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2370 NV_STATUS status = NV_OK;
2371 NvU64 inserted_lists;
2372
2373 if (!tracker_is_queue(event_tracker))
2374 return NV_ERR_INVALID_ARGUMENT;
2375
2376 if (mask_contains_invalid_events(params->eventTypeFlags))
2377 return NV_ERR_INVALID_ARGUMENT;
2378
2379 va_space = tools_event_tracker_va_space(event_tracker);
2380
2381 uvm_down_write(&g_tools_va_space_list_lock);
2382 uvm_down_write(&va_space->perf_events.lock);
2383 uvm_down_write(&va_space->tools.lock);
2384
2385 insert_event_tracker(va_space,
2386 event_tracker->queue.queue_nodes,
2387 UvmEventNumTypesAll,
2388 params->eventTypeFlags,
2389 &event_tracker->queue.subscribed_queues,
2390 event_tracker->version == UvmToolsEventQueueVersion_V1 ?
2391 va_space->tools.queues_v1 : va_space->tools.queues_v2,
2392 &inserted_lists);
2393
2394 // perform any necessary registration
2395 status = tools_update_status(va_space);
2396 if (status != NV_OK) {
2397 // on error, unregister any newly registered event
2398 remove_event_tracker(va_space,
2399 event_tracker->queue.queue_nodes,
2400 UvmEventNumTypes,
2401 inserted_lists,
2402 &event_tracker->queue.subscribed_queues);
2403 }
2404
2405 uvm_up_write(&va_space->tools.lock);
2406 uvm_up_write(&va_space->perf_events.lock);
2407 uvm_up_write(&g_tools_va_space_list_lock);
2408
2409 return status;
2410 }
2411
uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS * params,struct file * filp)2412 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp)
2413 {
2414 NV_STATUS status;
2415 uvm_va_space_t *va_space;
2416 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2417
2418 if (!tracker_is_queue(event_tracker))
2419 return NV_ERR_INVALID_ARGUMENT;
2420
2421 va_space = tools_event_tracker_va_space(event_tracker);
2422
2423 uvm_down_write(&g_tools_va_space_list_lock);
2424 uvm_down_write(&va_space->perf_events.lock);
2425 uvm_down_write(&va_space->tools.lock);
2426 remove_event_tracker(va_space,
2427 event_tracker->queue.queue_nodes,
2428 UvmEventNumTypesAll,
2429 params->eventTypeFlags,
2430 &event_tracker->queue.subscribed_queues);
2431
2432 // de-registration should not fail
2433 status = tools_update_status(va_space);
2434 UVM_ASSERT(status == NV_OK);
2435
2436 uvm_up_write(&va_space->tools.lock);
2437 uvm_up_write(&va_space->perf_events.lock);
2438 uvm_up_write(&g_tools_va_space_list_lock);
2439 return NV_OK;
2440 }
2441
uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS * params,struct file * filp)2442 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp)
2443 {
2444 uvm_va_space_t *va_space;
2445 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2446 NV_STATUS status = NV_OK;
2447 NvU64 inserted_lists;
2448
2449 if (!tracker_is_counter(event_tracker))
2450 return NV_ERR_INVALID_ARGUMENT;
2451
2452 va_space = tools_event_tracker_va_space(event_tracker);
2453
2454 uvm_down_write(&g_tools_va_space_list_lock);
2455 uvm_down_write(&va_space->perf_events.lock);
2456 uvm_down_write(&va_space->tools.lock);
2457
2458 insert_event_tracker(va_space,
2459 event_tracker->counter.counter_nodes,
2460 UVM_TOTAL_COUNTERS,
2461 params->counterTypeFlags,
2462 &event_tracker->counter.subscribed_counters,
2463 va_space->tools.counters,
2464 &inserted_lists);
2465
2466 // perform any necessary registration
2467 status = tools_update_status(va_space);
2468 if (status != NV_OK) {
2469 remove_event_tracker(va_space,
2470 event_tracker->counter.counter_nodes,
2471 UVM_TOTAL_COUNTERS,
2472 inserted_lists,
2473 &event_tracker->counter.subscribed_counters);
2474 }
2475
2476 uvm_up_write(&va_space->tools.lock);
2477 uvm_up_write(&va_space->perf_events.lock);
2478 uvm_up_write(&g_tools_va_space_list_lock);
2479
2480 return status;
2481 }
2482
uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS * params,struct file * filp)2483 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp)
2484 {
2485 NV_STATUS status;
2486 uvm_va_space_t *va_space;
2487 uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2488
2489 if (!tracker_is_counter(event_tracker))
2490 return NV_ERR_INVALID_ARGUMENT;
2491
2492 va_space = tools_event_tracker_va_space(event_tracker);
2493
2494 uvm_down_write(&g_tools_va_space_list_lock);
2495 uvm_down_write(&va_space->perf_events.lock);
2496 uvm_down_write(&va_space->tools.lock);
2497 remove_event_tracker(va_space,
2498 event_tracker->counter.counter_nodes,
2499 UVM_TOTAL_COUNTERS,
2500 params->counterTypeFlags,
2501 &event_tracker->counter.subscribed_counters);
2502
2503 // de-registration should not fail
2504 status = tools_update_status(va_space);
2505 UVM_ASSERT(status == NV_OK);
2506
2507 uvm_up_write(&va_space->tools.lock);
2508 uvm_up_write(&va_space->perf_events.lock);
2509 uvm_up_write(&g_tools_va_space_list_lock);
2510
2511 return NV_OK;
2512 }
2513
tools_access_va_block(uvm_va_block_t * va_block,uvm_va_block_context_t * block_context,NvU64 target_va,NvU64 size,bool is_write,uvm_mem_t * stage_mem)2514 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block,
2515 uvm_va_block_context_t *block_context,
2516 NvU64 target_va,
2517 NvU64 size,
2518 bool is_write,
2519 uvm_mem_t *stage_mem)
2520 {
2521 if (is_write) {
2522 return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2523 NULL,
2524 uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size));
2525 }
2526 else {
2527 return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2528 NULL,
2529 uvm_va_block_read_to_cpu(va_block, block_context, stage_mem, target_va, size));
2530
2531 }
2532 }
2533
tools_access_process_memory(uvm_va_space_t * va_space,NvU64 target_va,NvU64 size,NvU64 user_va,NvU64 * bytes,bool is_write)2534 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
2535 NvU64 target_va,
2536 NvU64 size,
2537 NvU64 user_va,
2538 NvU64 *bytes,
2539 bool is_write)
2540 {
2541 NV_STATUS status;
2542 uvm_mem_t *stage_mem = NULL;
2543 void *stage_addr;
2544 uvm_processor_mask_t *retained_gpus = NULL;
2545 uvm_va_block_context_t *block_context = NULL;
2546 struct mm_struct *mm = NULL;
2547
2548 retained_gpus = uvm_processor_mask_cache_alloc();
2549 if (!retained_gpus)
2550 return NV_ERR_NO_MEMORY;
2551
2552 uvm_processor_mask_zero(retained_gpus);
2553
2554 mm = uvm_va_space_mm_or_current_retain(va_space);
2555
2556 status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem);
2557 if (status != NV_OK)
2558 goto exit;
2559
2560 block_context = uvm_va_block_context_alloc(mm);
2561 if (!block_context) {
2562 status = NV_ERR_NO_MEMORY;
2563 goto exit;
2564 }
2565
2566 stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
2567 *bytes = 0;
2568
2569 while (*bytes < size) {
2570 uvm_gpu_t *gpu;
2571 uvm_va_block_t *block;
2572 void *user_va_start = (void *) (user_va + *bytes);
2573 NvU64 target_va_start = target_va + *bytes;
2574 NvU64 bytes_left = size - *bytes;
2575 NvU64 page_offset = target_va_start & (PAGE_SIZE - 1);
2576 NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset));
2577 bool map_stage_mem_on_gpus = true;
2578
2579 if (is_write) {
2580 NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
2581 if (remaining != 0) {
2582 status = NV_ERR_INVALID_ARGUMENT;
2583 goto exit;
2584 }
2585 }
2586
2587 if (mm)
2588 uvm_down_read_mmap_lock(mm);
2589
2590 // The RM flavor of the lock is needed to perform ECC checks.
2591 uvm_va_space_down_read_rm(va_space);
2592 if (mm)
2593 status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
2594 else
2595 status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
2596
2597 if (status != NV_OK)
2598 goto unlock_and_exit;
2599
2600 // When CC is enabled, the staging memory cannot be mapped on the GPU
2601 // (it is protected sysmem), but it is still used to store the
2602 // unencrypted version of the page contents when the page is resident
2603 // on vidmem.
2604 if (g_uvm_global.conf_computing_enabled)
2605 map_stage_mem_on_gpus = false;
2606
2607 if (map_stage_mem_on_gpus) {
2608 for_each_gpu_in_mask(gpu, &va_space->registered_gpus) {
2609 if (uvm_processor_mask_test_and_set(retained_gpus, gpu->id))
2610 continue;
2611
2612 // The retention of each GPU ensures that the staging memory is
2613 // freed before the unregistration of any of the GPUs is mapped
2614 // on. Each GPU is retained once.
2615 uvm_gpu_retain(gpu);
2616
2617 // Accessing the VA block may result in copying data between the
2618 // CPU and a GPU. Conservatively add virtual mappings to all the
2619 // GPUs (even if those mappings may never be used) as tools
2620 // read/write is not on a performance critical path.
2621 status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
2622 if (status != NV_OK)
2623 goto unlock_and_exit;
2624 }
2625 }
2626 else {
2627 UVM_ASSERT(uvm_processor_mask_empty(retained_gpus));
2628 }
2629
2630 // Make sure a CPU resident page has an up to date struct page pointer.
2631 if (uvm_va_block_is_hmm(block)) {
2632 status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true);
2633 if (status != NV_OK)
2634 goto unlock_and_exit;
2635 }
2636
2637 status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem);
2638
2639 // For simplicity, check for ECC errors on all GPUs registered in the VA
2640 // space
2641 if (status == NV_OK)
2642 status = uvm_global_gpu_check_ecc_error(&va_space->registered_gpus);
2643
2644 uvm_va_space_up_read_rm(va_space);
2645 if (mm)
2646 uvm_up_read_mmap_lock(mm);
2647
2648 if (status != NV_OK)
2649 goto exit;
2650
2651 if (!is_write) {
2652 NvU64 remaining;
2653
2654 // Prevent processor speculation prior to accessing user-mapped
2655 // memory to avoid leaking information from side-channel attacks.
2656 // Under speculation, a valid VA range which does not contain
2657 // target_va could be used, and the block index could run off the
2658 // end of the array. Information about the state of that kernel
2659 // memory could be inferred if speculative execution gets to the
2660 // point where the data is copied out.
2661 nv_speculation_barrier();
2662
2663 remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
2664 if (remaining > 0) {
2665 status = NV_ERR_INVALID_ARGUMENT;
2666 goto exit;
2667 }
2668 }
2669
2670 *bytes += bytes_now;
2671 }
2672
2673 unlock_and_exit:
2674 if (status != NV_OK) {
2675 uvm_va_space_up_read_rm(va_space);
2676 if (mm)
2677 uvm_up_read_mmap_lock(mm);
2678 }
2679
2680 exit:
2681 uvm_va_block_context_free(block_context);
2682
2683 uvm_mem_free(stage_mem);
2684
2685 uvm_global_gpu_release(retained_gpus);
2686
2687 uvm_va_space_mm_or_current_release(va_space, mm);
2688
2689 uvm_processor_mask_cache_free(retained_gpus);
2690
2691 return status;
2692 }
2693
uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS * params,struct file * filp)2694 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2695 {
2696 return tools_access_process_memory(uvm_va_space_get(filp),
2697 params->targetVa,
2698 params->size,
2699 params->buffer,
2700 ¶ms->bytesRead,
2701 false);
2702 }
2703
uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS * params,struct file * filp)2704 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2705 {
2706 return tools_access_process_memory(uvm_va_space_get(filp),
2707 params->targetVa,
2708 params->size,
2709 params->buffer,
2710 ¶ms->bytesWritten,
2711 true);
2712 }
2713
uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS * params,struct file * filp)2714 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp)
2715 {
2716 NvU32 i;
2717 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2718
2719 if (params->version != UvmToolsEventQueueVersion_V1 && params->version != UvmToolsEventQueueVersion_V2)
2720 return NV_ERR_INVALID_ARGUMENT;
2721
2722 if (params->entry_v1.eventData.eventType >= UvmEventNumTypesAll)
2723 return NV_ERR_INVALID_ARGUMENT;
2724
2725 uvm_down_read(&va_space->tools.lock);
2726 for (i = 0; i < params->count; i++) {
2727 if (params->version == UvmToolsEventQueueVersion_V1)
2728 uvm_tools_record_event_v1(va_space, ¶ms->entry_v1);
2729 else
2730 uvm_tools_record_event_v2(va_space, ¶ms->entry_v2);
2731 }
2732 uvm_up_read(&va_space->tools.lock);
2733 return NV_OK;
2734 }
2735
uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS * params,struct file * filp)2736 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp)
2737 {
2738 NvU32 i;
2739 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2740
2741 if (params->counter >= UVM_TOTAL_COUNTERS)
2742 return NV_ERR_INVALID_ARGUMENT;
2743
2744 uvm_down_read(&va_space->tools.lock);
2745 for (i = 0; i < params->count; i++)
2746 uvm_tools_inc_counter(va_space, params->counter, params->amount, ¶ms->processor);
2747 uvm_up_read(&va_space->tools.lock);
2748
2749 return NV_OK;
2750 }
2751
uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS * params,struct file * filp)2752 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp)
2753 {
2754 NvProcessorUuid *uuids;
2755 NvU64 remaining;
2756 uvm_gpu_t *gpu;
2757 NvU32 count = params->count;
2758 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2759 NvU32 version = UvmToolsEventQueueVersion_V2;
2760
2761 // Prior to Multi-MIG support, params->count was always zero meaning the
2762 // input array was size UVM_MAX_PROCESSORS_V1 or 33 at that time.
2763 if (count == 0 && params->tablePtr) {
2764 version = UvmToolsEventQueueVersion_V1;
2765 count = UVM_MAX_PROCESSORS_V1;
2766 }
2767 else if (count == 0 || count > UVM_ID_MAX_PROCESSORS) {
2768 // Note that we don't rely on the external API definition
2769 // UVM_MAX_PROCESSORS since the kernel determines the array size needed
2770 // and reports the number of processors found to the caller.
2771 count = UVM_ID_MAX_PROCESSORS;
2772 }
2773
2774 // Return which version of the table is being returned.
2775 params->version = version;
2776
2777 uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * count);
2778 if (uuids == NULL)
2779 return NV_ERR_NO_MEMORY;
2780
2781 uvm_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT);
2782 params->count = 1;
2783
2784 uvm_va_space_down_read(va_space);
2785 for_each_va_space_gpu(gpu, va_space) {
2786 NvU32 id_value;
2787 const NvProcessorUuid *uuid;
2788
2789 // Version 1 only supports processors 0..32 and uses the parent
2790 // GPU UUID.
2791 if (version == UvmToolsEventQueueVersion_V1) {
2792 id_value = uvm_parent_id_value(gpu->parent->id);
2793 uuid = &gpu->parent->uuid;
2794 }
2795 else {
2796 id_value = uvm_id_value(gpu->id);
2797 uuid = &gpu->uuid;
2798 }
2799
2800 if (id_value < count)
2801 uvm_uuid_copy(&uuids[id_value], uuid);
2802
2803 // Return the actual count even if the UUID isn't returned due to
2804 // limited input array size.
2805 if (id_value + 1 > params->count)
2806 params->count = id_value + 1;
2807 }
2808 uvm_va_space_up_read(va_space);
2809
2810 if (params->tablePtr)
2811 remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count);
2812 else
2813 remaining = 0;
2814 uvm_kvfree(uuids);
2815
2816 if (remaining != 0)
2817 return NV_ERR_INVALID_ADDRESS;
2818
2819 return NV_OK;
2820 }
2821
uvm_tools_flush_events(void)2822 void uvm_tools_flush_events(void)
2823 {
2824 tools_schedule_completed_events();
2825
2826 nv_kthread_q_flush(&g_tools_queue);
2827 }
2828
uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS * params,struct file * filp)2829 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp)
2830 {
2831 uvm_tools_flush_events();
2832 return NV_OK;
2833 }
2834
uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS * params,struct file * filp)2835 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp)
2836 {
2837 NV_STATUS status = NV_OK;
2838 uvm_gpu_t *gpu = NULL;
2839 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2840
2841 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpuUuid);
2842 if (!gpu)
2843 return NV_ERR_INVALID_DEVICE;
2844
2845 // Wait for register-based fault clears to queue the replay event
2846 if (!gpu->parent->has_clear_faulted_channel_method) {
2847 uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
2848 uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
2849 }
2850
2851 // Wait for pending fault replay methods to complete (replayable faults on
2852 // all GPUs, and non-replayable faults on method-based GPUs).
2853 status = uvm_channel_manager_wait(gpu->channel_manager);
2854
2855 // Flush any pending events even if (status != NV_OK)
2856 uvm_tools_flush_events();
2857 uvm_gpu_release(gpu);
2858
2859 return status;
2860 }
2861
2862 static const struct file_operations uvm_tools_fops =
2863 {
2864 .open = uvm_tools_open_entry,
2865 .release = uvm_tools_release_entry,
2866 .unlocked_ioctl = uvm_tools_unlocked_ioctl_entry,
2867 #if NVCPU_IS_X86_64
2868 .compat_ioctl = uvm_tools_unlocked_ioctl_entry,
2869 #endif
2870 .poll = uvm_tools_poll_entry,
2871 .owner = THIS_MODULE,
2872 };
2873
_uvm_tools_destroy_cache_all(void)2874 static void _uvm_tools_destroy_cache_all(void)
2875 {
2876 // The pointers are initialized to NULL,
2877 // it's safe to call destroy on all of them.
2878 kmem_cache_destroy_safe(&g_tools_event_tracker_cache);
2879 kmem_cache_destroy_safe(&g_tools_block_migration_data_cache);
2880 kmem_cache_destroy_safe(&g_tools_migration_data_cache);
2881 kmem_cache_destroy_safe(&g_tools_replay_data_cache);
2882 kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache);
2883 kmem_cache_destroy_safe(&g_tools_map_remote_data_cache);
2884 }
2885
uvm_tools_init(dev_t uvm_base_dev)2886 int uvm_tools_init(dev_t uvm_base_dev)
2887 {
2888 dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER);
2889 int ret = -ENOMEM; // This will be updated later if allocations succeed
2890
2891 uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
2892
2893 g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t",
2894 uvm_tools_event_tracker_t);
2895 if (!g_tools_event_tracker_cache)
2896 goto err_cache_destroy;
2897
2898 g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t",
2899 block_migration_data_t);
2900 if (!g_tools_block_migration_data_cache)
2901 goto err_cache_destroy;
2902
2903 g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t",
2904 migration_data_t);
2905 if (!g_tools_migration_data_cache)
2906 goto err_cache_destroy;
2907
2908 g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t",
2909 replay_data_t);
2910 if (!g_tools_replay_data_cache)
2911 goto err_cache_destroy;
2912
2913 g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t",
2914 block_map_remote_data_t);
2915 if (!g_tools_block_map_remote_data_cache)
2916 goto err_cache_destroy;
2917
2918 g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t",
2919 map_remote_data_t);
2920 if (!g_tools_map_remote_data_cache)
2921 goto err_cache_destroy;
2922
2923 uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF);
2924
2925 ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue");
2926 if (ret < 0)
2927 goto err_cache_destroy;
2928
2929 uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops);
2930 ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1);
2931 if (ret != 0) {
2932 UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev),
2933 MINOR(uvm_tools_dev), ret);
2934 goto err_stop_thread;
2935 }
2936
2937 return ret;
2938
2939 err_stop_thread:
2940 nv_kthread_q_stop(&g_tools_queue);
2941
2942 err_cache_destroy:
2943 _uvm_tools_destroy_cache_all();
2944 return ret;
2945 }
2946
uvm_tools_exit(void)2947 void uvm_tools_exit(void)
2948 {
2949 unsigned i;
2950 cdev_del(&g_uvm_tools_cdev);
2951
2952 nv_kthread_q_stop(&g_tools_queue);
2953
2954 for (i = 0; i < UvmEventNumTypesAll; ++i)
2955 UVM_ASSERT(g_tools_enabled_event_count[i] == 0);
2956
2957 UVM_ASSERT(list_empty(&g_tools_va_space_list));
2958
2959 _uvm_tools_destroy_cache_all();
2960 }
2961