1 /*******************************************************************************
2     Copyright (c) 2016-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 #include "uvm_common.h"
24 #include "uvm_ioctl.h"
25 #include "uvm_global.h"
26 #include "uvm_gpu.h"
27 #include "uvm_hal.h"
28 #include "uvm_tools.h"
29 #include "uvm_va_space.h"
30 #include "uvm_api.h"
31 #include "uvm_hal_types.h"
32 #include "uvm_va_block.h"
33 #include "uvm_va_range.h"
34 #include "uvm_push.h"
35 #include "uvm_forward_decl.h"
36 #include "uvm_range_group.h"
37 #include "uvm_mem.h"
38 #include "nv_speculation_barrier.h"
39 
40 // We limit the number of times a page can be retained by the kernel
41 // to prevent the user from maliciously passing UVM tools the same page
42 // over and over again in an attempt to overflow the refcount.
43 #define MAX_PAGE_COUNT (1 << 20)
44 
45 typedef struct
46 {
47     NvU32 get_ahead;
48     NvU32 get_behind;
49     NvU32 put_ahead;
50     NvU32 put_behind;
51 } uvm_tools_queue_snapshot_t;
52 
53 typedef struct
54 {
55     uvm_spinlock_t lock;
56     NvU64 subscribed_queues;
57     struct list_head queue_nodes[UvmEventNumTypesAll];
58 
59     struct page **queue_buffer_pages;
60     UvmEventEntry *queue;
61     NvU32 queue_buffer_count;
62     NvU32 notification_threshold;
63 
64     struct page **control_buffer_pages;
65     UvmToolsEventControlData *control;
66 
67     wait_queue_head_t wait_queue;
68     bool is_wakeup_get_valid;
69     NvU32 wakeup_get;
70 } uvm_tools_queue_t;
71 
72 typedef struct
73 {
74     struct list_head counter_nodes[UVM_TOTAL_COUNTERS];
75     NvU64 subscribed_counters;
76 
77     struct page **counter_buffer_pages;
78     NvU64 *counters;
79 
80     bool all_processors;
81     NvProcessorUuid processor;
82 } uvm_tools_counter_t;
83 
84 // private_data for /dev/nvidia-uvm-tools
85 typedef struct
86 {
87     bool is_queue;
88     struct file *uvm_file;
89     union
90     {
91         uvm_tools_queue_t queue;
92         uvm_tools_counter_t counter;
93     };
94 } uvm_tools_event_tracker_t;
95 
96 // Delayed events
97 //
98 // Events that require gpu timestamps for asynchronous operations use a delayed
99 // notification mechanism. Each event type registers a callback that is invoked
100 // from the update_progress channel routines. The callback then enqueues a
101 // work item that takes care of notifying the events. This module keeps a
102 // global list of channels with pending events. Other modules or user apps (via
103 // ioctl) may call uvm_tools_flush_events to update the progress of the channels
104 // in the list, as needed.
105 //
106 // User apps will need to flush events before removing gpus to avoid getting
107 // events with gpus ids that have been removed.
108 
109 // This object describes the pending migrations operations within a VA block
110 typedef struct
111 {
112     nv_kthread_q_item_t queue_item;
113     uvm_processor_id_t dst;
114     uvm_processor_id_t src;
115     uvm_va_space_t *va_space;
116 
117     uvm_channel_t *channel;
118     struct list_head events;
119     NvU64 start_timestamp_cpu;
120     NvU64 end_timestamp_cpu;
121     NvU64 *start_timestamp_gpu_addr;
122     NvU64 start_timestamp_gpu;
123     NvU64 range_group_id;
124 } block_migration_data_t;
125 
126 // This object represents a specific pending migration within a VA block
127 typedef struct
128 {
129     struct list_head events_node;
130     NvU64 bytes;
131     NvU64 address;
132     NvU64 *end_timestamp_gpu_addr;
133     NvU64 end_timestamp_gpu;
134     UvmEventMigrationCause cause;
135 } migration_data_t;
136 
137 // This object represents a pending gpu faut replay operation
138 typedef struct
139 {
140     nv_kthread_q_item_t queue_item;
141     uvm_channel_t *channel;
142     uvm_gpu_id_t gpu_id;
143     NvU32 batch_id;
144     uvm_fault_client_type_t client_type;
145     NvU64 timestamp;
146     NvU64 timestamp_gpu;
147     NvU64 *timestamp_gpu_addr;
148 } replay_data_t;
149 
150 // This object describes the pending map remote operations within a VA block
151 typedef struct
152 {
153     nv_kthread_q_item_t queue_item;
154     uvm_processor_id_t src;
155     uvm_processor_id_t dst;
156     UvmEventMapRemoteCause cause;
157     NvU64 timestamp;
158     uvm_va_space_t *va_space;
159 
160     uvm_channel_t *channel;
161     struct list_head events;
162 } block_map_remote_data_t;
163 
164 // This object represents a pending map remote operation
165 typedef struct
166 {
167     struct list_head events_node;
168 
169     NvU64 address;
170     NvU64 size;
171     NvU64 timestamp_gpu;
172     NvU64 *timestamp_gpu_addr;
173 } map_remote_data_t;
174 
175 
176 static struct cdev g_uvm_tools_cdev;
177 static LIST_HEAD(g_tools_va_space_list);
178 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll];
179 static uvm_rw_semaphore_t g_tools_va_space_list_lock;
180 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL;
181 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL;
182 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL;
183 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL;
184 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL;
185 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL;
186 static uvm_spinlock_t g_tools_channel_list_lock;
187 static LIST_HEAD(g_tools_channel_list);
188 static nv_kthread_q_t g_tools_queue;
189 
190 static NV_STATUS tools_update_status(uvm_va_space_t *va_space);
191 
192 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp)
193 {
194     return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data);
195 }
196 
197 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker)
198 {
199     return event_tracker != NULL && event_tracker->is_queue;
200 }
201 
202 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker)
203 {
204     return event_tracker != NULL && !event_tracker->is_queue;
205 }
206 
207 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker)
208 {
209     uvm_va_space_t *va_space;
210     UVM_ASSERT(event_tracker->uvm_file);
211     va_space = uvm_va_space_get(event_tracker->uvm_file);
212     return va_space;
213 }
214 
215 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
216 {
217     NvU64 i;
218 
219     for (i = 0; i < page_count; i++) {
220         set_page_dirty(pages[i]);
221         NV_UNPIN_USER_PAGE(pages[i]);
222     }
223 }
224 
225 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
226 {
227     size = DIV_ROUND_UP(size, PAGE_SIZE);
228     vunmap((NvU8 *)addr);
229     uvm_put_user_pages_dirty(pages, size);
230     uvm_kvfree(pages);
231 }
232 
233 // This must be called with the mmap_lock held in read mode or better.
234 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
235 {
236     struct vm_area_struct *vma;
237     NvU64 addr = start_va;
238     NvU64 region_end = start_va + size;
239 
240     do {
241         vma = find_vma(mm, addr);
242         if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
243             return NV_ERR_INVALID_ARGUMENT;
244 
245         addr = vma->vm_end;
246     } while (addr < region_end);
247 
248     return NV_OK;
249 }
250 
251 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
252 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
253 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
254 {
255     NV_STATUS status = NV_OK;
256     long ret = 0;
257     long num_pages;
258     long i;
259 
260     *addr = NULL;
261     *pages = NULL;
262     num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
263 
264     if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) {
265         status = NV_ERR_INVALID_ADDRESS;
266         goto fail;
267     }
268 
269     *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages);
270     if (*pages == NULL) {
271         status = NV_ERR_NO_MEMORY;
272         goto fail;
273     }
274 
275     // Although uvm_down_read_mmap_lock() is preferable due to its participation
276     // in the UVM lock dependency tracker, it cannot be used here. That's
277     // because pin_user_pages() may fault in HMM pages which are GPU-resident.
278     // When that happens, the UVM page fault handler would record another
279     // mmap_read_lock() on the same thread as this one, leading to a false
280     // positive lock dependency report.
281     //
282     // Therefore, use the lower level nv_mmap_read_lock() here.
283     nv_mmap_read_lock(current->mm);
284     status = check_vmas(current->mm, user_va, size);
285     if (status != NV_OK) {
286         nv_mmap_read_unlock(current->mm);
287         goto fail;
288     }
289     ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages);
290     nv_mmap_read_unlock(current->mm);
291 
292     if (ret != num_pages) {
293         status = NV_ERR_INVALID_ARGUMENT;
294         goto fail;
295     }
296 
297     for (i = 0; i < num_pages; i++) {
298         if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
299             status = NV_ERR_INVALID_ARGUMENT;
300             goto fail;
301         }
302     }
303 
304     *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL);
305     if (*addr == NULL)
306         goto fail;
307 
308     return NV_OK;
309 
310 fail:
311     if (*pages == NULL)
312         return status;
313 
314     if (ret > 0)
315         uvm_put_user_pages_dirty(*pages, ret);
316     else if (ret < 0)
317         status = errno_to_nv_status(ret);
318 
319     uvm_kvfree(*pages);
320     *pages = NULL;
321     return status;
322 }
323 
324 static void insert_event_tracker(uvm_va_space_t *va_space,
325                                  struct list_head *node,
326                                  NvU32 list_count,
327                                  NvU64 list_mask,
328                                  NvU64 *subscribed_mask,
329                                  struct list_head *lists,
330                                  NvU64 *inserted_lists)
331 {
332     NvU32 i;
333     NvU64 insertable_lists = list_mask & ~*subscribed_mask;
334 
335     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
336     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
337 
338     for (i = 0; i < list_count; i++) {
339         if (insertable_lists & (1ULL << i)) {
340             ++g_tools_enabled_event_count[i];
341             list_add(node + i, lists + i);
342         }
343     }
344 
345     *subscribed_mask |= list_mask;
346     *inserted_lists = insertable_lists;
347 }
348 
349 static void remove_event_tracker(uvm_va_space_t *va_space,
350                                  struct list_head *node,
351                                  NvU32 list_count,
352                                  NvU64 list_mask,
353                                  NvU64 *subscribed_mask)
354 {
355     NvU32 i;
356     NvU64 removable_lists = list_mask & *subscribed_mask;
357 
358     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
359     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
360 
361     for (i = 0; i < list_count; i++) {
362         if (removable_lists & (1ULL << i)) {
363             UVM_ASSERT(g_tools_enabled_event_count[i] > 0);
364             --g_tools_enabled_event_count[i];
365             list_del(node + i);
366         }
367     }
368 
369     *subscribed_mask &= ~list_mask;
370 }
371 
372 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn)
373 {
374     NvU32 queue_mask = queue->queue_buffer_count - 1;
375 
376     uvm_assert_spinlock_locked(&queue->lock);
377     return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold;
378 }
379 
380 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
381 {
382     if (event_tracker->uvm_file != NULL) {
383         NV_STATUS status;
384         uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker);
385 
386         uvm_down_write(&g_tools_va_space_list_lock);
387         uvm_down_write(&va_space->perf_events.lock);
388         uvm_down_write(&va_space->tools.lock);
389 
390         if (event_tracker->is_queue) {
391             uvm_tools_queue_t *queue = &event_tracker->queue;
392 
393             remove_event_tracker(va_space,
394                                  queue->queue_nodes,
395                                  UvmEventNumTypesAll,
396                                  queue->subscribed_queues,
397                                  &queue->subscribed_queues);
398 
399             if (queue->queue != NULL) {
400                 unmap_user_pages(queue->queue_buffer_pages,
401                                  queue->queue,
402                                  queue->queue_buffer_count * sizeof(UvmEventEntry));
403             }
404 
405             if (queue->control != NULL) {
406                 unmap_user_pages(queue->control_buffer_pages,
407                                  queue->control,
408                                  sizeof(UvmToolsEventControlData));
409             }
410         }
411         else {
412             uvm_tools_counter_t *counters = &event_tracker->counter;
413 
414             remove_event_tracker(va_space,
415                                  counters->counter_nodes,
416                                  UVM_TOTAL_COUNTERS,
417                                  counters->subscribed_counters,
418                                  &counters->subscribed_counters);
419 
420             if (counters->counters != NULL) {
421                 unmap_user_pages(counters->counter_buffer_pages,
422                                  counters->counters,
423                                  UVM_TOTAL_COUNTERS * sizeof(NvU64));
424             }
425         }
426 
427         // de-registration should not fail
428         status = tools_update_status(va_space);
429         UVM_ASSERT(status == NV_OK);
430 
431         uvm_up_write(&va_space->tools.lock);
432         uvm_up_write(&va_space->perf_events.lock);
433         uvm_up_write(&g_tools_va_space_list_lock);
434 
435         fput(event_tracker->uvm_file);
436     }
437     kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
438 }
439 
440 static void enqueue_event(const UvmEventEntry *entry, uvm_tools_queue_t *queue)
441 {
442     UvmToolsEventControlData *ctrl = queue->control;
443     uvm_tools_queue_snapshot_t sn;
444     NvU32 queue_size = queue->queue_buffer_count;
445     NvU32 queue_mask = queue_size - 1;
446 
447     // Prevent processor speculation prior to accessing user-mapped memory to
448     // avoid leaking information from side-channel attacks. There are many
449     // possible paths leading to this point and it would be difficult and error-
450     // prone to audit all of them to determine whether user mode could guide
451     // this access to kernel memory under speculative execution, so to be on the
452     // safe side we'll just always block speculation.
453     nv_speculation_barrier();
454 
455     uvm_spin_lock(&queue->lock);
456 
457     // ctrl is mapped into user space with read and write permissions,
458     // so its values cannot be trusted.
459     sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
460     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
461     sn.put_ahead = (sn.put_behind + 1) & queue_mask;
462 
463     // one free element means that the queue is full
464     if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
465         atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
466         goto unlock;
467     }
468 
469     memcpy(queue->queue + sn.put_behind, entry, sizeof(*entry));
470 
471     sn.put_behind = sn.put_ahead;
472     // put_ahead and put_behind will always be the same outside of queue->lock
473     // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
474     atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
475     atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
476 
477     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
478     // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
479     if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
480         queue->is_wakeup_get_valid = true;
481         queue->wakeup_get = sn.get_ahead;
482         wake_up_all(&queue->wait_queue);
483     }
484 
485 unlock:
486     uvm_spin_unlock(&queue->lock);
487 }
488 
489 static void uvm_tools_record_event(uvm_va_space_t *va_space, const UvmEventEntry *entry)
490 {
491     NvU8 eventType = entry->eventData.eventType;
492     uvm_tools_queue_t *queue;
493 
494     UVM_ASSERT(eventType < UvmEventNumTypesAll);
495 
496     uvm_assert_rwsem_locked(&va_space->tools.lock);
497 
498     list_for_each_entry(queue, va_space->tools.queues + eventType, queue_nodes[eventType])
499         enqueue_event(entry, queue);
500 }
501 
502 static void uvm_tools_broadcast_event(const UvmEventEntry *entry)
503 {
504     uvm_va_space_t *va_space;
505 
506     uvm_down_read(&g_tools_va_space_list_lock);
507     list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
508         uvm_down_read(&va_space->tools.lock);
509         uvm_tools_record_event(va_space, entry);
510         uvm_up_read(&va_space->tools.lock);
511     }
512     uvm_up_read(&g_tools_va_space_list_lock);
513 }
514 
515 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
516 {
517     // For compatibility with older counters, CPU faults for memory with a preferred location are reported
518     // for their preferred location as well as for the CPU device itself.
519     // This check prevents double counting in the aggregate count.
520     if (counter == UvmCounterNameCpuPageFaultCount)
521         return uvm_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT);
522     return true;
523 }
524 
525 static void uvm_tools_inc_counter(uvm_va_space_t *va_space,
526                                   UvmCounterName counter,
527                                   NvU64 amount,
528                                   const NvProcessorUuid *processor)
529 {
530     UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS);
531     uvm_assert_rwsem_locked(&va_space->tools.lock);
532 
533     if (amount > 0) {
534         uvm_tools_counter_t *counters;
535 
536         // Prevent processor speculation prior to accessing user-mapped memory
537         // to avoid leaking information from side-channel attacks. There are
538         // many possible paths leading to this point and it would be difficult
539         // and error-prone to audit all of them to determine whether user mode
540         // could guide this access to kernel memory under speculative execution,
541         // so to be on the safe side we'll just always block speculation.
542         nv_speculation_barrier();
543 
544         list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) {
545             if ((counters->all_processors && counter_matches_processor(counter, processor)) ||
546                 uvm_uuid_eq(&counters->processor, processor)) {
547                 atomic64_add(amount, (atomic64_t *)(counters->counters + counter));
548             }
549         }
550     }
551 }
552 
553 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter)
554 {
555     uvm_assert_rwsem_locked(&va_space->tools.lock);
556 
557     UVM_ASSERT(counter < UVM_TOTAL_COUNTERS);
558     return !list_empty(va_space->tools.counters + counter);
559 }
560 
561 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event)
562 {
563     uvm_assert_rwsem_locked(&va_space->tools.lock);
564 
565     UVM_ASSERT(event < UvmEventNumTypesAll);
566     return !list_empty(va_space->tools.queues + event);
567 }
568 
569 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event)
570 {
571     bool ret = false;
572 
573     uvm_down_read(&g_tools_va_space_list_lock);
574     ret = g_tools_enabled_event_count[event] != 0;
575     uvm_up_read(&g_tools_va_space_list_lock);
576 
577     return ret;
578 }
579 
580 static bool tools_are_enabled(uvm_va_space_t *va_space)
581 {
582     NvU32 i;
583 
584     uvm_assert_rwsem_locked(&va_space->tools.lock);
585 
586     for (i = 0; i < UVM_TOTAL_COUNTERS; i++) {
587         if (tools_is_counter_enabled(va_space, i))
588             return true;
589     }
590     for (i = 0; i < UvmEventNumTypesAll; i++) {
591         if (tools_is_event_enabled(va_space, i))
592             return true;
593     }
594     return false;
595 }
596 
597 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space)
598 {
599     return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) ||
600            tools_is_event_enabled(va_space, UvmEventTypeGpuFault) ||
601            tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) ||
602            tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount);
603 }
604 
605 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space)
606 {
607     return tools_is_event_enabled(va_space, UvmEventTypeMigration) ||
608            tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) ||
609            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) ||
610            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD);
611 }
612 
613 static int uvm_tools_open(struct inode *inode, struct file *filp)
614 {
615     filp->private_data = NULL;
616     return -nv_status_to_errno(uvm_global_get_status());
617 }
618 
619 static int uvm_tools_open_entry(struct inode *inode, struct file *filp)
620 {
621     UVM_ENTRY_RET(uvm_tools_open(inode, filp));
622 }
623 
624 static int uvm_tools_release(struct inode *inode, struct file *filp)
625 {
626     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
627     if (event_tracker != NULL) {
628         destroy_event_tracker(event_tracker);
629         filp->private_data = NULL;
630     }
631     return -nv_status_to_errno(uvm_global_get_status());
632 }
633 
634 static int uvm_tools_release_entry(struct inode *inode, struct file *filp)
635 {
636     UVM_ENTRY_RET(uvm_tools_release(inode, filp));
637 }
638 
639 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
640 {
641     switch (cmd) {
642         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER,         uvm_api_tools_init_event_tracker);
643         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold);
644         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS,  uvm_api_tools_event_queue_enable_events);
645         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events);
646         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS,            uvm_api_tools_enable_counters);
647         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS,           uvm_api_tools_disable_counters);
648     }
649 
650     uvm_thread_assert_all_unlocked();
651 
652     return -EINVAL;
653 }
654 
655 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg)
656 {
657     UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg));
658 }
659 
660 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
661 {
662     int flags = 0;
663     uvm_tools_queue_snapshot_t sn;
664     uvm_tools_event_tracker_t *event_tracker;
665     UvmToolsEventControlData *ctrl;
666 
667     if (uvm_global_get_status() != NV_OK)
668         return POLLERR;
669 
670     event_tracker = tools_event_tracker(filp);
671     if (!tracker_is_queue(event_tracker))
672         return POLLERR;
673 
674     uvm_spin_lock(&event_tracker->queue.lock);
675 
676     event_tracker->queue.is_wakeup_get_valid = false;
677     ctrl = event_tracker->queue.control;
678     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
679     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
680 
681     if (queue_needs_wakeup(&event_tracker->queue, &sn))
682         flags = POLLIN | POLLRDNORM;
683 
684     uvm_spin_unlock(&event_tracker->queue.lock);
685 
686     poll_wait(filp, &event_tracker->queue.wait_queue, wait);
687     return flags;
688 }
689 
690 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait)
691 {
692     UVM_ENTRY_RET(uvm_tools_poll(filp, wait));
693 }
694 
695 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = {
696     [UVM_FAULT_TYPE_INVALID_PDE]          = UvmFaultTypeInvalidPde,
697     [UVM_FAULT_TYPE_INVALID_PTE]          = UvmFaultTypeInvalidPte,
698     [UVM_FAULT_TYPE_ATOMIC]               = UvmFaultTypeAtomic,
699     [UVM_FAULT_TYPE_WRITE]                = UvmFaultTypeWrite,
700     [UVM_FAULT_TYPE_PDE_SIZE]             = UvmFaultTypeInvalidPdeSize,
701     [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION]   = UvmFaultTypeLimitViolation,
702     [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK]   = UvmFaultTypeUnboundInstBlock,
703     [UVM_FAULT_TYPE_PRIV_VIOLATION]       = UvmFaultTypePrivViolation,
704     [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation,
705     [UVM_FAULT_TYPE_WORK_CREATION]        = UvmFaultTypeWorkCreation,
706     [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture,
707     [UVM_FAULT_TYPE_COMPRESSION_FAILURE]  = UvmFaultTypeCompressionFailure,
708     [UVM_FAULT_TYPE_UNSUPPORTED_KIND]     = UvmFaultTypeUnsupportedKind,
709     [UVM_FAULT_TYPE_REGION_VIOLATION]     = UvmFaultTypeRegionViolation,
710     [UVM_FAULT_TYPE_POISONED]             = UvmFaultTypePoison,
711 };
712 
713 // TODO: add new value for weak atomics in tools
714 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = {
715     [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic,
716     [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK]   = UvmEventMemoryAccessTypeAtomic,
717     [UVM_FAULT_ACCESS_TYPE_WRITE]         = UvmEventMemoryAccessTypeWrite,
718     [UVM_FAULT_ACCESS_TYPE_READ]          = UvmEventMemoryAccessTypeRead,
719     [UVM_FAULT_ACCESS_TYPE_PREFETCH]      = UvmEventMemoryAccessTypePrefetch
720 };
721 
722 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = {
723     [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0,
724     [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1,
725     [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2,
726     [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3,
727     [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4,
728     [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5,
729     [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6,
730     [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7,
731     [UVM_APERTURE_SYS]    = UvmEventApertureSys,
732     [UVM_APERTURE_VID]    = UvmEventApertureVid,
733 };
734 
735 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = {
736     [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc,
737     [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub,
738 };
739 
740 static void record_gpu_fault_instance(uvm_gpu_t *gpu,
741                                       uvm_va_space_t *va_space,
742                                       const uvm_fault_buffer_entry_t *fault_entry,
743                                       NvU64 batch_id,
744                                       NvU64 timestamp)
745 {
746     UvmEventEntry entry;
747     UvmEventGpuFaultInfo *info = &entry.eventData.gpuFault;
748     memset(&entry, 0, sizeof(entry));
749 
750     info->eventType     = UvmEventTypeGpuFault;
751     info->gpuIndex      = uvm_parent_id_value_from_processor_id(gpu->id);
752     info->faultType     = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
753     info->accessType    = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
754     info->clientType    = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
755     if (fault_entry->is_replayable)
756         info->gpcId     = fault_entry->fault_source.gpc_id;
757     else
758         info->channelId = fault_entry->fault_source.channel_id;
759     info->clientId      = fault_entry->fault_source.client_id;
760     info->address       = fault_entry->fault_address;
761     info->timeStamp     = timestamp;
762     info->timeStampGpu  = fault_entry->timestamp;
763     info->batchId       = batch_id;
764 
765     uvm_tools_record_event(va_space, &entry);
766 }
767 
768 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
769 {
770     uvm_va_space_t *va_space = event_data->fault.space;
771 
772     UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
773     UVM_ASSERT(event_data->fault.space);
774 
775     uvm_assert_rwsem_locked(&va_space->lock);
776     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
777     UVM_ASSERT(va_space->tools.enabled);
778 
779     uvm_down_read(&va_space->tools.lock);
780     UVM_ASSERT(tools_is_fault_callback_needed(va_space));
781 
782     if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
783         if (tools_is_event_enabled(va_space, UvmEventTypeCpuFault)) {
784             UvmEventEntry entry;
785             UvmEventCpuFaultInfo *info = &entry.eventData.cpuFault;
786             memset(&entry, 0, sizeof(entry));
787 
788             info->eventType = UvmEventTypeCpuFault;
789             if (event_data->fault.cpu.is_write)
790                 info->accessType = UvmEventMemoryAccessTypeWrite;
791             else
792                 info->accessType = UvmEventMemoryAccessTypeRead;
793 
794             info->address = event_data->fault.cpu.fault_va;
795             info->timeStamp = NV_GETTIME();
796             // assume that current owns va_space
797             info->pid = uvm_get_stale_process_id();
798             info->threadId = uvm_get_stale_thread_id();
799             info->pc = event_data->fault.cpu.pc;
800 
801             uvm_tools_record_event(va_space, &entry);
802         }
803         if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) {
804             uvm_processor_id_t preferred_location;
805 
806             // The UVM Lite tools interface did not represent the CPU as a UVM
807             // device. It reported CPU faults against the corresponding
808             // allocation's 'home location'. Though this driver's tools
809             // interface does include a CPU device, for compatibility, the
810             // driver still reports faults against a buffer's preferred
811             // location, in addition to the CPU.
812             uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT);
813 
814             preferred_location = event_data->fault.preferred_location;
815             if (UVM_ID_IS_GPU(preferred_location)) {
816                 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location);
817                 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
818             }
819         }
820     }
821     else {
822         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id);
823         UVM_ASSERT(gpu);
824 
825         if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) {
826             NvU64 timestamp = NV_GETTIME();
827             uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry;
828             uvm_fault_buffer_entry_t *fault_instance;
829 
830             record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp);
831 
832             list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
833                 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp);
834         }
835 
836         if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount))
837             uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
838     }
839     uvm_up_read(&va_space->tools.lock);
840 }
841 
842 static void add_pending_event_for_channel(uvm_channel_t *channel)
843 {
844     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
845 
846     if (channel->tools.pending_event_count++ == 0)
847         list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list);
848 }
849 
850 static void remove_pending_event_for_channel(uvm_channel_t *channel)
851 {
852     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
853     UVM_ASSERT(channel->tools.pending_event_count > 0);
854     if (--channel->tools.pending_event_count == 0)
855         list_del_init(&channel->tools.channel_list_node);
856 }
857 
858 
859 static void record_migration_events(void *args)
860 {
861     block_migration_data_t *block_mig = (block_migration_data_t *)args;
862     migration_data_t *mig;
863     migration_data_t *next;
864     UvmEventEntry entry;
865     UvmEventMigrationInfo *info = &entry.eventData.migration;
866     uvm_va_space_t *va_space = block_mig->va_space;
867 
868     NvU64 gpu_timestamp = block_mig->start_timestamp_gpu;
869 
870     // Initialize fields that are constant throughout the whole block
871     memset(&entry, 0, sizeof(entry));
872     info->eventType      = UvmEventTypeMigration;
873     info->srcIndex       = uvm_parent_id_value_from_processor_id(block_mig->src);
874     info->dstIndex       = uvm_parent_id_value_from_processor_id(block_mig->dst);
875     info->beginTimeStamp = block_mig->start_timestamp_cpu;
876     info->endTimeStamp   = block_mig->end_timestamp_cpu;
877     info->rangeGroupId   = block_mig->range_group_id;
878 
879     uvm_down_read(&va_space->tools.lock);
880     list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
881         UVM_ASSERT(mig->bytes > 0);
882         list_del(&mig->events_node);
883 
884         info->address           = mig->address;
885         info->migratedBytes     = mig->bytes;
886         info->beginTimeStampGpu = gpu_timestamp;
887         info->endTimeStampGpu   = mig->end_timestamp_gpu;
888         info->migrationCause    = mig->cause;
889         gpu_timestamp = mig->end_timestamp_gpu;
890         kmem_cache_free(g_tools_migration_data_cache, mig);
891 
892         uvm_tools_record_event(va_space, &entry);
893     }
894     uvm_up_read(&va_space->tools.lock);
895 
896     UVM_ASSERT(list_empty(&block_mig->events));
897     kmem_cache_free(g_tools_block_migration_data_cache, block_mig);
898 }
899 
900 static void record_migration_events_entry(void *args)
901 {
902     UVM_ENTRY_VOID(record_migration_events(args));
903 }
904 
905 static void on_block_migration_complete(void *ptr)
906 {
907     migration_data_t *mig;
908     block_migration_data_t *block_mig = (block_migration_data_t *)ptr;
909 
910     block_mig->end_timestamp_cpu = NV_GETTIME();
911     block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr;
912     list_for_each_entry(mig, &block_mig->events, events_node)
913         mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr;
914 
915     nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig);
916 
917     // The UVM driver may notice that work in a channel is complete in a variety of situations
918     // and the va_space lock is not always held in all of them, nor can it always be taken safely on them.
919     // Dispatching events requires the va_space lock to be held in at least read mode, so
920     // this callback simply enqueues the dispatching onto a queue, where the
921     // va_space lock is always safe to acquire.
922     uvm_spin_lock(&g_tools_channel_list_lock);
923     remove_pending_event_for_channel(block_mig->channel);
924     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item);
925     uvm_spin_unlock(&g_tools_channel_list_lock);
926 }
927 
928 static void record_replay_event_helper(uvm_gpu_id_t gpu_id,
929                                        NvU32 batch_id,
930                                        uvm_fault_client_type_t client_type,
931                                        NvU64 timestamp,
932                                        NvU64 timestamp_gpu)
933 {
934     UvmEventEntry entry;
935 
936     memset(&entry, 0, sizeof(entry));
937     entry.eventData.gpuFaultReplay.eventType    = UvmEventTypeGpuFaultReplay;
938     entry.eventData.gpuFaultReplay.gpuIndex     = uvm_parent_id_value_from_processor_id(gpu_id);
939     entry.eventData.gpuFaultReplay.batchId      = batch_id;
940     entry.eventData.gpuFaultReplay.clientType   = g_hal_to_tools_fault_client_type_table[client_type];
941     entry.eventData.gpuFaultReplay.timeStamp    = timestamp;
942     entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
943 
944     uvm_tools_broadcast_event(&entry);
945 }
946 
947 static void record_replay_events(void *args)
948 {
949     replay_data_t *replay = (replay_data_t *)args;
950 
951     record_replay_event_helper(replay->gpu_id,
952                                replay->batch_id,
953                                replay->client_type,
954                                replay->timestamp,
955                                replay->timestamp_gpu);
956 
957     kmem_cache_free(g_tools_replay_data_cache, replay);
958 }
959 
960 static void record_replay_events_entry(void *args)
961 {
962     UVM_ENTRY_VOID(record_replay_events(args));
963 }
964 
965 static void on_replay_complete(void *ptr)
966 {
967     replay_data_t *replay = (replay_data_t *)ptr;
968     replay->timestamp_gpu = *replay->timestamp_gpu_addr;
969 
970     nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr);
971 
972     uvm_spin_lock(&g_tools_channel_list_lock);
973     remove_pending_event_for_channel(replay->channel);
974     nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item);
975     uvm_spin_unlock(&g_tools_channel_list_lock);
976 
977 }
978 
979 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = {
980     [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT]     = UvmEventMigrationCauseCoherence,
981     [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
982     [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER]       = UvmEventMigrationCauseAccessCounters,
983     [UVM_MAKE_RESIDENT_CAUSE_PREFETCH]             = UvmEventMigrationCausePrefetch,
984     [UVM_MAKE_RESIDENT_CAUSE_EVICTION]             = UvmEventMigrationCauseEviction,
985     [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS]            = UvmEventMigrationCauseInvalid,
986     [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE]          = UvmEventMigrationCauseUser,
987     [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP]  = UvmEventMigrationCauseCoherence,
988     [UVM_MAKE_RESIDENT_CAUSE_API_HINT]             = UvmEventMigrationCauseUser,
989 };
990 
991 // For non-CPU-to-CPU migrations (or CPU-to-CPU copies using CEs), this event is
992 // notified asynchronously when all the migrations pushed to the same uvm_push_t
993 // object in a call to block_copy_resident_pages_between have finished.
994 // For CPU-to-CPU copies using memcpy, this event is notified when all of the
995 // page copies does by block_copy_resident_pages have finished.
996 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
997 {
998     uvm_va_block_t *va_block = event_data->migration.block;
999     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1000 
1001     UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION);
1002 
1003     uvm_assert_mutex_locked(&va_block->lock);
1004     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
1005     UVM_ASSERT(va_space->tools.enabled);
1006 
1007     uvm_down_read(&va_space->tools.lock);
1008     UVM_ASSERT(tools_is_migration_callback_needed(va_space));
1009 
1010     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1011         if (!UVM_ID_IS_CPU(event_data->migration.src) || !UVM_ID_IS_CPU(event_data->migration.dst)) {
1012             migration_data_t *mig;
1013             uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push);
1014             block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data;
1015 
1016             if (push_info->on_complete != NULL) {
1017                 mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS);
1018                 if (mig == NULL)
1019                     goto done_unlock;
1020 
1021                 mig->address = event_data->migration.address;
1022                 mig->bytes = event_data->migration.bytes;
1023                 mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push);
1024                 mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause];
1025 
1026                 list_add_tail(&mig->events_node, &block_mig->events);
1027             }
1028         }
1029         else {
1030             UvmEventEntry entry;
1031             UvmEventMigrationInfo *info = &entry.eventData.migration;
1032             uvm_va_space_t *va_space = uvm_va_block_get_va_space(event_data->migration.block);
1033 
1034             // CPU-to-CPU migration events can be added directly to the queue.
1035             memset(&entry, 0, sizeof(entry));
1036             info->eventType = UvmEventTypeMigration;
1037             info->srcIndex = uvm_parent_id_value_from_processor_id(event_data->migration.src);
1038             info->dstIndex = uvm_parent_id_value_from_processor_id(event_data->migration.dst);
1039             // TODO: Bug 4232310: Add src and dst NUMA node IDS to event data.
1040             //info->srcNid = event_data->migration.src_nid;
1041             //info->dstNid = event_data->migration.dst_nid;
1042             info->address = event_data->migration.address;
1043             info->migratedBytes = event_data->migration.bytes;
1044             info->beginTimeStamp = event_data->migration.cpu_start_timestamp;
1045             info->endTimeStamp = NV_GETTIME();
1046             info->migrationCause = event_data->migration.cause;
1047             info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE;
1048 
1049             // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1050             if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1051                 uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address);
1052                 if (range != NULL)
1053                     info->rangeGroupId = range->range_group->id;
1054             }
1055 
1056             uvm_tools_record_event(va_space, &entry);
1057         }
1058     }
1059 
1060     // We don't want to increment neither UvmCounterNameBytesXferDtH nor
1061     // UvmCounterNameBytesXferHtD in a CPU-to-CPU migration.
1062     if (UVM_ID_IS_CPU(event_data->migration.src) && UVM_ID_IS_CPU(event_data->migration.dst))
1063         goto done_unlock;
1064 
1065     // Increment counters
1066     if (UVM_ID_IS_CPU(event_data->migration.src) &&
1067         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) {
1068         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst);
1069         uvm_tools_inc_counter(va_space,
1070                               UvmCounterNameBytesXferHtD,
1071                               event_data->migration.bytes,
1072                               uvm_gpu_uuid(gpu));
1073     }
1074     if (UVM_ID_IS_CPU(event_data->migration.dst) &&
1075         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) {
1076         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src);
1077         uvm_tools_inc_counter(va_space,
1078                               UvmCounterNameBytesXferDtH,
1079                               event_data->migration.bytes,
1080                               uvm_gpu_uuid(gpu));
1081     }
1082 
1083 done_unlock:
1084     uvm_up_read(&va_space->tools.lock);
1085 }
1086 
1087 // This event is notified asynchronously when it is marked as completed in the
1088 // pushbuffer the replay method belongs to.
1089 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
1090                                 uvm_push_t *push,
1091                                 NvU32 batch_id,
1092                                 uvm_fault_client_type_t client_type)
1093 {
1094     uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1095     replay_data_t *replay;
1096 
1097     // Perform delayed notification only if some VA space has signed up for
1098     // UvmEventTypeGpuFaultReplay
1099     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1100         return;
1101 
1102     replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS);
1103     if (replay == NULL)
1104         return;
1105 
1106     UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1107 
1108     replay->timestamp_gpu_addr = uvm_push_timestamp(push);
1109     replay->gpu_id             = gpu->id;
1110     replay->batch_id           = batch_id;
1111     replay->client_type        = client_type;
1112     replay->timestamp          = NV_GETTIME();
1113     replay->channel            = push->channel;
1114 
1115     push_info->on_complete_data = replay;
1116     push_info->on_complete = on_replay_complete;
1117 
1118     uvm_spin_lock(&g_tools_channel_list_lock);
1119     add_pending_event_for_channel(replay->channel);
1120     uvm_spin_unlock(&g_tools_channel_list_lock);
1121 }
1122 
1123 
1124 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
1125 {
1126     UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
1127 
1128     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1129         return;
1130 
1131     record_replay_event_helper(gpu->id, batch_id, client_type, NV_GETTIME(), gpu->parent->host_hal->get_time(gpu));
1132 }
1133 
1134 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
1135                                         const uvm_access_counter_buffer_entry_t *buffer_entry,
1136                                         bool on_managed_phys)
1137 {
1138     UvmEventEntry entry;
1139     UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
1140 
1141     // Perform delayed notification only if some VA space has signed up for
1142     // UvmEventTypeAccessCounter
1143     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeTestAccessCounter))
1144         return;
1145 
1146     if (!buffer_entry->address.is_virtual)
1147         UVM_ASSERT(UVM_ID_IS_VALID(buffer_entry->physical_info.resident_id));
1148 
1149     memset(&entry, 0, sizeof(entry));
1150 
1151     info->eventType           = UvmEventTypeTestAccessCounter;
1152     info->srcIndex            = uvm_parent_id_value_from_processor_id(gpu->id);
1153     info->address             = buffer_entry->address.address;
1154     info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
1155 
1156     if (buffer_entry->address.is_virtual) {
1157         info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
1158         info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1159         info->veId                = buffer_entry->virtual_info.ve_id;
1160     }
1161     else {
1162         info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1163         info->physOnManaged       = on_managed_phys? 1 : 0;
1164     }
1165 
1166     info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1167     info->value               = buffer_entry->counter_value;
1168     info->subGranularity      = buffer_entry->sub_granularity;
1169     info->bank                = buffer_entry->bank;
1170     info->tag                 = buffer_entry->tag;
1171 
1172     uvm_tools_broadcast_event(&entry);
1173 }
1174 
1175 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
1176 {
1177     UvmEventEntry entry;
1178 
1179     if (!va_space->tools.enabled)
1180         return;
1181 
1182     entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
1183     uvm_down_read(&va_space->tools.lock);
1184     uvm_tools_record_event(va_space, &entry);
1185     uvm_up_read(&va_space->tools.lock);
1186 }
1187 
1188 // This function is used as a begin marker to group all migrations within a VA
1189 // block that are performed in the same call to
1190 // block_copy_resident_pages_between. All of these are pushed to the same
1191 // uvm_push_t object, and will be notified in burst when the last one finishes.
1192 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block,
1193                                             uvm_push_t *push,
1194                                             uvm_processor_id_t dst_id,
1195                                             uvm_processor_id_t src_id,
1196                                             NvU64 start,
1197                                             uvm_make_resident_cause_t cause)
1198 {
1199     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1200     uvm_range_group_range_t *range;
1201 
1202     // Calls from tools read/write functions to make_resident must not trigger
1203     // any migration
1204     UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS);
1205 
1206     // During evictions the va_space lock is not held.
1207     if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1208         uvm_assert_rwsem_locked(&va_space->lock);
1209 
1210     if (!va_space->tools.enabled)
1211         return;
1212 
1213     uvm_down_read(&va_space->tools.lock);
1214 
1215     // Perform delayed notification only if the VA space has signed up for
1216     // UvmEventTypeMigration
1217     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1218         block_migration_data_t *block_mig;
1219         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1220 
1221         UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1222 
1223         block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS);
1224         if (block_mig == NULL)
1225             goto done_unlock;
1226 
1227         block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push);
1228         block_mig->channel = push->channel;
1229         block_mig->start_timestamp_cpu = NV_GETTIME();
1230         block_mig->dst = dst_id;
1231         block_mig->src = src_id;
1232         block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE;
1233 
1234         // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1235         if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1236             range = uvm_range_group_range_find(va_space, start);
1237             if (range != NULL)
1238                 block_mig->range_group_id = range->range_group->id;
1239         }
1240         block_mig->va_space = va_space;
1241 
1242         INIT_LIST_HEAD(&block_mig->events);
1243         push_info->on_complete_data = block_mig;
1244         push_info->on_complete = on_block_migration_complete;
1245 
1246         uvm_spin_lock(&g_tools_channel_list_lock);
1247         add_pending_event_for_channel(block_mig->channel);
1248         uvm_spin_unlock(&g_tools_channel_list_lock);
1249     }
1250 
1251 done_unlock:
1252     uvm_up_read(&va_space->tools.lock);
1253 }
1254 
1255 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
1256                                      uvm_processor_id_t dst,
1257                                      uvm_va_block_region_t region,
1258                                      const uvm_page_mask_t *page_mask)
1259 {
1260     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1261 
1262     if (!va_space->tools.enabled)
1263         return;
1264 
1265     uvm_down_read(&va_space->tools.lock);
1266     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate)) {
1267         // Read-duplication events
1268         UvmEventEntry entry;
1269         UvmEventReadDuplicateInfo *info_read_duplicate = &entry.eventData.readDuplicate;
1270         uvm_page_index_t page_index;
1271         memset(&entry, 0, sizeof(entry));
1272 
1273         info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1274         info_read_duplicate->size      = PAGE_SIZE;
1275         info_read_duplicate->timeStamp = NV_GETTIME();
1276 
1277         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1278             uvm_processor_id_t id;
1279             uvm_processor_mask_t resident_processors;
1280 
1281             info_read_duplicate->address    = uvm_va_block_cpu_page_address(va_block, page_index);
1282             info_read_duplicate->processors = 0;
1283 
1284             uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors);
1285             for_each_id_in_mask(id, &resident_processors)
1286                 info_read_duplicate->processors |= (1 << uvm_parent_id_value_from_processor_id(id));
1287 
1288             uvm_tools_record_event(va_space, &entry);
1289         }
1290     }
1291     uvm_up_read(&va_space->tools.lock);
1292 }
1293 
1294 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
1295                                                 uvm_processor_id_t dst,
1296                                                 uvm_va_block_region_t region,
1297                                                 const uvm_page_mask_t *page_mask)
1298 {
1299     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1300 
1301     if (!va_space->tools.enabled)
1302         return;
1303 
1304     uvm_down_read(&va_space->tools.lock);
1305     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicateInvalidate)) {
1306         UvmEventEntry entry;
1307         uvm_page_index_t page_index;
1308         UvmEventReadDuplicateInvalidateInfo *info = &entry.eventData.readDuplicateInvalidate;
1309         memset(&entry, 0, sizeof(entry));
1310 
1311         info->eventType     = UvmEventTypeReadDuplicateInvalidate;
1312         info->residentIndex = uvm_parent_id_value_from_processor_id(dst);
1313         info->size          = PAGE_SIZE;
1314         info->timeStamp     = NV_GETTIME();
1315 
1316         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1317             UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1318 
1319             info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1320             uvm_tools_record_event(va_space, &entry);
1321         }
1322     }
1323     uvm_up_read(&va_space->tools.lock);
1324 }
1325 
1326 static void tools_schedule_completed_events(void)
1327 {
1328     uvm_channel_t *channel;
1329     uvm_channel_t *next_channel;
1330     NvU64 channel_count = 0;
1331     NvU64 i;
1332 
1333     uvm_spin_lock(&g_tools_channel_list_lock);
1334 
1335     // retain every channel list entry currently in the list and keep track of their count.
1336     list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) {
1337         ++channel->tools.pending_event_count;
1338         ++channel_count;
1339     }
1340     uvm_spin_unlock(&g_tools_channel_list_lock);
1341 
1342     if (channel_count == 0)
1343         return;
1344 
1345     // new entries always appear at the end, and all the entries seen in the first loop have been retained
1346     // so it is safe to go through them
1347     channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node);
1348     for (i = 0; i < channel_count; i++) {
1349         uvm_channel_update_progress_all(channel);
1350         channel = list_next_entry(channel, tools.channel_list_node);
1351     }
1352 
1353     // now release all the entries we retained in the beginning
1354     i = 0;
1355     uvm_spin_lock(&g_tools_channel_list_lock);
1356     list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) {
1357         if (i++ == channel_count)
1358             break;
1359 
1360         remove_pending_event_for_channel(channel);
1361     }
1362     uvm_spin_unlock(&g_tools_channel_list_lock);
1363 }
1364 
1365 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space,
1366                                       NvU64 address,
1367                                       bool is_write,
1368                                       UvmEventFatalReason reason)
1369 {
1370     uvm_assert_rwsem_locked(&va_space->lock);
1371 
1372     if (!va_space->tools.enabled)
1373         return;
1374 
1375     uvm_down_read(&va_space->tools.lock);
1376     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1377         UvmEventEntry entry;
1378         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1379         memset(&entry, 0, sizeof(entry));
1380 
1381         info->eventType      = UvmEventTypeFatalFault;
1382         info->processorIndex = UVM_ID_CPU_VALUE;
1383         info->timeStamp      = NV_GETTIME();
1384         info->address        = address;
1385         info->accessType     = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1386         // info->faultType is not valid for cpu faults
1387         info->reason         = reason;
1388 
1389         uvm_tools_record_event(va_space, &entry);
1390     }
1391     uvm_up_read(&va_space->tools.lock);
1392 }
1393 
1394 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,
1395                                       uvm_va_space_t *va_space,
1396                                       const uvm_fault_buffer_entry_t *buffer_entry,
1397                                       UvmEventFatalReason reason)
1398 {
1399     uvm_assert_rwsem_locked(&va_space->lock);
1400 
1401     if (!va_space->tools.enabled)
1402         return;
1403 
1404     uvm_down_read(&va_space->tools.lock);
1405     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1406         UvmEventEntry entry;
1407         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1408         memset(&entry, 0, sizeof(entry));
1409 
1410         info->eventType      = UvmEventTypeFatalFault;
1411         info->processorIndex = uvm_parent_id_value_from_processor_id(gpu_id);
1412         info->timeStamp      = NV_GETTIME();
1413         info->address        = buffer_entry->fault_address;
1414         info->accessType     = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1415         info->faultType      = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1416         info->reason         = reason;
1417 
1418         uvm_tools_record_event(va_space, &entry);
1419     }
1420     uvm_up_read(&va_space->tools.lock);
1421 }
1422 
1423 void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
1424                                 NvU64 address,
1425                                 size_t region_size,
1426                                 const uvm_processor_mask_t *processors)
1427 {
1428     UVM_ASSERT(address);
1429     UVM_ASSERT(PAGE_ALIGNED(address));
1430     UVM_ASSERT(region_size > 0);
1431 
1432     uvm_assert_rwsem_locked(&va_space->lock);
1433 
1434     if (!va_space->tools.enabled)
1435         return;
1436 
1437     uvm_down_read(&va_space->tools.lock);
1438     if (tools_is_event_enabled(va_space, UvmEventTypeThrashingDetected)) {
1439         uvm_processor_id_t id;
1440         UvmEventEntry entry;
1441         UvmEventThrashingDetectedInfo *info = &entry.eventData.thrashing;
1442         memset(&entry, 0, sizeof(entry));
1443 
1444         info->eventType = UvmEventTypeThrashingDetected;
1445         info->address   = address;
1446         info->size      = region_size;
1447         info->timeStamp = NV_GETTIME();
1448 
1449         for_each_id_in_mask(id, processors)
1450             __set_bit(uvm_parent_id_value_from_processor_id(id),
1451                       (unsigned long *)&info->processors);
1452 
1453         uvm_tools_record_event(va_space, &entry);
1454     }
1455     uvm_up_read(&va_space->tools.lock);
1456 }
1457 
1458 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1459 {
1460     UVM_ASSERT(address);
1461     UVM_ASSERT(PAGE_ALIGNED(address));
1462     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1463 
1464     uvm_assert_rwsem_locked(&va_space->lock);
1465 
1466     if (!va_space->tools.enabled)
1467         return;
1468 
1469     uvm_down_read(&va_space->tools.lock);
1470     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingStart)) {
1471         UvmEventEntry entry;
1472         UvmEventThrottlingStartInfo *info = &entry.eventData.throttlingStart;
1473         memset(&entry, 0, sizeof(entry));
1474 
1475         info->eventType      = UvmEventTypeThrottlingStart;
1476         info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1477         info->address        = address;
1478         info->timeStamp      = NV_GETTIME();
1479 
1480         uvm_tools_record_event(va_space, &entry);
1481     }
1482     uvm_up_read(&va_space->tools.lock);
1483 }
1484 
1485 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1486 {
1487     UVM_ASSERT(address);
1488     UVM_ASSERT(PAGE_ALIGNED(address));
1489     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1490 
1491     uvm_assert_rwsem_locked(&va_space->lock);
1492 
1493     if (!va_space->tools.enabled)
1494         return;
1495 
1496     uvm_down_read(&va_space->tools.lock);
1497     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingEnd)) {
1498         UvmEventEntry entry;
1499         UvmEventThrottlingEndInfo *info = &entry.eventData.throttlingEnd;
1500         memset(&entry, 0, sizeof(entry));
1501 
1502         info->eventType      = UvmEventTypeThrottlingEnd;
1503         info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1504         info->address        = address;
1505         info->timeStamp      = NV_GETTIME();
1506 
1507         uvm_tools_record_event(va_space, &entry);
1508     }
1509     uvm_up_read(&va_space->tools.lock);
1510 }
1511 
1512 static void record_map_remote_events(void *args)
1513 {
1514     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args;
1515     map_remote_data_t *map_remote, *next;
1516     UvmEventEntry entry;
1517     uvm_va_space_t *va_space = block_map_remote->va_space;
1518 
1519     memset(&entry, 0, sizeof(entry));
1520 
1521     entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1522     entry.eventData.mapRemote.srcIndex       = uvm_parent_id_value_from_processor_id(block_map_remote->src);
1523     entry.eventData.mapRemote.dstIndex       = uvm_parent_id_value_from_processor_id(block_map_remote->dst);
1524     entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1525     entry.eventData.mapRemote.timeStamp      = block_map_remote->timestamp;
1526 
1527     uvm_down_read(&va_space->tools.lock);
1528     list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1529         list_del(&map_remote->events_node);
1530 
1531         entry.eventData.mapRemote.address      = map_remote->address;
1532         entry.eventData.mapRemote.size         = map_remote->size;
1533         entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1534         kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1535 
1536         uvm_tools_record_event(va_space, &entry);
1537     }
1538     uvm_up_read(&va_space->tools.lock);
1539 
1540     UVM_ASSERT(list_empty(&block_map_remote->events));
1541     kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote);
1542 }
1543 
1544 static void record_map_remote_events_entry(void *args)
1545 {
1546     UVM_ENTRY_VOID(record_map_remote_events(args));
1547 }
1548 
1549 static void on_map_remote_complete(void *ptr)
1550 {
1551     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr;
1552     map_remote_data_t *map_remote;
1553 
1554     // Only GPU mappings use the deferred mechanism
1555     UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src));
1556     list_for_each_entry(map_remote, &block_map_remote->events, events_node)
1557         map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr;
1558 
1559     nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr);
1560 
1561     uvm_spin_lock(&g_tools_channel_list_lock);
1562     remove_pending_event_for_channel(block_map_remote->channel);
1563     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item);
1564     uvm_spin_unlock(&g_tools_channel_list_lock);
1565 }
1566 
1567 void uvm_tools_record_map_remote(uvm_va_block_t *va_block,
1568                                  uvm_push_t *push,
1569                                  uvm_processor_id_t processor,
1570                                  uvm_processor_id_t residency,
1571                                  NvU64 address,
1572                                  size_t region_size,
1573                                  UvmEventMapRemoteCause cause)
1574 {
1575     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1576 
1577     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1578     UVM_ASSERT(UVM_ID_IS_VALID(residency));
1579     UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid);
1580 
1581     uvm_assert_rwsem_locked(&va_space->lock);
1582 
1583     if (!va_space->tools.enabled)
1584         return;
1585 
1586     uvm_down_read(&va_space->tools.lock);
1587     if (!tools_is_event_enabled(va_space, UvmEventTypeMapRemote))
1588         goto done;
1589 
1590     if (UVM_ID_IS_CPU(processor)) {
1591         UvmEventEntry entry;
1592         memset(&entry, 0, sizeof(entry));
1593 
1594         entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1595         entry.eventData.mapRemote.srcIndex       = uvm_parent_id_value_from_processor_id(processor);
1596         entry.eventData.mapRemote.dstIndex       = uvm_parent_id_value_from_processor_id(residency);
1597         entry.eventData.mapRemote.mapRemoteCause = cause;
1598         entry.eventData.mapRemote.timeStamp      = NV_GETTIME();
1599         entry.eventData.mapRemote.address        = address;
1600         entry.eventData.mapRemote.size           = region_size;
1601         entry.eventData.mapRemote.timeStampGpu   = 0;
1602 
1603         UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
1604 
1605         uvm_tools_record_event(va_space, &entry);
1606     }
1607     else {
1608         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1609         block_map_remote_data_t *block_map_remote;
1610         map_remote_data_t *map_remote;
1611 
1612         // The first call on this pushbuffer creates the per-VA block structure
1613         if (push_info->on_complete == NULL) {
1614             UVM_ASSERT(push_info->on_complete_data == NULL);
1615 
1616             block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1617             if (block_map_remote == NULL)
1618                 goto done;
1619 
1620             block_map_remote->src = processor;
1621             block_map_remote->dst = residency;
1622             block_map_remote->cause = cause;
1623             block_map_remote->timestamp = NV_GETTIME();
1624             block_map_remote->va_space = va_space;
1625             block_map_remote->channel = push->channel;
1626             INIT_LIST_HEAD(&block_map_remote->events);
1627 
1628             push_info->on_complete_data = block_map_remote;
1629             push_info->on_complete = on_map_remote_complete;
1630 
1631             uvm_spin_lock(&g_tools_channel_list_lock);
1632             add_pending_event_for_channel(block_map_remote->channel);
1633             uvm_spin_unlock(&g_tools_channel_list_lock);
1634         }
1635         else {
1636             block_map_remote = push_info->on_complete_data;
1637         }
1638         UVM_ASSERT(block_map_remote);
1639 
1640         map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1641         if (map_remote == NULL)
1642             goto done;
1643 
1644         map_remote->address = address;
1645         map_remote->size = region_size;
1646         map_remote->timestamp_gpu_addr = uvm_push_timestamp(push);
1647 
1648         list_add_tail(&map_remote->events_node, &block_map_remote->events);
1649     }
1650 
1651 done:
1652     uvm_up_read(&va_space->tools.lock);
1653 }
1654 
1655 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp)
1656 {
1657     NV_STATUS status = NV_OK;
1658     uvm_tools_event_tracker_t *event_tracker;
1659 
1660     event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS);
1661     if (event_tracker == NULL)
1662         return NV_ERR_NO_MEMORY;
1663 
1664     event_tracker->uvm_file = fget(params->uvmFd);
1665     if (event_tracker->uvm_file == NULL) {
1666         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1667         goto fail;
1668     }
1669 
1670     if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) {
1671         fput(event_tracker->uvm_file);
1672         event_tracker->uvm_file = NULL;
1673         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1674         goto fail;
1675     }
1676 
1677     // We don't use uvm_fd_va_space() here because tools can work
1678     // without an associated va_space_mm.
1679     if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) {
1680         fput(event_tracker->uvm_file);
1681         event_tracker->uvm_file = NULL;
1682         status = NV_ERR_ILLEGAL_ACTION;
1683         goto fail;
1684     }
1685 
1686     event_tracker->is_queue = params->queueBufferSize != 0;
1687     if (event_tracker->is_queue) {
1688         uvm_tools_queue_t *queue = &event_tracker->queue;
1689         uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
1690         init_waitqueue_head(&queue->wait_queue);
1691 
1692         if (params->queueBufferSize > UINT_MAX) {
1693             status = NV_ERR_INVALID_ARGUMENT;
1694             goto fail;
1695         }
1696 
1697         queue->queue_buffer_count = (NvU32)params->queueBufferSize;
1698         queue->notification_threshold = queue->queue_buffer_count / 2;
1699 
1700         // queue_buffer_count must be a power of 2, of at least 2
1701         if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) {
1702             status = NV_ERR_INVALID_ARGUMENT;
1703             goto fail;
1704         }
1705 
1706         status = map_user_pages(params->queueBuffer,
1707                                 queue->queue_buffer_count * sizeof(UvmEventEntry),
1708                                 (void **)&queue->queue,
1709                                 &queue->queue_buffer_pages);
1710         if (status != NV_OK)
1711             goto fail;
1712 
1713         status = map_user_pages(params->controlBuffer,
1714                                 sizeof(UvmToolsEventControlData),
1715                                 (void **)&queue->control,
1716                                 &queue->control_buffer_pages);
1717 
1718         if (status != NV_OK)
1719             goto fail;
1720     }
1721     else {
1722         uvm_tools_counter_t *counter = &event_tracker->counter;
1723         counter->all_processors = params->allProcessors;
1724         counter->processor = params->processor;
1725         status = map_user_pages(params->controlBuffer,
1726                                 sizeof(NvU64) * UVM_TOTAL_COUNTERS,
1727                                 (void **)&counter->counters,
1728                                 &counter->counter_buffer_pages);
1729         if (status != NV_OK)
1730             goto fail;
1731     }
1732 
1733     if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
1734         status = NV_ERR_INVALID_ARGUMENT;
1735         goto fail;
1736     }
1737 
1738     return NV_OK;
1739 
1740 fail:
1741     destroy_event_tracker(event_tracker);
1742     return status;
1743 }
1744 
1745 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp)
1746 {
1747     UvmToolsEventControlData *ctrl;
1748     uvm_tools_queue_snapshot_t sn;
1749     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1750 
1751     if (!tracker_is_queue(event_tracker))
1752         return NV_ERR_INVALID_ARGUMENT;
1753 
1754     uvm_spin_lock(&event_tracker->queue.lock);
1755 
1756     event_tracker->queue.notification_threshold = params->notificationThreshold;
1757 
1758     ctrl = event_tracker->queue.control;
1759     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
1760     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
1761 
1762     if (queue_needs_wakeup(&event_tracker->queue, &sn))
1763         wake_up_all(&event_tracker->queue.wait_queue);
1764 
1765     uvm_spin_unlock(&event_tracker->queue.lock);
1766 
1767     return NV_OK;
1768 }
1769 
1770 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space)
1771 {
1772     NV_STATUS status;
1773 
1774     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1775     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1776 
1777     if (tools_is_fault_callback_needed(va_space)) {
1778         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1779             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1780                                                              UVM_PERF_EVENT_FAULT,
1781                                                              uvm_tools_record_fault);
1782 
1783             if (status != NV_OK)
1784                 return status;
1785         }
1786     }
1787     else {
1788         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1789             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1790                                                       UVM_PERF_EVENT_FAULT,
1791                                                       uvm_tools_record_fault);
1792         }
1793     }
1794 
1795     if (tools_is_migration_callback_needed(va_space)) {
1796         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1797             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1798                                                              UVM_PERF_EVENT_MIGRATION,
1799                                                              uvm_tools_record_migration);
1800 
1801             if (status != NV_OK)
1802                 return status;
1803         }
1804     }
1805     else {
1806         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1807             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1808                                                       UVM_PERF_EVENT_MIGRATION,
1809                                                       uvm_tools_record_migration);
1810         }
1811     }
1812 
1813     return NV_OK;
1814 }
1815 
1816 static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
1817 {
1818     NV_STATUS status;
1819     bool should_be_enabled;
1820     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
1821     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1822     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1823 
1824     status = tools_update_perf_events_callbacks(va_space);
1825     if (status != NV_OK)
1826         return status;
1827 
1828     should_be_enabled = tools_are_enabled(va_space);
1829     if (should_be_enabled != va_space->tools.enabled) {
1830         if (should_be_enabled)
1831             list_add(&va_space->tools.node, &g_tools_va_space_list);
1832         else
1833             list_del(&va_space->tools.node);
1834 
1835         va_space->tools.enabled = should_be_enabled;
1836     }
1837 
1838     return NV_OK;
1839 }
1840 
1841 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8)
1842 
1843 static bool mask_contains_invalid_events(NvU64 event_flags)
1844 {
1845     const unsigned long *event_mask = (const unsigned long *)&event_flags;
1846     DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS);
1847     DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS);
1848     DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS);
1849 
1850     bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS);
1851     bitmap_set(tests_events_mask,
1852                UvmEventTestTypesFirst,
1853                UvmEventTestTypesLast - UvmEventTestTypesFirst + 1);
1854 
1855     bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS);
1856     bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1);
1857 
1858     if (uvm_enable_builtin_tests)
1859         bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS);
1860 
1861     // Make sure that test event ids do not overlap with regular events
1862     BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes);
1863     BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast);
1864     BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll);
1865 
1866     // Make sure that no test event ever changes the size of UvmEventEntry
1867     BUILD_BUG_ON(sizeof(((UvmEventEntry *)NULL)->testEventData) >
1868                  sizeof(((UvmEventEntry *)NULL)->eventData));
1869     BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS);
1870 
1871     if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS))
1872         return false;
1873 
1874     if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS))
1875         UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
1876 
1877     return true;
1878 }
1879 
1880 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp)
1881 {
1882     uvm_va_space_t *va_space;
1883     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1884     NV_STATUS status = NV_OK;
1885     NvU64 inserted_lists;
1886 
1887     if (!tracker_is_queue(event_tracker))
1888         return NV_ERR_INVALID_ARGUMENT;
1889 
1890     if (mask_contains_invalid_events(params->eventTypeFlags))
1891         return NV_ERR_INVALID_ARGUMENT;
1892 
1893     va_space = tools_event_tracker_va_space(event_tracker);
1894 
1895     uvm_down_write(&g_tools_va_space_list_lock);
1896     uvm_down_write(&va_space->perf_events.lock);
1897     uvm_down_write(&va_space->tools.lock);
1898 
1899     insert_event_tracker(va_space,
1900                          event_tracker->queue.queue_nodes,
1901                          UvmEventNumTypesAll,
1902                          params->eventTypeFlags,
1903                          &event_tracker->queue.subscribed_queues,
1904                          va_space->tools.queues,
1905                          &inserted_lists);
1906 
1907     // perform any necessary registration
1908     status = tools_update_status(va_space);
1909     if (status != NV_OK) {
1910         // on error, unregister any newly registered event
1911         remove_event_tracker(va_space,
1912                              event_tracker->queue.queue_nodes,
1913                              UvmEventNumTypes,
1914                              inserted_lists,
1915                              &event_tracker->queue.subscribed_queues);
1916     }
1917 
1918     uvm_up_write(&va_space->tools.lock);
1919     uvm_up_write(&va_space->perf_events.lock);
1920     uvm_up_write(&g_tools_va_space_list_lock);
1921 
1922     return status;
1923 }
1924 
1925 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp)
1926 {
1927     NV_STATUS status;
1928     uvm_va_space_t *va_space;
1929     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1930 
1931     if (!tracker_is_queue(event_tracker))
1932         return NV_ERR_INVALID_ARGUMENT;
1933 
1934     va_space = tools_event_tracker_va_space(event_tracker);
1935 
1936     uvm_down_write(&g_tools_va_space_list_lock);
1937     uvm_down_write(&va_space->perf_events.lock);
1938     uvm_down_write(&va_space->tools.lock);
1939     remove_event_tracker(va_space,
1940                          event_tracker->queue.queue_nodes,
1941                          UvmEventNumTypesAll,
1942                          params->eventTypeFlags,
1943                          &event_tracker->queue.subscribed_queues);
1944 
1945     // de-registration should not fail
1946     status = tools_update_status(va_space);
1947     UVM_ASSERT(status == NV_OK);
1948 
1949     uvm_up_write(&va_space->tools.lock);
1950     uvm_up_write(&va_space->perf_events.lock);
1951     uvm_up_write(&g_tools_va_space_list_lock);
1952     return NV_OK;
1953 }
1954 
1955 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp)
1956 {
1957     uvm_va_space_t *va_space;
1958     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1959     NV_STATUS status = NV_OK;
1960     NvU64 inserted_lists;
1961 
1962     if (!tracker_is_counter(event_tracker))
1963         return NV_ERR_INVALID_ARGUMENT;
1964 
1965     va_space = tools_event_tracker_va_space(event_tracker);
1966 
1967     uvm_down_write(&g_tools_va_space_list_lock);
1968     uvm_down_write(&va_space->perf_events.lock);
1969     uvm_down_write(&va_space->tools.lock);
1970 
1971     insert_event_tracker(va_space,
1972                          event_tracker->counter.counter_nodes,
1973                          UVM_TOTAL_COUNTERS,
1974                          params->counterTypeFlags,
1975                          &event_tracker->counter.subscribed_counters,
1976                          va_space->tools.counters,
1977                          &inserted_lists);
1978 
1979     // perform any necessary registration
1980     status = tools_update_status(va_space);
1981     if (status != NV_OK) {
1982         remove_event_tracker(va_space,
1983                              event_tracker->counter.counter_nodes,
1984                              UVM_TOTAL_COUNTERS,
1985                              inserted_lists,
1986                              &event_tracker->counter.subscribed_counters);
1987     }
1988 
1989     uvm_up_write(&va_space->tools.lock);
1990     uvm_up_write(&va_space->perf_events.lock);
1991     uvm_up_write(&g_tools_va_space_list_lock);
1992 
1993     return status;
1994 }
1995 
1996 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp)
1997 {
1998     NV_STATUS status;
1999     uvm_va_space_t *va_space;
2000     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2001 
2002     if (!tracker_is_counter(event_tracker))
2003         return NV_ERR_INVALID_ARGUMENT;
2004 
2005     va_space = tools_event_tracker_va_space(event_tracker);
2006 
2007     uvm_down_write(&g_tools_va_space_list_lock);
2008     uvm_down_write(&va_space->perf_events.lock);
2009     uvm_down_write(&va_space->tools.lock);
2010     remove_event_tracker(va_space,
2011                          event_tracker->counter.counter_nodes,
2012                          UVM_TOTAL_COUNTERS,
2013                          params->counterTypeFlags,
2014                          &event_tracker->counter.subscribed_counters);
2015 
2016     // de-registration should not fail
2017     status = tools_update_status(va_space);
2018     UVM_ASSERT(status == NV_OK);
2019 
2020     uvm_up_write(&va_space->tools.lock);
2021     uvm_up_write(&va_space->perf_events.lock);
2022     uvm_up_write(&g_tools_va_space_list_lock);
2023 
2024     return NV_OK;
2025 }
2026 
2027 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block,
2028                                        uvm_va_block_context_t *block_context,
2029                                        NvU64 target_va,
2030                                        NvU64 size,
2031                                        bool is_write,
2032                                        uvm_mem_t *stage_mem)
2033 {
2034     if (is_write) {
2035         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2036                                        NULL,
2037                                        uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size));
2038     }
2039     else {
2040         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2041                                        NULL,
2042                                        uvm_va_block_read_to_cpu(va_block, stage_mem, target_va, size));
2043 
2044     }
2045 }
2046 
2047 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
2048                                              NvU64 target_va,
2049                                              NvU64 size,
2050                                              NvU64 user_va,
2051                                              NvU64 *bytes,
2052                                              bool is_write)
2053 {
2054     NV_STATUS status;
2055     uvm_mem_t *stage_mem = NULL;
2056     void *stage_addr;
2057     uvm_processor_mask_t *retained_gpus = NULL;
2058     uvm_va_block_context_t *block_context = NULL;
2059     struct mm_struct *mm = NULL;
2060 
2061     retained_gpus = uvm_processor_mask_cache_alloc();
2062     if (!retained_gpus)
2063         return NV_ERR_NO_MEMORY;
2064 
2065     uvm_processor_mask_zero(retained_gpus);
2066 
2067     mm = uvm_va_space_mm_or_current_retain(va_space);
2068 
2069     status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem);
2070     if (status != NV_OK)
2071         goto exit;
2072 
2073     block_context = uvm_va_block_context_alloc(mm);
2074     if (!block_context) {
2075         status = NV_ERR_NO_MEMORY;
2076         goto exit;
2077     }
2078 
2079     stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
2080     *bytes = 0;
2081 
2082     while (*bytes < size) {
2083         uvm_gpu_t *gpu;
2084         uvm_va_block_t *block;
2085         void *user_va_start = (void *) (user_va + *bytes);
2086         NvU64 target_va_start = target_va + *bytes;
2087         NvU64 bytes_left = size - *bytes;
2088         NvU64 page_offset = target_va_start & (PAGE_SIZE - 1);
2089         NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset));
2090         bool map_stage_mem_on_gpus = true;
2091 
2092         if (is_write) {
2093             NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
2094             if (remaining != 0)  {
2095                 status = NV_ERR_INVALID_ARGUMENT;
2096                 goto exit;
2097             }
2098         }
2099 
2100         if (mm)
2101             uvm_down_read_mmap_lock(mm);
2102 
2103         // The RM flavor of the lock is needed to perform ECC checks.
2104         uvm_va_space_down_read_rm(va_space);
2105         if (mm)
2106             status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
2107         else
2108             status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
2109 
2110         if (status != NV_OK)
2111             goto unlock_and_exit;
2112 
2113         // When CC is enabled, the staging memory cannot be mapped on the GPU
2114         // (it is protected sysmem), but it is still used to store the
2115         // unencrypted version of the page contents when the page is resident
2116         // on vidmem.
2117         if (g_uvm_global.conf_computing_enabled)
2118             map_stage_mem_on_gpus = false;
2119 
2120         if (map_stage_mem_on_gpus) {
2121             for_each_gpu_in_mask(gpu, &va_space->registered_gpus) {
2122                 if (uvm_processor_mask_test_and_set(retained_gpus, gpu->id))
2123                     continue;
2124 
2125                 // The retention of each GPU ensures that the staging memory is
2126                 // freed before the unregistration of any of the GPUs is mapped
2127                 // on. Each GPU is retained once.
2128                 uvm_gpu_retain(gpu);
2129 
2130                 // Accessing the VA block may result in copying data between the
2131                 // CPU and a GPU. Conservatively add virtual mappings to all the
2132                 // GPUs (even if those mappings may never be used) as tools
2133                 // read/write is not on a performance critical path.
2134                 status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
2135                 if (status != NV_OK)
2136                     goto unlock_and_exit;
2137             }
2138         }
2139         else {
2140             UVM_ASSERT(uvm_processor_mask_empty(retained_gpus));
2141         }
2142 
2143         // Make sure a CPU resident page has an up to date struct page pointer.
2144         if (uvm_va_block_is_hmm(block)) {
2145             status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true);
2146             if (status != NV_OK)
2147                 goto unlock_and_exit;
2148         }
2149 
2150         status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem);
2151 
2152         // For simplicity, check for ECC errors on all GPUs registered in the VA
2153         // space
2154         if (status == NV_OK)
2155             status = uvm_global_gpu_check_ecc_error(&va_space->registered_gpus);
2156 
2157         uvm_va_space_up_read_rm(va_space);
2158         if (mm)
2159             uvm_up_read_mmap_lock(mm);
2160 
2161         if (status != NV_OK)
2162             goto exit;
2163 
2164         if (!is_write) {
2165             NvU64 remaining;
2166 
2167             // Prevent processor speculation prior to accessing user-mapped
2168             // memory to avoid leaking information from side-channel attacks.
2169             // Under speculation, a valid VA range which does not contain
2170             // target_va could be used, and the block index could run off the
2171             // end of the array. Information about the state of that kernel
2172             // memory could be inferred if speculative execution gets to the
2173             // point where the data is copied out.
2174             nv_speculation_barrier();
2175 
2176             remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
2177             if (remaining > 0) {
2178                 status = NV_ERR_INVALID_ARGUMENT;
2179                 goto exit;
2180             }
2181         }
2182 
2183         *bytes += bytes_now;
2184     }
2185 
2186 unlock_and_exit:
2187     if (status != NV_OK) {
2188         uvm_va_space_up_read_rm(va_space);
2189         if (mm)
2190             uvm_up_read_mmap_lock(mm);
2191     }
2192 
2193 exit:
2194     uvm_va_block_context_free(block_context);
2195 
2196     uvm_mem_free(stage_mem);
2197 
2198     uvm_global_gpu_release(retained_gpus);
2199 
2200     uvm_va_space_mm_or_current_release(va_space, mm);
2201 
2202     uvm_processor_mask_cache_free(retained_gpus);
2203 
2204     return status;
2205 }
2206 
2207 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2208 {
2209     return tools_access_process_memory(uvm_va_space_get(filp),
2210                                        params->targetVa,
2211                                        params->size,
2212                                        params->buffer,
2213                                        &params->bytesRead,
2214                                        false);
2215 }
2216 
2217 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2218 {
2219     return tools_access_process_memory(uvm_va_space_get(filp),
2220                                        params->targetVa,
2221                                        params->size,
2222                                        params->buffer,
2223                                        &params->bytesWritten,
2224                                        true);
2225 }
2226 
2227 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp)
2228 {
2229     NvU32 i;
2230     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2231 
2232     if (params->entry.eventData.eventType >= UvmEventNumTypesAll)
2233         return NV_ERR_INVALID_ARGUMENT;
2234 
2235     uvm_down_read(&va_space->tools.lock);
2236     for (i = 0; i < params->count; i++)
2237         uvm_tools_record_event(va_space, &params->entry);
2238     uvm_up_read(&va_space->tools.lock);
2239     return NV_OK;
2240 }
2241 
2242 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp)
2243 {
2244     NvU32 i;
2245     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2246 
2247     if (params->counter >= UVM_TOTAL_COUNTERS)
2248         return NV_ERR_INVALID_ARGUMENT;
2249 
2250     uvm_down_read(&va_space->tools.lock);
2251     for (i = 0; i < params->count; i++)
2252         uvm_tools_inc_counter(va_space, params->counter, params->amount, &params->processor);
2253     uvm_up_read(&va_space->tools.lock);
2254 
2255     return NV_OK;
2256 }
2257 
2258 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp)
2259 {
2260     NvProcessorUuid *uuids;
2261     NvU64 remaining;
2262     uvm_gpu_t *gpu;
2263     NvU32 count = params->count;
2264     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2265 
2266     // Prior to Multi-MIG support, params->count was always zero meaning the
2267     // input array was size UVM_MAX_PROCESSORS or 33 at that time.
2268     if (count == 0)
2269         count = 33;
2270     else if (count > UVM_ID_MAX_PROCESSORS)
2271         count = UVM_ID_MAX_PROCESSORS;
2272 
2273     uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * count);
2274     if (uuids == NULL)
2275         return NV_ERR_NO_MEMORY;
2276 
2277     uvm_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT);
2278     params->count = 1;
2279 
2280     uvm_va_space_down_read(va_space);
2281     for_each_va_space_gpu(gpu, va_space) {
2282         NvU32 id_value;
2283         const NvProcessorUuid *uuid;
2284 
2285         id_value = uvm_parent_id_value(gpu->parent->id);
2286         uuid = &gpu->parent->uuid;
2287 
2288         if (id_value < count)
2289             uvm_uuid_copy(&uuids[id_value], uuid);
2290 
2291         // Return the actual count even if the UUID isn't returned due to
2292         // limited input array size.
2293         if (id_value + 1 > params->count)
2294             params->count = id_value + 1;
2295     }
2296     uvm_va_space_up_read(va_space);
2297 
2298     remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count);
2299     uvm_kvfree(uuids);
2300 
2301     if (remaining != 0)
2302         return NV_ERR_INVALID_ADDRESS;
2303 
2304     return NV_OK;
2305 }
2306 
2307 void uvm_tools_flush_events(void)
2308 {
2309     tools_schedule_completed_events();
2310 
2311     nv_kthread_q_flush(&g_tools_queue);
2312 }
2313 
2314 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp)
2315 {
2316     uvm_tools_flush_events();
2317     return NV_OK;
2318 }
2319 
2320 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp)
2321 {
2322     NV_STATUS status = NV_OK;
2323     uvm_gpu_t *gpu = NULL;
2324     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2325 
2326     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpuUuid);
2327     if (!gpu)
2328         return NV_ERR_INVALID_DEVICE;
2329 
2330     // Wait for register-based fault clears to queue the replay event
2331     if (!gpu->parent->has_clear_faulted_channel_method) {
2332         uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
2333         uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
2334     }
2335 
2336     // Wait for pending fault replay methods to complete (replayable faults on
2337     // all GPUs, and non-replayable faults on method-based GPUs).
2338     status = uvm_channel_manager_wait(gpu->channel_manager);
2339 
2340     // Flush any pending events even if (status != NV_OK)
2341     uvm_tools_flush_events();
2342     uvm_gpu_release(gpu);
2343 
2344     return status;
2345 }
2346 
2347 static const struct file_operations uvm_tools_fops =
2348 {
2349     .open            = uvm_tools_open_entry,
2350     .release         = uvm_tools_release_entry,
2351     .unlocked_ioctl  = uvm_tools_unlocked_ioctl_entry,
2352 #if NVCPU_IS_X86_64
2353     .compat_ioctl    = uvm_tools_unlocked_ioctl_entry,
2354 #endif
2355     .poll            = uvm_tools_poll_entry,
2356     .owner           = THIS_MODULE,
2357 };
2358 
2359 static void _uvm_tools_destroy_cache_all(void)
2360 {
2361     // The pointers are initialized to NULL,
2362     // it's safe to call destroy on all of them.
2363     kmem_cache_destroy_safe(&g_tools_event_tracker_cache);
2364     kmem_cache_destroy_safe(&g_tools_block_migration_data_cache);
2365     kmem_cache_destroy_safe(&g_tools_migration_data_cache);
2366     kmem_cache_destroy_safe(&g_tools_replay_data_cache);
2367     kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache);
2368     kmem_cache_destroy_safe(&g_tools_map_remote_data_cache);
2369 }
2370 
2371 int uvm_tools_init(dev_t uvm_base_dev)
2372 {
2373     dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER);
2374     int ret = -ENOMEM; // This will be updated later if allocations succeed
2375 
2376     uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
2377 
2378     g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t",
2379                                                         uvm_tools_event_tracker_t);
2380     if (!g_tools_event_tracker_cache)
2381         goto err_cache_destroy;
2382 
2383     g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t",
2384                                                               block_migration_data_t);
2385     if (!g_tools_block_migration_data_cache)
2386         goto err_cache_destroy;
2387 
2388     g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t",
2389                                                         migration_data_t);
2390     if (!g_tools_migration_data_cache)
2391         goto err_cache_destroy;
2392 
2393     g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t",
2394                                                      replay_data_t);
2395     if (!g_tools_replay_data_cache)
2396         goto err_cache_destroy;
2397 
2398     g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t",
2399                                                                block_map_remote_data_t);
2400     if (!g_tools_block_map_remote_data_cache)
2401         goto err_cache_destroy;
2402 
2403     g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t",
2404                                                          map_remote_data_t);
2405     if (!g_tools_map_remote_data_cache)
2406         goto err_cache_destroy;
2407 
2408     uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF);
2409 
2410     ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue");
2411     if (ret < 0)
2412         goto err_cache_destroy;
2413 
2414     uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops);
2415     ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1);
2416     if (ret != 0) {
2417         UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev),
2418                       MINOR(uvm_tools_dev), ret);
2419         goto err_stop_thread;
2420     }
2421 
2422     return ret;
2423 
2424 err_stop_thread:
2425     nv_kthread_q_stop(&g_tools_queue);
2426 
2427 err_cache_destroy:
2428     _uvm_tools_destroy_cache_all();
2429     return ret;
2430 }
2431 
2432 void uvm_tools_exit(void)
2433 {
2434     unsigned i;
2435     cdev_del(&g_uvm_tools_cdev);
2436 
2437     nv_kthread_q_stop(&g_tools_queue);
2438 
2439     for (i = 0; i < UvmEventNumTypesAll; ++i)
2440         UVM_ASSERT(g_tools_enabled_event_count[i] == 0);
2441 
2442     UVM_ASSERT(list_empty(&g_tools_va_space_list));
2443 
2444     _uvm_tools_destroy_cache_all();
2445 }
2446