1 /*******************************************************************************
2     Copyright (c) 2016-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 #include "uvm_common.h"
24 #include "uvm_ioctl.h"
25 #include "uvm_gpu.h"
26 #include "uvm_hal.h"
27 #include "uvm_tools.h"
28 #include "uvm_va_space.h"
29 #include "uvm_api.h"
30 #include "uvm_hal_types.h"
31 #include "uvm_va_block.h"
32 #include "uvm_va_range.h"
33 #include "uvm_push.h"
34 #include "uvm_forward_decl.h"
35 #include "uvm_range_group.h"
36 #include "uvm_mem.h"
37 #include "nv_speculation_barrier.h"
38 
39 // We limit the number of times a page can be retained by the kernel
40 // to prevent the user from maliciously passing UVM tools the same page
41 // over and over again in an attempt to overflow the refcount.
42 #define MAX_PAGE_COUNT (1 << 20)
43 
44 typedef struct
45 {
46     NvU32 get_ahead;
47     NvU32 get_behind;
48     NvU32 put_ahead;
49     NvU32 put_behind;
50 } uvm_tools_queue_snapshot_t;
51 
52 typedef struct
53 {
54     uvm_spinlock_t lock;
55     NvU64 subscribed_queues;
56     struct list_head queue_nodes[UvmEventNumTypesAll];
57 
58     struct page **queue_buffer_pages;
59     UvmEventEntry *queue;
60     NvU32 queue_buffer_count;
61     NvU32 notification_threshold;
62 
63     struct page **control_buffer_pages;
64     UvmToolsEventControlData *control;
65 
66     wait_queue_head_t wait_queue;
67     bool is_wakeup_get_valid;
68     NvU32 wakeup_get;
69 } uvm_tools_queue_t;
70 
71 typedef struct
72 {
73     struct list_head counter_nodes[UVM_TOTAL_COUNTERS];
74     NvU64 subscribed_counters;
75 
76     struct page **counter_buffer_pages;
77     NvU64 *counters;
78 
79     bool all_processors;
80     NvProcessorUuid processor;
81 } uvm_tools_counter_t;
82 
83 // private_data for /dev/nvidia-uvm-tools
84 typedef struct
85 {
86     bool is_queue;
87     struct file *uvm_file;
88     union
89     {
90         uvm_tools_queue_t queue;
91         uvm_tools_counter_t counter;
92     };
93 } uvm_tools_event_tracker_t;
94 
95 // Delayed events
96 //
97 // Events that require gpu timestamps for asynchronous operations use a delayed
98 // notification mechanism. Each event type registers a callback that is invoked
99 // from the update_progress channel routines. The callback then enqueues a
100 // work item that takes care of notifying the events. This module keeps a
101 // global list of channels with pending events. Other modules or user apps (via
102 // ioctl) may call uvm_tools_flush_events to update the progress of the channels
103 // in the list, as needed.
104 //
105 // User apps will need to flush events before removing gpus to avoid getting
106 // events with gpus ids that have been removed.
107 
108 // This object describes the pending migrations operations within a VA block
109 typedef struct
110 {
111     nv_kthread_q_item_t queue_item;
112     uvm_processor_id_t dst;
113     uvm_processor_id_t src;
114     uvm_va_space_t *va_space;
115 
116     uvm_channel_t *channel;
117     struct list_head events;
118     NvU64 start_timestamp_cpu;
119     NvU64 end_timestamp_cpu;
120     NvU64 *start_timestamp_gpu_addr;
121     NvU64 start_timestamp_gpu;
122     NvU64 range_group_id;
123 } block_migration_data_t;
124 
125 // This object represents a specific pending migration within a VA block
126 typedef struct
127 {
128     struct list_head events_node;
129     NvU64 bytes;
130     NvU64 address;
131     NvU64 *end_timestamp_gpu_addr;
132     NvU64 end_timestamp_gpu;
133     UvmEventMigrationCause cause;
134 } migration_data_t;
135 
136 // This object represents a pending gpu faut replay operation
137 typedef struct
138 {
139     nv_kthread_q_item_t queue_item;
140     uvm_channel_t *channel;
141     uvm_gpu_id_t gpu_id;
142     NvU32 batch_id;
143     uvm_fault_client_type_t client_type;
144     NvU64 timestamp;
145     NvU64 timestamp_gpu;
146     NvU64 *timestamp_gpu_addr;
147 } replay_data_t;
148 
149 // This object describes the pending map remote operations within a VA block
150 typedef struct
151 {
152     nv_kthread_q_item_t queue_item;
153     uvm_processor_id_t src;
154     uvm_processor_id_t dst;
155     UvmEventMapRemoteCause cause;
156     NvU64 timestamp;
157     uvm_va_space_t *va_space;
158 
159     uvm_channel_t *channel;
160     struct list_head events;
161 } block_map_remote_data_t;
162 
163 // This object represents a pending map remote operation
164 typedef struct
165 {
166     struct list_head events_node;
167 
168     NvU64 address;
169     NvU64 size;
170     NvU64 timestamp_gpu;
171     NvU64 *timestamp_gpu_addr;
172 } map_remote_data_t;
173 
174 
175 static struct cdev g_uvm_tools_cdev;
176 static LIST_HEAD(g_tools_va_space_list);
177 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll];
178 static uvm_rw_semaphore_t g_tools_va_space_list_lock;
179 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL;
180 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL;
181 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL;
182 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL;
183 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL;
184 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL;
185 static uvm_spinlock_t g_tools_channel_list_lock;
186 static LIST_HEAD(g_tools_channel_list);
187 static nv_kthread_q_t g_tools_queue;
188 
189 static NV_STATUS tools_update_status(uvm_va_space_t *va_space);
190 
191 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp)
192 {
193     return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data);
194 }
195 
196 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker)
197 {
198     return event_tracker != NULL && event_tracker->is_queue;
199 }
200 
201 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker)
202 {
203     return event_tracker != NULL && !event_tracker->is_queue;
204 }
205 
206 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker)
207 {
208     uvm_va_space_t *va_space;
209     UVM_ASSERT(event_tracker->uvm_file);
210     va_space = uvm_va_space_get(event_tracker->uvm_file);
211     return va_space;
212 }
213 
214 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
215 {
216     NvU64 i;
217 
218     for (i = 0; i < page_count; i++) {
219         set_page_dirty(pages[i]);
220         NV_UNPIN_USER_PAGE(pages[i]);
221     }
222 }
223 
224 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
225 {
226     size = DIV_ROUND_UP(size, PAGE_SIZE);
227     vunmap((NvU8 *)addr);
228     uvm_put_user_pages_dirty(pages, size);
229     uvm_kvfree(pages);
230 }
231 
232 // This must be called with the mmap_lock held in read mode or better.
233 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
234 {
235     struct vm_area_struct *vma;
236     NvU64 addr = start_va;
237     NvU64 region_end = start_va + size;
238 
239     do {
240         vma = find_vma(mm, addr);
241         if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
242             return NV_ERR_INVALID_ARGUMENT;
243 
244         addr = vma->vm_end;
245     } while (addr < region_end);
246 
247     return NV_OK;
248 }
249 
250 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
251 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
252 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
253 {
254     NV_STATUS status = NV_OK;
255     long ret = 0;
256     long num_pages;
257     long i;
258 
259     *addr = NULL;
260     *pages = NULL;
261     num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
262 
263     if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) {
264         status = NV_ERR_INVALID_ADDRESS;
265         goto fail;
266     }
267 
268     *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages);
269     if (*pages == NULL) {
270         status = NV_ERR_NO_MEMORY;
271         goto fail;
272     }
273 
274     // Although uvm_down_read_mmap_lock() is preferable due to its participation
275     // in the UVM lock dependency tracker, it cannot be used here. That's
276     // because pin_user_pages() may fault in HMM pages which are GPU-resident.
277     // When that happens, the UVM page fault handler would record another
278     // mmap_read_lock() on the same thread as this one, leading to a false
279     // positive lock dependency report.
280     //
281     // Therefore, use the lower level nv_mmap_read_lock() here.
282     nv_mmap_read_lock(current->mm);
283     status = check_vmas(current->mm, user_va, size);
284     if (status != NV_OK) {
285         nv_mmap_read_unlock(current->mm);
286         goto fail;
287     }
288     ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, NULL);
289     nv_mmap_read_unlock(current->mm);
290 
291     if (ret != num_pages) {
292         status = NV_ERR_INVALID_ARGUMENT;
293         goto fail;
294     }
295 
296     for (i = 0; i < num_pages; i++) {
297         if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
298             status = NV_ERR_INVALID_ARGUMENT;
299             goto fail;
300         }
301     }
302 
303     *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL);
304     if (*addr == NULL)
305         goto fail;
306 
307     return NV_OK;
308 
309 fail:
310     if (*pages == NULL)
311         return status;
312 
313     if (ret > 0)
314         uvm_put_user_pages_dirty(*pages, ret);
315     else if (ret < 0)
316         status = errno_to_nv_status(ret);
317 
318     uvm_kvfree(*pages);
319     *pages = NULL;
320     return status;
321 }
322 
323 static void insert_event_tracker(uvm_va_space_t *va_space,
324                                  struct list_head *node,
325                                  NvU32 list_count,
326                                  NvU64 list_mask,
327                                  NvU64 *subscribed_mask,
328                                  struct list_head *lists,
329                                  NvU64 *inserted_lists)
330 {
331     NvU32 i;
332     NvU64 insertable_lists = list_mask & ~*subscribed_mask;
333 
334     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
335     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
336 
337     for (i = 0; i < list_count; i++) {
338         if (insertable_lists & (1ULL << i)) {
339             ++g_tools_enabled_event_count[i];
340             list_add(node + i, lists + i);
341         }
342     }
343 
344     *subscribed_mask |= list_mask;
345     *inserted_lists = insertable_lists;
346 }
347 
348 static void remove_event_tracker(uvm_va_space_t *va_space,
349                                  struct list_head *node,
350                                  NvU32 list_count,
351                                  NvU64 list_mask,
352                                  NvU64 *subscribed_mask)
353 {
354     NvU32 i;
355     NvU64 removable_lists = list_mask & *subscribed_mask;
356 
357     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
358     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
359 
360     for (i = 0; i < list_count; i++) {
361         if (removable_lists & (1ULL << i)) {
362             UVM_ASSERT(g_tools_enabled_event_count[i] > 0);
363             --g_tools_enabled_event_count[i];
364             list_del(node + i);
365         }
366     }
367 
368     *subscribed_mask &= ~list_mask;
369 }
370 
371 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn)
372 {
373     NvU32 queue_mask = queue->queue_buffer_count - 1;
374 
375     uvm_assert_spinlock_locked(&queue->lock);
376     return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold;
377 }
378 
379 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
380 {
381     if (event_tracker->uvm_file != NULL) {
382         NV_STATUS status;
383         uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker);
384 
385         uvm_down_write(&g_tools_va_space_list_lock);
386         uvm_down_write(&va_space->perf_events.lock);
387         uvm_down_write(&va_space->tools.lock);
388 
389         if (event_tracker->is_queue) {
390             uvm_tools_queue_t *queue = &event_tracker->queue;
391 
392             remove_event_tracker(va_space,
393                                  queue->queue_nodes,
394                                  UvmEventNumTypesAll,
395                                  queue->subscribed_queues,
396                                  &queue->subscribed_queues);
397 
398             if (queue->queue != NULL) {
399                 unmap_user_pages(queue->queue_buffer_pages,
400                                  queue->queue,
401                                  queue->queue_buffer_count * sizeof(UvmEventEntry));
402             }
403 
404             if (queue->control != NULL) {
405                 unmap_user_pages(queue->control_buffer_pages,
406                                  queue->control,
407                                  sizeof(UvmToolsEventControlData));
408             }
409         }
410         else {
411             uvm_tools_counter_t *counters = &event_tracker->counter;
412 
413             remove_event_tracker(va_space,
414                                  counters->counter_nodes,
415                                  UVM_TOTAL_COUNTERS,
416                                  counters->subscribed_counters,
417                                  &counters->subscribed_counters);
418 
419             if (counters->counters != NULL) {
420                 unmap_user_pages(counters->counter_buffer_pages,
421                                  counters->counters,
422                                  UVM_TOTAL_COUNTERS * sizeof(NvU64));
423             }
424         }
425 
426         // de-registration should not fail
427         status = tools_update_status(va_space);
428         UVM_ASSERT(status == NV_OK);
429 
430         uvm_up_write(&va_space->tools.lock);
431         uvm_up_write(&va_space->perf_events.lock);
432         uvm_up_write(&g_tools_va_space_list_lock);
433 
434         fput(event_tracker->uvm_file);
435     }
436     kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
437 }
438 
439 static void enqueue_event(const UvmEventEntry *entry, uvm_tools_queue_t *queue)
440 {
441     UvmToolsEventControlData *ctrl = queue->control;
442     uvm_tools_queue_snapshot_t sn;
443     NvU32 queue_size = queue->queue_buffer_count;
444     NvU32 queue_mask = queue_size - 1;
445 
446     // Prevent processor speculation prior to accessing user-mapped memory to
447     // avoid leaking information from side-channel attacks. There are many
448     // possible paths leading to this point and it would be difficult and error-
449     // prone to audit all of them to determine whether user mode could guide
450     // this access to kernel memory under speculative execution, so to be on the
451     // safe side we'll just always block speculation.
452     nv_speculation_barrier();
453 
454     uvm_spin_lock(&queue->lock);
455 
456     // ctrl is mapped into user space with read and write permissions,
457     // so its values cannot be trusted.
458     sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
459     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
460     sn.put_ahead = (sn.put_behind + 1) & queue_mask;
461 
462     // one free element means that the queue is full
463     if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
464         atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
465         goto unlock;
466     }
467 
468     memcpy(queue->queue + sn.put_behind, entry, sizeof(*entry));
469 
470     sn.put_behind = sn.put_ahead;
471     // put_ahead and put_behind will always be the same outside of queue->lock
472     // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
473     atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
474     atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
475 
476     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
477     // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
478     if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
479         queue->is_wakeup_get_valid = true;
480         queue->wakeup_get = sn.get_ahead;
481         wake_up_all(&queue->wait_queue);
482     }
483 
484 unlock:
485     uvm_spin_unlock(&queue->lock);
486 }
487 
488 static void uvm_tools_record_event(uvm_va_space_t *va_space, const UvmEventEntry *entry)
489 {
490     NvU8 eventType = entry->eventData.eventType;
491     uvm_tools_queue_t *queue;
492 
493     UVM_ASSERT(eventType < UvmEventNumTypesAll);
494 
495     uvm_assert_rwsem_locked(&va_space->tools.lock);
496 
497     list_for_each_entry(queue, va_space->tools.queues + eventType, queue_nodes[eventType])
498         enqueue_event(entry, queue);
499 }
500 
501 static void uvm_tools_broadcast_event(const UvmEventEntry *entry)
502 {
503     uvm_va_space_t *va_space;
504 
505     uvm_down_read(&g_tools_va_space_list_lock);
506     list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
507         uvm_down_read(&va_space->tools.lock);
508         uvm_tools_record_event(va_space, entry);
509         uvm_up_read(&va_space->tools.lock);
510     }
511     uvm_up_read(&g_tools_va_space_list_lock);
512 }
513 
514 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
515 {
516     // For compatibility with older counters, CPU faults for memory with a preferred location are reported
517     // for their preferred location as well as for the CPU device itself.
518     // This check prevents double counting in the aggregate count.
519     if (counter == UvmCounterNameCpuPageFaultCount)
520         return uvm_processor_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT);
521     return true;
522 }
523 
524 static void uvm_tools_inc_counter(uvm_va_space_t *va_space,
525                                   UvmCounterName counter,
526                                   NvU64 amount,
527                                   const NvProcessorUuid *processor)
528 {
529     UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS);
530     uvm_assert_rwsem_locked(&va_space->tools.lock);
531 
532     if (amount > 0) {
533         uvm_tools_counter_t *counters;
534 
535         // Prevent processor speculation prior to accessing user-mapped memory
536         // to avoid leaking information from side-channel attacks. There are
537         // many possible paths leading to this point and it would be difficult
538         // and error-prone to audit all of them to determine whether user mode
539         // could guide this access to kernel memory under speculative execution,
540         // so to be on the safe side we'll just always block speculation.
541         nv_speculation_barrier();
542 
543         list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) {
544             if ((counters->all_processors && counter_matches_processor(counter, processor)) ||
545                 uvm_processor_uuid_eq(&counters->processor, processor)) {
546                 atomic64_add(amount, (atomic64_t *)(counters->counters + counter));
547             }
548         }
549     }
550 }
551 
552 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter)
553 {
554     uvm_assert_rwsem_locked(&va_space->tools.lock);
555 
556     UVM_ASSERT(counter < UVM_TOTAL_COUNTERS);
557     return !list_empty(va_space->tools.counters + counter);
558 }
559 
560 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event)
561 {
562     uvm_assert_rwsem_locked(&va_space->tools.lock);
563 
564     UVM_ASSERT(event < UvmEventNumTypesAll);
565     return !list_empty(va_space->tools.queues + event);
566 }
567 
568 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event)
569 {
570     bool ret = false;
571 
572     uvm_down_read(&g_tools_va_space_list_lock);
573     ret = g_tools_enabled_event_count[event] != 0;
574     uvm_up_read(&g_tools_va_space_list_lock);
575 
576     return ret;
577 }
578 
579 static bool tools_are_enabled(uvm_va_space_t *va_space)
580 {
581     NvU32 i;
582 
583     uvm_assert_rwsem_locked(&va_space->tools.lock);
584 
585     for (i = 0; i < UVM_TOTAL_COUNTERS; i++) {
586         if (tools_is_counter_enabled(va_space, i))
587             return true;
588     }
589     for (i = 0; i < UvmEventNumTypesAll; i++) {
590         if (tools_is_event_enabled(va_space, i))
591             return true;
592     }
593     return false;
594 }
595 
596 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space)
597 {
598     return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) ||
599            tools_is_event_enabled(va_space, UvmEventTypeGpuFault) ||
600            tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) ||
601            tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount);
602 }
603 
604 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space)
605 {
606     return tools_is_event_enabled(va_space, UvmEventTypeMigration) ||
607            tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) ||
608            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) ||
609            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD);
610 }
611 
612 static int uvm_tools_open(struct inode *inode, struct file *filp)
613 {
614     filp->private_data = NULL;
615     return -nv_status_to_errno(uvm_global_get_status());
616 }
617 
618 static int uvm_tools_open_entry(struct inode *inode, struct file *filp)
619 {
620     UVM_ENTRY_RET(uvm_tools_open(inode, filp));
621 }
622 
623 static int uvm_tools_release(struct inode *inode, struct file *filp)
624 {
625     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
626     if (event_tracker != NULL) {
627         destroy_event_tracker(event_tracker);
628         filp->private_data = NULL;
629     }
630     return -nv_status_to_errno(uvm_global_get_status());
631 }
632 
633 static int uvm_tools_release_entry(struct inode *inode, struct file *filp)
634 {
635     UVM_ENTRY_RET(uvm_tools_release(inode, filp));
636 }
637 
638 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
639 {
640     switch (cmd) {
641         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER,         uvm_api_tools_init_event_tracker);
642         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold);
643         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS,  uvm_api_tools_event_queue_enable_events);
644         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events);
645         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS,            uvm_api_tools_enable_counters);
646         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS,           uvm_api_tools_disable_counters);
647     }
648 
649     uvm_thread_assert_all_unlocked();
650 
651     return -EINVAL;
652 }
653 
654 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg)
655 {
656     UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg));
657 }
658 
659 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
660 {
661     int flags = 0;
662     uvm_tools_queue_snapshot_t sn;
663     uvm_tools_event_tracker_t *event_tracker;
664     UvmToolsEventControlData *ctrl;
665 
666     if (uvm_global_get_status() != NV_OK)
667         return POLLERR;
668 
669     event_tracker = tools_event_tracker(filp);
670     if (!tracker_is_queue(event_tracker))
671         return POLLERR;
672 
673     uvm_spin_lock(&event_tracker->queue.lock);
674 
675     event_tracker->queue.is_wakeup_get_valid = false;
676     ctrl = event_tracker->queue.control;
677     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
678     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
679 
680     if (queue_needs_wakeup(&event_tracker->queue, &sn))
681         flags = POLLIN | POLLRDNORM;
682 
683     uvm_spin_unlock(&event_tracker->queue.lock);
684 
685     poll_wait(filp, &event_tracker->queue.wait_queue, wait);
686     return flags;
687 }
688 
689 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait)
690 {
691     UVM_ENTRY_RET(uvm_tools_poll(filp, wait));
692 }
693 
694 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = {
695     [UVM_FAULT_TYPE_INVALID_PDE]          = UvmFaultTypeInvalidPde,
696     [UVM_FAULT_TYPE_INVALID_PTE]          = UvmFaultTypeInvalidPte,
697     [UVM_FAULT_TYPE_ATOMIC]               = UvmFaultTypeAtomic,
698     [UVM_FAULT_TYPE_WRITE]                = UvmFaultTypeWrite,
699     [UVM_FAULT_TYPE_PDE_SIZE]             = UvmFaultTypeInvalidPdeSize,
700     [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION]   = UvmFaultTypeLimitViolation,
701     [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK]   = UvmFaultTypeUnboundInstBlock,
702     [UVM_FAULT_TYPE_PRIV_VIOLATION]       = UvmFaultTypePrivViolation,
703     [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation,
704     [UVM_FAULT_TYPE_WORK_CREATION]        = UvmFaultTypeWorkCreation,
705     [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture,
706     [UVM_FAULT_TYPE_COMPRESSION_FAILURE]  = UvmFaultTypeCompressionFailure,
707     [UVM_FAULT_TYPE_UNSUPPORTED_KIND]     = UvmFaultTypeUnsupportedKind,
708     [UVM_FAULT_TYPE_REGION_VIOLATION]     = UvmFaultTypeRegionViolation,
709     [UVM_FAULT_TYPE_POISONED]             = UvmFaultTypePoison,
710 };
711 
712 // TODO: add new value for weak atomics in tools
713 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = {
714     [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic,
715     [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK]   = UvmEventMemoryAccessTypeAtomic,
716     [UVM_FAULT_ACCESS_TYPE_WRITE]         = UvmEventMemoryAccessTypeWrite,
717     [UVM_FAULT_ACCESS_TYPE_READ]          = UvmEventMemoryAccessTypeRead,
718     [UVM_FAULT_ACCESS_TYPE_PREFETCH]      = UvmEventMemoryAccessTypePrefetch
719 };
720 
721 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = {
722     [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0,
723     [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1,
724     [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2,
725     [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3,
726     [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4,
727     [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5,
728     [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6,
729     [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7,
730     [UVM_APERTURE_SYS]    = UvmEventApertureSys,
731     [UVM_APERTURE_VID]    = UvmEventApertureVid,
732 };
733 
734 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = {
735     [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc,
736     [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub,
737 };
738 
739 static void record_gpu_fault_instance(uvm_gpu_t *gpu,
740                                       uvm_va_space_t *va_space,
741                                       const uvm_fault_buffer_entry_t *fault_entry,
742                                       NvU64 batch_id,
743                                       NvU64 timestamp)
744 {
745     UvmEventEntry entry;
746     UvmEventGpuFaultInfo *info = &entry.eventData.gpuFault;
747     memset(&entry, 0, sizeof(entry));
748 
749     info->eventType     = UvmEventTypeGpuFault;
750     info->gpuIndex      = uvm_id_value(gpu->id);
751     info->faultType     = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
752     info->accessType    = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
753     info->clientType    = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
754     if (fault_entry->is_replayable)
755         info->gpcId     = fault_entry->fault_source.gpc_id;
756     else
757         info->channelId = fault_entry->fault_source.channel_id;
758     info->clientId      = fault_entry->fault_source.client_id;
759     info->address       = fault_entry->fault_address;
760     info->timeStamp     = timestamp;
761     info->timeStampGpu  = fault_entry->timestamp;
762     info->batchId       = batch_id;
763 
764     uvm_tools_record_event(va_space, &entry);
765 }
766 
767 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
768 {
769     uvm_va_space_t *va_space = event_data->fault.space;
770 
771     UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
772     UVM_ASSERT(event_data->fault.space);
773 
774     uvm_assert_rwsem_locked(&va_space->lock);
775     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
776     UVM_ASSERT(va_space->tools.enabled);
777 
778     uvm_down_read(&va_space->tools.lock);
779     UVM_ASSERT(tools_is_fault_callback_needed(va_space));
780 
781     if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
782         if (tools_is_event_enabled(va_space, UvmEventTypeCpuFault)) {
783             UvmEventEntry entry;
784             UvmEventCpuFaultInfo *info = &entry.eventData.cpuFault;
785             memset(&entry, 0, sizeof(entry));
786 
787             info->eventType = UvmEventTypeCpuFault;
788             if (event_data->fault.cpu.is_write)
789                 info->accessType = UvmEventMemoryAccessTypeWrite;
790             else
791                 info->accessType = UvmEventMemoryAccessTypeRead;
792 
793             info->address = event_data->fault.cpu.fault_va;
794             info->timeStamp = NV_GETTIME();
795             // assume that current owns va_space
796             info->pid = uvm_get_stale_process_id();
797             info->threadId = uvm_get_stale_thread_id();
798             info->pc = event_data->fault.cpu.pc;
799 
800             uvm_tools_record_event(va_space, &entry);
801         }
802         if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) {
803             uvm_processor_id_t preferred_location;
804 
805             // The UVM Lite tools interface did not represent the CPU as a UVM
806             // device. It reported CPU faults against the corresponding
807             // allocation's 'home location'. Though this driver's tools
808             // interface does include a CPU device, for compatibility, the
809             // driver still reports faults against a buffer's preferred
810             // location, in addition to the CPU.
811             uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT);
812 
813             preferred_location = event_data->fault.preferred_location;
814             if (UVM_ID_IS_GPU(preferred_location)) {
815                 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location);
816                 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
817             }
818         }
819     }
820     else {
821         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id);
822         UVM_ASSERT(gpu);
823 
824         if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) {
825             NvU64 timestamp = NV_GETTIME();
826             uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry;
827             uvm_fault_buffer_entry_t *fault_instance;
828 
829             record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp);
830 
831             list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
832                 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp);
833         }
834 
835         if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount))
836             uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
837     }
838     uvm_up_read(&va_space->tools.lock);
839 }
840 
841 static void add_pending_event_for_channel(uvm_channel_t *channel)
842 {
843     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
844 
845     if (channel->tools.pending_event_count++ == 0)
846         list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list);
847 }
848 
849 static void remove_pending_event_for_channel(uvm_channel_t *channel)
850 {
851     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
852     UVM_ASSERT(channel->tools.pending_event_count > 0);
853     if (--channel->tools.pending_event_count == 0)
854         list_del_init(&channel->tools.channel_list_node);
855 }
856 
857 
858 static void record_migration_events(void *args)
859 {
860     block_migration_data_t *block_mig = (block_migration_data_t *)args;
861     migration_data_t *mig;
862     migration_data_t *next;
863     UvmEventEntry entry;
864     UvmEventMigrationInfo *info = &entry.eventData.migration;
865     uvm_va_space_t *va_space = block_mig->va_space;
866 
867     NvU64 gpu_timestamp = block_mig->start_timestamp_gpu;
868 
869     // Initialize fields that are constant throughout the whole block
870     memset(&entry, 0, sizeof(entry));
871     info->eventType      = UvmEventTypeMigration;
872     info->srcIndex       = uvm_id_value(block_mig->src);
873     info->dstIndex       = uvm_id_value(block_mig->dst);
874     info->beginTimeStamp = block_mig->start_timestamp_cpu;
875     info->endTimeStamp   = block_mig->end_timestamp_cpu;
876     info->rangeGroupId   = block_mig->range_group_id;
877 
878     uvm_down_read(&va_space->tools.lock);
879     list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
880         UVM_ASSERT(mig->bytes > 0);
881         list_del(&mig->events_node);
882 
883         info->address           = mig->address;
884         info->migratedBytes     = mig->bytes;
885         info->beginTimeStampGpu = gpu_timestamp;
886         info->endTimeStampGpu   = mig->end_timestamp_gpu;
887         info->migrationCause    = mig->cause;
888         gpu_timestamp = mig->end_timestamp_gpu;
889         kmem_cache_free(g_tools_migration_data_cache, mig);
890 
891         uvm_tools_record_event(va_space, &entry);
892     }
893     uvm_up_read(&va_space->tools.lock);
894 
895     UVM_ASSERT(list_empty(&block_mig->events));
896     kmem_cache_free(g_tools_block_migration_data_cache, block_mig);
897 }
898 
899 static void record_migration_events_entry(void *args)
900 {
901     UVM_ENTRY_VOID(record_migration_events(args));
902 }
903 
904 static void on_block_migration_complete(void *ptr)
905 {
906     migration_data_t *mig;
907     block_migration_data_t *block_mig = (block_migration_data_t *)ptr;
908 
909     block_mig->end_timestamp_cpu = NV_GETTIME();
910     block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr;
911     list_for_each_entry(mig, &block_mig->events, events_node)
912         mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr;
913 
914     nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig);
915 
916     // The UVM driver may notice that work in a channel is complete in a variety of situations
917     // and the va_space lock is not always held in all of them, nor can it always be taken safely on them.
918     // Dispatching events requires the va_space lock to be held in at least read mode, so
919     // this callback simply enqueues the dispatching onto a queue, where the
920     // va_space lock is always safe to acquire.
921     uvm_spin_lock(&g_tools_channel_list_lock);
922     remove_pending_event_for_channel(block_mig->channel);
923     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item);
924     uvm_spin_unlock(&g_tools_channel_list_lock);
925 }
926 
927 static void record_replay_event_helper(uvm_gpu_id_t gpu_id,
928                                        NvU32 batch_id,
929                                        uvm_fault_client_type_t client_type,
930                                        NvU64 timestamp,
931                                        NvU64 timestamp_gpu)
932 {
933     UvmEventEntry entry;
934 
935     memset(&entry, 0, sizeof(entry));
936     entry.eventData.gpuFaultReplay.eventType    = UvmEventTypeGpuFaultReplay;
937     entry.eventData.gpuFaultReplay.gpuIndex     = uvm_id_value(gpu_id);
938     entry.eventData.gpuFaultReplay.batchId      = batch_id;
939     entry.eventData.gpuFaultReplay.clientType   = g_hal_to_tools_fault_client_type_table[client_type];
940     entry.eventData.gpuFaultReplay.timeStamp    = timestamp;
941     entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
942 
943     uvm_tools_broadcast_event(&entry);
944 }
945 
946 static void record_replay_events(void *args)
947 {
948     replay_data_t *replay = (replay_data_t *)args;
949 
950     record_replay_event_helper(replay->gpu_id,
951                                replay->batch_id,
952                                replay->client_type,
953                                replay->timestamp,
954                                replay->timestamp_gpu);
955 
956     kmem_cache_free(g_tools_replay_data_cache, replay);
957 }
958 
959 static void record_replay_events_entry(void *args)
960 {
961     UVM_ENTRY_VOID(record_replay_events(args));
962 }
963 
964 static void on_replay_complete(void *ptr)
965 {
966     replay_data_t *replay = (replay_data_t *)ptr;
967     replay->timestamp_gpu = *replay->timestamp_gpu_addr;
968 
969     nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr);
970 
971     uvm_spin_lock(&g_tools_channel_list_lock);
972     remove_pending_event_for_channel(replay->channel);
973     nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item);
974     uvm_spin_unlock(&g_tools_channel_list_lock);
975 
976 }
977 
978 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = {
979     [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT]     = UvmEventMigrationCauseCoherence,
980     [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
981     [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER]       = UvmEventMigrationCauseAccessCounters,
982     [UVM_MAKE_RESIDENT_CAUSE_PREFETCH]             = UvmEventMigrationCausePrefetch,
983     [UVM_MAKE_RESIDENT_CAUSE_EVICTION]             = UvmEventMigrationCauseEviction,
984     [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS]            = UvmEventMigrationCauseInvalid,
985     [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE]          = UvmEventMigrationCauseUser,
986     [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP]  = UvmEventMigrationCauseCoherence,
987     [UVM_MAKE_RESIDENT_CAUSE_API_HINT]             = UvmEventMigrationCauseUser,
988 };
989 
990 // This event is notified asynchronously when all the migrations pushed to the
991 // same uvm_push_t object in a call to block_copy_resident_pages_between have
992 // finished
993 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
994 {
995     uvm_va_block_t *va_block = event_data->migration.block;
996     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
997 
998     UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION);
999 
1000     uvm_assert_mutex_locked(&va_block->lock);
1001     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
1002     UVM_ASSERT(va_space->tools.enabled);
1003 
1004     uvm_down_read(&va_space->tools.lock);
1005     UVM_ASSERT(tools_is_migration_callback_needed(va_space));
1006 
1007     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1008         migration_data_t *mig;
1009         uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push);
1010         block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data;
1011 
1012         if (push_info->on_complete != NULL) {
1013             mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS);
1014             if (mig == NULL)
1015                 goto done_unlock;
1016 
1017             mig->address = event_data->migration.address;
1018             mig->bytes = event_data->migration.bytes;
1019             mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push);
1020             mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause];
1021 
1022             list_add_tail(&mig->events_node, &block_mig->events);
1023         }
1024     }
1025 
1026     // Increment counters
1027     if (UVM_ID_IS_CPU(event_data->migration.src) &&
1028         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) {
1029         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst);
1030         uvm_tools_inc_counter(va_space,
1031                               UvmCounterNameBytesXferHtD,
1032                               event_data->migration.bytes,
1033                               uvm_gpu_uuid(gpu));
1034     }
1035     if (UVM_ID_IS_CPU(event_data->migration.dst) &&
1036         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) {
1037         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src);
1038         uvm_tools_inc_counter(va_space,
1039                               UvmCounterNameBytesXferDtH,
1040                               event_data->migration.bytes,
1041                               uvm_gpu_uuid(gpu));
1042     }
1043 
1044 done_unlock:
1045     uvm_up_read(&va_space->tools.lock);
1046 }
1047 
1048 // This event is notified asynchronously when it is marked as completed in the
1049 // pushbuffer the replay method belongs to.
1050 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
1051                                 uvm_push_t *push,
1052                                 NvU32 batch_id,
1053                                 uvm_fault_client_type_t client_type)
1054 {
1055     uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1056     replay_data_t *replay;
1057 
1058     // Perform delayed notification only if some VA space has signed up for
1059     // UvmEventTypeGpuFaultReplay
1060     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1061         return;
1062 
1063     replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS);
1064     if (replay == NULL)
1065         return;
1066 
1067     UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1068 
1069     replay->timestamp_gpu_addr = uvm_push_timestamp(push);
1070     replay->gpu_id             = gpu->id;
1071     replay->batch_id           = batch_id;
1072     replay->client_type        = client_type;
1073     replay->timestamp          = NV_GETTIME();
1074     replay->channel            = push->channel;
1075 
1076     push_info->on_complete_data = replay;
1077     push_info->on_complete = on_replay_complete;
1078 
1079     uvm_spin_lock(&g_tools_channel_list_lock);
1080     add_pending_event_for_channel(replay->channel);
1081     uvm_spin_unlock(&g_tools_channel_list_lock);
1082 }
1083 
1084 
1085 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
1086                                      NvU32 batch_id,
1087                                      uvm_fault_client_type_t client_type)
1088 {
1089     UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
1090 
1091     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1092         return;
1093 
1094     record_replay_event_helper(gpu->id,
1095                                batch_id,
1096                                client_type,
1097                                NV_GETTIME(),
1098                                gpu->parent->host_hal->get_time(gpu));
1099 }
1100 
1101 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
1102                                         const uvm_access_counter_buffer_entry_t *buffer_entry,
1103                                         bool on_managed)
1104 {
1105     UvmEventEntry entry;
1106     UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
1107 
1108     // Perform delayed notification only if some VA space has signed up for
1109     // UvmEventTypeAccessCounter
1110     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeTestAccessCounter))
1111         return;
1112 
1113     if (!buffer_entry->address.is_virtual)
1114         UVM_ASSERT(UVM_ID_IS_VALID(buffer_entry->physical_info.resident_id));
1115 
1116     memset(&entry, 0, sizeof(entry));
1117 
1118     info->eventType           = UvmEventTypeTestAccessCounter;
1119     info->srcIndex            = uvm_id_value(gpu->id);
1120     info->address             = buffer_entry->address.address;
1121     info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
1122     if (buffer_entry->address.is_virtual) {
1123         info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
1124         info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1125         info->veId                = buffer_entry->virtual_info.ve_id;
1126     }
1127     else {
1128         info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1129     }
1130     info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1131     info->onManaged           = on_managed? 1 : 0;
1132     info->value               = buffer_entry->counter_value;
1133     info->subGranularity      = buffer_entry->sub_granularity;
1134     info->bank                = buffer_entry->bank;
1135     info->tag                 = buffer_entry->tag;
1136 
1137     uvm_tools_broadcast_event(&entry);
1138 }
1139 
1140 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
1141 {
1142     UvmEventEntry entry;
1143 
1144     if (!va_space->tools.enabled)
1145         return;
1146 
1147     entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
1148     uvm_down_read(&va_space->tools.lock);
1149     uvm_tools_record_event(va_space, &entry);
1150     uvm_up_read(&va_space->tools.lock);
1151 }
1152 
1153 // This function is used as a begin marker to group all migrations within a VA
1154 // block that are performed in the same call to
1155 // block_copy_resident_pages_between. All of these are pushed to the same
1156 // uvm_push_t object, and will be notified in burst when the last one finishes.
1157 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block,
1158                                             uvm_push_t *push,
1159                                             uvm_processor_id_t dst_id,
1160                                             uvm_processor_id_t src_id,
1161                                             NvU64 start,
1162                                             uvm_make_resident_cause_t cause)
1163 {
1164     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1165     uvm_range_group_range_t *range;
1166 
1167     // Calls from tools read/write functions to make_resident must not trigger
1168     // any migration
1169     UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS);
1170 
1171     // During evictions the va_space lock is not held.
1172     if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1173         uvm_assert_rwsem_locked(&va_space->lock);
1174 
1175     if (!va_space->tools.enabled)
1176         return;
1177 
1178     uvm_down_read(&va_space->tools.lock);
1179 
1180     // Perform delayed notification only if the VA space has signed up for
1181     // UvmEventTypeMigration
1182     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1183         block_migration_data_t *block_mig;
1184         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1185 
1186         UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1187 
1188         block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS);
1189         if (block_mig == NULL)
1190             goto done_unlock;
1191 
1192         block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push);
1193         block_mig->channel = push->channel;
1194         block_mig->start_timestamp_cpu = NV_GETTIME();
1195         block_mig->dst = dst_id;
1196         block_mig->src = src_id;
1197         block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE;
1198 
1199         // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1200         if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1201             range = uvm_range_group_range_find(va_space, start);
1202             if (range != NULL)
1203                 block_mig->range_group_id = range->range_group->id;
1204         }
1205         block_mig->va_space = va_space;
1206 
1207         INIT_LIST_HEAD(&block_mig->events);
1208         push_info->on_complete_data = block_mig;
1209         push_info->on_complete = on_block_migration_complete;
1210 
1211         uvm_spin_lock(&g_tools_channel_list_lock);
1212         add_pending_event_for_channel(block_mig->channel);
1213         uvm_spin_unlock(&g_tools_channel_list_lock);
1214     }
1215 
1216 done_unlock:
1217     uvm_up_read(&va_space->tools.lock);
1218 }
1219 
1220 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
1221                                      uvm_processor_id_t dst,
1222                                      uvm_va_block_region_t region,
1223                                      const uvm_page_mask_t *page_mask)
1224 {
1225     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1226 
1227     if (!va_space->tools.enabled)
1228         return;
1229 
1230     uvm_down_read(&va_space->tools.lock);
1231     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate)) {
1232         // Read-duplication events
1233         UvmEventEntry entry;
1234         UvmEventReadDuplicateInfo *info_read_duplicate = &entry.eventData.readDuplicate;
1235         uvm_page_index_t page_index;
1236         memset(&entry, 0, sizeof(entry));
1237 
1238         info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1239         info_read_duplicate->size      = PAGE_SIZE;
1240         info_read_duplicate->timeStamp = NV_GETTIME();
1241 
1242         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1243             uvm_processor_id_t id;
1244             uvm_processor_mask_t resident_processors;
1245 
1246             info_read_duplicate->address    = uvm_va_block_cpu_page_address(va_block, page_index);
1247             info_read_duplicate->processors = 0;
1248 
1249             uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors);
1250             for_each_id_in_mask(id, &resident_processors)
1251                 info_read_duplicate->processors |= (1 << uvm_id_value(id));
1252 
1253             uvm_tools_record_event(va_space, &entry);
1254         }
1255     }
1256     uvm_up_read(&va_space->tools.lock);
1257 }
1258 
1259 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
1260                                                 uvm_processor_id_t dst,
1261                                                 uvm_va_block_region_t region,
1262                                                 const uvm_page_mask_t *page_mask)
1263 {
1264     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1265 
1266     if (!va_space->tools.enabled)
1267         return;
1268 
1269     uvm_down_read(&va_space->tools.lock);
1270     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicateInvalidate)) {
1271         UvmEventEntry entry;
1272         uvm_page_index_t page_index;
1273         UvmEventReadDuplicateInvalidateInfo *info = &entry.eventData.readDuplicateInvalidate;
1274         memset(&entry, 0, sizeof(entry));
1275 
1276         info->eventType     = UvmEventTypeReadDuplicateInvalidate;
1277         info->residentIndex = uvm_id_value(dst);
1278         info->size          = PAGE_SIZE;
1279         info->timeStamp     = NV_GETTIME();
1280 
1281         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1282             UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1283 
1284             info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1285             uvm_tools_record_event(va_space, &entry);
1286         }
1287     }
1288     uvm_up_read(&va_space->tools.lock);
1289 }
1290 
1291 static void tools_schedule_completed_events(void)
1292 {
1293     uvm_channel_t *channel;
1294     uvm_channel_t *next_channel;
1295     NvU64 channel_count = 0;
1296     NvU64 i;
1297 
1298     uvm_spin_lock(&g_tools_channel_list_lock);
1299 
1300     // retain every channel list entry currently in the list and keep track of their count.
1301     list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) {
1302         ++channel->tools.pending_event_count;
1303         ++channel_count;
1304     }
1305     uvm_spin_unlock(&g_tools_channel_list_lock);
1306 
1307     if (channel_count == 0)
1308         return;
1309 
1310     // new entries always appear at the end, and all the entries seen in the first loop have been retained
1311     // so it is safe to go through them
1312     channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node);
1313     for (i = 0; i < channel_count; i++) {
1314         uvm_channel_update_progress_all(channel);
1315         channel = list_next_entry(channel, tools.channel_list_node);
1316     }
1317 
1318     // now release all the entries we retained in the beginning
1319     i = 0;
1320     uvm_spin_lock(&g_tools_channel_list_lock);
1321     list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) {
1322         if (i++ == channel_count)
1323             break;
1324 
1325         remove_pending_event_for_channel(channel);
1326     }
1327     uvm_spin_unlock(&g_tools_channel_list_lock);
1328 }
1329 
1330 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space,
1331                                       NvU64 address,
1332                                       bool is_write,
1333                                       UvmEventFatalReason reason)
1334 {
1335     uvm_assert_rwsem_locked(&va_space->lock);
1336 
1337     if (!va_space->tools.enabled)
1338         return;
1339 
1340     uvm_down_read(&va_space->tools.lock);
1341     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1342         UvmEventEntry entry;
1343         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1344         memset(&entry, 0, sizeof(entry));
1345 
1346         info->eventType      = UvmEventTypeFatalFault;
1347         info->processorIndex = UVM_ID_CPU_VALUE;
1348         info->timeStamp      = NV_GETTIME();
1349         info->address        = address;
1350         info->accessType     = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1351         // info->faultType is not valid for cpu faults
1352         info->reason         = reason;
1353 
1354         uvm_tools_record_event(va_space, &entry);
1355     }
1356     uvm_up_read(&va_space->tools.lock);
1357 }
1358 
1359 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,
1360                                       uvm_va_space_t *va_space,
1361                                       const uvm_fault_buffer_entry_t *buffer_entry,
1362                                       UvmEventFatalReason reason)
1363 {
1364     uvm_assert_rwsem_locked(&va_space->lock);
1365 
1366     if (!va_space->tools.enabled)
1367         return;
1368 
1369     uvm_down_read(&va_space->tools.lock);
1370     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1371         UvmEventEntry entry;
1372         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1373         memset(&entry, 0, sizeof(entry));
1374 
1375         info->eventType      = UvmEventTypeFatalFault;
1376         info->processorIndex = uvm_id_value(gpu_id);
1377         info->timeStamp      = NV_GETTIME();
1378         info->address        = buffer_entry->fault_address;
1379         info->accessType     = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1380         info->faultType      = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1381         info->reason         = reason;
1382 
1383         uvm_tools_record_event(va_space, &entry);
1384     }
1385     uvm_up_read(&va_space->tools.lock);
1386 }
1387 
1388 void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
1389                                 NvU64 address,
1390                                 size_t region_size,
1391                                 const uvm_processor_mask_t *processors)
1392 {
1393     UVM_ASSERT(address);
1394     UVM_ASSERT(PAGE_ALIGNED(address));
1395     UVM_ASSERT(region_size > 0);
1396 
1397     uvm_assert_rwsem_locked(&va_space->lock);
1398 
1399     if (!va_space->tools.enabled)
1400         return;
1401 
1402     uvm_down_read(&va_space->tools.lock);
1403     if (tools_is_event_enabled(va_space, UvmEventTypeThrashingDetected)) {
1404         UvmEventEntry entry;
1405         UvmEventThrashingDetectedInfo *info = &entry.eventData.thrashing;
1406         memset(&entry, 0, sizeof(entry));
1407 
1408         info->eventType = UvmEventTypeThrashingDetected;
1409         info->address   = address;
1410         info->size      = region_size;
1411         info->timeStamp = NV_GETTIME();
1412         bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
1413 
1414         uvm_tools_record_event(va_space, &entry);
1415     }
1416     uvm_up_read(&va_space->tools.lock);
1417 }
1418 
1419 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1420 {
1421     UVM_ASSERT(address);
1422     UVM_ASSERT(PAGE_ALIGNED(address));
1423     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1424 
1425     uvm_assert_rwsem_locked(&va_space->lock);
1426 
1427     if (!va_space->tools.enabled)
1428         return;
1429 
1430     uvm_down_read(&va_space->tools.lock);
1431     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingStart)) {
1432         UvmEventEntry entry;
1433         UvmEventThrottlingStartInfo *info = &entry.eventData.throttlingStart;
1434         memset(&entry, 0, sizeof(entry));
1435 
1436         info->eventType      = UvmEventTypeThrottlingStart;
1437         info->processorIndex = uvm_id_value(processor);
1438         info->address        = address;
1439         info->timeStamp      = NV_GETTIME();
1440 
1441         uvm_tools_record_event(va_space, &entry);
1442     }
1443     uvm_up_read(&va_space->tools.lock);
1444 }
1445 
1446 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1447 {
1448     UVM_ASSERT(address);
1449     UVM_ASSERT(PAGE_ALIGNED(address));
1450     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1451 
1452     uvm_assert_rwsem_locked(&va_space->lock);
1453 
1454     if (!va_space->tools.enabled)
1455         return;
1456 
1457     uvm_down_read(&va_space->tools.lock);
1458     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingEnd)) {
1459         UvmEventEntry entry;
1460         UvmEventThrottlingEndInfo *info = &entry.eventData.throttlingEnd;
1461         memset(&entry, 0, sizeof(entry));
1462 
1463         info->eventType      = UvmEventTypeThrottlingEnd;
1464         info->processorIndex = uvm_id_value(processor);
1465         info->address        = address;
1466         info->timeStamp      = NV_GETTIME();
1467 
1468         uvm_tools_record_event(va_space, &entry);
1469     }
1470     uvm_up_read(&va_space->tools.lock);
1471 }
1472 
1473 static void record_map_remote_events(void *args)
1474 {
1475     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args;
1476     map_remote_data_t *map_remote, *next;
1477     UvmEventEntry entry;
1478     uvm_va_space_t *va_space = block_map_remote->va_space;
1479 
1480     memset(&entry, 0, sizeof(entry));
1481 
1482     entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1483     entry.eventData.mapRemote.srcIndex       = uvm_id_value(block_map_remote->src);
1484     entry.eventData.mapRemote.dstIndex       = uvm_id_value(block_map_remote->dst);
1485     entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1486     entry.eventData.mapRemote.timeStamp      = block_map_remote->timestamp;
1487 
1488     uvm_down_read(&va_space->tools.lock);
1489     list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1490         list_del(&map_remote->events_node);
1491 
1492         entry.eventData.mapRemote.address      = map_remote->address;
1493         entry.eventData.mapRemote.size         = map_remote->size;
1494         entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1495         kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1496 
1497         uvm_tools_record_event(va_space, &entry);
1498     }
1499     uvm_up_read(&va_space->tools.lock);
1500 
1501     UVM_ASSERT(list_empty(&block_map_remote->events));
1502     kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote);
1503 }
1504 
1505 static void record_map_remote_events_entry(void *args)
1506 {
1507     UVM_ENTRY_VOID(record_map_remote_events(args));
1508 }
1509 
1510 static void on_map_remote_complete(void *ptr)
1511 {
1512     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr;
1513     map_remote_data_t *map_remote;
1514 
1515     // Only GPU mappings use the deferred mechanism
1516     UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src));
1517     list_for_each_entry(map_remote, &block_map_remote->events, events_node)
1518         map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr;
1519 
1520     nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr);
1521 
1522     uvm_spin_lock(&g_tools_channel_list_lock);
1523     remove_pending_event_for_channel(block_map_remote->channel);
1524     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item);
1525     uvm_spin_unlock(&g_tools_channel_list_lock);
1526 }
1527 
1528 void uvm_tools_record_map_remote(uvm_va_block_t *va_block,
1529                                  uvm_push_t *push,
1530                                  uvm_processor_id_t processor,
1531                                  uvm_processor_id_t residency,
1532                                  NvU64 address,
1533                                  size_t region_size,
1534                                  UvmEventMapRemoteCause cause)
1535 {
1536     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1537 
1538     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1539     UVM_ASSERT(UVM_ID_IS_VALID(residency));
1540     UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid);
1541 
1542     uvm_assert_rwsem_locked(&va_space->lock);
1543 
1544     if (!va_space->tools.enabled)
1545         return;
1546 
1547     uvm_down_read(&va_space->tools.lock);
1548     if (!tools_is_event_enabled(va_space, UvmEventTypeMapRemote))
1549         goto done;
1550 
1551     if (UVM_ID_IS_CPU(processor)) {
1552         UvmEventEntry entry;
1553         memset(&entry, 0, sizeof(entry));
1554 
1555         entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1556         entry.eventData.mapRemote.srcIndex       = uvm_id_value(processor);
1557         entry.eventData.mapRemote.dstIndex       = uvm_id_value(residency);
1558         entry.eventData.mapRemote.mapRemoteCause = cause;
1559         entry.eventData.mapRemote.timeStamp      = NV_GETTIME();
1560         entry.eventData.mapRemote.address        = address;
1561         entry.eventData.mapRemote.size           = region_size;
1562         entry.eventData.mapRemote.timeStampGpu   = 0;
1563 
1564         UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
1565 
1566         uvm_tools_record_event(va_space, &entry);
1567     }
1568     else {
1569         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1570         block_map_remote_data_t *block_map_remote;
1571         map_remote_data_t *map_remote;
1572 
1573         // The first call on this pushbuffer creates the per-VA block structure
1574         if (push_info->on_complete == NULL) {
1575             UVM_ASSERT(push_info->on_complete_data == NULL);
1576 
1577             block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1578             if (block_map_remote == NULL)
1579                 goto done;
1580 
1581             block_map_remote->src = processor;
1582             block_map_remote->dst = residency;
1583             block_map_remote->cause = cause;
1584             block_map_remote->timestamp = NV_GETTIME();
1585             block_map_remote->va_space = va_space;
1586             block_map_remote->channel = push->channel;
1587             INIT_LIST_HEAD(&block_map_remote->events);
1588 
1589             push_info->on_complete_data = block_map_remote;
1590             push_info->on_complete = on_map_remote_complete;
1591 
1592             uvm_spin_lock(&g_tools_channel_list_lock);
1593             add_pending_event_for_channel(block_map_remote->channel);
1594             uvm_spin_unlock(&g_tools_channel_list_lock);
1595         }
1596         else {
1597             block_map_remote = push_info->on_complete_data;
1598         }
1599         UVM_ASSERT(block_map_remote);
1600 
1601         map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1602         if (map_remote == NULL)
1603             goto done;
1604 
1605         map_remote->address = address;
1606         map_remote->size = region_size;
1607         map_remote->timestamp_gpu_addr = uvm_push_timestamp(push);
1608 
1609         list_add_tail(&map_remote->events_node, &block_map_remote->events);
1610     }
1611 
1612 done:
1613     uvm_up_read(&va_space->tools.lock);
1614 }
1615 
1616 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp)
1617 {
1618     NV_STATUS status = NV_OK;
1619     uvm_tools_event_tracker_t *event_tracker;
1620 
1621     event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS);
1622     if (event_tracker == NULL)
1623         return NV_ERR_NO_MEMORY;
1624 
1625     event_tracker->uvm_file = fget(params->uvmFd);
1626     if (event_tracker->uvm_file == NULL) {
1627         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1628         goto fail;
1629     }
1630 
1631     if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) {
1632         fput(event_tracker->uvm_file);
1633         event_tracker->uvm_file = NULL;
1634         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1635         goto fail;
1636     }
1637 
1638     // We don't use uvm_fd_va_space() here because tools can work
1639     // without an associated va_space_mm.
1640     if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) {
1641         fput(event_tracker->uvm_file);
1642         event_tracker->uvm_file = NULL;
1643         status = NV_ERR_ILLEGAL_ACTION;
1644         goto fail;
1645     }
1646 
1647     event_tracker->is_queue = params->queueBufferSize != 0;
1648     if (event_tracker->is_queue) {
1649         uvm_tools_queue_t *queue = &event_tracker->queue;
1650         uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
1651         init_waitqueue_head(&queue->wait_queue);
1652 
1653         if (params->queueBufferSize > UINT_MAX) {
1654             status = NV_ERR_INVALID_ARGUMENT;
1655             goto fail;
1656         }
1657 
1658         queue->queue_buffer_count = (NvU32)params->queueBufferSize;
1659         queue->notification_threshold = queue->queue_buffer_count / 2;
1660 
1661         // queue_buffer_count must be a power of 2, of at least 2
1662         if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) {
1663             status = NV_ERR_INVALID_ARGUMENT;
1664             goto fail;
1665         }
1666 
1667         status = map_user_pages(params->queueBuffer,
1668                                 queue->queue_buffer_count * sizeof(UvmEventEntry),
1669                                 (void **)&queue->queue,
1670                                 &queue->queue_buffer_pages);
1671         if (status != NV_OK)
1672             goto fail;
1673 
1674         status = map_user_pages(params->controlBuffer,
1675                                 sizeof(UvmToolsEventControlData),
1676                                 (void **)&queue->control,
1677                                 &queue->control_buffer_pages);
1678 
1679         if (status != NV_OK)
1680             goto fail;
1681     }
1682     else {
1683         uvm_tools_counter_t *counter = &event_tracker->counter;
1684         counter->all_processors = params->allProcessors;
1685         counter->processor = params->processor;
1686         status = map_user_pages(params->controlBuffer,
1687                                 sizeof(NvU64) * UVM_TOTAL_COUNTERS,
1688                                 (void **)&counter->counters,
1689                                 &counter->counter_buffer_pages);
1690         if (status != NV_OK)
1691             goto fail;
1692     }
1693 
1694     if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
1695         status = NV_ERR_INVALID_ARGUMENT;
1696         goto fail;
1697     }
1698 
1699     return NV_OK;
1700 
1701 fail:
1702     destroy_event_tracker(event_tracker);
1703     return status;
1704 }
1705 
1706 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp)
1707 {
1708     UvmToolsEventControlData *ctrl;
1709     uvm_tools_queue_snapshot_t sn;
1710     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1711 
1712     if (!tracker_is_queue(event_tracker))
1713         return NV_ERR_INVALID_ARGUMENT;
1714 
1715     uvm_spin_lock(&event_tracker->queue.lock);
1716 
1717     event_tracker->queue.notification_threshold = params->notificationThreshold;
1718 
1719     ctrl = event_tracker->queue.control;
1720     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
1721     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
1722 
1723     if (queue_needs_wakeup(&event_tracker->queue, &sn))
1724         wake_up_all(&event_tracker->queue.wait_queue);
1725 
1726     uvm_spin_unlock(&event_tracker->queue.lock);
1727 
1728     return NV_OK;
1729 }
1730 
1731 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space)
1732 {
1733     NV_STATUS status;
1734 
1735     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1736     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1737 
1738     if (tools_is_fault_callback_needed(va_space)) {
1739         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1740             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1741                                                              UVM_PERF_EVENT_FAULT,
1742                                                              uvm_tools_record_fault);
1743 
1744             if (status != NV_OK)
1745                 return status;
1746         }
1747     }
1748     else {
1749         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1750             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1751                                                       UVM_PERF_EVENT_FAULT,
1752                                                       uvm_tools_record_fault);
1753         }
1754     }
1755 
1756     if (tools_is_migration_callback_needed(va_space)) {
1757         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1758             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1759                                                              UVM_PERF_EVENT_MIGRATION,
1760                                                              uvm_tools_record_migration);
1761 
1762             if (status != NV_OK)
1763                 return status;
1764         }
1765     }
1766     else {
1767         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1768             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1769                                                       UVM_PERF_EVENT_MIGRATION,
1770                                                       uvm_tools_record_migration);
1771         }
1772     }
1773 
1774     return NV_OK;
1775 }
1776 
1777 static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
1778 {
1779     NV_STATUS status;
1780     bool should_be_enabled;
1781     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
1782     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1783     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1784 
1785     status = tools_update_perf_events_callbacks(va_space);
1786     if (status != NV_OK)
1787         return status;
1788 
1789     should_be_enabled = tools_are_enabled(va_space);
1790     if (should_be_enabled != va_space->tools.enabled) {
1791         if (should_be_enabled)
1792             list_add(&va_space->tools.node, &g_tools_va_space_list);
1793         else
1794             list_del(&va_space->tools.node);
1795 
1796         va_space->tools.enabled = should_be_enabled;
1797     }
1798 
1799     return NV_OK;
1800 }
1801 
1802 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8)
1803 
1804 static bool mask_contains_invalid_events(NvU64 event_flags)
1805 {
1806     const unsigned long *event_mask = (const unsigned long *)&event_flags;
1807     DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS);
1808     DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS);
1809     DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS);
1810 
1811     bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS);
1812     bitmap_set(tests_events_mask,
1813                UvmEventTestTypesFirst,
1814                UvmEventTestTypesLast - UvmEventTestTypesFirst + 1);
1815 
1816     bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS);
1817     bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1);
1818 
1819     if (uvm_enable_builtin_tests)
1820         bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS);
1821 
1822     // Make sure that test event ids do not overlap with regular events
1823     BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes);
1824     BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast);
1825     BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll);
1826 
1827     // Make sure that no test event ever changes the size of UvmEventEntry
1828     BUILD_BUG_ON(sizeof(((UvmEventEntry *)NULL)->testEventData) >
1829                  sizeof(((UvmEventEntry *)NULL)->eventData));
1830     BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS);
1831 
1832     if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS))
1833         return false;
1834 
1835     if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS))
1836         UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
1837 
1838     return true;
1839 }
1840 
1841 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp)
1842 {
1843     uvm_va_space_t *va_space;
1844     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1845     NV_STATUS status = NV_OK;
1846     NvU64 inserted_lists;
1847 
1848     if (!tracker_is_queue(event_tracker))
1849         return NV_ERR_INVALID_ARGUMENT;
1850 
1851     if (mask_contains_invalid_events(params->eventTypeFlags))
1852         return NV_ERR_INVALID_ARGUMENT;
1853 
1854     va_space = tools_event_tracker_va_space(event_tracker);
1855 
1856     uvm_down_write(&g_tools_va_space_list_lock);
1857     uvm_down_write(&va_space->perf_events.lock);
1858     uvm_down_write(&va_space->tools.lock);
1859 
1860     insert_event_tracker(va_space,
1861                          event_tracker->queue.queue_nodes,
1862                          UvmEventNumTypesAll,
1863                          params->eventTypeFlags,
1864                          &event_tracker->queue.subscribed_queues,
1865                          va_space->tools.queues,
1866                          &inserted_lists);
1867 
1868     // perform any necessary registration
1869     status = tools_update_status(va_space);
1870     if (status != NV_OK) {
1871         // on error, unregister any newly registered event
1872         remove_event_tracker(va_space,
1873                              event_tracker->queue.queue_nodes,
1874                              UvmEventNumTypes,
1875                              inserted_lists,
1876                              &event_tracker->queue.subscribed_queues);
1877     }
1878 
1879     uvm_up_write(&va_space->tools.lock);
1880     uvm_up_write(&va_space->perf_events.lock);
1881     uvm_up_write(&g_tools_va_space_list_lock);
1882 
1883     return status;
1884 }
1885 
1886 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp)
1887 {
1888     NV_STATUS status;
1889     uvm_va_space_t *va_space;
1890     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1891 
1892     if (!tracker_is_queue(event_tracker))
1893         return NV_ERR_INVALID_ARGUMENT;
1894 
1895     va_space = tools_event_tracker_va_space(event_tracker);
1896 
1897     uvm_down_write(&g_tools_va_space_list_lock);
1898     uvm_down_write(&va_space->perf_events.lock);
1899     uvm_down_write(&va_space->tools.lock);
1900     remove_event_tracker(va_space,
1901                          event_tracker->queue.queue_nodes,
1902                          UvmEventNumTypesAll,
1903                          params->eventTypeFlags,
1904                          &event_tracker->queue.subscribed_queues);
1905 
1906     // de-registration should not fail
1907     status = tools_update_status(va_space);
1908     UVM_ASSERT(status == NV_OK);
1909 
1910     uvm_up_write(&va_space->tools.lock);
1911     uvm_up_write(&va_space->perf_events.lock);
1912     uvm_up_write(&g_tools_va_space_list_lock);
1913     return NV_OK;
1914 }
1915 
1916 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp)
1917 {
1918     uvm_va_space_t *va_space;
1919     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1920     NV_STATUS status = NV_OK;
1921     NvU64 inserted_lists;
1922 
1923     if (!tracker_is_counter(event_tracker))
1924         return NV_ERR_INVALID_ARGUMENT;
1925 
1926     va_space = tools_event_tracker_va_space(event_tracker);
1927 
1928     uvm_down_write(&g_tools_va_space_list_lock);
1929     uvm_down_write(&va_space->perf_events.lock);
1930     uvm_down_write(&va_space->tools.lock);
1931 
1932     insert_event_tracker(va_space,
1933                          event_tracker->counter.counter_nodes,
1934                          UVM_TOTAL_COUNTERS,
1935                          params->counterTypeFlags,
1936                          &event_tracker->counter.subscribed_counters,
1937                          va_space->tools.counters,
1938                          &inserted_lists);
1939 
1940     // perform any necessary registration
1941     status = tools_update_status(va_space);
1942     if (status != NV_OK) {
1943         remove_event_tracker(va_space,
1944                              event_tracker->counter.counter_nodes,
1945                              UVM_TOTAL_COUNTERS,
1946                              inserted_lists,
1947                              &event_tracker->counter.subscribed_counters);
1948     }
1949 
1950     uvm_up_write(&va_space->tools.lock);
1951     uvm_up_write(&va_space->perf_events.lock);
1952     uvm_up_write(&g_tools_va_space_list_lock);
1953 
1954     return status;
1955 }
1956 
1957 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp)
1958 {
1959     NV_STATUS status;
1960     uvm_va_space_t *va_space;
1961     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1962 
1963     if (!tracker_is_counter(event_tracker))
1964         return NV_ERR_INVALID_ARGUMENT;
1965 
1966     va_space = tools_event_tracker_va_space(event_tracker);
1967 
1968     uvm_down_write(&g_tools_va_space_list_lock);
1969     uvm_down_write(&va_space->perf_events.lock);
1970     uvm_down_write(&va_space->tools.lock);
1971     remove_event_tracker(va_space,
1972                          event_tracker->counter.counter_nodes,
1973                          UVM_TOTAL_COUNTERS,
1974                          params->counterTypeFlags,
1975                          &event_tracker->counter.subscribed_counters);
1976 
1977     // de-registration should not fail
1978     status = tools_update_status(va_space);
1979     UVM_ASSERT(status == NV_OK);
1980 
1981     uvm_up_write(&va_space->tools.lock);
1982     uvm_up_write(&va_space->perf_events.lock);
1983     uvm_up_write(&g_tools_va_space_list_lock);
1984 
1985     return NV_OK;
1986 }
1987 
1988 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block,
1989                                        uvm_va_block_context_t *block_context,
1990                                        NvU64 target_va,
1991                                        NvU64 size,
1992                                        bool is_write,
1993                                        uvm_mem_t *stage_mem)
1994 {
1995     if (is_write) {
1996         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
1997                                        NULL,
1998                                        uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size));
1999     }
2000     else {
2001         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2002                                        NULL,
2003                                        uvm_va_block_read_to_cpu(va_block, stage_mem, target_va, size));
2004 
2005     }
2006 }
2007 
2008 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
2009                                              NvU64 target_va,
2010                                              NvU64 size,
2011                                              NvU64 user_va,
2012                                              NvU64 *bytes,
2013                                              bool is_write)
2014 {
2015     NV_STATUS status;
2016     uvm_mem_t *stage_mem = NULL;
2017     void *stage_addr;
2018     uvm_global_processor_mask_t *retained_global_gpus = NULL;
2019     uvm_global_processor_mask_t *global_gpus = NULL;
2020     uvm_va_block_context_t *block_context = NULL;
2021     struct mm_struct *mm = NULL;
2022 
2023     retained_global_gpus = uvm_kvmalloc(sizeof(*retained_global_gpus));
2024     if (retained_global_gpus == NULL)
2025         return NV_ERR_NO_MEMORY;
2026 
2027     uvm_global_processor_mask_zero(retained_global_gpus);
2028 
2029     global_gpus = uvm_kvmalloc(sizeof(*global_gpus));
2030     if (global_gpus == NULL) {
2031         status = NV_ERR_NO_MEMORY;
2032         goto exit;
2033     }
2034 
2035     mm = uvm_va_space_mm_or_current_retain(va_space);
2036 
2037     status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem);
2038     if (status != NV_OK)
2039         goto exit;
2040 
2041     block_context = uvm_va_block_context_alloc(mm);
2042     if (!block_context) {
2043         status = NV_ERR_NO_MEMORY;
2044         goto exit;
2045     }
2046 
2047     stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
2048     *bytes = 0;
2049 
2050     while (*bytes < size) {
2051         uvm_gpu_t *gpu;
2052         uvm_va_block_t *block;
2053         void *user_va_start = (void *) (user_va + *bytes);
2054         NvU64 target_va_start = target_va + *bytes;
2055         NvU64 bytes_left = size - *bytes;
2056         NvU64 page_offset = target_va_start & (PAGE_SIZE - 1);
2057         NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset));
2058 
2059         if (is_write) {
2060             NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
2061             if (remaining != 0)  {
2062                 status = NV_ERR_INVALID_ARGUMENT;
2063                 goto exit;
2064             }
2065         }
2066 
2067         if (mm)
2068             uvm_down_read_mmap_lock(mm);
2069 
2070         // The RM flavor of the lock is needed to perform ECC checks.
2071         uvm_va_space_down_read_rm(va_space);
2072         if (mm)
2073             status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
2074         else
2075             status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
2076 
2077         if (status != NV_OK)
2078             goto unlock_and_exit;
2079 
2080         uvm_va_space_global_gpus(va_space, global_gpus);
2081 
2082         for_each_global_gpu_in_mask(gpu, global_gpus) {
2083 
2084             // When CC is enabled, the staging memory cannot be mapped on the
2085             // GPU (it is protected sysmem), but it is still used to store the
2086             // unencrypted version of the page contents when the page is
2087             // resident on vidmem.
2088             if (uvm_conf_computing_mode_enabled(gpu)) {
2089                 UVM_ASSERT(uvm_global_processor_mask_empty(retained_global_gpus));
2090 
2091                 break;
2092             }
2093             if (uvm_global_processor_mask_test_and_set(retained_global_gpus, gpu->global_id))
2094                 continue;
2095 
2096             // The retention of each GPU ensures that the staging memory is
2097             // freed before the unregistration of any of the GPUs is mapped on.
2098             // Each GPU is retained once.
2099             uvm_gpu_retain(gpu);
2100 
2101             // Accessing the VA block may result in copying data between the CPU
2102             // and a GPU. Conservatively add virtual mappings to all the GPUs
2103             // (even if those mappings may never be used) as tools read/write is
2104             // not on a performance critical path.
2105             status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
2106             if (status != NV_OK)
2107                 goto unlock_and_exit;
2108         }
2109 
2110         // Make sure a CPU resident page has an up to date struct page pointer.
2111         if (uvm_va_block_is_hmm(block)) {
2112             status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true);
2113             if (status != NV_OK)
2114                 goto unlock_and_exit;
2115         }
2116 
2117         status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem);
2118 
2119         // For simplicity, check for ECC errors on all GPUs registered in the VA
2120         // space
2121         if (status == NV_OK)
2122             status = uvm_global_mask_check_ecc_error(global_gpus);
2123 
2124         uvm_va_space_up_read_rm(va_space);
2125         if (mm)
2126             uvm_up_read_mmap_lock(mm);
2127 
2128         if (status != NV_OK)
2129             goto exit;
2130 
2131         if (!is_write) {
2132             NvU64 remaining;
2133 
2134             // Prevent processor speculation prior to accessing user-mapped
2135             // memory to avoid leaking information from side-channel attacks.
2136             // Under speculation, a valid VA range which does not contain
2137             // target_va could be used, and the block index could run off the
2138             // end of the array. Information about the state of that kernel
2139             // memory could be inferred if speculative execution gets to the
2140             // point where the data is copied out.
2141             nv_speculation_barrier();
2142 
2143             remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
2144             if (remaining > 0) {
2145                 status = NV_ERR_INVALID_ARGUMENT;
2146                 goto exit;
2147             }
2148         }
2149 
2150         *bytes += bytes_now;
2151     }
2152 
2153 unlock_and_exit:
2154     if (status != NV_OK) {
2155         uvm_va_space_up_read_rm(va_space);
2156         if (mm)
2157             uvm_up_read_mmap_lock(mm);
2158     }
2159 
2160 exit:
2161     uvm_va_block_context_free(block_context);
2162 
2163     uvm_mem_free(stage_mem);
2164 
2165     uvm_global_mask_release(retained_global_gpus);
2166 
2167     uvm_va_space_mm_or_current_release(va_space, mm);
2168 
2169     uvm_kvfree(global_gpus);
2170     uvm_kvfree(retained_global_gpus);
2171 
2172     return status;
2173 }
2174 
2175 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2176 {
2177     return tools_access_process_memory(uvm_va_space_get(filp),
2178                                        params->targetVa,
2179                                        params->size,
2180                                        params->buffer,
2181                                        &params->bytesRead,
2182                                        false);
2183 }
2184 
2185 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2186 {
2187     return tools_access_process_memory(uvm_va_space_get(filp),
2188                                        params->targetVa,
2189                                        params->size,
2190                                        params->buffer,
2191                                        &params->bytesWritten,
2192                                        true);
2193 }
2194 
2195 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp)
2196 {
2197     NvU32 i;
2198     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2199 
2200     if (params->entry.eventData.eventType >= UvmEventNumTypesAll)
2201         return NV_ERR_INVALID_ARGUMENT;
2202 
2203     uvm_down_read(&va_space->tools.lock);
2204     for (i = 0; i < params->count; i++)
2205         uvm_tools_record_event(va_space, &params->entry);
2206     uvm_up_read(&va_space->tools.lock);
2207     return NV_OK;
2208 }
2209 
2210 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp)
2211 {
2212     NvU32 i;
2213     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2214 
2215     if (params->counter >= UVM_TOTAL_COUNTERS)
2216         return NV_ERR_INVALID_ARGUMENT;
2217 
2218     uvm_down_read(&va_space->tools.lock);
2219     for (i = 0; i < params->count; i++)
2220         uvm_tools_inc_counter(va_space, params->counter, params->amount, &params->processor);
2221     uvm_up_read(&va_space->tools.lock);
2222 
2223     return NV_OK;
2224 }
2225 
2226 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp)
2227 {
2228     NvProcessorUuid *uuids;
2229     NvU64 remaining;
2230     uvm_gpu_t *gpu;
2231     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2232 
2233     uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * UVM_ID_MAX_PROCESSORS);
2234     if (uuids == NULL)
2235         return NV_ERR_NO_MEMORY;
2236 
2237     uvm_processor_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT);
2238     params->count = 1;
2239 
2240     uvm_va_space_down_read(va_space);
2241     for_each_va_space_gpu(gpu, va_space) {
2242         uvm_processor_uuid_copy(&uuids[uvm_id_value(gpu->id)], uvm_gpu_uuid(gpu));
2243         if (uvm_id_value(gpu->id) + 1 > params->count)
2244             params->count = uvm_id_value(gpu->id) + 1;
2245     }
2246     uvm_va_space_up_read(va_space);
2247 
2248     remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * params->count);
2249     uvm_kvfree(uuids);
2250 
2251     if (remaining != 0)
2252         return NV_ERR_INVALID_ADDRESS;
2253 
2254     return NV_OK;
2255 }
2256 
2257 void uvm_tools_flush_events(void)
2258 {
2259     tools_schedule_completed_events();
2260 
2261     nv_kthread_q_flush(&g_tools_queue);
2262 }
2263 
2264 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp)
2265 {
2266     uvm_tools_flush_events();
2267     return NV_OK;
2268 }
2269 
2270 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp)
2271 {
2272     NV_STATUS status = NV_OK;
2273     uvm_gpu_t *gpu = NULL;
2274     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2275 
2276     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpuUuid);
2277     if (!gpu)
2278         return NV_ERR_INVALID_DEVICE;
2279 
2280     // Wait for register-based fault clears to queue the replay event
2281     if (!gpu->parent->has_clear_faulted_channel_method) {
2282         uvm_gpu_non_replayable_faults_isr_lock(gpu->parent);
2283         uvm_gpu_non_replayable_faults_isr_unlock(gpu->parent);
2284     }
2285 
2286     // Wait for pending fault replay methods to complete (replayable faults on
2287     // all GPUs, and non-replayable faults on method-based GPUs).
2288     status = uvm_channel_manager_wait(gpu->channel_manager);
2289 
2290     // Flush any pending events even if (status != NV_OK)
2291     uvm_tools_flush_events();
2292     uvm_gpu_release(gpu);
2293 
2294     return status;
2295 }
2296 
2297 static const struct file_operations uvm_tools_fops =
2298 {
2299     .open            = uvm_tools_open_entry,
2300     .release         = uvm_tools_release_entry,
2301     .unlocked_ioctl  = uvm_tools_unlocked_ioctl_entry,
2302 #if NVCPU_IS_X86_64
2303     .compat_ioctl    = uvm_tools_unlocked_ioctl_entry,
2304 #endif
2305     .poll            = uvm_tools_poll_entry,
2306     .owner           = THIS_MODULE,
2307 };
2308 
2309 static void _uvm_tools_destroy_cache_all(void)
2310 {
2311     // The pointers are initialized to NULL,
2312     // it's safe to call destroy on all of them.
2313     kmem_cache_destroy_safe(&g_tools_event_tracker_cache);
2314     kmem_cache_destroy_safe(&g_tools_block_migration_data_cache);
2315     kmem_cache_destroy_safe(&g_tools_migration_data_cache);
2316     kmem_cache_destroy_safe(&g_tools_replay_data_cache);
2317     kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache);
2318     kmem_cache_destroy_safe(&g_tools_map_remote_data_cache);
2319 }
2320 
2321 int uvm_tools_init(dev_t uvm_base_dev)
2322 {
2323     dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER);
2324     int ret = -ENOMEM; // This will be updated later if allocations succeed
2325 
2326     uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
2327 
2328     g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t",
2329                                                         uvm_tools_event_tracker_t);
2330     if (!g_tools_event_tracker_cache)
2331         goto err_cache_destroy;
2332 
2333     g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t",
2334                                                               block_migration_data_t);
2335     if (!g_tools_block_migration_data_cache)
2336         goto err_cache_destroy;
2337 
2338     g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t",
2339                                                         migration_data_t);
2340     if (!g_tools_migration_data_cache)
2341         goto err_cache_destroy;
2342 
2343     g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t",
2344                                                      replay_data_t);
2345     if (!g_tools_replay_data_cache)
2346         goto err_cache_destroy;
2347 
2348     g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t",
2349                                                                block_map_remote_data_t);
2350     if (!g_tools_block_map_remote_data_cache)
2351         goto err_cache_destroy;
2352 
2353     g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t",
2354                                                          map_remote_data_t);
2355     if (!g_tools_map_remote_data_cache)
2356         goto err_cache_destroy;
2357 
2358     uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF);
2359 
2360     ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue");
2361     if (ret < 0)
2362         goto err_cache_destroy;
2363 
2364     uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops);
2365     ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1);
2366     if (ret != 0) {
2367         UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev),
2368                       MINOR(uvm_tools_dev), ret);
2369         goto err_stop_thread;
2370     }
2371 
2372     return ret;
2373 
2374 err_stop_thread:
2375     nv_kthread_q_stop(&g_tools_queue);
2376 
2377 err_cache_destroy:
2378     _uvm_tools_destroy_cache_all();
2379     return ret;
2380 }
2381 
2382 void uvm_tools_exit(void)
2383 {
2384     unsigned i;
2385     cdev_del(&g_uvm_tools_cdev);
2386 
2387     nv_kthread_q_stop(&g_tools_queue);
2388 
2389     for (i = 0; i < UvmEventNumTypesAll; ++i)
2390         UVM_ASSERT(g_tools_enabled_event_count[i] == 0);
2391 
2392     UVM_ASSERT(list_empty(&g_tools_va_space_list));
2393 
2394     _uvm_tools_destroy_cache_all();
2395 }
2396