1 /*******************************************************************************
2     Copyright (c) 2016-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 #include "uvm_common.h"
24 #include "uvm_ioctl.h"
25 #include "uvm_gpu.h"
26 #include "uvm_hal.h"
27 #include "uvm_tools.h"
28 #include "uvm_va_space.h"
29 #include "uvm_api.h"
30 #include "uvm_hal_types.h"
31 #include "uvm_va_block.h"
32 #include "uvm_va_range.h"
33 #include "uvm_push.h"
34 #include "uvm_forward_decl.h"
35 #include "uvm_range_group.h"
36 #include "uvm_mem.h"
37 #include "nv_speculation_barrier.h"
38 
39 // We limit the number of times a page can be retained by the kernel
40 // to prevent the user from maliciously passing UVM tools the same page
41 // over and over again in an attempt to overflow the refcount.
42 #define MAX_PAGE_COUNT (1 << 20)
43 
44 typedef struct
45 {
46     NvU32 get_ahead;
47     NvU32 get_behind;
48     NvU32 put_ahead;
49     NvU32 put_behind;
50 } uvm_tools_queue_snapshot_t;
51 
52 typedef struct
53 {
54     uvm_spinlock_t lock;
55     NvU64 subscribed_queues;
56     struct list_head queue_nodes[UvmEventNumTypesAll];
57 
58     struct page **queue_buffer_pages;
59     UvmEventEntry *queue;
60     NvU32 queue_buffer_count;
61     NvU32 notification_threshold;
62 
63     struct page **control_buffer_pages;
64     UvmToolsEventControlData *control;
65 
66     wait_queue_head_t wait_queue;
67     bool is_wakeup_get_valid;
68     NvU32 wakeup_get;
69 } uvm_tools_queue_t;
70 
71 typedef struct
72 {
73     struct list_head counter_nodes[UVM_TOTAL_COUNTERS];
74     NvU64 subscribed_counters;
75 
76     struct page **counter_buffer_pages;
77     NvU64 *counters;
78 
79     bool all_processors;
80     NvProcessorUuid processor;
81 } uvm_tools_counter_t;
82 
83 // private_data for /dev/nvidia-uvm-tools
84 typedef struct
85 {
86     bool is_queue;
87     struct file *uvm_file;
88     union
89     {
90         uvm_tools_queue_t queue;
91         uvm_tools_counter_t counter;
92     };
93 } uvm_tools_event_tracker_t;
94 
95 // Delayed events
96 //
97 // Events that require gpu timestamps for asynchronous operations use a delayed
98 // notification mechanism. Each event type registers a callback that is invoked
99 // from the update_progress channel routines. The callback then enqueues a
100 // work item that takes care of notifying the events. This module keeps a
101 // global list of channels with pending events. Other modules or user apps (via
102 // ioctl) may call uvm_tools_flush_events to update the progress of the channels
103 // in the list, as needed.
104 //
105 // User apps will need to flush events before removing gpus to avoid getting
106 // events with gpus ids that have been removed.
107 
108 // This object describes the pending migrations operations within a VA block
109 typedef struct
110 {
111     nv_kthread_q_item_t queue_item;
112     uvm_processor_id_t dst;
113     uvm_processor_id_t src;
114     uvm_va_space_t *va_space;
115 
116     uvm_channel_t *channel;
117     struct list_head events;
118     NvU64 start_timestamp_cpu;
119     NvU64 end_timestamp_cpu;
120     NvU64 *start_timestamp_gpu_addr;
121     NvU64 start_timestamp_gpu;
122     NvU64 range_group_id;
123 } block_migration_data_t;
124 
125 // This object represents a specific pending migration within a VA block
126 typedef struct
127 {
128     struct list_head events_node;
129     NvU64 bytes;
130     NvU64 address;
131     NvU64 *end_timestamp_gpu_addr;
132     NvU64 end_timestamp_gpu;
133     UvmEventMigrationCause cause;
134 } migration_data_t;
135 
136 // This object represents a pending gpu faut replay operation
137 typedef struct
138 {
139     nv_kthread_q_item_t queue_item;
140     uvm_channel_t *channel;
141     uvm_gpu_id_t gpu_id;
142     NvU32 batch_id;
143     uvm_fault_client_type_t client_type;
144     NvU64 timestamp;
145     NvU64 timestamp_gpu;
146     NvU64 *timestamp_gpu_addr;
147 } replay_data_t;
148 
149 // This object describes the pending map remote operations within a VA block
150 typedef struct
151 {
152     nv_kthread_q_item_t queue_item;
153     uvm_processor_id_t src;
154     uvm_processor_id_t dst;
155     UvmEventMapRemoteCause cause;
156     NvU64 timestamp;
157     uvm_va_space_t *va_space;
158 
159     uvm_channel_t *channel;
160     struct list_head events;
161 } block_map_remote_data_t;
162 
163 // This object represents a pending map remote operation
164 typedef struct
165 {
166     struct list_head events_node;
167 
168     NvU64 address;
169     NvU64 size;
170     NvU64 timestamp_gpu;
171     NvU64 *timestamp_gpu_addr;
172 } map_remote_data_t;
173 
174 
175 static struct cdev g_uvm_tools_cdev;
176 static LIST_HEAD(g_tools_va_space_list);
177 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll];
178 static uvm_rw_semaphore_t g_tools_va_space_list_lock;
179 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL;
180 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL;
181 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL;
182 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL;
183 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL;
184 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL;
185 static uvm_spinlock_t g_tools_channel_list_lock;
186 static LIST_HEAD(g_tools_channel_list);
187 static nv_kthread_q_t g_tools_queue;
188 
189 static NV_STATUS tools_update_status(uvm_va_space_t *va_space);
190 
191 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp)
192 {
193     return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data);
194 }
195 
196 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker)
197 {
198     return event_tracker != NULL && event_tracker->is_queue;
199 }
200 
201 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker)
202 {
203     return event_tracker != NULL && !event_tracker->is_queue;
204 }
205 
206 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker)
207 {
208     uvm_va_space_t *va_space;
209     UVM_ASSERT(event_tracker->uvm_file);
210     va_space = uvm_va_space_get(event_tracker->uvm_file);
211     return va_space;
212 }
213 
214 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
215 {
216     NvU64 i;
217 
218     for (i = 0; i < page_count; i++) {
219         set_page_dirty(pages[i]);
220         NV_UNPIN_USER_PAGE(pages[i]);
221     }
222 }
223 
224 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
225 {
226     size = DIV_ROUND_UP(size, PAGE_SIZE);
227     vunmap((NvU8 *)addr);
228     uvm_put_user_pages_dirty(pages, size);
229     uvm_kvfree(pages);
230 }
231 
232 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
233 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
234 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
235 {
236     NV_STATUS status = NV_OK;
237     long ret = 0;
238     long num_pages;
239     long i;
240     struct vm_area_struct **vmas = NULL;
241 
242     *addr = NULL;
243     *pages = NULL;
244     num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
245 
246     if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) {
247         status = NV_ERR_INVALID_ADDRESS;
248         goto fail;
249     }
250 
251     *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages);
252     if (*pages == NULL) {
253         status = NV_ERR_NO_MEMORY;
254         goto fail;
255     }
256 
257     vmas = uvm_kvmalloc(sizeof(struct vm_area_struct *) * num_pages);
258     if (vmas == NULL) {
259         status = NV_ERR_NO_MEMORY;
260         goto fail;
261     }
262 
263     nv_mmap_read_lock(current->mm);
264     ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
265     nv_mmap_read_unlock(current->mm);
266     if (ret != num_pages) {
267         status = NV_ERR_INVALID_ARGUMENT;
268         goto fail;
269     }
270 
271     for (i = 0; i < num_pages; i++) {
272         if (page_count((*pages)[i]) > MAX_PAGE_COUNT || uvm_file_is_nvidia_uvm(vmas[i]->vm_file)) {
273             status = NV_ERR_INVALID_ARGUMENT;
274             goto fail;
275         }
276     }
277 
278     *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL);
279     if (*addr == NULL)
280         goto fail;
281 
282     uvm_kvfree(vmas);
283     return NV_OK;
284 
285 fail:
286     if (*pages == NULL)
287         return status;
288 
289     uvm_kvfree(vmas);
290 
291     if (ret > 0)
292         uvm_put_user_pages_dirty(*pages, ret);
293     else if (ret < 0)
294         status = errno_to_nv_status(ret);
295 
296     uvm_kvfree(*pages);
297     *pages = NULL;
298     return status;
299 }
300 
301 static void insert_event_tracker(uvm_va_space_t *va_space,
302                                  struct list_head *node,
303                                  NvU32 list_count,
304                                  NvU64 list_mask,
305                                  NvU64 *subscribed_mask,
306                                  struct list_head *lists,
307                                  NvU64 *inserted_lists)
308 {
309     NvU32 i;
310     NvU64 insertable_lists = list_mask & ~*subscribed_mask;
311 
312     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
313     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
314 
315     for (i = 0; i < list_count; i++) {
316         if (insertable_lists & (1ULL << i)) {
317             ++g_tools_enabled_event_count[i];
318             list_add(node + i, lists + i);
319         }
320     }
321 
322     *subscribed_mask |= list_mask;
323     *inserted_lists = insertable_lists;
324 }
325 
326 static void remove_event_tracker(uvm_va_space_t *va_space,
327                                  struct list_head *node,
328                                  NvU32 list_count,
329                                  NvU64 list_mask,
330                                  NvU64 *subscribed_mask)
331 {
332     NvU32 i;
333     NvU64 removable_lists = list_mask & *subscribed_mask;
334 
335     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
336     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
337 
338     for (i = 0; i < list_count; i++) {
339         if (removable_lists & (1ULL << i)) {
340             UVM_ASSERT(g_tools_enabled_event_count[i] > 0);
341             --g_tools_enabled_event_count[i];
342             list_del(node + i);
343         }
344     }
345 
346     *subscribed_mask &= ~list_mask;
347 }
348 
349 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn)
350 {
351     NvU32 queue_mask = queue->queue_buffer_count - 1;
352 
353     uvm_assert_spinlock_locked(&queue->lock);
354     return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold;
355 }
356 
357 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
358 {
359     if (event_tracker->uvm_file != NULL) {
360         NV_STATUS status;
361         uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker);
362 
363         uvm_down_write(&g_tools_va_space_list_lock);
364         uvm_down_write(&va_space->perf_events.lock);
365         uvm_down_write(&va_space->tools.lock);
366 
367         if (event_tracker->is_queue) {
368             uvm_tools_queue_t *queue = &event_tracker->queue;
369 
370             remove_event_tracker(va_space,
371                                  queue->queue_nodes,
372                                  UvmEventNumTypesAll,
373                                  queue->subscribed_queues,
374                                  &queue->subscribed_queues);
375 
376             if (queue->queue != NULL) {
377                 unmap_user_pages(queue->queue_buffer_pages,
378                                  queue->queue,
379                                  queue->queue_buffer_count * sizeof(UvmEventEntry));
380             }
381 
382             if (queue->control != NULL) {
383                 unmap_user_pages(queue->control_buffer_pages,
384                                  queue->control,
385                                  sizeof(UvmToolsEventControlData));
386             }
387         }
388         else {
389             uvm_tools_counter_t *counters = &event_tracker->counter;
390 
391             remove_event_tracker(va_space,
392                                  counters->counter_nodes,
393                                  UVM_TOTAL_COUNTERS,
394                                  counters->subscribed_counters,
395                                  &counters->subscribed_counters);
396 
397             if (counters->counters != NULL) {
398                 unmap_user_pages(counters->counter_buffer_pages,
399                                  counters->counters,
400                                  UVM_TOTAL_COUNTERS * sizeof(NvU64));
401             }
402         }
403 
404         // de-registration should not fail
405         status = tools_update_status(va_space);
406         UVM_ASSERT(status == NV_OK);
407 
408         uvm_up_write(&va_space->tools.lock);
409         uvm_up_write(&va_space->perf_events.lock);
410         uvm_up_write(&g_tools_va_space_list_lock);
411 
412         fput(event_tracker->uvm_file);
413     }
414     kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
415 }
416 
417 static void enqueue_event(const UvmEventEntry *entry, uvm_tools_queue_t *queue)
418 {
419     UvmToolsEventControlData *ctrl = queue->control;
420     uvm_tools_queue_snapshot_t sn;
421     NvU32 queue_size = queue->queue_buffer_count;
422     NvU32 queue_mask = queue_size - 1;
423 
424     // Prevent processor speculation prior to accessing user-mapped memory to
425     // avoid leaking information from side-channel attacks. There are many
426     // possible paths leading to this point and it would be difficult and error-
427     // prone to audit all of them to determine whether user mode could guide
428     // this access to kernel memory under speculative execution, so to be on the
429     // safe side we'll just always block speculation.
430     nv_speculation_barrier();
431 
432     uvm_spin_lock(&queue->lock);
433 
434     // ctrl is mapped into user space with read and write permissions,
435     // so its values cannot be trusted.
436     sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
437     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
438     sn.put_ahead = (sn.put_behind + 1) & queue_mask;
439 
440     // one free element means that the queue is full
441     if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
442         atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
443         goto unlock;
444     }
445 
446     memcpy(queue->queue + sn.put_behind, entry, sizeof(*entry));
447 
448     sn.put_behind = sn.put_ahead;
449     // put_ahead and put_behind will always be the same outside of queue->lock
450     // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
451     atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
452     atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
453 
454     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
455     // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
456     if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
457         queue->is_wakeup_get_valid = true;
458         queue->wakeup_get = sn.get_ahead;
459         wake_up_all(&queue->wait_queue);
460     }
461 
462 unlock:
463     uvm_spin_unlock(&queue->lock);
464 }
465 
466 static void uvm_tools_record_event(uvm_va_space_t *va_space, const UvmEventEntry *entry)
467 {
468     NvU8 eventType = entry->eventData.eventType;
469     uvm_tools_queue_t *queue;
470 
471     UVM_ASSERT(eventType < UvmEventNumTypesAll);
472 
473     uvm_assert_rwsem_locked(&va_space->tools.lock);
474 
475     list_for_each_entry(queue, va_space->tools.queues + eventType, queue_nodes[eventType])
476         enqueue_event(entry, queue);
477 }
478 
479 static void uvm_tools_broadcast_event(const UvmEventEntry *entry)
480 {
481     uvm_va_space_t *va_space;
482 
483     uvm_down_read(&g_tools_va_space_list_lock);
484     list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
485         uvm_down_read(&va_space->tools.lock);
486         uvm_tools_record_event(va_space, entry);
487         uvm_up_read(&va_space->tools.lock);
488     }
489     uvm_up_read(&g_tools_va_space_list_lock);
490 }
491 
492 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
493 {
494     // For compatibility with older counters, CPU faults for memory with a preferred location are reported
495     // for their preferred location as well as for the CPU device itself.
496     // This check prevents double counting in the aggregate count.
497     if (counter == UvmCounterNameCpuPageFaultCount)
498         return uvm_processor_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT);
499     return true;
500 }
501 
502 static void uvm_tools_inc_counter(uvm_va_space_t *va_space,
503                                   UvmCounterName counter,
504                                   NvU64 amount,
505                                   const NvProcessorUuid *processor)
506 {
507     UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS);
508     uvm_assert_rwsem_locked(&va_space->tools.lock);
509 
510     if (amount > 0) {
511         uvm_tools_counter_t *counters;
512 
513         // Prevent processor speculation prior to accessing user-mapped memory
514         // to avoid leaking information from side-channel attacks. There are
515         // many possible paths leading to this point and it would be difficult
516         // and error-prone to audit all of them to determine whether user mode
517         // could guide this access to kernel memory under speculative execution,
518         // so to be on the safe side we'll just always block speculation.
519         nv_speculation_barrier();
520 
521         list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) {
522             if ((counters->all_processors && counter_matches_processor(counter, processor)) ||
523                 uvm_processor_uuid_eq(&counters->processor, processor)) {
524                 atomic64_add(amount, (atomic64_t *)(counters->counters + counter));
525             }
526         }
527     }
528 }
529 
530 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter)
531 {
532     uvm_assert_rwsem_locked(&va_space->tools.lock);
533 
534     UVM_ASSERT(counter < UVM_TOTAL_COUNTERS);
535     return !list_empty(va_space->tools.counters + counter);
536 }
537 
538 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event)
539 {
540     uvm_assert_rwsem_locked(&va_space->tools.lock);
541 
542     UVM_ASSERT(event < UvmEventNumTypesAll);
543     return !list_empty(va_space->tools.queues + event);
544 }
545 
546 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event)
547 {
548     bool ret = false;
549 
550     uvm_down_read(&g_tools_va_space_list_lock);
551     ret = g_tools_enabled_event_count[event] != 0;
552     uvm_up_read(&g_tools_va_space_list_lock);
553 
554     return ret;
555 }
556 
557 static bool tools_are_enabled(uvm_va_space_t *va_space)
558 {
559     NvU32 i;
560 
561     uvm_assert_rwsem_locked(&va_space->tools.lock);
562 
563     for (i = 0; i < UVM_TOTAL_COUNTERS; i++) {
564         if (tools_is_counter_enabled(va_space, i))
565             return true;
566     }
567     for (i = 0; i < UvmEventNumTypesAll; i++) {
568         if (tools_is_event_enabled(va_space, i))
569             return true;
570     }
571     return false;
572 }
573 
574 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space)
575 {
576     return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) ||
577            tools_is_event_enabled(va_space, UvmEventTypeGpuFault) ||
578            tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) ||
579            tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount);
580 }
581 
582 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space)
583 {
584     return tools_is_event_enabled(va_space, UvmEventTypeMigration) ||
585            tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) ||
586            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) ||
587            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD);
588 }
589 
590 static int uvm_tools_open(struct inode *inode, struct file *filp)
591 {
592     filp->private_data = NULL;
593     return -nv_status_to_errno(uvm_global_get_status());
594 }
595 
596 static int uvm_tools_open_entry(struct inode *inode, struct file *filp)
597 {
598     UVM_ENTRY_RET(uvm_tools_open(inode, filp));
599 }
600 
601 static int uvm_tools_release(struct inode *inode, struct file *filp)
602 {
603     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
604     if (event_tracker != NULL) {
605         destroy_event_tracker(event_tracker);
606         filp->private_data = NULL;
607     }
608     return -nv_status_to_errno(uvm_global_get_status());
609 }
610 
611 static int uvm_tools_release_entry(struct inode *inode, struct file *filp)
612 {
613     UVM_ENTRY_RET(uvm_tools_release(inode, filp));
614 }
615 
616 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
617 {
618     switch (cmd) {
619         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER,         uvm_api_tools_init_event_tracker);
620         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold);
621         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS,  uvm_api_tools_event_queue_enable_events);
622         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events);
623         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS,            uvm_api_tools_enable_counters);
624         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS,           uvm_api_tools_disable_counters);
625     }
626 
627     uvm_thread_assert_all_unlocked();
628 
629     return -EINVAL;
630 }
631 
632 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg)
633 {
634     UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg));
635 }
636 
637 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
638 {
639     int flags = 0;
640     uvm_tools_queue_snapshot_t sn;
641     uvm_tools_event_tracker_t *event_tracker;
642     UvmToolsEventControlData *ctrl;
643 
644     if (uvm_global_get_status() != NV_OK)
645         return POLLERR;
646 
647     event_tracker = tools_event_tracker(filp);
648     if (!tracker_is_queue(event_tracker))
649         return POLLERR;
650 
651     uvm_spin_lock(&event_tracker->queue.lock);
652 
653     event_tracker->queue.is_wakeup_get_valid = false;
654     ctrl = event_tracker->queue.control;
655     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
656     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
657 
658     if (queue_needs_wakeup(&event_tracker->queue, &sn))
659         flags = POLLIN | POLLRDNORM;
660 
661     uvm_spin_unlock(&event_tracker->queue.lock);
662 
663     poll_wait(filp, &event_tracker->queue.wait_queue, wait);
664     return flags;
665 }
666 
667 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait)
668 {
669     UVM_ENTRY_RET(uvm_tools_poll(filp, wait));
670 }
671 
672 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = {
673     [UVM_FAULT_TYPE_INVALID_PDE]          = UvmFaultTypeInvalidPde,
674     [UVM_FAULT_TYPE_INVALID_PTE]          = UvmFaultTypeInvalidPte,
675     [UVM_FAULT_TYPE_ATOMIC]               = UvmFaultTypeAtomic,
676     [UVM_FAULT_TYPE_WRITE]                = UvmFaultTypeWrite,
677     [UVM_FAULT_TYPE_PDE_SIZE]             = UvmFaultTypeInvalidPdeSize,
678     [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION]   = UvmFaultTypeLimitViolation,
679     [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK]   = UvmFaultTypeUnboundInstBlock,
680     [UVM_FAULT_TYPE_PRIV_VIOLATION]       = UvmFaultTypePrivViolation,
681     [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation,
682     [UVM_FAULT_TYPE_WORK_CREATION]        = UvmFaultTypeWorkCreation,
683     [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture,
684     [UVM_FAULT_TYPE_COMPRESSION_FAILURE]  = UvmFaultTypeCompressionFailure,
685     [UVM_FAULT_TYPE_UNSUPPORTED_KIND]     = UvmFaultTypeUnsupportedKind,
686     [UVM_FAULT_TYPE_REGION_VIOLATION]     = UvmFaultTypeRegionViolation,
687     [UVM_FAULT_TYPE_POISONED]             = UvmFaultTypePoison,
688 };
689 
690 // TODO: add new value for weak atomics in tools
691 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = {
692     [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic,
693     [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK]   = UvmEventMemoryAccessTypeAtomic,
694     [UVM_FAULT_ACCESS_TYPE_WRITE]         = UvmEventMemoryAccessTypeWrite,
695     [UVM_FAULT_ACCESS_TYPE_READ]          = UvmEventMemoryAccessTypeRead,
696     [UVM_FAULT_ACCESS_TYPE_PREFETCH]      = UvmEventMemoryAccessTypePrefetch
697 };
698 
699 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = {
700     [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0,
701     [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1,
702     [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2,
703     [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3,
704     [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4,
705     [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5,
706     [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6,
707     [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7,
708     [UVM_APERTURE_SYS]    = UvmEventApertureSys,
709     [UVM_APERTURE_VID]    = UvmEventApertureVid,
710 };
711 
712 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = {
713     [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc,
714     [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub,
715 };
716 
717 static void record_gpu_fault_instance(uvm_gpu_t *gpu,
718                                       uvm_va_space_t *va_space,
719                                       const uvm_fault_buffer_entry_t *fault_entry,
720                                       NvU64 batch_id,
721                                       NvU64 timestamp)
722 {
723     UvmEventEntry entry;
724     UvmEventGpuFaultInfo *info = &entry.eventData.gpuFault;
725     memset(&entry, 0, sizeof(entry));
726 
727     info->eventType     = UvmEventTypeGpuFault;
728     info->gpuIndex      = uvm_id_value(gpu->id);
729     info->faultType     = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
730     info->accessType    = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
731     info->clientType    = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
732     if (fault_entry->is_replayable)
733         info->gpcId     = fault_entry->fault_source.gpc_id;
734     else
735         info->channelId = fault_entry->fault_source.channel_id;
736     info->clientId      = fault_entry->fault_source.client_id;
737     info->address       = fault_entry->fault_address;
738     info->timeStamp     = timestamp;
739     info->timeStampGpu  = fault_entry->timestamp;
740     info->batchId       = batch_id;
741 
742     uvm_tools_record_event(va_space, &entry);
743 }
744 
745 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
746 {
747     uvm_va_space_t *va_space = event_data->fault.space;
748 
749     UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
750     UVM_ASSERT(event_data->fault.space);
751 
752     uvm_assert_rwsem_locked(&va_space->lock);
753     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
754     UVM_ASSERT(va_space->tools.enabled);
755 
756     uvm_down_read(&va_space->tools.lock);
757     UVM_ASSERT(tools_is_fault_callback_needed(va_space));
758 
759     if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
760         if (tools_is_event_enabled(va_space, UvmEventTypeCpuFault)) {
761             UvmEventEntry entry;
762             UvmEventCpuFaultInfo *info = &entry.eventData.cpuFault;
763             memset(&entry, 0, sizeof(entry));
764 
765             info->eventType = UvmEventTypeCpuFault;
766             if (event_data->fault.cpu.is_write)
767                 info->accessType = UvmEventMemoryAccessTypeWrite;
768             else
769                 info->accessType = UvmEventMemoryAccessTypeRead;
770 
771             info->address = event_data->fault.cpu.fault_va;
772             info->timeStamp = NV_GETTIME();
773             // assume that current owns va_space
774             info->pid = uvm_get_stale_process_id();
775             info->threadId = uvm_get_stale_thread_id();
776             info->pc = event_data->fault.cpu.pc;
777 
778             uvm_tools_record_event(va_space, &entry);
779         }
780         if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) {
781             uvm_processor_id_t preferred_location;
782 
783             // The UVM Lite tools interface did not represent the CPU as a UVM
784             // device. It reported CPU faults against the corresponding
785             // allocation's 'home location'. Though this driver's tools
786             // interface does include a CPU device, for compatibility, the
787             // driver still reports faults against a buffer's preferred
788             // location, in addition to the CPU.
789             uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT);
790 
791             preferred_location = event_data->fault.preferred_location;
792             if (UVM_ID_IS_GPU(preferred_location)) {
793                 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location);
794                 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
795             }
796         }
797     }
798     else {
799         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id);
800         UVM_ASSERT(gpu);
801 
802         if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) {
803             NvU64 timestamp = NV_GETTIME();
804             uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry;
805             uvm_fault_buffer_entry_t *fault_instance;
806 
807             record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp);
808 
809             list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
810                 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp);
811         }
812 
813         if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount))
814             uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, uvm_gpu_uuid(gpu));
815     }
816     uvm_up_read(&va_space->tools.lock);
817 }
818 
819 static void add_pending_event_for_channel(uvm_channel_t *channel)
820 {
821     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
822 
823     if (channel->tools.pending_event_count++ == 0)
824         list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list);
825 }
826 
827 static void remove_pending_event_for_channel(uvm_channel_t *channel)
828 {
829     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
830     UVM_ASSERT(channel->tools.pending_event_count > 0);
831     if (--channel->tools.pending_event_count == 0)
832         list_del_init(&channel->tools.channel_list_node);
833 }
834 
835 
836 static void record_migration_events(void *args)
837 {
838     block_migration_data_t *block_mig = (block_migration_data_t *)args;
839     migration_data_t *mig;
840     migration_data_t *next;
841     UvmEventEntry entry;
842     UvmEventMigrationInfo *info = &entry.eventData.migration;
843     uvm_va_space_t *va_space = block_mig->va_space;
844 
845     NvU64 gpu_timestamp = block_mig->start_timestamp_gpu;
846 
847     // Initialize fields that are constant throughout the whole block
848     memset(&entry, 0, sizeof(entry));
849     info->eventType      = UvmEventTypeMigration;
850     info->srcIndex       = uvm_id_value(block_mig->src);
851     info->dstIndex       = uvm_id_value(block_mig->dst);
852     info->beginTimeStamp = block_mig->start_timestamp_cpu;
853     info->endTimeStamp   = block_mig->end_timestamp_cpu;
854     info->rangeGroupId   = block_mig->range_group_id;
855 
856     uvm_down_read(&va_space->tools.lock);
857     list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
858         UVM_ASSERT(mig->bytes > 0);
859         list_del(&mig->events_node);
860 
861         info->address           = mig->address;
862         info->migratedBytes     = mig->bytes;
863         info->beginTimeStampGpu = gpu_timestamp;
864         info->endTimeStampGpu   = mig->end_timestamp_gpu;
865         info->migrationCause    = mig->cause;
866         gpu_timestamp = mig->end_timestamp_gpu;
867         kmem_cache_free(g_tools_migration_data_cache, mig);
868 
869         uvm_tools_record_event(va_space, &entry);
870     }
871     uvm_up_read(&va_space->tools.lock);
872 
873     UVM_ASSERT(list_empty(&block_mig->events));
874     kmem_cache_free(g_tools_block_migration_data_cache, block_mig);
875 }
876 
877 static void record_migration_events_entry(void *args)
878 {
879     UVM_ENTRY_VOID(record_migration_events(args));
880 }
881 
882 static void on_block_migration_complete(void *ptr)
883 {
884     migration_data_t *mig;
885     block_migration_data_t *block_mig = (block_migration_data_t *)ptr;
886 
887     block_mig->end_timestamp_cpu = NV_GETTIME();
888     block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr;
889     list_for_each_entry(mig, &block_mig->events, events_node)
890         mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr;
891 
892     nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig);
893 
894     // The UVM driver may notice that work in a channel is complete in a variety of situations
895     // and the va_space lock is not always held in all of them, nor can it always be taken safely on them.
896     // Dispatching events requires the va_space lock to be held in at least read mode, so
897     // this callback simply enqueues the dispatching onto a queue, where the
898     // va_space lock is always safe to acquire.
899     uvm_spin_lock(&g_tools_channel_list_lock);
900     remove_pending_event_for_channel(block_mig->channel);
901     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item);
902     uvm_spin_unlock(&g_tools_channel_list_lock);
903 }
904 
905 static void record_replay_event_helper(uvm_gpu_id_t gpu_id,
906                                        NvU32 batch_id,
907                                        uvm_fault_client_type_t client_type,
908                                        NvU64 timestamp,
909                                        NvU64 timestamp_gpu)
910 {
911     UvmEventEntry entry;
912 
913     memset(&entry, 0, sizeof(entry));
914     entry.eventData.gpuFaultReplay.eventType    = UvmEventTypeGpuFaultReplay;
915     entry.eventData.gpuFaultReplay.gpuIndex     = uvm_id_value(gpu_id);
916     entry.eventData.gpuFaultReplay.batchId      = batch_id;
917     entry.eventData.gpuFaultReplay.clientType   = g_hal_to_tools_fault_client_type_table[client_type];
918     entry.eventData.gpuFaultReplay.timeStamp    = timestamp;
919     entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
920 
921     uvm_tools_broadcast_event(&entry);
922 }
923 
924 static void record_replay_events(void *args)
925 {
926     replay_data_t *replay = (replay_data_t *)args;
927 
928     record_replay_event_helper(replay->gpu_id,
929                                replay->batch_id,
930                                replay->client_type,
931                                replay->timestamp,
932                                replay->timestamp_gpu);
933 
934     kmem_cache_free(g_tools_replay_data_cache, replay);
935 }
936 
937 static void record_replay_events_entry(void *args)
938 {
939     UVM_ENTRY_VOID(record_replay_events(args));
940 }
941 
942 static void on_replay_complete(void *ptr)
943 {
944     replay_data_t *replay = (replay_data_t *)ptr;
945     replay->timestamp_gpu = *replay->timestamp_gpu_addr;
946 
947     nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr);
948 
949     uvm_spin_lock(&g_tools_channel_list_lock);
950     remove_pending_event_for_channel(replay->channel);
951     nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item);
952     uvm_spin_unlock(&g_tools_channel_list_lock);
953 
954 }
955 
956 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = {
957     [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT]     = UvmEventMigrationCauseCoherence,
958     [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
959     [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER]       = UvmEventMigrationCauseAccessCounters,
960     [UVM_MAKE_RESIDENT_CAUSE_PREFETCH]             = UvmEventMigrationCausePrefetch,
961     [UVM_MAKE_RESIDENT_CAUSE_EVICTION]             = UvmEventMigrationCauseEviction,
962     [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS]            = UvmEventMigrationCauseInvalid,
963     [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE]          = UvmEventMigrationCauseUser,
964     [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP]  = UvmEventMigrationCauseCoherence,
965     [UVM_MAKE_RESIDENT_CAUSE_API_HINT]             = UvmEventMigrationCauseUser,
966 };
967 
968 // This event is notified asynchronously when all the migrations pushed to the
969 // same uvm_push_t object in a call to block_copy_resident_pages_between have
970 // finished
971 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
972 {
973     uvm_va_block_t *va_block = event_data->migration.block;
974     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
975 
976     UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION);
977 
978     uvm_assert_mutex_locked(&va_block->lock);
979     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
980     UVM_ASSERT(va_space->tools.enabled);
981 
982     uvm_down_read(&va_space->tools.lock);
983     UVM_ASSERT(tools_is_migration_callback_needed(va_space));
984 
985     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
986         migration_data_t *mig;
987         uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push);
988         block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data;
989 
990         if (push_info->on_complete != NULL) {
991             mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS);
992             if (mig == NULL)
993                 goto done_unlock;
994 
995             mig->address = event_data->migration.address;
996             mig->bytes = event_data->migration.bytes;
997             mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push);
998             mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause];
999 
1000             list_add_tail(&mig->events_node, &block_mig->events);
1001         }
1002     }
1003 
1004     // Increment counters
1005     if (UVM_ID_IS_CPU(event_data->migration.src) &&
1006         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) {
1007         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst);
1008         uvm_tools_inc_counter(va_space,
1009                               UvmCounterNameBytesXferHtD,
1010                               event_data->migration.bytes,
1011                               uvm_gpu_uuid(gpu));
1012     }
1013     if (UVM_ID_IS_CPU(event_data->migration.dst) &&
1014         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) {
1015         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src);
1016         uvm_tools_inc_counter(va_space,
1017                               UvmCounterNameBytesXferDtH,
1018                               event_data->migration.bytes,
1019                               uvm_gpu_uuid(gpu));
1020     }
1021 
1022 done_unlock:
1023     uvm_up_read(&va_space->tools.lock);
1024 }
1025 
1026 // This event is notified asynchronously when it is marked as completed in the
1027 // pushbuffer the replay method belongs to.
1028 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
1029                                 uvm_push_t *push,
1030                                 NvU32 batch_id,
1031                                 uvm_fault_client_type_t client_type)
1032 {
1033     uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1034     replay_data_t *replay;
1035 
1036     // Perform delayed notification only if some VA space has signed up for
1037     // UvmEventTypeGpuFaultReplay
1038     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1039         return;
1040 
1041     replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS);
1042     if (replay == NULL)
1043         return;
1044 
1045     UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1046 
1047     replay->timestamp_gpu_addr = uvm_push_timestamp(push);
1048     replay->gpu_id             = gpu->id;
1049     replay->batch_id           = batch_id;
1050     replay->client_type        = client_type;
1051     replay->timestamp          = NV_GETTIME();
1052     replay->channel            = push->channel;
1053 
1054     push_info->on_complete_data = replay;
1055     push_info->on_complete = on_replay_complete;
1056 
1057     uvm_spin_lock(&g_tools_channel_list_lock);
1058     add_pending_event_for_channel(replay->channel);
1059     uvm_spin_unlock(&g_tools_channel_list_lock);
1060 }
1061 
1062 
1063 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
1064                                      NvU32 batch_id,
1065                                      uvm_fault_client_type_t client_type)
1066 {
1067     UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
1068 
1069     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1070         return;
1071 
1072     record_replay_event_helper(gpu->id,
1073                                batch_id,
1074                                client_type,
1075                                NV_GETTIME(),
1076                                gpu->parent->host_hal->get_time(gpu));
1077 }
1078 
1079 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
1080                                         const uvm_access_counter_buffer_entry_t *buffer_entry,
1081                                         bool on_managed)
1082 {
1083     UvmEventEntry entry;
1084     UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
1085 
1086     // Perform delayed notification only if some VA space has signed up for
1087     // UvmEventTypeAccessCounter
1088     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeTestAccessCounter))
1089         return;
1090 
1091     if (!buffer_entry->address.is_virtual)
1092         UVM_ASSERT(UVM_ID_IS_VALID(buffer_entry->physical_info.resident_id));
1093 
1094     memset(&entry, 0, sizeof(entry));
1095 
1096     info->eventType           = UvmEventTypeTestAccessCounter;
1097     info->srcIndex            = uvm_id_value(gpu->id);
1098     info->address             = buffer_entry->address.address;
1099     info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
1100     if (buffer_entry->address.is_virtual) {
1101         info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
1102         info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1103         info->veId                = buffer_entry->virtual_info.ve_id;
1104     }
1105     else {
1106         info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1107     }
1108     info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1109     info->onManaged           = on_managed? 1 : 0;
1110     info->value               = buffer_entry->counter_value;
1111     info->subGranularity      = buffer_entry->sub_granularity;
1112     info->bank                = buffer_entry->bank;
1113     info->tag                 = buffer_entry->tag;
1114 
1115     uvm_tools_broadcast_event(&entry);
1116 }
1117 
1118 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
1119 {
1120     UvmEventEntry entry;
1121 
1122     if (!va_space->tools.enabled)
1123         return;
1124 
1125     entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
1126     uvm_down_read(&va_space->tools.lock);
1127     uvm_tools_record_event(va_space, &entry);
1128     uvm_up_read(&va_space->tools.lock);
1129 }
1130 
1131 // This function is used as a begin marker to group all migrations within a VA
1132 // block that are performed in the same call to
1133 // block_copy_resident_pages_between. All of these are pushed to the same
1134 // uvm_push_t object, and will be notified in burst when the last one finishes.
1135 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block,
1136                                             uvm_push_t *push,
1137                                             uvm_processor_id_t dst_id,
1138                                             uvm_processor_id_t src_id,
1139                                             NvU64 start,
1140                                             uvm_make_resident_cause_t cause)
1141 {
1142     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1143     uvm_range_group_range_t *range;
1144 
1145     // Calls from tools read/write functions to make_resident must not trigger
1146     // any migration
1147     UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS);
1148 
1149     // During evictions the va_space lock is not held.
1150     if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1151         uvm_assert_rwsem_locked(&va_space->lock);
1152 
1153     if (!va_space->tools.enabled)
1154         return;
1155 
1156     uvm_down_read(&va_space->tools.lock);
1157 
1158     // Perform delayed notification only if the VA space has signed up for
1159     // UvmEventTypeMigration
1160     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1161         block_migration_data_t *block_mig;
1162         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1163 
1164         UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1165 
1166         block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS);
1167         if (block_mig == NULL)
1168             goto done_unlock;
1169 
1170         block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push);
1171         block_mig->channel = push->channel;
1172         block_mig->start_timestamp_cpu = NV_GETTIME();
1173         block_mig->dst = dst_id;
1174         block_mig->src = src_id;
1175         block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE;
1176 
1177         // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1178         if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1179             range = uvm_range_group_range_find(va_space, start);
1180             if (range != NULL)
1181                 block_mig->range_group_id = range->range_group->id;
1182         }
1183         block_mig->va_space = va_space;
1184 
1185         INIT_LIST_HEAD(&block_mig->events);
1186         push_info->on_complete_data = block_mig;
1187         push_info->on_complete = on_block_migration_complete;
1188 
1189         uvm_spin_lock(&g_tools_channel_list_lock);
1190         add_pending_event_for_channel(block_mig->channel);
1191         uvm_spin_unlock(&g_tools_channel_list_lock);
1192     }
1193 
1194 done_unlock:
1195     uvm_up_read(&va_space->tools.lock);
1196 }
1197 
1198 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
1199                                      uvm_processor_id_t dst,
1200                                      uvm_va_block_region_t region,
1201                                      const uvm_page_mask_t *page_mask)
1202 {
1203     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1204 
1205     if (!va_space->tools.enabled)
1206         return;
1207 
1208     uvm_down_read(&va_space->tools.lock);
1209     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate)) {
1210         // Read-duplication events
1211         UvmEventEntry entry;
1212         UvmEventReadDuplicateInfo *info_read_duplicate = &entry.eventData.readDuplicate;
1213         uvm_page_index_t page_index;
1214         memset(&entry, 0, sizeof(entry));
1215 
1216         info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1217         info_read_duplicate->size      = PAGE_SIZE;
1218         info_read_duplicate->timeStamp = NV_GETTIME();
1219 
1220         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1221             uvm_processor_id_t id;
1222             uvm_processor_mask_t resident_processors;
1223 
1224             info_read_duplicate->address    = uvm_va_block_cpu_page_address(va_block, page_index);
1225             info_read_duplicate->processors = 0;
1226 
1227             uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors);
1228             for_each_id_in_mask(id, &resident_processors)
1229                 info_read_duplicate->processors |= (1 << uvm_id_value(id));
1230 
1231             uvm_tools_record_event(va_space, &entry);
1232         }
1233     }
1234     uvm_up_read(&va_space->tools.lock);
1235 }
1236 
1237 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
1238                                                 uvm_processor_id_t dst,
1239                                                 uvm_va_block_region_t region,
1240                                                 const uvm_page_mask_t *page_mask)
1241 {
1242     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1243 
1244     if (!va_space->tools.enabled)
1245         return;
1246 
1247     uvm_down_read(&va_space->tools.lock);
1248     if (tools_is_event_enabled(va_space, UvmEventTypeReadDuplicateInvalidate)) {
1249         UvmEventEntry entry;
1250         uvm_page_index_t page_index;
1251         UvmEventReadDuplicateInvalidateInfo *info = &entry.eventData.readDuplicateInvalidate;
1252         memset(&entry, 0, sizeof(entry));
1253 
1254         info->eventType     = UvmEventTypeReadDuplicateInvalidate;
1255         info->residentIndex = uvm_id_value(dst);
1256         info->size          = PAGE_SIZE;
1257         info->timeStamp     = NV_GETTIME();
1258 
1259         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1260             UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1261 
1262             info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1263             uvm_tools_record_event(va_space, &entry);
1264         }
1265     }
1266     uvm_up_read(&va_space->tools.lock);
1267 }
1268 
1269 static void tools_schedule_completed_events(void)
1270 {
1271     uvm_channel_t *channel;
1272     uvm_channel_t *next_channel;
1273     NvU64 channel_count = 0;
1274     NvU64 i;
1275 
1276     uvm_spin_lock(&g_tools_channel_list_lock);
1277 
1278     // retain every channel list entry currently in the list and keep track of their count.
1279     list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) {
1280         ++channel->tools.pending_event_count;
1281         ++channel_count;
1282     }
1283     uvm_spin_unlock(&g_tools_channel_list_lock);
1284 
1285     if (channel_count == 0)
1286         return;
1287 
1288     // new entries always appear at the end, and all the entries seen in the first loop have been retained
1289     // so it is safe to go through them
1290     channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node);
1291     for (i = 0; i < channel_count; i++) {
1292         uvm_channel_update_progress_all(channel);
1293         channel = list_next_entry(channel, tools.channel_list_node);
1294     }
1295 
1296     // now release all the entries we retained in the beginning
1297     i = 0;
1298     uvm_spin_lock(&g_tools_channel_list_lock);
1299     list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) {
1300         if (i++ == channel_count)
1301             break;
1302 
1303         remove_pending_event_for_channel(channel);
1304     }
1305     uvm_spin_unlock(&g_tools_channel_list_lock);
1306 }
1307 
1308 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space,
1309                                       NvU64 address,
1310                                       bool is_write,
1311                                       UvmEventFatalReason reason)
1312 {
1313     uvm_assert_rwsem_locked(&va_space->lock);
1314 
1315     if (!va_space->tools.enabled)
1316         return;
1317 
1318     uvm_down_read(&va_space->tools.lock);
1319     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1320         UvmEventEntry entry;
1321         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1322         memset(&entry, 0, sizeof(entry));
1323 
1324         info->eventType      = UvmEventTypeFatalFault;
1325         info->processorIndex = UVM_ID_CPU_VALUE;
1326         info->timeStamp      = NV_GETTIME();
1327         info->address        = address;
1328         info->accessType     = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1329         // info->faultType is not valid for cpu faults
1330         info->reason         = reason;
1331 
1332         uvm_tools_record_event(va_space, &entry);
1333     }
1334     uvm_up_read(&va_space->tools.lock);
1335 }
1336 
1337 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,
1338                                       uvm_va_space_t *va_space,
1339                                       const uvm_fault_buffer_entry_t *buffer_entry,
1340                                       UvmEventFatalReason reason)
1341 {
1342     uvm_assert_rwsem_locked(&va_space->lock);
1343 
1344     if (!va_space->tools.enabled)
1345         return;
1346 
1347     uvm_down_read(&va_space->tools.lock);
1348     if (tools_is_event_enabled(va_space, UvmEventTypeFatalFault)) {
1349         UvmEventEntry entry;
1350         UvmEventFatalFaultInfo *info = &entry.eventData.fatalFault;
1351         memset(&entry, 0, sizeof(entry));
1352 
1353         info->eventType      = UvmEventTypeFatalFault;
1354         info->processorIndex = uvm_id_value(gpu_id);
1355         info->timeStamp      = NV_GETTIME();
1356         info->address        = buffer_entry->fault_address;
1357         info->accessType     = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1358         info->faultType      = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1359         info->reason         = reason;
1360 
1361         uvm_tools_record_event(va_space, &entry);
1362     }
1363     uvm_up_read(&va_space->tools.lock);
1364 }
1365 
1366 void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
1367                                 NvU64 address,
1368                                 size_t region_size,
1369                                 const uvm_processor_mask_t *processors)
1370 {
1371     UVM_ASSERT(address);
1372     UVM_ASSERT(PAGE_ALIGNED(address));
1373     UVM_ASSERT(region_size > 0);
1374 
1375     uvm_assert_rwsem_locked(&va_space->lock);
1376 
1377     if (!va_space->tools.enabled)
1378         return;
1379 
1380     uvm_down_read(&va_space->tools.lock);
1381     if (tools_is_event_enabled(va_space, UvmEventTypeThrashingDetected)) {
1382         UvmEventEntry entry;
1383         UvmEventThrashingDetectedInfo *info = &entry.eventData.thrashing;
1384         memset(&entry, 0, sizeof(entry));
1385 
1386         info->eventType = UvmEventTypeThrashingDetected;
1387         info->address   = address;
1388         info->size      = region_size;
1389         info->timeStamp = NV_GETTIME();
1390         bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
1391 
1392         uvm_tools_record_event(va_space, &entry);
1393     }
1394     uvm_up_read(&va_space->tools.lock);
1395 }
1396 
1397 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1398 {
1399     UVM_ASSERT(address);
1400     UVM_ASSERT(PAGE_ALIGNED(address));
1401     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1402 
1403     uvm_assert_rwsem_locked(&va_space->lock);
1404 
1405     if (!va_space->tools.enabled)
1406         return;
1407 
1408     uvm_down_read(&va_space->tools.lock);
1409     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingStart)) {
1410         UvmEventEntry entry;
1411         UvmEventThrottlingStartInfo *info = &entry.eventData.throttlingStart;
1412         memset(&entry, 0, sizeof(entry));
1413 
1414         info->eventType      = UvmEventTypeThrottlingStart;
1415         info->processorIndex = uvm_id_value(processor);
1416         info->address        = address;
1417         info->timeStamp      = NV_GETTIME();
1418 
1419         uvm_tools_record_event(va_space, &entry);
1420     }
1421     uvm_up_read(&va_space->tools.lock);
1422 }
1423 
1424 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1425 {
1426     UVM_ASSERT(address);
1427     UVM_ASSERT(PAGE_ALIGNED(address));
1428     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1429 
1430     uvm_assert_rwsem_locked(&va_space->lock);
1431 
1432     if (!va_space->tools.enabled)
1433         return;
1434 
1435     uvm_down_read(&va_space->tools.lock);
1436     if (tools_is_event_enabled(va_space, UvmEventTypeThrottlingEnd)) {
1437         UvmEventEntry entry;
1438         UvmEventThrottlingEndInfo *info = &entry.eventData.throttlingEnd;
1439         memset(&entry, 0, sizeof(entry));
1440 
1441         info->eventType      = UvmEventTypeThrottlingEnd;
1442         info->processorIndex = uvm_id_value(processor);
1443         info->address        = address;
1444         info->timeStamp      = NV_GETTIME();
1445 
1446         uvm_tools_record_event(va_space, &entry);
1447     }
1448     uvm_up_read(&va_space->tools.lock);
1449 }
1450 
1451 static void record_map_remote_events(void *args)
1452 {
1453     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args;
1454     map_remote_data_t *map_remote, *next;
1455     UvmEventEntry entry;
1456     uvm_va_space_t *va_space = block_map_remote->va_space;
1457 
1458     memset(&entry, 0, sizeof(entry));
1459 
1460     entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1461     entry.eventData.mapRemote.srcIndex       = uvm_id_value(block_map_remote->src);
1462     entry.eventData.mapRemote.dstIndex       = uvm_id_value(block_map_remote->dst);
1463     entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1464     entry.eventData.mapRemote.timeStamp      = block_map_remote->timestamp;
1465 
1466     uvm_down_read(&va_space->tools.lock);
1467     list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1468         list_del(&map_remote->events_node);
1469 
1470         entry.eventData.mapRemote.address      = map_remote->address;
1471         entry.eventData.mapRemote.size         = map_remote->size;
1472         entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1473         kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1474 
1475         uvm_tools_record_event(va_space, &entry);
1476     }
1477     uvm_up_read(&va_space->tools.lock);
1478 
1479     UVM_ASSERT(list_empty(&block_map_remote->events));
1480     kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote);
1481 }
1482 
1483 static void record_map_remote_events_entry(void *args)
1484 {
1485     UVM_ENTRY_VOID(record_map_remote_events(args));
1486 }
1487 
1488 static void on_map_remote_complete(void *ptr)
1489 {
1490     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr;
1491     map_remote_data_t *map_remote;
1492 
1493     // Only GPU mappings use the deferred mechanism
1494     UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src));
1495     list_for_each_entry(map_remote, &block_map_remote->events, events_node)
1496         map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr;
1497 
1498     nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr);
1499 
1500     uvm_spin_lock(&g_tools_channel_list_lock);
1501     remove_pending_event_for_channel(block_map_remote->channel);
1502     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item);
1503     uvm_spin_unlock(&g_tools_channel_list_lock);
1504 }
1505 
1506 void uvm_tools_record_map_remote(uvm_va_block_t *va_block,
1507                                  uvm_push_t *push,
1508                                  uvm_processor_id_t processor,
1509                                  uvm_processor_id_t residency,
1510                                  NvU64 address,
1511                                  size_t region_size,
1512                                  UvmEventMapRemoteCause cause)
1513 {
1514     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1515 
1516     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1517     UVM_ASSERT(UVM_ID_IS_VALID(residency));
1518     UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid);
1519 
1520     uvm_assert_rwsem_locked(&va_space->lock);
1521 
1522     if (!va_space->tools.enabled)
1523         return;
1524 
1525     uvm_down_read(&va_space->tools.lock);
1526     if (!tools_is_event_enabled(va_space, UvmEventTypeMapRemote))
1527         goto done;
1528 
1529     if (UVM_ID_IS_CPU(processor)) {
1530         UvmEventEntry entry;
1531         memset(&entry, 0, sizeof(entry));
1532 
1533         entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1534         entry.eventData.mapRemote.srcIndex       = uvm_id_value(processor);
1535         entry.eventData.mapRemote.dstIndex       = uvm_id_value(residency);
1536         entry.eventData.mapRemote.mapRemoteCause = cause;
1537         entry.eventData.mapRemote.timeStamp      = NV_GETTIME();
1538         entry.eventData.mapRemote.address        = address;
1539         entry.eventData.mapRemote.size           = region_size;
1540         entry.eventData.mapRemote.timeStampGpu   = 0;
1541 
1542         UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
1543 
1544         uvm_tools_record_event(va_space, &entry);
1545     }
1546     else {
1547         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1548         block_map_remote_data_t *block_map_remote;
1549         map_remote_data_t *map_remote;
1550 
1551         // The first call on this pushbuffer creates the per-VA block structure
1552         if (push_info->on_complete == NULL) {
1553             UVM_ASSERT(push_info->on_complete_data == NULL);
1554 
1555             block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1556             if (block_map_remote == NULL)
1557                 goto done;
1558 
1559             block_map_remote->src = processor;
1560             block_map_remote->dst = residency;
1561             block_map_remote->cause = cause;
1562             block_map_remote->timestamp = NV_GETTIME();
1563             block_map_remote->va_space = va_space;
1564             block_map_remote->channel = push->channel;
1565             INIT_LIST_HEAD(&block_map_remote->events);
1566 
1567             push_info->on_complete_data = block_map_remote;
1568             push_info->on_complete = on_map_remote_complete;
1569 
1570             uvm_spin_lock(&g_tools_channel_list_lock);
1571             add_pending_event_for_channel(block_map_remote->channel);
1572             uvm_spin_unlock(&g_tools_channel_list_lock);
1573         }
1574         else {
1575             block_map_remote = push_info->on_complete_data;
1576         }
1577         UVM_ASSERT(block_map_remote);
1578 
1579         map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS);
1580         if (map_remote == NULL)
1581             goto done;
1582 
1583         map_remote->address = address;
1584         map_remote->size = region_size;
1585         map_remote->timestamp_gpu_addr = uvm_push_timestamp(push);
1586 
1587         list_add_tail(&map_remote->events_node, &block_map_remote->events);
1588     }
1589 
1590 done:
1591     uvm_up_read(&va_space->tools.lock);
1592 }
1593 
1594 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp)
1595 {
1596     NV_STATUS status = NV_OK;
1597     uvm_tools_event_tracker_t *event_tracker;
1598 
1599     event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS);
1600     if (event_tracker == NULL)
1601         return NV_ERR_NO_MEMORY;
1602 
1603     event_tracker->uvm_file = fget(params->uvmFd);
1604     if (event_tracker->uvm_file == NULL) {
1605         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1606         goto fail;
1607     }
1608 
1609     if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) {
1610         fput(event_tracker->uvm_file);
1611         event_tracker->uvm_file = NULL;
1612         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
1613         goto fail;
1614     }
1615 
1616     if (!uvm_fd_va_space(event_tracker->uvm_file)) {
1617         fput(event_tracker->uvm_file);
1618         event_tracker->uvm_file = NULL;
1619         status = NV_ERR_ILLEGAL_ACTION;
1620         goto fail;
1621     }
1622 
1623     event_tracker->is_queue = params->queueBufferSize != 0;
1624     if (event_tracker->is_queue) {
1625         uvm_tools_queue_t *queue = &event_tracker->queue;
1626         uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
1627         init_waitqueue_head(&queue->wait_queue);
1628 
1629         if (params->queueBufferSize > UINT_MAX) {
1630             status = NV_ERR_INVALID_ARGUMENT;
1631             goto fail;
1632         }
1633 
1634         queue->queue_buffer_count = (NvU32)params->queueBufferSize;
1635         queue->notification_threshold = queue->queue_buffer_count / 2;
1636 
1637         // queue_buffer_count must be a power of 2, of at least 2
1638         if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) {
1639             status = NV_ERR_INVALID_ARGUMENT;
1640             goto fail;
1641         }
1642 
1643         status = map_user_pages(params->queueBuffer,
1644                                 queue->queue_buffer_count * sizeof(UvmEventEntry),
1645                                 (void **)&queue->queue,
1646                                 &queue->queue_buffer_pages);
1647         if (status != NV_OK)
1648             goto fail;
1649 
1650         status = map_user_pages(params->controlBuffer,
1651                                 sizeof(UvmToolsEventControlData),
1652                                 (void **)&queue->control,
1653                                 &queue->control_buffer_pages);
1654 
1655         if (status != NV_OK)
1656             goto fail;
1657     }
1658     else {
1659         uvm_tools_counter_t *counter = &event_tracker->counter;
1660         counter->all_processors = params->allProcessors;
1661         counter->processor = params->processor;
1662         status = map_user_pages(params->controlBuffer,
1663                                 sizeof(NvU64) * UVM_TOTAL_COUNTERS,
1664                                 (void **)&counter->counters,
1665                                 &counter->counter_buffer_pages);
1666         if (status != NV_OK)
1667             goto fail;
1668     }
1669 
1670     if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
1671         status = NV_ERR_INVALID_ARGUMENT;
1672         goto fail;
1673     }
1674 
1675     return NV_OK;
1676 
1677 fail:
1678     destroy_event_tracker(event_tracker);
1679     return status;
1680 }
1681 
1682 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp)
1683 {
1684     UvmToolsEventControlData *ctrl;
1685     uvm_tools_queue_snapshot_t sn;
1686     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1687 
1688     if (!tracker_is_queue(event_tracker))
1689         return NV_ERR_INVALID_ARGUMENT;
1690 
1691     uvm_spin_lock(&event_tracker->queue.lock);
1692 
1693     event_tracker->queue.notification_threshold = params->notificationThreshold;
1694 
1695     ctrl = event_tracker->queue.control;
1696     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
1697     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
1698 
1699     if (queue_needs_wakeup(&event_tracker->queue, &sn))
1700         wake_up_all(&event_tracker->queue.wait_queue);
1701 
1702     uvm_spin_unlock(&event_tracker->queue.lock);
1703 
1704     return NV_OK;
1705 }
1706 
1707 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space)
1708 {
1709     NV_STATUS status;
1710 
1711     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1712     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1713 
1714     if (tools_is_fault_callback_needed(va_space)) {
1715         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1716             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1717                                                              UVM_PERF_EVENT_FAULT,
1718                                                              uvm_tools_record_fault);
1719 
1720             if (status != NV_OK)
1721                 return status;
1722         }
1723     }
1724     else {
1725         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
1726             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1727                                                       UVM_PERF_EVENT_FAULT,
1728                                                       uvm_tools_record_fault);
1729         }
1730     }
1731 
1732     if (tools_is_migration_callback_needed(va_space)) {
1733         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1734             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
1735                                                              UVM_PERF_EVENT_MIGRATION,
1736                                                              uvm_tools_record_migration);
1737 
1738             if (status != NV_OK)
1739                 return status;
1740         }
1741     }
1742     else {
1743         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
1744             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
1745                                                       UVM_PERF_EVENT_MIGRATION,
1746                                                       uvm_tools_record_migration);
1747         }
1748     }
1749 
1750     return NV_OK;
1751 }
1752 
1753 static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
1754 {
1755     NV_STATUS status;
1756     bool should_be_enabled;
1757     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
1758     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
1759     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
1760 
1761     status = tools_update_perf_events_callbacks(va_space);
1762     if (status != NV_OK)
1763         return status;
1764 
1765     should_be_enabled = tools_are_enabled(va_space);
1766     if (should_be_enabled != va_space->tools.enabled) {
1767         if (should_be_enabled)
1768             list_add(&va_space->tools.node, &g_tools_va_space_list);
1769         else
1770             list_del(&va_space->tools.node);
1771 
1772         va_space->tools.enabled = should_be_enabled;
1773     }
1774 
1775     return NV_OK;
1776 }
1777 
1778 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8)
1779 
1780 static bool mask_contains_invalid_events(NvU64 event_flags)
1781 {
1782     const unsigned long *event_mask = (const unsigned long *)&event_flags;
1783     DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS);
1784     DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS);
1785     DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS);
1786 
1787     bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS);
1788     bitmap_set(tests_events_mask,
1789                UvmEventTestTypesFirst,
1790                UvmEventTestTypesLast - UvmEventTestTypesFirst + 1);
1791 
1792     bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS);
1793     bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1);
1794 
1795     if (uvm_enable_builtin_tests)
1796         bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS);
1797 
1798     // Make sure that test event ids do not overlap with regular events
1799     BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes);
1800     BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast);
1801     BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll);
1802 
1803     // Make sure that no test event ever changes the size of UvmEventEntry
1804     BUILD_BUG_ON(sizeof(((UvmEventEntry *)NULL)->testEventData) >
1805                  sizeof(((UvmEventEntry *)NULL)->eventData));
1806     BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS);
1807 
1808     if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS))
1809         return false;
1810 
1811     if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS))
1812         UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
1813 
1814     return true;
1815 }
1816 
1817 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp)
1818 {
1819     uvm_va_space_t *va_space;
1820     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1821     NV_STATUS status = NV_OK;
1822     NvU64 inserted_lists;
1823 
1824     if (!tracker_is_queue(event_tracker))
1825         return NV_ERR_INVALID_ARGUMENT;
1826 
1827     if (mask_contains_invalid_events(params->eventTypeFlags))
1828         return NV_ERR_INVALID_ARGUMENT;
1829 
1830     va_space = tools_event_tracker_va_space(event_tracker);
1831 
1832     uvm_down_write(&g_tools_va_space_list_lock);
1833     uvm_down_write(&va_space->perf_events.lock);
1834     uvm_down_write(&va_space->tools.lock);
1835 
1836     insert_event_tracker(va_space,
1837                          event_tracker->queue.queue_nodes,
1838                          UvmEventNumTypesAll,
1839                          params->eventTypeFlags,
1840                          &event_tracker->queue.subscribed_queues,
1841                          va_space->tools.queues,
1842                          &inserted_lists);
1843 
1844     // perform any necessary registration
1845     status = tools_update_status(va_space);
1846     if (status != NV_OK) {
1847         // on error, unregister any newly registered event
1848         remove_event_tracker(va_space,
1849                              event_tracker->queue.queue_nodes,
1850                              UvmEventNumTypes,
1851                              inserted_lists,
1852                              &event_tracker->queue.subscribed_queues);
1853     }
1854 
1855     uvm_up_write(&va_space->tools.lock);
1856     uvm_up_write(&va_space->perf_events.lock);
1857     uvm_up_write(&g_tools_va_space_list_lock);
1858 
1859     return status;
1860 }
1861 
1862 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp)
1863 {
1864     NV_STATUS status;
1865     uvm_va_space_t *va_space;
1866     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1867 
1868     if (!tracker_is_queue(event_tracker))
1869         return NV_ERR_INVALID_ARGUMENT;
1870 
1871     va_space = tools_event_tracker_va_space(event_tracker);
1872 
1873     uvm_down_write(&g_tools_va_space_list_lock);
1874     uvm_down_write(&va_space->perf_events.lock);
1875     uvm_down_write(&va_space->tools.lock);
1876     remove_event_tracker(va_space,
1877                          event_tracker->queue.queue_nodes,
1878                          UvmEventNumTypesAll,
1879                          params->eventTypeFlags,
1880                          &event_tracker->queue.subscribed_queues);
1881 
1882     // de-registration should not fail
1883     status = tools_update_status(va_space);
1884     UVM_ASSERT(status == NV_OK);
1885 
1886     uvm_up_write(&va_space->tools.lock);
1887     uvm_up_write(&va_space->perf_events.lock);
1888     uvm_up_write(&g_tools_va_space_list_lock);
1889     return NV_OK;
1890 }
1891 
1892 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp)
1893 {
1894     uvm_va_space_t *va_space;
1895     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1896     NV_STATUS status = NV_OK;
1897     NvU64 inserted_lists;
1898 
1899     if (!tracker_is_counter(event_tracker))
1900         return NV_ERR_INVALID_ARGUMENT;
1901 
1902     va_space = tools_event_tracker_va_space(event_tracker);
1903 
1904     uvm_down_write(&g_tools_va_space_list_lock);
1905     uvm_down_write(&va_space->perf_events.lock);
1906     uvm_down_write(&va_space->tools.lock);
1907 
1908     insert_event_tracker(va_space,
1909                          event_tracker->counter.counter_nodes,
1910                          UVM_TOTAL_COUNTERS,
1911                          params->counterTypeFlags,
1912                          &event_tracker->counter.subscribed_counters,
1913                          va_space->tools.counters,
1914                          &inserted_lists);
1915 
1916     // perform any necessary registration
1917     status = tools_update_status(va_space);
1918     if (status != NV_OK) {
1919         remove_event_tracker(va_space,
1920                              event_tracker->counter.counter_nodes,
1921                              UVM_TOTAL_COUNTERS,
1922                              inserted_lists,
1923                              &event_tracker->counter.subscribed_counters);
1924     }
1925 
1926     uvm_up_write(&va_space->tools.lock);
1927     uvm_up_write(&va_space->perf_events.lock);
1928     uvm_up_write(&g_tools_va_space_list_lock);
1929 
1930     return status;
1931 }
1932 
1933 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp)
1934 {
1935     NV_STATUS status;
1936     uvm_va_space_t *va_space;
1937     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
1938 
1939     if (!tracker_is_counter(event_tracker))
1940         return NV_ERR_INVALID_ARGUMENT;
1941 
1942     va_space = tools_event_tracker_va_space(event_tracker);
1943 
1944     uvm_down_write(&g_tools_va_space_list_lock);
1945     uvm_down_write(&va_space->perf_events.lock);
1946     uvm_down_write(&va_space->tools.lock);
1947     remove_event_tracker(va_space,
1948                          event_tracker->counter.counter_nodes,
1949                          UVM_TOTAL_COUNTERS,
1950                          params->counterTypeFlags,
1951                          &event_tracker->counter.subscribed_counters);
1952 
1953     // de-registration should not fail
1954     status = tools_update_status(va_space);
1955     UVM_ASSERT(status == NV_OK);
1956 
1957     uvm_up_write(&va_space->tools.lock);
1958     uvm_up_write(&va_space->perf_events.lock);
1959     uvm_up_write(&g_tools_va_space_list_lock);
1960 
1961     return NV_OK;
1962 }
1963 
1964 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block,
1965                                        uvm_va_block_context_t *block_context,
1966                                        NvU64 target_va,
1967                                        NvU64 size,
1968                                        bool is_write,
1969                                        uvm_mem_t *stage_mem)
1970 {
1971     if (is_write) {
1972         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
1973                                        NULL,
1974                                        uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size));
1975     }
1976     else {
1977         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
1978                                        NULL,
1979                                        uvm_va_block_read_to_cpu(va_block, stage_mem, target_va, size));
1980 
1981     }
1982 }
1983 
1984 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
1985                                              NvU64 target_va,
1986                                              NvU64 size,
1987                                              NvU64 user_va,
1988                                              NvU64 *bytes,
1989                                              bool is_write)
1990 {
1991     NV_STATUS status;
1992     uvm_mem_t *stage_mem = NULL;
1993     void *stage_addr;
1994     uvm_global_processor_mask_t *retained_global_gpus = NULL;
1995     uvm_global_processor_mask_t *global_gpus = NULL;
1996     uvm_va_block_context_t *block_context = NULL;
1997     struct mm_struct *mm = NULL;
1998 
1999     retained_global_gpus = uvm_kvmalloc(sizeof(*retained_global_gpus));
2000     if (retained_global_gpus == NULL)
2001         return NV_ERR_NO_MEMORY;
2002 
2003     uvm_global_processor_mask_zero(retained_global_gpus);
2004 
2005     global_gpus = uvm_kvmalloc(sizeof(*global_gpus));
2006     if (global_gpus == NULL) {
2007         status = NV_ERR_NO_MEMORY;
2008         goto exit;
2009     }
2010 
2011     mm = uvm_va_space_mm_or_current_retain(va_space);
2012 
2013     status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem);
2014     if (status != NV_OK)
2015         goto exit;
2016 
2017     block_context = uvm_va_block_context_alloc(mm);
2018     if (!block_context) {
2019         status = NV_ERR_NO_MEMORY;
2020         goto exit;
2021     }
2022 
2023     stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
2024     *bytes = 0;
2025 
2026     while (*bytes < size) {
2027         uvm_gpu_t *gpu;
2028         uvm_va_block_t *block;
2029         void *user_va_start = (void *) (user_va + *bytes);
2030         NvU64 target_va_start = target_va + *bytes;
2031         NvU64 bytes_left = size - *bytes;
2032         NvU64 page_offset = target_va_start & (PAGE_SIZE - 1);
2033         NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset));
2034 
2035         if (is_write) {
2036             NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
2037             if (remaining != 0)  {
2038                 status = NV_ERR_INVALID_ARGUMENT;
2039                 goto exit;
2040             }
2041         }
2042 
2043         if (mm)
2044             uvm_down_read_mmap_lock(mm);
2045 
2046         // The RM flavor of the lock is needed to perform ECC checks.
2047         uvm_va_space_down_read_rm(va_space);
2048         status = uvm_va_block_find_create(va_space, UVM_ALIGN_DOWN(target_va_start, PAGE_SIZE), block_context, &block);
2049         if (status != NV_OK) {
2050             uvm_va_space_up_read_rm(va_space);
2051             if (mm)
2052                 uvm_up_read_mmap_lock(mm);
2053             goto exit;
2054         }
2055 
2056         uvm_va_space_global_gpus(va_space, global_gpus);
2057 
2058         for_each_global_gpu_in_mask(gpu, global_gpus) {
2059             if (uvm_global_processor_mask_test_and_set(retained_global_gpus, gpu->global_id))
2060                 continue;
2061 
2062             // The retention of each GPU ensures that the staging memory is
2063             // freed before the unregistration of any of the GPUs is mapped on.
2064             // Each GPU is retained once.
2065             uvm_gpu_retain(gpu);
2066 
2067             // Accessing the VA block may result in copying data between the CPU
2068             // and a GPU. Conservatively add virtual mappings to all the GPUs
2069             // (even if those mappings may never be used) as tools read/write is
2070             // not on a performance critical path.
2071             status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
2072             if (status != NV_OK) {
2073                 uvm_va_space_up_read_rm(va_space);
2074                 if (mm)
2075                     uvm_up_read_mmap_lock(mm);
2076                 goto exit;
2077             }
2078         }
2079 
2080         // Make sure a CPU resident page has an up to date struct page pointer.
2081         if (uvm_va_block_is_hmm(block)) {
2082             status = uvm_hmm_va_block_update_residency_info(block,
2083                                                             mm,
2084                                                             UVM_ALIGN_DOWN(target_va_start, PAGE_SIZE),
2085                                                             true);
2086             if (status != NV_OK) {
2087                 uvm_va_space_up_read_rm(va_space);
2088                 if (mm)
2089                     uvm_up_read_mmap_lock(mm);
2090                 goto exit;
2091             }
2092         }
2093 
2094         status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem);
2095 
2096         // For simplicity, check for ECC errors on all GPUs registered in the VA
2097         // space
2098         if (status == NV_OK)
2099             status = uvm_global_mask_check_ecc_error(global_gpus);
2100 
2101         uvm_va_space_up_read_rm(va_space);
2102         if (mm)
2103             uvm_up_read_mmap_lock(mm);
2104 
2105         if (status != NV_OK)
2106             goto exit;
2107 
2108         if (!is_write) {
2109             NvU64 remaining;
2110 
2111             // Prevent processor speculation prior to accessing user-mapped
2112             // memory to avoid leaking information from side-channel attacks.
2113             // Under speculation, a valid VA range which does not contain
2114             // target_va could be used, and the block index could run off the
2115             // end of the array. Information about the state of that kernel
2116             // memory could be inferred if speculative execution gets to the
2117             // point where the data is copied out.
2118             nv_speculation_barrier();
2119 
2120             remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
2121             if (remaining > 0) {
2122                 status = NV_ERR_INVALID_ARGUMENT;
2123                 goto exit;
2124             }
2125         }
2126 
2127         *bytes += bytes_now;
2128     }
2129 
2130 exit:
2131     uvm_va_block_context_free(block_context);
2132 
2133     uvm_mem_free(stage_mem);
2134 
2135     uvm_global_mask_release(retained_global_gpus);
2136 
2137     uvm_va_space_mm_or_current_release(va_space, mm);
2138 
2139     uvm_kvfree(global_gpus);
2140     uvm_kvfree(retained_global_gpus);
2141 
2142     return status;
2143 }
2144 
2145 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2146 {
2147     return tools_access_process_memory(uvm_va_space_get(filp),
2148                                        params->targetVa,
2149                                        params->size,
2150                                        params->buffer,
2151                                        &params->bytesRead,
2152                                        false);
2153 }
2154 
2155 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2156 {
2157     return tools_access_process_memory(uvm_va_space_get(filp),
2158                                        params->targetVa,
2159                                        params->size,
2160                                        params->buffer,
2161                                        &params->bytesWritten,
2162                                        true);
2163 }
2164 
2165 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp)
2166 {
2167     NvU32 i;
2168     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2169 
2170     if (params->entry.eventData.eventType >= UvmEventNumTypesAll)
2171         return NV_ERR_INVALID_ARGUMENT;
2172 
2173     uvm_down_read(&va_space->tools.lock);
2174     for (i = 0; i < params->count; i++)
2175         uvm_tools_record_event(va_space, &params->entry);
2176     uvm_up_read(&va_space->tools.lock);
2177     return NV_OK;
2178 }
2179 
2180 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp)
2181 {
2182     NvU32 i;
2183     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2184 
2185     if (params->counter >= UVM_TOTAL_COUNTERS)
2186         return NV_ERR_INVALID_ARGUMENT;
2187 
2188     uvm_down_read(&va_space->tools.lock);
2189     for (i = 0; i < params->count; i++)
2190         uvm_tools_inc_counter(va_space, params->counter, params->amount, &params->processor);
2191     uvm_up_read(&va_space->tools.lock);
2192 
2193     return NV_OK;
2194 }
2195 
2196 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp)
2197 {
2198     NvProcessorUuid *uuids;
2199     NvU64 remaining;
2200     uvm_gpu_t *gpu;
2201     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2202 
2203     uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * UVM_ID_MAX_PROCESSORS);
2204     if (uuids == NULL)
2205         return NV_ERR_NO_MEMORY;
2206 
2207     uvm_processor_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT);
2208     params->count = 1;
2209 
2210     uvm_va_space_down_read(va_space);
2211     for_each_va_space_gpu(gpu, va_space) {
2212         uvm_processor_uuid_copy(&uuids[uvm_id_value(gpu->id)], uvm_gpu_uuid(gpu));
2213         if (uvm_id_value(gpu->id) + 1 > params->count)
2214             params->count = uvm_id_value(gpu->id) + 1;
2215     }
2216     uvm_va_space_up_read(va_space);
2217 
2218     remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * params->count);
2219     uvm_kvfree(uuids);
2220 
2221     if (remaining != 0)
2222         return NV_ERR_INVALID_ADDRESS;
2223 
2224     return NV_OK;
2225 }
2226 
2227 void uvm_tools_flush_events(void)
2228 {
2229     tools_schedule_completed_events();
2230 
2231     nv_kthread_q_flush(&g_tools_queue);
2232 }
2233 
2234 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp)
2235 {
2236     uvm_tools_flush_events();
2237     return NV_OK;
2238 }
2239 
2240 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp)
2241 {
2242     NV_STATUS status = NV_OK;
2243     uvm_gpu_t *gpu = NULL;
2244     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2245 
2246     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpuUuid);
2247     if (!gpu)
2248         return NV_ERR_INVALID_DEVICE;
2249 
2250     // Wait for register-based fault clears to queue the replay event
2251     if (!gpu->parent->has_clear_faulted_channel_method) {
2252         uvm_gpu_non_replayable_faults_isr_lock(gpu->parent);
2253         uvm_gpu_non_replayable_faults_isr_unlock(gpu->parent);
2254     }
2255 
2256     // Wait for pending fault replay methods to complete (replayable faults on
2257     // all GPUs, and non-replayable faults on method-based GPUs).
2258     status = uvm_channel_manager_wait(gpu->channel_manager);
2259 
2260     // Flush any pending events even if (status != NV_OK)
2261     uvm_tools_flush_events();
2262     uvm_gpu_release(gpu);
2263 
2264     return status;
2265 }
2266 
2267 static const struct file_operations uvm_tools_fops =
2268 {
2269     .open            = uvm_tools_open_entry,
2270     .release         = uvm_tools_release_entry,
2271     .unlocked_ioctl  = uvm_tools_unlocked_ioctl_entry,
2272 #if NVCPU_IS_X86_64
2273     .compat_ioctl    = uvm_tools_unlocked_ioctl_entry,
2274 #endif
2275     .poll            = uvm_tools_poll_entry,
2276     .owner           = THIS_MODULE,
2277 };
2278 
2279 static void _uvm_tools_destroy_cache_all(void)
2280 {
2281     // The pointers are initialized to NULL,
2282     // it's safe to call destroy on all of them.
2283     kmem_cache_destroy_safe(&g_tools_event_tracker_cache);
2284     kmem_cache_destroy_safe(&g_tools_block_migration_data_cache);
2285     kmem_cache_destroy_safe(&g_tools_migration_data_cache);
2286     kmem_cache_destroy_safe(&g_tools_replay_data_cache);
2287     kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache);
2288     kmem_cache_destroy_safe(&g_tools_map_remote_data_cache);
2289 }
2290 
2291 int uvm_tools_init(dev_t uvm_base_dev)
2292 {
2293     dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER);
2294     int ret = -ENOMEM; // This will be updated later if allocations succeed
2295 
2296     uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
2297 
2298     g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t",
2299                                                         uvm_tools_event_tracker_t);
2300     if (!g_tools_event_tracker_cache)
2301         goto err_cache_destroy;
2302 
2303     g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t",
2304                                                               block_migration_data_t);
2305     if (!g_tools_block_migration_data_cache)
2306         goto err_cache_destroy;
2307 
2308     g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t",
2309                                                         migration_data_t);
2310     if (!g_tools_migration_data_cache)
2311         goto err_cache_destroy;
2312 
2313     g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t",
2314                                                      replay_data_t);
2315     if (!g_tools_replay_data_cache)
2316         goto err_cache_destroy;
2317 
2318     g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t",
2319                                                                block_map_remote_data_t);
2320     if (!g_tools_block_map_remote_data_cache)
2321         goto err_cache_destroy;
2322 
2323     g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t",
2324                                                          map_remote_data_t);
2325     if (!g_tools_map_remote_data_cache)
2326         goto err_cache_destroy;
2327 
2328     uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF);
2329 
2330     ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue");
2331     if (ret < 0)
2332         goto err_cache_destroy;
2333 
2334     uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops);
2335     ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1);
2336     if (ret != 0) {
2337         UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev),
2338                       MINOR(uvm_tools_dev), ret);
2339         goto err_stop_thread;
2340     }
2341 
2342     return ret;
2343 
2344 err_stop_thread:
2345     nv_kthread_q_stop(&g_tools_queue);
2346 
2347 err_cache_destroy:
2348     _uvm_tools_destroy_cache_all();
2349     return ret;
2350 }
2351 
2352 void uvm_tools_exit(void)
2353 {
2354     unsigned i;
2355     cdev_del(&g_uvm_tools_cdev);
2356 
2357     nv_kthread_q_stop(&g_tools_queue);
2358 
2359     for (i = 0; i < UvmEventNumTypesAll; ++i)
2360         UVM_ASSERT(g_tools_enabled_event_count[i] == 0);
2361 
2362     UVM_ASSERT(list_empty(&g_tools_va_space_list));
2363 
2364     _uvm_tools_destroy_cache_all();
2365 }
2366