1 /*******************************************************************************
2     Copyright (c) 2016-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 #include "uvm_common.h"
24 #include "uvm_ioctl.h"
25 #include "uvm_global.h"
26 #include "uvm_gpu.h"
27 #include "uvm_hal.h"
28 #include "uvm_tools.h"
29 #include "uvm_va_space.h"
30 #include "uvm_api.h"
31 #include "uvm_hal_types.h"
32 #include "uvm_va_block.h"
33 #include "uvm_va_range.h"
34 #include "uvm_push.h"
35 #include "uvm_forward_decl.h"
36 #include "uvm_range_group.h"
37 #include "uvm_mem.h"
38 #include "nv_speculation_barrier.h"
39 
40 // We limit the number of times a page can be retained by the kernel
41 // to prevent the user from maliciously passing UVM tools the same page
42 // over and over again in an attempt to overflow the refcount.
43 #define MAX_PAGE_COUNT (1 << 20)
44 
45 typedef struct
46 {
47     NvU32 get_ahead;
48     NvU32 get_behind;
49     NvU32 put_ahead;
50     NvU32 put_behind;
51 } uvm_tools_queue_snapshot_t;
52 
53 typedef struct
54 {
55     uvm_spinlock_t lock;
56     NvU64 subscribed_queues;
57     struct list_head queue_nodes[UvmEventNumTypesAll];
58 
59     struct page **queue_buffer_pages;
60     union
61     {
62         UvmEventEntry_V1 *queue_v1;
63         UvmEventEntry_V2 *queue_v2;
64     };
65     NvU32 queue_buffer_count;
66     NvU32 notification_threshold;
67 
68     struct page **control_buffer_pages;
69     union
70     {
71         UvmToolsEventControlData_V1 *control_v1;
72         UvmToolsEventControlData_V2 *control_v2;
73     };
74 
75     wait_queue_head_t wait_queue;
76     bool is_wakeup_get_valid;
77     NvU32 wakeup_get;
78 } uvm_tools_queue_t;
79 
80 typedef struct
81 {
82     struct list_head counter_nodes[UVM_TOTAL_COUNTERS];
83     NvU64 subscribed_counters;
84 
85     struct page **counter_buffer_pages;
86     NvU64 *counters;
87 
88     bool all_processors;
89     NvProcessorUuid processor;
90 } uvm_tools_counter_t;
91 
92 // private_data for /dev/nvidia-uvm-tools
93 typedef struct
94 {
95     NvU32 version;
96     bool is_queue;
97     struct file *uvm_file;
98     union
99     {
100         uvm_tools_queue_t queue;
101         uvm_tools_counter_t counter;
102     };
103 } uvm_tools_event_tracker_t;
104 
105 // Delayed events
106 //
107 // Events that require gpu timestamps for asynchronous operations use a delayed
108 // notification mechanism. Each event type registers a callback that is invoked
109 // from the update_progress channel routines. The callback then enqueues a
110 // work item that takes care of notifying the events. This module keeps a
111 // global list of channels with pending events. Other modules or user apps (via
112 // ioctl) may call uvm_tools_flush_events to update the progress of the channels
113 // in the list, as needed.
114 //
115 // User apps will need to flush events before removing gpus to avoid getting
116 // events with gpus ids that have been removed.
117 
118 // This object describes the pending migrations operations within a VA block
119 typedef struct
120 {
121     nv_kthread_q_item_t queue_item;
122     uvm_processor_id_t dst;
123     uvm_processor_id_t src;
124     uvm_va_space_t *va_space;
125 
126     uvm_channel_t *channel;
127     struct list_head events;
128     NvU64 start_timestamp_cpu;
129     NvU64 end_timestamp_cpu;
130     NvU64 *start_timestamp_gpu_addr;
131     NvU64 start_timestamp_gpu;
132     NvU64 range_group_id;
133 } block_migration_data_t;
134 
135 // This object represents a specific pending migration within a VA block
136 typedef struct
137 {
138     struct list_head events_node;
139     NvU64 bytes;
140     NvU64 address;
141     NvU64 *end_timestamp_gpu_addr;
142     NvU64 end_timestamp_gpu;
143     UvmEventMigrationCause cause;
144 } migration_data_t;
145 
146 // This object represents a pending gpu faut replay operation
147 typedef struct
148 {
149     nv_kthread_q_item_t queue_item;
150     uvm_channel_t *channel;
151     uvm_gpu_id_t gpu_id;
152     NvU32 batch_id;
153     uvm_fault_client_type_t client_type;
154     NvU64 timestamp;
155     NvU64 timestamp_gpu;
156     NvU64 *timestamp_gpu_addr;
157 } replay_data_t;
158 
159 // This object describes the pending map remote operations within a VA block
160 typedef struct
161 {
162     nv_kthread_q_item_t queue_item;
163     uvm_processor_id_t src;
164     uvm_processor_id_t dst;
165     UvmEventMapRemoteCause cause;
166     NvU64 timestamp;
167     uvm_va_space_t *va_space;
168 
169     uvm_channel_t *channel;
170     struct list_head events;
171 } block_map_remote_data_t;
172 
173 // This object represents a pending map remote operation
174 typedef struct
175 {
176     struct list_head events_node;
177 
178     NvU64 address;
179     NvU64 size;
180     NvU64 timestamp_gpu;
181     NvU64 *timestamp_gpu_addr;
182 } map_remote_data_t;
183 
184 
185 static struct cdev g_uvm_tools_cdev;
186 static LIST_HEAD(g_tools_va_space_list);
187 static NvU32 g_tools_enabled_event_count[UvmEventNumTypesAll];
188 static uvm_rw_semaphore_t g_tools_va_space_list_lock;
189 static struct kmem_cache *g_tools_event_tracker_cache __read_mostly = NULL;
190 static struct kmem_cache *g_tools_block_migration_data_cache __read_mostly = NULL;
191 static struct kmem_cache *g_tools_migration_data_cache __read_mostly = NULL;
192 static struct kmem_cache *g_tools_replay_data_cache __read_mostly = NULL;
193 static struct kmem_cache *g_tools_block_map_remote_data_cache __read_mostly = NULL;
194 static struct kmem_cache *g_tools_map_remote_data_cache __read_mostly = NULL;
195 static uvm_spinlock_t g_tools_channel_list_lock;
196 static LIST_HEAD(g_tools_channel_list);
197 static nv_kthread_q_t g_tools_queue;
198 
199 static NV_STATUS tools_update_status(uvm_va_space_t *va_space);
200 
tools_event_tracker(struct file * filp)201 static uvm_tools_event_tracker_t *tools_event_tracker(struct file *filp)
202 {
203     return (uvm_tools_event_tracker_t *)atomic_long_read((atomic_long_t *)&filp->private_data);
204 }
205 
tracker_is_queue(uvm_tools_event_tracker_t * event_tracker)206 static bool tracker_is_queue(uvm_tools_event_tracker_t *event_tracker)
207 {
208     return event_tracker != NULL && event_tracker->is_queue;
209 }
210 
tracker_is_counter(uvm_tools_event_tracker_t * event_tracker)211 static bool tracker_is_counter(uvm_tools_event_tracker_t *event_tracker)
212 {
213     return event_tracker != NULL && !event_tracker->is_queue;
214 }
215 
tools_event_tracker_va_space(uvm_tools_event_tracker_t * event_tracker)216 static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *event_tracker)
217 {
218     uvm_va_space_t *va_space;
219     UVM_ASSERT(event_tracker->uvm_file);
220     va_space = uvm_va_space_get(event_tracker->uvm_file);
221     return va_space;
222 }
223 
uvm_put_user_pages_dirty(struct page ** pages,NvU64 page_count)224 static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
225 {
226     NvU64 i;
227 
228     for (i = 0; i < page_count; i++) {
229         set_page_dirty(pages[i]);
230         NV_UNPIN_USER_PAGE(pages[i]);
231     }
232 }
233 
unmap_user_pages(struct page ** pages,void * addr,NvU64 size)234 static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
235 {
236     size = DIV_ROUND_UP(size, PAGE_SIZE);
237     vunmap((NvU8 *)addr);
238     uvm_put_user_pages_dirty(pages, size);
239     uvm_kvfree(pages);
240 }
241 
242 // This must be called with the mmap_lock held in read mode or better.
check_vmas(struct mm_struct * mm,NvU64 start_va,NvU64 size)243 static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
244 {
245     struct vm_area_struct *vma;
246     NvU64 addr = start_va;
247     NvU64 region_end = start_va + size;
248 
249     do {
250         vma = find_vma(mm, addr);
251         if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
252             return NV_ERR_INVALID_ARGUMENT;
253 
254         addr = vma->vm_end;
255     } while (addr < region_end);
256 
257     return NV_OK;
258 }
259 
260 // Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
261 // Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
map_user_pages(NvU64 user_va,NvU64 size,void ** addr,struct page *** pages)262 static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
263 {
264     NV_STATUS status = NV_OK;
265     long ret = 0;
266     long num_pages;
267     long i;
268 
269     *addr = NULL;
270     *pages = NULL;
271     num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
272 
273     if (uvm_api_range_invalid(user_va, num_pages * PAGE_SIZE)) {
274         status = NV_ERR_INVALID_ADDRESS;
275         goto fail;
276     }
277 
278     *pages = uvm_kvmalloc(sizeof(struct page *) * num_pages);
279     if (*pages == NULL) {
280         status = NV_ERR_NO_MEMORY;
281         goto fail;
282     }
283 
284     // Although uvm_down_read_mmap_lock() is preferable due to its participation
285     // in the UVM lock dependency tracker, it cannot be used here. That's
286     // because pin_user_pages() may fault in HMM pages which are GPU-resident.
287     // When that happens, the UVM page fault handler would record another
288     // mmap_read_lock() on the same thread as this one, leading to a false
289     // positive lock dependency report.
290     //
291     // Therefore, use the lower level nv_mmap_read_lock() here.
292     nv_mmap_read_lock(current->mm);
293     status = check_vmas(current->mm, user_va, size);
294     if (status != NV_OK) {
295         nv_mmap_read_unlock(current->mm);
296         goto fail;
297     }
298     ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages);
299     nv_mmap_read_unlock(current->mm);
300 
301     if (ret != num_pages) {
302         status = NV_ERR_INVALID_ARGUMENT;
303         goto fail;
304     }
305 
306     for (i = 0; i < num_pages; i++) {
307         if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
308             status = NV_ERR_INVALID_ARGUMENT;
309             goto fail;
310         }
311     }
312 
313     *addr = vmap(*pages, num_pages, VM_MAP, PAGE_KERNEL);
314     if (*addr == NULL)
315         goto fail;
316 
317     return NV_OK;
318 
319 fail:
320     if (*pages == NULL)
321         return status;
322 
323     if (ret > 0)
324         uvm_put_user_pages_dirty(*pages, ret);
325     else if (ret < 0)
326         status = errno_to_nv_status(ret);
327 
328     uvm_kvfree(*pages);
329     *pages = NULL;
330     return status;
331 }
332 
insert_event_tracker(uvm_va_space_t * va_space,struct list_head * node,NvU32 list_count,NvU64 list_mask,NvU64 * subscribed_mask,struct list_head * lists,NvU64 * inserted_lists)333 static void insert_event_tracker(uvm_va_space_t *va_space,
334                                  struct list_head *node,
335                                  NvU32 list_count,
336                                  NvU64 list_mask,
337                                  NvU64 *subscribed_mask,
338                                  struct list_head *lists,
339                                  NvU64 *inserted_lists)
340 {
341     NvU32 i;
342     NvU64 insertable_lists = list_mask & ~*subscribed_mask;
343 
344     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
345     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
346 
347     for (i = 0; i < list_count; i++) {
348         if (insertable_lists & (1ULL << i)) {
349             ++g_tools_enabled_event_count[i];
350             list_add(node + i, lists + i);
351         }
352     }
353 
354     *subscribed_mask |= list_mask;
355     *inserted_lists = insertable_lists;
356 }
357 
remove_event_tracker(uvm_va_space_t * va_space,struct list_head * node,NvU32 list_count,NvU64 list_mask,NvU64 * subscribed_mask)358 static void remove_event_tracker(uvm_va_space_t *va_space,
359                                  struct list_head *node,
360                                  NvU32 list_count,
361                                  NvU64 list_mask,
362                                  NvU64 *subscribed_mask)
363 {
364     NvU32 i;
365     NvU64 removable_lists = list_mask & *subscribed_mask;
366 
367     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
368     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
369 
370     for (i = 0; i < list_count; i++) {
371         if (removable_lists & (1ULL << i)) {
372             UVM_ASSERT(g_tools_enabled_event_count[i] > 0);
373             --g_tools_enabled_event_count[i];
374             list_del(node + i);
375         }
376     }
377 
378     *subscribed_mask &= ~list_mask;
379 }
380 
queue_needs_wakeup(uvm_tools_queue_t * queue,uvm_tools_queue_snapshot_t * sn)381 static bool queue_needs_wakeup(uvm_tools_queue_t *queue, uvm_tools_queue_snapshot_t *sn)
382 {
383     NvU32 queue_mask = queue->queue_buffer_count - 1;
384 
385     uvm_assert_spinlock_locked(&queue->lock);
386     return ((queue->queue_buffer_count + sn->put_behind - sn->get_ahead) & queue_mask) >= queue->notification_threshold;
387 }
388 
destroy_event_tracker(uvm_tools_event_tracker_t * event_tracker)389 static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
390 {
391     if (event_tracker->uvm_file != NULL) {
392         NV_STATUS status;
393         uvm_va_space_t *va_space = tools_event_tracker_va_space(event_tracker);
394 
395         uvm_down_write(&g_tools_va_space_list_lock);
396         uvm_down_write(&va_space->perf_events.lock);
397         uvm_down_write(&va_space->tools.lock);
398 
399         if (event_tracker->is_queue) {
400             uvm_tools_queue_t *queue = &event_tracker->queue;
401             NvU64 buffer_size, control_size;
402 
403             if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
404                 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
405                 control_size = sizeof(UvmToolsEventControlData_V1);
406             }
407             else {
408                 buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
409                 control_size = sizeof(UvmToolsEventControlData_V2);
410             }
411 
412             remove_event_tracker(va_space,
413                                  queue->queue_nodes,
414                                  UvmEventNumTypesAll,
415                                  queue->subscribed_queues,
416                                  &queue->subscribed_queues);
417 
418             if (queue->queue_v2 != NULL) {
419                 unmap_user_pages(queue->queue_buffer_pages,
420                                  queue->queue_v2,
421                                  buffer_size);
422             }
423 
424             if (queue->control_v2 != NULL) {
425                 unmap_user_pages(queue->control_buffer_pages,
426                                  queue->control_v2,
427                                  control_size);
428             }
429         }
430         else {
431             uvm_tools_counter_t *counters = &event_tracker->counter;
432 
433             remove_event_tracker(va_space,
434                                  counters->counter_nodes,
435                                  UVM_TOTAL_COUNTERS,
436                                  counters->subscribed_counters,
437                                  &counters->subscribed_counters);
438 
439             if (counters->counters != NULL) {
440                 unmap_user_pages(counters->counter_buffer_pages,
441                                  counters->counters,
442                                  UVM_TOTAL_COUNTERS * sizeof(NvU64));
443             }
444         }
445 
446         // de-registration should not fail
447         status = tools_update_status(va_space);
448         UVM_ASSERT(status == NV_OK);
449 
450         uvm_up_write(&va_space->tools.lock);
451         uvm_up_write(&va_space->perf_events.lock);
452         uvm_up_write(&g_tools_va_space_list_lock);
453 
454         fput(event_tracker->uvm_file);
455     }
456     kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
457 }
458 
enqueue_event_v1(const UvmEventEntry_V1 * entry,uvm_tools_queue_t * queue)459 static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
460 {
461     UvmToolsEventControlData_V1 *ctrl = queue->control_v1;
462     uvm_tools_queue_snapshot_t sn;
463     NvU32 queue_size = queue->queue_buffer_count;
464     NvU32 queue_mask = queue_size - 1;
465 
466     // Prevent processor speculation prior to accessing user-mapped memory to
467     // avoid leaking information from side-channel attacks. There are many
468     // possible paths leading to this point and it would be difficult and error-
469     // prone to audit all of them to determine whether user mode could guide
470     // this access to kernel memory under speculative execution, so to be on the
471     // safe side we'll just always block speculation.
472     nv_speculation_barrier();
473 
474     uvm_spin_lock(&queue->lock);
475 
476     // ctrl is mapped into user space with read and write permissions,
477     // so its values cannot be trusted.
478     sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
479     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
480     sn.put_ahead = (sn.put_behind + 1) & queue_mask;
481 
482     // one free element means that the queue is full
483     if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
484         atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
485         goto unlock;
486     }
487 
488     memcpy(queue->queue_v1 + sn.put_behind, entry, sizeof(*entry));
489 
490     sn.put_behind = sn.put_ahead;
491 
492     // put_ahead and put_behind will always be the same outside of queue->lock
493     // this allows the user-space consumer to choose either a 2 or 4 pointer
494     // synchronization approach.
495     atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
496     atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
497 
498     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
499 
500     // if the queue needs to be woken up, only signal if we haven't signaled
501     // before for this value of get_ahead.
502     if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
503         queue->is_wakeup_get_valid = true;
504         queue->wakeup_get = sn.get_ahead;
505         wake_up_all(&queue->wait_queue);
506     }
507 
508 unlock:
509     uvm_spin_unlock(&queue->lock);
510 }
511 
enqueue_event_v2(const UvmEventEntry_V2 * entry,uvm_tools_queue_t * queue)512 static void enqueue_event_v2(const UvmEventEntry_V2 *entry, uvm_tools_queue_t *queue)
513 {
514     UvmToolsEventControlData_V2 *ctrl = queue->control_v2;
515     uvm_tools_queue_snapshot_t sn;
516     NvU32 queue_size = queue->queue_buffer_count;
517     NvU32 queue_mask = queue_size - 1;
518 
519     // Prevent processor speculation prior to accessing user-mapped memory to
520     // avoid leaking information from side-channel attacks. There are many
521     // possible paths leading to this point and it would be difficult and error-
522     // prone to audit all of them to determine whether user mode could guide
523     // this access to kernel memory under speculative execution, so to be on the
524     // safe side we'll just always block speculation.
525     nv_speculation_barrier();
526 
527     uvm_spin_lock(&queue->lock);
528 
529     // ctrl is mapped into user space with read and write permissions,
530     // so its values cannot be trusted.
531     sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
532     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
533     sn.put_ahead = (sn.put_behind + 1) & queue_mask;
534 
535     // one free element means that the queue is full
536     if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
537         atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
538         goto unlock;
539     }
540 
541     memcpy(queue->queue_v2 + sn.put_behind, entry, sizeof(*entry));
542 
543     sn.put_behind = sn.put_ahead;
544     // put_ahead and put_behind will always be the same outside of queue->lock
545     // this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
546     atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
547     atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
548 
549     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
550     // if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
551     if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
552         queue->is_wakeup_get_valid = true;
553         queue->wakeup_get = sn.get_ahead;
554         wake_up_all(&queue->wait_queue);
555     }
556 
557 unlock:
558     uvm_spin_unlock(&queue->lock);
559 }
560 
uvm_tools_record_event_v1(uvm_va_space_t * va_space,const UvmEventEntry_V1 * entry)561 static void uvm_tools_record_event_v1(uvm_va_space_t *va_space, const UvmEventEntry_V1 *entry)
562 {
563     NvU8 eventType = entry->eventData.eventType;
564     uvm_tools_queue_t *queue;
565 
566     UVM_ASSERT(eventType < UvmEventNumTypesAll);
567 
568     uvm_assert_rwsem_locked(&va_space->tools.lock);
569 
570     list_for_each_entry(queue, va_space->tools.queues_v1 + eventType, queue_nodes[eventType])
571         enqueue_event_v1(entry, queue);
572 }
573 
uvm_tools_record_event_v2(uvm_va_space_t * va_space,const UvmEventEntry_V2 * entry)574 static void uvm_tools_record_event_v2(uvm_va_space_t *va_space, const UvmEventEntry_V2 *entry)
575 {
576     NvU8 eventType = entry->eventData.eventType;
577     uvm_tools_queue_t *queue;
578 
579     UVM_ASSERT(eventType < UvmEventNumTypesAll);
580 
581     uvm_assert_rwsem_locked(&va_space->tools.lock);
582 
583     list_for_each_entry(queue, va_space->tools.queues_v2 + eventType, queue_nodes[eventType])
584         enqueue_event_v2(entry, queue);
585 }
586 
counter_matches_processor(UvmCounterName counter,const NvProcessorUuid * processor)587 static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
588 {
589     // For compatibility with older counters, CPU faults for memory with a
590     // preferred location are reported for their preferred location as well as
591     // for the CPU device itself.
592     // This check prevents double counting in the aggregate count.
593     if (counter == UvmCounterNameCpuPageFaultCount)
594         return uvm_uuid_eq(processor, &NV_PROCESSOR_UUID_CPU_DEFAULT);
595     return true;
596 }
597 
uvm_tools_inc_counter(uvm_va_space_t * va_space,UvmCounterName counter,NvU64 amount,const NvProcessorUuid * processor)598 static void uvm_tools_inc_counter(uvm_va_space_t *va_space,
599                                   UvmCounterName counter,
600                                   NvU64 amount,
601                                   const NvProcessorUuid *processor)
602 {
603     UVM_ASSERT((NvU32)counter < UVM_TOTAL_COUNTERS);
604     uvm_assert_rwsem_locked(&va_space->tools.lock);
605 
606     if (amount > 0) {
607         uvm_tools_counter_t *counters;
608 
609         // Prevent processor speculation prior to accessing user-mapped memory
610         // to avoid leaking information from side-channel attacks. There are
611         // many possible paths leading to this point and it would be difficult
612         // and error-prone to audit all of them to determine whether user mode
613         // could guide this access to kernel memory under speculative execution,
614         // so to be on the safe side we'll just always block speculation.
615         nv_speculation_barrier();
616 
617         list_for_each_entry(counters, va_space->tools.counters + counter, counter_nodes[counter]) {
618             if ((counters->all_processors && counter_matches_processor(counter, processor)) ||
619                 uvm_uuid_eq(&counters->processor, processor)) {
620                 atomic64_add(amount, (atomic64_t *)(counters->counters + counter));
621             }
622         }
623     }
624 }
625 
tools_is_counter_enabled(uvm_va_space_t * va_space,UvmCounterName counter)626 static bool tools_is_counter_enabled(uvm_va_space_t *va_space, UvmCounterName counter)
627 {
628     uvm_assert_rwsem_locked(&va_space->tools.lock);
629 
630     UVM_ASSERT(counter < UVM_TOTAL_COUNTERS);
631     return !list_empty(va_space->tools.counters + counter);
632 }
633 
tools_is_event_enabled_version(uvm_va_space_t * va_space,UvmEventType event,UvmToolsEventQueueVersion version)634 static bool tools_is_event_enabled_version(uvm_va_space_t *va_space,
635                                            UvmEventType event,
636                                            UvmToolsEventQueueVersion version)
637 {
638     uvm_assert_rwsem_locked(&va_space->tools.lock);
639 
640     UVM_ASSERT(event < UvmEventNumTypesAll);
641 
642     if (version == UvmToolsEventQueueVersion_V1)
643         return !list_empty(va_space->tools.queues_v1 + event);
644     else
645         return !list_empty(va_space->tools.queues_v2 + event);
646 }
647 
tools_is_event_enabled(uvm_va_space_t * va_space,UvmEventType event)648 static bool tools_is_event_enabled(uvm_va_space_t *va_space, UvmEventType event)
649 {
650     uvm_assert_rwsem_locked(&va_space->tools.lock);
651 
652     UVM_ASSERT(event < UvmEventNumTypesAll);
653 
654     return !list_empty(va_space->tools.queues_v1 + event) ||
655            !list_empty(va_space->tools.queues_v2 + event);
656 }
657 
tools_is_event_enabled_in_any_va_space(UvmEventType event)658 static bool tools_is_event_enabled_in_any_va_space(UvmEventType event)
659 {
660     bool ret = false;
661 
662     uvm_down_read(&g_tools_va_space_list_lock);
663     ret = g_tools_enabled_event_count[event] != 0;
664     uvm_up_read(&g_tools_va_space_list_lock);
665 
666     return ret;
667 }
668 
tools_are_enabled(uvm_va_space_t * va_space)669 static bool tools_are_enabled(uvm_va_space_t *va_space)
670 {
671     NvU32 i;
672 
673     uvm_assert_rwsem_locked(&va_space->tools.lock);
674 
675     for (i = 0; i < UVM_TOTAL_COUNTERS; i++) {
676         if (tools_is_counter_enabled(va_space, i))
677             return true;
678     }
679     for (i = 0; i < UvmEventNumTypesAll; i++) {
680         if (tools_is_event_enabled(va_space, i))
681             return true;
682     }
683     return false;
684 }
685 
tools_is_fault_callback_needed(uvm_va_space_t * va_space)686 static bool tools_is_fault_callback_needed(uvm_va_space_t *va_space)
687 {
688     return tools_is_event_enabled(va_space, UvmEventTypeCpuFault) ||
689            tools_is_event_enabled(va_space, UvmEventTypeGpuFault) ||
690            tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount) ||
691            tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount);
692 }
693 
tools_is_migration_callback_needed(uvm_va_space_t * va_space)694 static bool tools_is_migration_callback_needed(uvm_va_space_t *va_space)
695 {
696     return tools_is_event_enabled(va_space, UvmEventTypeMigration) ||
697            tools_is_event_enabled(va_space, UvmEventTypeReadDuplicate) ||
698            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH) ||
699            tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD);
700 }
701 
uvm_tools_open(struct inode * inode,struct file * filp)702 static int uvm_tools_open(struct inode *inode, struct file *filp)
703 {
704     filp->private_data = NULL;
705     return -nv_status_to_errno(uvm_global_get_status());
706 }
707 
uvm_tools_open_entry(struct inode * inode,struct file * filp)708 static int uvm_tools_open_entry(struct inode *inode, struct file *filp)
709 {
710     UVM_ENTRY_RET(uvm_tools_open(inode, filp));
711 }
712 
uvm_tools_release(struct inode * inode,struct file * filp)713 static int uvm_tools_release(struct inode *inode, struct file *filp)
714 {
715     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
716     if (event_tracker != NULL) {
717         destroy_event_tracker(event_tracker);
718         filp->private_data = NULL;
719     }
720     return -nv_status_to_errno(uvm_global_get_status());
721 }
722 
uvm_tools_release_entry(struct inode * inode,struct file * filp)723 static int uvm_tools_release_entry(struct inode *inode, struct file *filp)
724 {
725     UVM_ENTRY_RET(uvm_tools_release(inode, filp));
726 }
727 
uvm_tools_unlocked_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)728 static long uvm_tools_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
729 {
730     switch (cmd) {
731         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_INIT_EVENT_TRACKER,         uvm_api_tools_init_event_tracker);
732         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD, uvm_api_tools_set_notification_threshold);
733         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS,  uvm_api_tools_event_queue_enable_events);
734         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS, uvm_api_tools_event_queue_disable_events);
735         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_ENABLE_COUNTERS,            uvm_api_tools_enable_counters);
736         UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TOOLS_DISABLE_COUNTERS,           uvm_api_tools_disable_counters);
737     }
738 
739     uvm_thread_assert_all_unlocked();
740 
741     return -EINVAL;
742 }
743 
uvm_tools_unlocked_ioctl_entry(struct file * filp,unsigned int cmd,unsigned long arg)744 static long uvm_tools_unlocked_ioctl_entry(struct file *filp, unsigned int cmd, unsigned long arg)
745 {
746     UVM_ENTRY_RET(uvm_tools_unlocked_ioctl(filp, cmd, arg));
747 }
748 
uvm_tools_poll(struct file * filp,poll_table * wait)749 static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
750 {
751     int flags = 0;
752     uvm_tools_queue_snapshot_t sn;
753     uvm_tools_event_tracker_t *event_tracker;
754     UvmToolsEventControlData_V2 *ctrl;
755 
756     if (uvm_global_get_status() != NV_OK)
757         return POLLERR;
758 
759     event_tracker = tools_event_tracker(filp);
760     if (!tracker_is_queue(event_tracker))
761         return POLLERR;
762 
763     uvm_spin_lock(&event_tracker->queue.lock);
764 
765     event_tracker->queue.is_wakeup_get_valid = false;
766     ctrl = event_tracker->queue.control_v2;
767     sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
768     sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
769 
770     if (queue_needs_wakeup(&event_tracker->queue, &sn))
771         flags = POLLIN | POLLRDNORM;
772 
773     uvm_spin_unlock(&event_tracker->queue.lock);
774 
775     poll_wait(filp, &event_tracker->queue.wait_queue, wait);
776     return flags;
777 }
778 
uvm_tools_poll_entry(struct file * filp,poll_table * wait)779 static unsigned uvm_tools_poll_entry(struct file *filp, poll_table *wait)
780 {
781     UVM_ENTRY_RET(uvm_tools_poll(filp, wait));
782 }
783 
784 static UvmEventFaultType g_hal_to_tools_fault_type_table[UVM_FAULT_TYPE_COUNT] = {
785     [UVM_FAULT_TYPE_INVALID_PDE]          = UvmFaultTypeInvalidPde,
786     [UVM_FAULT_TYPE_INVALID_PTE]          = UvmFaultTypeInvalidPte,
787     [UVM_FAULT_TYPE_ATOMIC]               = UvmFaultTypeAtomic,
788     [UVM_FAULT_TYPE_WRITE]                = UvmFaultTypeWrite,
789     [UVM_FAULT_TYPE_PDE_SIZE]             = UvmFaultTypeInvalidPdeSize,
790     [UVM_FAULT_TYPE_VA_LIMIT_VIOLATION]   = UvmFaultTypeLimitViolation,
791     [UVM_FAULT_TYPE_UNBOUND_INST_BLOCK]   = UvmFaultTypeUnboundInstBlock,
792     [UVM_FAULT_TYPE_PRIV_VIOLATION]       = UvmFaultTypePrivViolation,
793     [UVM_FAULT_TYPE_PITCH_MASK_VIOLATION] = UvmFaultTypePitchMaskViolation,
794     [UVM_FAULT_TYPE_WORK_CREATION]        = UvmFaultTypeWorkCreation,
795     [UVM_FAULT_TYPE_UNSUPPORTED_APERTURE] = UvmFaultTypeUnsupportedAperture,
796     [UVM_FAULT_TYPE_COMPRESSION_FAILURE]  = UvmFaultTypeCompressionFailure,
797     [UVM_FAULT_TYPE_UNSUPPORTED_KIND]     = UvmFaultTypeUnsupportedKind,
798     [UVM_FAULT_TYPE_REGION_VIOLATION]     = UvmFaultTypeRegionViolation,
799     [UVM_FAULT_TYPE_POISONED]             = UvmFaultTypePoison,
800 };
801 
802 // TODO: add new value for weak atomics in tools
803 static UvmEventMemoryAccessType g_hal_to_tools_fault_access_type_table[UVM_FAULT_ACCESS_TYPE_COUNT] = {
804     [UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG] = UvmEventMemoryAccessTypeAtomic,
805     [UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK]   = UvmEventMemoryAccessTypeAtomic,
806     [UVM_FAULT_ACCESS_TYPE_WRITE]         = UvmEventMemoryAccessTypeWrite,
807     [UVM_FAULT_ACCESS_TYPE_READ]          = UvmEventMemoryAccessTypeRead,
808     [UVM_FAULT_ACCESS_TYPE_PREFETCH]      = UvmEventMemoryAccessTypePrefetch
809 };
810 
811 static UvmEventApertureType g_hal_to_tools_aperture_table[UVM_APERTURE_MAX] = {
812     [UVM_APERTURE_PEER_0] = UvmEventAperturePeer0,
813     [UVM_APERTURE_PEER_1] = UvmEventAperturePeer1,
814     [UVM_APERTURE_PEER_2] = UvmEventAperturePeer2,
815     [UVM_APERTURE_PEER_3] = UvmEventAperturePeer3,
816     [UVM_APERTURE_PEER_4] = UvmEventAperturePeer4,
817     [UVM_APERTURE_PEER_5] = UvmEventAperturePeer5,
818     [UVM_APERTURE_PEER_6] = UvmEventAperturePeer6,
819     [UVM_APERTURE_PEER_7] = UvmEventAperturePeer7,
820     [UVM_APERTURE_SYS]    = UvmEventApertureSys,
821     [UVM_APERTURE_VID]    = UvmEventApertureVid,
822 };
823 
824 static UvmEventFaultClientType g_hal_to_tools_fault_client_type_table[UVM_FAULT_CLIENT_TYPE_COUNT] = {
825     [UVM_FAULT_CLIENT_TYPE_GPC] = UvmEventFaultClientTypeGpc,
826     [UVM_FAULT_CLIENT_TYPE_HUB] = UvmEventFaultClientTypeHub,
827 };
828 
record_gpu_fault_instance(uvm_gpu_t * gpu,uvm_va_space_t * va_space,const uvm_fault_buffer_entry_t * fault_entry,NvU64 batch_id,NvU64 timestamp)829 static void record_gpu_fault_instance(uvm_gpu_t *gpu,
830                                       uvm_va_space_t *va_space,
831                                       const uvm_fault_buffer_entry_t *fault_entry,
832                                       NvU64 batch_id,
833                                       NvU64 timestamp)
834 {
835     if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFault, UvmToolsEventQueueVersion_V1)) {
836         UvmEventEntry_V1 entry;
837         UvmEventGpuFaultInfo_V1 *info = &entry.eventData.gpuFault;
838         memset(&entry, 0, sizeof(entry));
839 
840         info->eventType     = UvmEventTypeGpuFault;
841         info->gpuIndex      = uvm_parent_id_value_from_processor_id(gpu->id);
842         info->faultType     = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
843         info->accessType    = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
844         info->clientType    = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
845         if (fault_entry->is_replayable)
846             info->gpcId     = fault_entry->fault_source.gpc_id;
847         else
848             info->channelId = fault_entry->fault_source.channel_id;
849         info->clientId      = fault_entry->fault_source.client_id;
850         info->address       = fault_entry->fault_address;
851         info->timeStamp     = timestamp;
852         info->timeStampGpu  = fault_entry->timestamp;
853         info->batchId       = batch_id;
854 
855         uvm_tools_record_event_v1(va_space, &entry);
856     }
857     if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFault, UvmToolsEventQueueVersion_V2)) {
858         UvmEventEntry_V2 entry;
859         UvmEventGpuFaultInfo_V2 *info = &entry.eventData.gpuFault;
860         memset(&entry, 0, sizeof(entry));
861 
862         info->eventType     = UvmEventTypeGpuFault;
863         info->gpuIndex      = uvm_id_value(gpu->id);
864         info->faultType     = g_hal_to_tools_fault_type_table[fault_entry->fault_type];
865         info->accessType    = g_hal_to_tools_fault_access_type_table[fault_entry->fault_access_type];
866         info->clientType    = g_hal_to_tools_fault_client_type_table[fault_entry->fault_source.client_type];
867         if (fault_entry->is_replayable)
868             info->gpcId     = fault_entry->fault_source.gpc_id;
869         else
870             info->channelId = fault_entry->fault_source.channel_id;
871         info->clientId      = fault_entry->fault_source.client_id;
872         info->address       = fault_entry->fault_address;
873         info->timeStamp     = timestamp;
874         info->timeStampGpu  = fault_entry->timestamp;
875         info->batchId       = batch_id;
876 
877         uvm_tools_record_event_v2(va_space, &entry);
878     }
879 }
880 
uvm_tools_record_fault(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)881 static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
882 {
883     uvm_va_space_t *va_space = event_data->fault.space;
884 
885     UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
886     UVM_ASSERT(event_data->fault.space);
887 
888     uvm_assert_rwsem_locked(&va_space->lock);
889     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
890     UVM_ASSERT(va_space->tools.enabled);
891 
892     uvm_down_read(&va_space->tools.lock);
893     UVM_ASSERT(tools_is_fault_callback_needed(va_space));
894 
895     if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
896         if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V1)) {
897             UvmEventEntry_V1 entry;
898             UvmEventCpuFaultInfo_V1 *info = &entry.eventData.cpuFault;
899             memset(&entry, 0, sizeof(entry));
900 
901             info->eventType = UvmEventTypeCpuFault;
902             if (event_data->fault.cpu.is_write)
903                 info->accessType = UvmEventMemoryAccessTypeWrite;
904             else
905                 info->accessType = UvmEventMemoryAccessTypeRead;
906 
907             info->address = event_data->fault.cpu.fault_va;
908             info->timeStamp = NV_GETTIME();
909             // assume that current owns va_space
910             info->pid = uvm_get_stale_process_id();
911             info->threadId = uvm_get_stale_thread_id();
912             info->pc = event_data->fault.cpu.pc;
913 
914             uvm_tools_record_event_v1(va_space, &entry);
915         }
916         if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V2)) {
917             UvmEventEntry_V2 entry;
918             UvmEventCpuFaultInfo_V2 *info = &entry.eventData.cpuFault;
919             memset(&entry, 0, sizeof(entry));
920 
921             info->eventType = UvmEventTypeCpuFault;
922             if (event_data->fault.cpu.is_write)
923                 info->accessType = UvmEventMemoryAccessTypeWrite;
924             else
925                 info->accessType = UvmEventMemoryAccessTypeRead;
926 
927             info->address = event_data->fault.cpu.fault_va;
928             info->timeStamp = NV_GETTIME();
929             // assume that current owns va_space
930             info->pid = uvm_get_stale_process_id();
931             info->threadId = uvm_get_stale_thread_id();
932             info->pc = event_data->fault.cpu.pc;
933 
934             uvm_tools_record_event_v2(va_space, &entry);
935         }
936         if (tools_is_counter_enabled(va_space, UvmCounterNameCpuPageFaultCount)) {
937             uvm_processor_id_t preferred_location;
938 
939             // The UVM Lite tools interface did not represent the CPU as a UVM
940             // device. It reported CPU faults against the corresponding
941             // allocation's 'home location'. Though this driver's tools
942             // interface does include a CPU device, for compatibility, the
943             // driver still reports faults against a buffer's preferred
944             // location, in addition to the CPU.
945             uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &NV_PROCESSOR_UUID_CPU_DEFAULT);
946 
947             preferred_location = event_data->fault.preferred_location;
948             if (UVM_ID_IS_GPU(preferred_location)) {
949                 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, preferred_location);
950                 uvm_tools_inc_counter(va_space, UvmCounterNameCpuPageFaultCount, 1, &gpu->uuid);
951             }
952         }
953     }
954     else {
955         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->fault.proc_id);
956         UVM_ASSERT(gpu);
957 
958         if (tools_is_event_enabled(va_space, UvmEventTypeGpuFault)) {
959             NvU64 timestamp = NV_GETTIME();
960             uvm_fault_buffer_entry_t *fault_entry = event_data->fault.gpu.buffer_entry;
961             uvm_fault_buffer_entry_t *fault_instance;
962 
963             record_gpu_fault_instance(gpu, va_space, fault_entry, event_data->fault.gpu.batch_id, timestamp);
964 
965             list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
966                 record_gpu_fault_instance(gpu, va_space, fault_instance, event_data->fault.gpu.batch_id, timestamp);
967         }
968 
969         if (tools_is_counter_enabled(va_space, UvmCounterNameGpuPageFaultCount))
970             uvm_tools_inc_counter(va_space, UvmCounterNameGpuPageFaultCount, 1, &gpu->uuid);
971     }
972     uvm_up_read(&va_space->tools.lock);
973 }
974 
add_pending_event_for_channel(uvm_channel_t * channel)975 static void add_pending_event_for_channel(uvm_channel_t *channel)
976 {
977     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
978 
979     if (channel->tools.pending_event_count++ == 0)
980         list_add_tail(&channel->tools.channel_list_node, &g_tools_channel_list);
981 }
982 
remove_pending_event_for_channel(uvm_channel_t * channel)983 static void remove_pending_event_for_channel(uvm_channel_t *channel)
984 {
985     uvm_assert_spinlock_locked(&g_tools_channel_list_lock);
986     UVM_ASSERT(channel->tools.pending_event_count > 0);
987     if (--channel->tools.pending_event_count == 0)
988         list_del_init(&channel->tools.channel_list_node);
989 }
990 
record_migration_events(void * args)991 static void record_migration_events(void *args)
992 {
993     block_migration_data_t *block_mig = (block_migration_data_t *)args;
994     migration_data_t *mig;
995     migration_data_t *next;
996     uvm_va_space_t *va_space = block_mig->va_space;
997 
998     NvU64 gpu_timestamp = block_mig->start_timestamp_gpu;
999 
1000     uvm_down_read(&va_space->tools.lock);
1001     if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V1)) {
1002         UvmEventEntry_V1 entry;
1003         UvmEventMigrationInfo_V1 *info = &entry.eventData.migration;
1004 
1005         // Initialize fields that are constant throughout the whole block
1006         memset(&entry, 0, sizeof(entry));
1007         info->eventType      = UvmEventTypeMigration;
1008         info->srcIndex       = uvm_parent_id_value_from_processor_id(block_mig->src);
1009         info->dstIndex       = uvm_parent_id_value_from_processor_id(block_mig->dst);
1010         info->beginTimeStamp = block_mig->start_timestamp_cpu;
1011         info->endTimeStamp   = block_mig->end_timestamp_cpu;
1012         info->rangeGroupId   = block_mig->range_group_id;
1013 
1014         list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
1015             UVM_ASSERT(mig->bytes > 0);
1016             list_del(&mig->events_node);
1017 
1018             info->address           = mig->address;
1019             info->migratedBytes     = mig->bytes;
1020             info->beginTimeStampGpu = gpu_timestamp;
1021             info->endTimeStampGpu   = mig->end_timestamp_gpu;
1022             info->migrationCause    = mig->cause;
1023             gpu_timestamp = mig->end_timestamp_gpu;
1024             kmem_cache_free(g_tools_migration_data_cache, mig);
1025 
1026             uvm_tools_record_event_v1(va_space, &entry);
1027         }
1028     }
1029     if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V2)) {
1030         UvmEventEntry_V2 entry;
1031         UvmEventMigrationInfo_V2 *info = &entry.eventData.migration;
1032 
1033         // Initialize fields that are constant throughout the whole block
1034         memset(&entry, 0, sizeof(entry));
1035         info->eventType      = UvmEventTypeMigration;
1036         info->srcIndex       = uvm_id_value(block_mig->src);
1037         info->dstIndex       = uvm_id_value(block_mig->dst);
1038         info->beginTimeStamp = block_mig->start_timestamp_cpu;
1039         info->endTimeStamp   = block_mig->end_timestamp_cpu;
1040         info->rangeGroupId   = block_mig->range_group_id;
1041 
1042         list_for_each_entry_safe(mig, next, &block_mig->events, events_node) {
1043             UVM_ASSERT(mig->bytes > 0);
1044             list_del(&mig->events_node);
1045 
1046             info->address           = mig->address;
1047             info->migratedBytes     = mig->bytes;
1048             info->beginTimeStampGpu = gpu_timestamp;
1049             info->endTimeStampGpu   = mig->end_timestamp_gpu;
1050             info->migrationCause    = mig->cause;
1051             gpu_timestamp = mig->end_timestamp_gpu;
1052             kmem_cache_free(g_tools_migration_data_cache, mig);
1053 
1054             uvm_tools_record_event_v2(va_space, &entry);
1055         }
1056     }
1057     uvm_up_read(&va_space->tools.lock);
1058 
1059     UVM_ASSERT(list_empty(&block_mig->events));
1060     kmem_cache_free(g_tools_block_migration_data_cache, block_mig);
1061 }
1062 
record_migration_events_entry(void * args)1063 static void record_migration_events_entry(void *args)
1064 {
1065     UVM_ENTRY_VOID(record_migration_events(args));
1066 }
1067 
on_block_migration_complete(void * ptr)1068 static void on_block_migration_complete(void *ptr)
1069 {
1070     migration_data_t *mig;
1071     block_migration_data_t *block_mig = (block_migration_data_t *)ptr;
1072 
1073     block_mig->end_timestamp_cpu = NV_GETTIME();
1074     block_mig->start_timestamp_gpu = *block_mig->start_timestamp_gpu_addr;
1075     list_for_each_entry(mig, &block_mig->events, events_node)
1076         mig->end_timestamp_gpu = *mig->end_timestamp_gpu_addr;
1077 
1078     nv_kthread_q_item_init(&block_mig->queue_item, record_migration_events_entry, block_mig);
1079 
1080     // The UVM driver may notice that work in a channel is complete in a variety of situations
1081     // and the va_space lock is not always held in all of them, nor can it always be taken safely on them.
1082     // Dispatching events requires the va_space lock to be held in at least read mode, so
1083     // this callback simply enqueues the dispatching onto a queue, where the
1084     // va_space lock is always safe to acquire.
1085     uvm_spin_lock(&g_tools_channel_list_lock);
1086     remove_pending_event_for_channel(block_mig->channel);
1087     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_mig->queue_item);
1088     uvm_spin_unlock(&g_tools_channel_list_lock);
1089 }
1090 
record_replay_event_helper(uvm_va_space_t * va_space,uvm_gpu_id_t gpu_id,NvU32 batch_id,uvm_fault_client_type_t client_type,NvU64 timestamp,NvU64 timestamp_gpu)1091 static void record_replay_event_helper(uvm_va_space_t *va_space,
1092                                        uvm_gpu_id_t gpu_id,
1093                                        NvU32 batch_id,
1094                                        uvm_fault_client_type_t client_type,
1095                                        NvU64 timestamp,
1096                                        NvU64 timestamp_gpu)
1097 {
1098     uvm_down_read(&va_space->tools.lock);
1099 
1100     if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFaultReplay, UvmToolsEventQueueVersion_V1)) {
1101         UvmEventEntry_V1 entry;
1102 
1103         memset(&entry, 0, sizeof(entry));
1104         entry.eventData.gpuFaultReplay.eventType    = UvmEventTypeGpuFaultReplay;
1105         entry.eventData.gpuFaultReplay.gpuIndex     = uvm_parent_id_value_from_processor_id(gpu_id);
1106         entry.eventData.gpuFaultReplay.batchId      = batch_id;
1107         entry.eventData.gpuFaultReplay.clientType   = g_hal_to_tools_fault_client_type_table[client_type];
1108         entry.eventData.gpuFaultReplay.timeStamp    = timestamp;
1109         entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
1110 
1111         uvm_tools_record_event_v1(va_space, &entry);
1112     }
1113     if (tools_is_event_enabled_version(va_space, UvmEventTypeGpuFaultReplay, UvmToolsEventQueueVersion_V2)) {
1114         UvmEventEntry_V2 entry;
1115 
1116         memset(&entry, 0, sizeof(entry));
1117         entry.eventData.gpuFaultReplay.eventType    = UvmEventTypeGpuFaultReplay;
1118         entry.eventData.gpuFaultReplay.gpuIndex     = uvm_id_value(gpu_id);
1119         entry.eventData.gpuFaultReplay.batchId      = batch_id;
1120         entry.eventData.gpuFaultReplay.clientType   = g_hal_to_tools_fault_client_type_table[client_type];
1121         entry.eventData.gpuFaultReplay.timeStamp    = timestamp;
1122         entry.eventData.gpuFaultReplay.timeStampGpu = timestamp_gpu;
1123 
1124         uvm_tools_record_event_v2(va_space, &entry);
1125     }
1126 
1127     uvm_up_read(&va_space->tools.lock);
1128 }
1129 
record_replay_event_broadcast(uvm_gpu_id_t gpu_id,NvU32 batch_id,uvm_fault_client_type_t client_type,NvU64 timestamp,NvU64 timestamp_gpu)1130 static void record_replay_event_broadcast(uvm_gpu_id_t gpu_id,
1131                                           NvU32 batch_id,
1132                                           uvm_fault_client_type_t client_type,
1133                                           NvU64 timestamp,
1134                                           NvU64 timestamp_gpu)
1135 {
1136     uvm_va_space_t *va_space;
1137 
1138     uvm_down_read(&g_tools_va_space_list_lock);
1139 
1140     list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
1141         record_replay_event_helper(va_space,
1142                                    gpu_id,
1143                                    batch_id,
1144                                    client_type,
1145                                    timestamp,
1146                                    timestamp_gpu);
1147     }
1148 
1149     uvm_up_read(&g_tools_va_space_list_lock);
1150 }
1151 
record_replay_events(void * args)1152 static void record_replay_events(void *args)
1153 {
1154     replay_data_t *replay = (replay_data_t *)args;
1155 
1156     record_replay_event_broadcast(replay->gpu_id,
1157                                   replay->batch_id,
1158                                   replay->client_type,
1159                                   replay->timestamp,
1160                                   replay->timestamp_gpu);
1161 
1162     kmem_cache_free(g_tools_replay_data_cache, replay);
1163 }
1164 
record_replay_events_entry(void * args)1165 static void record_replay_events_entry(void *args)
1166 {
1167     UVM_ENTRY_VOID(record_replay_events(args));
1168 }
1169 
on_replay_complete(void * ptr)1170 static void on_replay_complete(void *ptr)
1171 {
1172     replay_data_t *replay = (replay_data_t *)ptr;
1173     replay->timestamp_gpu = *replay->timestamp_gpu_addr;
1174 
1175     nv_kthread_q_item_init(&replay->queue_item, record_replay_events_entry, ptr);
1176 
1177     uvm_spin_lock(&g_tools_channel_list_lock);
1178     remove_pending_event_for_channel(replay->channel);
1179     nv_kthread_q_schedule_q_item(&g_tools_queue, &replay->queue_item);
1180     uvm_spin_unlock(&g_tools_channel_list_lock);
1181 
1182 }
1183 
1184 static UvmEventMigrationCause g_make_resident_to_tools_migration_cause[UVM_MAKE_RESIDENT_CAUSE_MAX] = {
1185     [UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT]     = UvmEventMigrationCauseCoherence,
1186     [UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT] = UvmEventMigrationCauseCoherence,
1187     [UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER]       = UvmEventMigrationCauseAccessCounters,
1188     [UVM_MAKE_RESIDENT_CAUSE_PREFETCH]             = UvmEventMigrationCausePrefetch,
1189     [UVM_MAKE_RESIDENT_CAUSE_EVICTION]             = UvmEventMigrationCauseEviction,
1190     [UVM_MAKE_RESIDENT_CAUSE_API_TOOLS]            = UvmEventMigrationCauseInvalid,
1191     [UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE]          = UvmEventMigrationCauseUser,
1192     [UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP]  = UvmEventMigrationCauseCoherence,
1193     [UVM_MAKE_RESIDENT_CAUSE_API_HINT]             = UvmEventMigrationCauseUser,
1194 };
1195 
uvm_tools_record_migration_cpu_to_cpu(uvm_va_space_t * va_space,uvm_perf_event_data_t * event_data)1196 static void uvm_tools_record_migration_cpu_to_cpu(uvm_va_space_t *va_space,
1197                                                   uvm_perf_event_data_t *event_data)
1198 {
1199     if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V1)) {
1200         UvmEventEntry_V1 entry;
1201         UvmEventMigrationInfo_V1 *info = &entry.eventData.migration;
1202 
1203         // CPU-to-CPU migration events can be added directly to the queue.
1204         memset(&entry, 0, sizeof(entry));
1205         info->eventType = UvmEventTypeMigration;
1206         info->srcIndex = uvm_parent_id_value_from_processor_id(event_data->migration.src);
1207         info->dstIndex = uvm_parent_id_value_from_processor_id(event_data->migration.dst);
1208         info->address = event_data->migration.address;
1209         info->migratedBytes = event_data->migration.bytes;
1210         info->beginTimeStamp = event_data->migration.cpu_start_timestamp;
1211         info->endTimeStamp = NV_GETTIME();
1212         info->migrationCause = event_data->migration.cause;
1213         info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE;
1214 
1215         // During evictions, it is not safe to uvm_range_group_range_find()
1216         // because the va_space lock is not held.
1217         if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1218             uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address);
1219             if (range != NULL)
1220                 info->rangeGroupId = range->range_group->id;
1221         }
1222 
1223         uvm_tools_record_event_v1(va_space, &entry);
1224     }
1225     if (tools_is_event_enabled_version(va_space, UvmEventTypeMigration, UvmToolsEventQueueVersion_V2)) {
1226         UvmEventEntry_V2 entry;
1227         UvmEventMigrationInfo_V2 *info = &entry.eventData.migration;
1228 
1229         // CPU-to-CPU migration events can be added directly to the queue.
1230         memset(&entry, 0, sizeof(entry));
1231         info->eventType = UvmEventTypeMigration;
1232         info->srcIndex = uvm_id_value(event_data->migration.src);
1233         info->dstIndex = uvm_id_value(event_data->migration.dst);
1234         info->srcNid = event_data->migration.src_nid;
1235         info->dstNid = event_data->migration.dst_nid;
1236         info->address = event_data->migration.address;
1237         info->migratedBytes = event_data->migration.bytes;
1238         info->beginTimeStamp = event_data->migration.cpu_start_timestamp;
1239         info->endTimeStamp = NV_GETTIME();
1240         info->migrationCause = event_data->migration.cause;
1241         info->rangeGroupId = UVM_RANGE_GROUP_ID_NONE;
1242 
1243         // During evictions, it is not safe to uvm_range_group_range_find()
1244         // because the va_space lock is not held.
1245         if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1246             uvm_range_group_range_t *range = uvm_range_group_range_find(va_space, event_data->migration.address);
1247             if (range != NULL)
1248                 info->rangeGroupId = range->range_group->id;
1249         }
1250 
1251         uvm_tools_record_event_v2(va_space, &entry);
1252     }
1253 }
1254 
1255 // For non-CPU-to-CPU migrations (or CPU-to-CPU copies using CEs), this event is
1256 // notified asynchronously when all the migrations pushed to the same uvm_push_t
1257 // object in a call to block_copy_resident_pages_between have finished.
1258 // For CPU-to-CPU copies using memcpy, this event is notified when all of the
1259 // page copies does by block_copy_resident_pages have finished.
uvm_tools_record_migration(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)1260 static void uvm_tools_record_migration(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
1261 {
1262     uvm_va_block_t *va_block = event_data->migration.block;
1263     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1264 
1265     UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION);
1266 
1267     uvm_assert_mutex_locked(&va_block->lock);
1268     uvm_assert_rwsem_locked(&va_space->perf_events.lock);
1269     UVM_ASSERT(va_space->tools.enabled);
1270 
1271     uvm_down_read(&va_space->tools.lock);
1272     UVM_ASSERT(tools_is_migration_callback_needed(va_space));
1273 
1274     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1275         if (!UVM_ID_IS_CPU(event_data->migration.src) || !UVM_ID_IS_CPU(event_data->migration.dst)) {
1276             migration_data_t *mig;
1277             uvm_push_info_t *push_info = uvm_push_info_from_push(event_data->migration.push);
1278             block_migration_data_t *block_mig = (block_migration_data_t *)push_info->on_complete_data;
1279 
1280             if (push_info->on_complete != NULL) {
1281                 mig = kmem_cache_alloc(g_tools_migration_data_cache, NV_UVM_GFP_FLAGS);
1282                 if (mig == NULL)
1283                     goto done_unlock;
1284 
1285                 mig->address = event_data->migration.address;
1286                 mig->bytes = event_data->migration.bytes;
1287                 mig->end_timestamp_gpu_addr = uvm_push_timestamp(event_data->migration.push);
1288                 mig->cause = g_make_resident_to_tools_migration_cause[event_data->migration.cause];
1289 
1290                 list_add_tail(&mig->events_node, &block_mig->events);
1291             }
1292         }
1293         else {
1294             uvm_tools_record_migration_cpu_to_cpu(va_space, event_data);
1295         }
1296     }
1297 
1298     // We don't want to increment neither UvmCounterNameBytesXferDtH nor
1299     // UvmCounterNameBytesXferHtD in a CPU-to-CPU migration.
1300     if (UVM_ID_IS_CPU(event_data->migration.src) && UVM_ID_IS_CPU(event_data->migration.dst))
1301         goto done_unlock;
1302 
1303     // Increment counters
1304     if (UVM_ID_IS_CPU(event_data->migration.src) &&
1305         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferHtD)) {
1306         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.dst);
1307         uvm_tools_inc_counter(va_space,
1308                               UvmCounterNameBytesXferHtD,
1309                               event_data->migration.bytes,
1310                               &gpu->uuid);
1311     }
1312     if (UVM_ID_IS_CPU(event_data->migration.dst) &&
1313         tools_is_counter_enabled(va_space, UvmCounterNameBytesXferDtH)) {
1314         uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, event_data->migration.src);
1315         uvm_tools_inc_counter(va_space,
1316                               UvmCounterNameBytesXferDtH,
1317                               event_data->migration.bytes,
1318                               &gpu->uuid);
1319     }
1320 
1321 done_unlock:
1322     uvm_up_read(&va_space->tools.lock);
1323 }
1324 
1325 // This event is notified asynchronously when it is marked as completed in the
1326 // pushbuffer the replay method belongs to.
uvm_tools_broadcast_replay(uvm_gpu_t * gpu,uvm_push_t * push,NvU32 batch_id,uvm_fault_client_type_t client_type)1327 void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
1328                                 uvm_push_t *push,
1329                                 NvU32 batch_id,
1330                                 uvm_fault_client_type_t client_type)
1331 {
1332     uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1333     replay_data_t *replay;
1334 
1335     // Perform delayed notification only if some VA space has signed up for
1336     // UvmEventTypeGpuFaultReplay
1337     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1338         return;
1339 
1340     replay = kmem_cache_alloc(g_tools_replay_data_cache, NV_UVM_GFP_FLAGS);
1341     if (replay == NULL)
1342         return;
1343 
1344     UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1345 
1346     replay->timestamp_gpu_addr = uvm_push_timestamp(push);
1347     replay->gpu_id             = gpu->id;
1348     replay->batch_id           = batch_id;
1349     replay->client_type        = client_type;
1350     replay->timestamp          = NV_GETTIME();
1351     replay->channel            = push->channel;
1352 
1353     push_info->on_complete_data = replay;
1354     push_info->on_complete = on_replay_complete;
1355 
1356     uvm_spin_lock(&g_tools_channel_list_lock);
1357     add_pending_event_for_channel(replay->channel);
1358     uvm_spin_unlock(&g_tools_channel_list_lock);
1359 }
1360 
uvm_tools_broadcast_replay_sync(uvm_gpu_t * gpu,NvU32 batch_id,uvm_fault_client_type_t client_type)1361 void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
1362 {
1363     UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
1364 
1365     if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
1366         return;
1367 
1368     record_replay_event_broadcast(gpu->id,
1369                                   batch_id,
1370                                   client_type,
1371                                   NV_GETTIME(),
1372                                   gpu->parent->host_hal->get_time(gpu));
1373 }
1374 
uvm_tools_record_access_counter(uvm_va_space_t * va_space,uvm_gpu_id_t gpu_id,const uvm_access_counter_buffer_entry_t * buffer_entry,bool on_managed_phys)1375 void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
1376                                      uvm_gpu_id_t gpu_id,
1377                                      const uvm_access_counter_buffer_entry_t *buffer_entry,
1378                                      bool on_managed_phys)
1379 {
1380     uvm_down_read(&va_space->tools.lock);
1381 
1382     if (tools_is_event_enabled_version(va_space, UvmEventTypeTestAccessCounter, UvmToolsEventQueueVersion_V1)) {
1383         UvmEventEntry_V1 entry;
1384         UvmEventTestAccessCounterInfo_V1 *info = &entry.testEventData.accessCounter;
1385 
1386         memset(&entry, 0, sizeof(entry));
1387 
1388         info->eventType           = UvmEventTypeTestAccessCounter;
1389         info->srcIndex            = uvm_parent_id_value_from_processor_id(gpu_id);
1390         info->address             = buffer_entry->address.address;
1391         info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
1392         if (buffer_entry->address.is_virtual) {
1393             info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
1394             info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1395             info->veId                = buffer_entry->virtual_info.ve_id;
1396         }
1397         else {
1398             info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1399         }
1400         info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1401         info->physOnManaged       = on_managed_phys? 1 : 0;
1402         info->value               = buffer_entry->counter_value;
1403         info->subGranularity      = buffer_entry->sub_granularity;
1404         info->bank                = buffer_entry->bank;
1405         info->tag                 = buffer_entry->tag;
1406 
1407         uvm_tools_record_event_v1(va_space, &entry);
1408     }
1409     if (tools_is_event_enabled_version(va_space, UvmEventTypeTestAccessCounter, UvmToolsEventQueueVersion_V2)) {
1410         UvmEventEntry_V2 entry;
1411         UvmEventTestAccessCounterInfo_V2 *info = &entry.testEventData.accessCounter;
1412 
1413         memset(&entry, 0, sizeof(entry));
1414 
1415         info->eventType           = UvmEventTypeTestAccessCounter;
1416         info->srcIndex            = uvm_id_value(gpu_id);
1417         info->address             = buffer_entry->address.address;
1418         info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
1419         if (buffer_entry->address.is_virtual) {
1420             info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
1421             info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
1422             info->veId                = buffer_entry->virtual_info.ve_id;
1423         }
1424         else {
1425             info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
1426         }
1427         info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
1428         info->physOnManaged       = on_managed_phys? 1 : 0;
1429         info->value               = buffer_entry->counter_value;
1430         info->subGranularity      = buffer_entry->sub_granularity;
1431         info->bank                = buffer_entry->bank;
1432         info->tag                 = buffer_entry->tag;
1433 
1434         uvm_tools_record_event_v2(va_space, &entry);
1435     }
1436 
1437     uvm_up_read(&va_space->tools.lock);
1438 }
1439 
uvm_tools_broadcast_access_counter(uvm_gpu_t * gpu,const uvm_access_counter_buffer_entry_t * buffer_entry,bool on_managed_phys)1440 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
1441                                         const uvm_access_counter_buffer_entry_t *buffer_entry,
1442                                         bool on_managed_phys)
1443 {
1444     uvm_va_space_t *va_space;
1445 
1446     uvm_down_read(&g_tools_va_space_list_lock);
1447     list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
1448         uvm_tools_record_access_counter(va_space,
1449                                         gpu->id,
1450                                         buffer_entry,
1451                                         on_managed_phys);
1452     }
1453     uvm_up_read(&g_tools_va_space_list_lock);
1454 }
1455 
uvm_tools_test_hmm_split_invalidate(uvm_va_space_t * va_space)1456 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
1457 {
1458     UvmEventEntry_V2 entry;
1459 
1460     if (!va_space->tools.enabled)
1461         return;
1462 
1463     entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
1464     uvm_down_read(&va_space->tools.lock);
1465     uvm_tools_record_event_v2(va_space, &entry);
1466     uvm_up_read(&va_space->tools.lock);
1467 }
1468 
1469 // This function is used as a begin marker to group all migrations within a VA
1470 // block that are performed in the same call to
1471 // block_copy_resident_pages_between. All of these are pushed to the same
1472 // uvm_push_t object, and will be notified in burst when the last one finishes.
uvm_tools_record_block_migration_begin(uvm_va_block_t * va_block,uvm_push_t * push,uvm_processor_id_t dst_id,uvm_processor_id_t src_id,NvU64 start,uvm_make_resident_cause_t cause)1473 void uvm_tools_record_block_migration_begin(uvm_va_block_t *va_block,
1474                                             uvm_push_t *push,
1475                                             uvm_processor_id_t dst_id,
1476                                             uvm_processor_id_t src_id,
1477                                             NvU64 start,
1478                                             uvm_make_resident_cause_t cause)
1479 {
1480     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1481     uvm_range_group_range_t *range;
1482 
1483     // Calls from tools read/write functions to make_resident must not trigger
1484     // any migration
1485     UVM_ASSERT(cause != UVM_MAKE_RESIDENT_CAUSE_API_TOOLS);
1486 
1487     // During evictions the va_space lock is not held.
1488     if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1489         uvm_assert_rwsem_locked(&va_space->lock);
1490 
1491     if (!va_space->tools.enabled)
1492         return;
1493 
1494     uvm_down_read(&va_space->tools.lock);
1495 
1496     // Perform delayed notification only if the VA space has signed up for
1497     // UvmEventTypeMigration
1498     if (tools_is_event_enabled(va_space, UvmEventTypeMigration)) {
1499         block_migration_data_t *block_mig;
1500         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
1501 
1502         UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
1503 
1504         block_mig = kmem_cache_alloc(g_tools_block_migration_data_cache, NV_UVM_GFP_FLAGS);
1505         if (block_mig == NULL)
1506             goto done_unlock;
1507 
1508         block_mig->start_timestamp_gpu_addr = uvm_push_timestamp(push);
1509         block_mig->channel = push->channel;
1510         block_mig->start_timestamp_cpu = NV_GETTIME();
1511         block_mig->dst = dst_id;
1512         block_mig->src = src_id;
1513         block_mig->range_group_id = UVM_RANGE_GROUP_ID_NONE;
1514 
1515         // During evictions, it is not safe to uvm_range_group_range_find() because the va_space lock is not held.
1516         if (cause != UVM_MAKE_RESIDENT_CAUSE_EVICTION) {
1517             range = uvm_range_group_range_find(va_space, start);
1518             if (range != NULL)
1519                 block_mig->range_group_id = range->range_group->id;
1520         }
1521         block_mig->va_space = va_space;
1522 
1523         INIT_LIST_HEAD(&block_mig->events);
1524         push_info->on_complete_data = block_mig;
1525         push_info->on_complete = on_block_migration_complete;
1526 
1527         uvm_spin_lock(&g_tools_channel_list_lock);
1528         add_pending_event_for_channel(block_mig->channel);
1529         uvm_spin_unlock(&g_tools_channel_list_lock);
1530     }
1531 
1532 done_unlock:
1533     uvm_up_read(&va_space->tools.lock);
1534 }
1535 
uvm_tools_record_read_duplicate(uvm_va_block_t * va_block,uvm_processor_id_t dst,uvm_va_block_region_t region,const uvm_page_mask_t * page_mask)1536 void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
1537                                      uvm_processor_id_t dst,
1538                                      uvm_va_block_region_t region,
1539                                      const uvm_page_mask_t *page_mask)
1540 {
1541     uvm_processor_mask_t *resident_processors;
1542     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1543 
1544     if (!va_space->tools.enabled)
1545         return;
1546 
1547     resident_processors = uvm_processor_mask_cache_alloc();
1548     if (!resident_processors)
1549         return;
1550 
1551     uvm_down_read(&va_space->tools.lock);
1552 
1553     if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V1)) {
1554         UvmEventEntry_V1 entry;
1555         UvmEventReadDuplicateInfo_V1 *info_read_duplicate = &entry.eventData.readDuplicate;
1556         uvm_page_index_t page_index;
1557 
1558         memset(&entry, 0, sizeof(entry));
1559 
1560         info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1561         info_read_duplicate->size      = PAGE_SIZE;
1562         info_read_duplicate->timeStamp = NV_GETTIME();
1563 
1564         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1565             uvm_processor_id_t id;
1566 
1567             info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
1568             info_read_duplicate->processors = 0;
1569 
1570             uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
1571 
1572             for_each_id_in_mask(id, resident_processors)
1573                 __set_bit(uvm_parent_id_value_from_processor_id(id), (unsigned long *)&info_read_duplicate->processors);
1574 
1575             uvm_tools_record_event_v1(va_space, &entry);
1576         }
1577     }
1578 
1579     if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V2)) {
1580         UvmEventEntry_V2 entry;
1581         UvmEventReadDuplicateInfo_V2 *info_read_duplicate = &entry.eventData.readDuplicate;
1582         uvm_page_index_t page_index;
1583 
1584         memset(&entry, 0, sizeof(entry));
1585 
1586         info_read_duplicate->eventType = UvmEventTypeReadDuplicate;
1587         info_read_duplicate->size      = PAGE_SIZE;
1588         info_read_duplicate->timeStamp = NV_GETTIME();
1589 
1590         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1591             uvm_processor_id_t id;
1592 
1593             info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
1594             memset(info_read_duplicate->processors, 0, sizeof(info_read_duplicate->processors));
1595 
1596             uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
1597 
1598             for_each_id_in_mask(id, resident_processors)
1599                 __set_bit(uvm_id_value(id), (unsigned long *)info_read_duplicate->processors);
1600 
1601             uvm_tools_record_event_v2(va_space, &entry);
1602         }
1603     }
1604 
1605     uvm_up_read(&va_space->tools.lock);
1606 
1607     uvm_processor_mask_cache_free(resident_processors);
1608 }
1609 
uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t * va_block,uvm_processor_id_t dst,uvm_va_block_region_t region,const uvm_page_mask_t * page_mask)1610 void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
1611                                                 uvm_processor_id_t dst,
1612                                                 uvm_va_block_region_t region,
1613                                                 const uvm_page_mask_t *page_mask)
1614 {
1615     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1616 
1617     if (!va_space->tools.enabled)
1618         return;
1619 
1620     uvm_down_read(&va_space->tools.lock);
1621     if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicateInvalidate, UvmToolsEventQueueVersion_V1)) {
1622         UvmEventEntry_V1 entry;
1623         uvm_page_index_t page_index;
1624         UvmEventReadDuplicateInvalidateInfo_V1 *info = &entry.eventData.readDuplicateInvalidate;
1625 
1626         memset(&entry, 0, sizeof(entry));
1627 
1628         info->eventType     = UvmEventTypeReadDuplicateInvalidate;
1629         info->residentIndex = uvm_parent_id_value_from_processor_id(dst);
1630         info->size          = PAGE_SIZE;
1631         info->timeStamp     = NV_GETTIME();
1632 
1633         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1634             UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1635 
1636             info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1637             uvm_tools_record_event_v1(va_space, &entry);
1638         }
1639     }
1640     if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicateInvalidate, UvmToolsEventQueueVersion_V2)) {
1641         UvmEventEntry_V2 entry;
1642         uvm_page_index_t page_index;
1643         UvmEventReadDuplicateInvalidateInfo_V2 *info = &entry.eventData.readDuplicateInvalidate;
1644 
1645         memset(&entry, 0, sizeof(entry));
1646 
1647         info->eventType     = UvmEventTypeReadDuplicateInvalidate;
1648         info->residentIndex = uvm_id_value(dst);
1649         info->size          = PAGE_SIZE;
1650         info->timeStamp     = NV_GETTIME();
1651 
1652         for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
1653             UVM_ASSERT(uvm_page_mask_test(&va_block->read_duplicated_pages, page_index));
1654 
1655             info->address = uvm_va_block_cpu_page_address(va_block, page_index);
1656             uvm_tools_record_event_v2(va_space, &entry);
1657         }
1658     }
1659     uvm_up_read(&va_space->tools.lock);
1660 }
1661 
tools_schedule_completed_events(void)1662 static void tools_schedule_completed_events(void)
1663 {
1664     uvm_channel_t *channel;
1665     uvm_channel_t *next_channel;
1666     NvU64 channel_count = 0;
1667     NvU64 i;
1668 
1669     uvm_spin_lock(&g_tools_channel_list_lock);
1670 
1671     // retain every channel list entry currently in the list and keep track of their count.
1672     list_for_each_entry(channel, &g_tools_channel_list, tools.channel_list_node) {
1673         ++channel->tools.pending_event_count;
1674         ++channel_count;
1675     }
1676     uvm_spin_unlock(&g_tools_channel_list_lock);
1677 
1678     if (channel_count == 0)
1679         return;
1680 
1681     // new entries always appear at the end, and all the entries seen in the first loop have been retained
1682     // so it is safe to go through them
1683     channel = list_first_entry(&g_tools_channel_list, uvm_channel_t, tools.channel_list_node);
1684     for (i = 0; i < channel_count; i++) {
1685         uvm_channel_update_progress_all(channel);
1686         channel = list_next_entry(channel, tools.channel_list_node);
1687     }
1688 
1689     // now release all the entries we retained in the beginning
1690     i = 0;
1691     uvm_spin_lock(&g_tools_channel_list_lock);
1692     list_for_each_entry_safe(channel, next_channel, &g_tools_channel_list, tools.channel_list_node) {
1693         if (i++ == channel_count)
1694             break;
1695 
1696         remove_pending_event_for_channel(channel);
1697     }
1698     uvm_spin_unlock(&g_tools_channel_list_lock);
1699 }
1700 
uvm_tools_record_cpu_fatal_fault(uvm_va_space_t * va_space,NvU64 address,bool is_write,UvmEventFatalReason reason)1701 void uvm_tools_record_cpu_fatal_fault(uvm_va_space_t *va_space,
1702                                       NvU64 address,
1703                                       bool is_write,
1704                                       UvmEventFatalReason reason)
1705 {
1706     uvm_assert_rwsem_locked(&va_space->lock);
1707 
1708     if (!va_space->tools.enabled)
1709         return;
1710 
1711     uvm_down_read(&va_space->tools.lock);
1712     if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V1)) {
1713         UvmEventEntry_V1 entry;
1714         UvmEventFatalFaultInfo_V1 *info = &entry.eventData.fatalFault;
1715 
1716         memset(&entry, 0, sizeof(entry));
1717 
1718         info->eventType      = UvmEventTypeFatalFault;
1719         info->processorIndex = UVM_ID_CPU_VALUE;
1720         info->timeStamp      = NV_GETTIME();
1721         info->address        = address;
1722         info->accessType     = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1723         // info->faultType is not valid for cpu faults
1724         info->reason         = reason;
1725 
1726         uvm_tools_record_event_v1(va_space, &entry);
1727     }
1728     if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V2)) {
1729         UvmEventEntry_V2 entry;
1730         UvmEventFatalFaultInfo_V2 *info = &entry.eventData.fatalFault;
1731 
1732         memset(&entry, 0, sizeof(entry));
1733 
1734         info->eventType      = UvmEventTypeFatalFault;
1735         info->processorIndex = UVM_ID_CPU_VALUE;
1736         info->timeStamp      = NV_GETTIME();
1737         info->address        = address;
1738         info->accessType     = is_write? UvmEventMemoryAccessTypeWrite: UvmEventMemoryAccessTypeRead;
1739         // info->faultType is not valid for cpu faults
1740         info->reason         = reason;
1741 
1742         uvm_tools_record_event_v2(va_space, &entry);
1743     }
1744     uvm_up_read(&va_space->tools.lock);
1745 }
1746 
uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,uvm_va_space_t * va_space,const uvm_fault_buffer_entry_t * buffer_entry,UvmEventFatalReason reason)1747 void uvm_tools_record_gpu_fatal_fault(uvm_gpu_id_t gpu_id,
1748                                       uvm_va_space_t *va_space,
1749                                       const uvm_fault_buffer_entry_t *buffer_entry,
1750                                       UvmEventFatalReason reason)
1751 {
1752     uvm_assert_rwsem_locked(&va_space->lock);
1753 
1754     if (!va_space->tools.enabled)
1755         return;
1756 
1757     uvm_down_read(&va_space->tools.lock);
1758     if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V1)) {
1759         UvmEventEntry_V1 entry;
1760         UvmEventFatalFaultInfo_V1 *info = &entry.eventData.fatalFault;
1761 
1762         memset(&entry, 0, sizeof(entry));
1763 
1764         info->eventType      = UvmEventTypeFatalFault;
1765         info->processorIndex = uvm_parent_id_value_from_processor_id(gpu_id);
1766         info->timeStamp      = NV_GETTIME();
1767         info->address        = buffer_entry->fault_address;
1768         info->accessType     = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1769         info->faultType      = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1770         info->reason         = reason;
1771 
1772         uvm_tools_record_event_v1(va_space, &entry);
1773     }
1774     if (tools_is_event_enabled_version(va_space, UvmEventTypeFatalFault, UvmToolsEventQueueVersion_V2)) {
1775         UvmEventEntry_V2 entry;
1776         UvmEventFatalFaultInfo_V2 *info = &entry.eventData.fatalFault;
1777 
1778         memset(&entry, 0, sizeof(entry));
1779 
1780         info->eventType      = UvmEventTypeFatalFault;
1781         info->processorIndex = uvm_id_value(gpu_id);
1782         info->timeStamp      = NV_GETTIME();
1783         info->address        = buffer_entry->fault_address;
1784         info->accessType     = g_hal_to_tools_fault_access_type_table[buffer_entry->fault_access_type];
1785         info->faultType      = g_hal_to_tools_fault_type_table[buffer_entry->fault_type];
1786         info->reason         = reason;
1787 
1788         uvm_tools_record_event_v2(va_space, &entry);
1789     }
1790     uvm_up_read(&va_space->tools.lock);
1791 }
1792 
uvm_tools_record_thrashing(uvm_va_space_t * va_space,NvU64 address,size_t region_size,const uvm_processor_mask_t * processors)1793 void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
1794                                 NvU64 address,
1795                                 size_t region_size,
1796                                 const uvm_processor_mask_t *processors)
1797 {
1798     UVM_ASSERT(address);
1799     UVM_ASSERT(PAGE_ALIGNED(address));
1800     UVM_ASSERT(region_size > 0);
1801 
1802     uvm_assert_rwsem_locked(&va_space->lock);
1803 
1804     if (!va_space->tools.enabled)
1805         return;
1806 
1807     uvm_down_read(&va_space->tools.lock);
1808     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrashingDetected, UvmToolsEventQueueVersion_V1)) {
1809         UvmEventEntry_V1 entry;
1810         UvmEventThrashingDetectedInfo_V1 *info = &entry.eventData.thrashing;
1811         uvm_processor_id_t id;
1812 
1813         memset(&entry, 0, sizeof(entry));
1814 
1815         info->eventType = UvmEventTypeThrashingDetected;
1816         info->address   = address;
1817         info->size      = region_size;
1818         info->timeStamp = NV_GETTIME();
1819 
1820         for_each_id_in_mask(id, processors)
1821             __set_bit(uvm_parent_id_value_from_processor_id(id),
1822                       (unsigned long *)&info->processors);
1823 
1824         uvm_tools_record_event_v1(va_space, &entry);
1825     }
1826     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrashingDetected, UvmToolsEventQueueVersion_V2)) {
1827         UvmEventEntry_V2 entry;
1828         UvmEventThrashingDetectedInfo_V2 *info = &entry.eventData.thrashing;
1829 
1830         memset(&entry, 0, sizeof(entry));
1831 
1832         info->eventType = UvmEventTypeThrashingDetected;
1833         info->address   = address;
1834         info->size      = region_size;
1835         info->timeStamp = NV_GETTIME();
1836 
1837         BUILD_BUG_ON(UVM_MAX_PROCESSORS_V2 < UVM_ID_MAX_PROCESSORS);
1838         bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
1839 
1840         uvm_tools_record_event_v2(va_space, &entry);
1841     }
1842     uvm_up_read(&va_space->tools.lock);
1843 }
1844 
uvm_tools_record_throttling_start(uvm_va_space_t * va_space,NvU64 address,uvm_processor_id_t processor)1845 void uvm_tools_record_throttling_start(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1846 {
1847     UVM_ASSERT(address);
1848     UVM_ASSERT(PAGE_ALIGNED(address));
1849     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1850 
1851     uvm_assert_rwsem_locked(&va_space->lock);
1852 
1853     if (!va_space->tools.enabled)
1854         return;
1855 
1856     uvm_down_read(&va_space->tools.lock);
1857     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingStart, UvmToolsEventQueueVersion_V1)) {
1858         UvmEventEntry_V1 entry;
1859         UvmEventThrottlingStartInfo_V1 *info = &entry.eventData.throttlingStart;
1860 
1861         memset(&entry, 0, sizeof(entry));
1862 
1863         info->eventType      = UvmEventTypeThrottlingStart;
1864         info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1865         info->address        = address;
1866         info->timeStamp      = NV_GETTIME();
1867 
1868         uvm_tools_record_event_v1(va_space, &entry);
1869     }
1870     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingStart, UvmToolsEventQueueVersion_V2)) {
1871         UvmEventEntry_V2 entry;
1872         UvmEventThrottlingStartInfo_V2 *info = &entry.eventData.throttlingStart;
1873 
1874         memset(&entry, 0, sizeof(entry));
1875 
1876         info->eventType      = UvmEventTypeThrottlingStart;
1877         info->processorIndex = uvm_id_value(processor);
1878         info->address        = address;
1879         info->timeStamp      = NV_GETTIME();
1880 
1881         uvm_tools_record_event_v2(va_space, &entry);
1882     }
1883     uvm_up_read(&va_space->tools.lock);
1884 }
1885 
uvm_tools_record_throttling_end(uvm_va_space_t * va_space,NvU64 address,uvm_processor_id_t processor)1886 void uvm_tools_record_throttling_end(uvm_va_space_t *va_space, NvU64 address, uvm_processor_id_t processor)
1887 {
1888     UVM_ASSERT(address);
1889     UVM_ASSERT(PAGE_ALIGNED(address));
1890     UVM_ASSERT(UVM_ID_IS_VALID(processor));
1891 
1892     uvm_assert_rwsem_locked(&va_space->lock);
1893 
1894     if (!va_space->tools.enabled)
1895         return;
1896 
1897     uvm_down_read(&va_space->tools.lock);
1898     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingEnd, UvmToolsEventQueueVersion_V1)) {
1899         UvmEventEntry_V1 entry;
1900         UvmEventThrottlingEndInfo_V1 *info = &entry.eventData.throttlingEnd;
1901 
1902         memset(&entry, 0, sizeof(entry));
1903 
1904         info->eventType      = UvmEventTypeThrottlingEnd;
1905         info->processorIndex = uvm_parent_id_value_from_processor_id(processor);
1906         info->address        = address;
1907         info->timeStamp      = NV_GETTIME();
1908 
1909         uvm_tools_record_event_v1(va_space, &entry);
1910     }
1911     if (tools_is_event_enabled_version(va_space, UvmEventTypeThrottlingEnd, UvmToolsEventQueueVersion_V2)) {
1912         UvmEventEntry_V2 entry;
1913         UvmEventThrottlingEndInfo_V2 *info = &entry.eventData.throttlingEnd;
1914 
1915         memset(&entry, 0, sizeof(entry));
1916 
1917         info->eventType      = UvmEventTypeThrottlingEnd;
1918         info->processorIndex = uvm_id_value(processor);
1919         info->address        = address;
1920         info->timeStamp      = NV_GETTIME();
1921 
1922         uvm_tools_record_event_v2(va_space, &entry);
1923     }
1924     uvm_up_read(&va_space->tools.lock);
1925 }
1926 
record_map_remote_events(void * args)1927 static void record_map_remote_events(void *args)
1928 {
1929     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)args;
1930     map_remote_data_t *map_remote, *next;
1931     uvm_va_space_t *va_space = block_map_remote->va_space;
1932 
1933     uvm_down_read(&va_space->tools.lock);
1934     if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V1)) {
1935         UvmEventEntry_V1 entry;
1936 
1937         memset(&entry, 0, sizeof(entry));
1938 
1939         entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1940         entry.eventData.mapRemote.srcIndex       = uvm_parent_id_value_from_processor_id(block_map_remote->src);
1941         entry.eventData.mapRemote.dstIndex       = uvm_parent_id_value_from_processor_id(block_map_remote->dst);
1942         entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1943         entry.eventData.mapRemote.timeStamp      = block_map_remote->timestamp;
1944 
1945         list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1946             list_del(&map_remote->events_node);
1947 
1948             entry.eventData.mapRemote.address      = map_remote->address;
1949             entry.eventData.mapRemote.size         = map_remote->size;
1950             entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1951             kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1952 
1953             uvm_tools_record_event_v1(va_space, &entry);
1954         }
1955     }
1956     if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V2)) {
1957         UvmEventEntry_V2 entry;
1958 
1959         memset(&entry, 0, sizeof(entry));
1960 
1961         entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
1962         entry.eventData.mapRemote.srcIndex       = uvm_id_value(block_map_remote->src);
1963         entry.eventData.mapRemote.dstIndex       = uvm_id_value(block_map_remote->dst);
1964         entry.eventData.mapRemote.mapRemoteCause = block_map_remote->cause;
1965         entry.eventData.mapRemote.timeStamp      = block_map_remote->timestamp;
1966 
1967         list_for_each_entry_safe(map_remote, next, &block_map_remote->events, events_node) {
1968             list_del(&map_remote->events_node);
1969 
1970             entry.eventData.mapRemote.address      = map_remote->address;
1971             entry.eventData.mapRemote.size         = map_remote->size;
1972             entry.eventData.mapRemote.timeStampGpu = map_remote->timestamp_gpu;
1973             kmem_cache_free(g_tools_map_remote_data_cache, map_remote);
1974 
1975             uvm_tools_record_event_v2(va_space, &entry);
1976         }
1977     }
1978     uvm_up_read(&va_space->tools.lock);
1979 
1980     UVM_ASSERT(list_empty(&block_map_remote->events));
1981     kmem_cache_free(g_tools_block_map_remote_data_cache, block_map_remote);
1982 }
1983 
record_map_remote_events_entry(void * args)1984 static void record_map_remote_events_entry(void *args)
1985 {
1986     UVM_ENTRY_VOID(record_map_remote_events(args));
1987 }
1988 
on_map_remote_complete(void * ptr)1989 static void on_map_remote_complete(void *ptr)
1990 {
1991     block_map_remote_data_t *block_map_remote = (block_map_remote_data_t *)ptr;
1992     map_remote_data_t *map_remote;
1993 
1994     // Only GPU mappings use the deferred mechanism
1995     UVM_ASSERT(UVM_ID_IS_GPU(block_map_remote->src));
1996     list_for_each_entry(map_remote, &block_map_remote->events, events_node)
1997         map_remote->timestamp_gpu = *map_remote->timestamp_gpu_addr;
1998 
1999     nv_kthread_q_item_init(&block_map_remote->queue_item, record_map_remote_events_entry, ptr);
2000 
2001     uvm_spin_lock(&g_tools_channel_list_lock);
2002     remove_pending_event_for_channel(block_map_remote->channel);
2003     nv_kthread_q_schedule_q_item(&g_tools_queue, &block_map_remote->queue_item);
2004     uvm_spin_unlock(&g_tools_channel_list_lock);
2005 }
2006 
uvm_tools_record_map_remote(uvm_va_block_t * va_block,uvm_push_t * push,uvm_processor_id_t processor,uvm_processor_id_t residency,NvU64 address,size_t region_size,UvmEventMapRemoteCause cause)2007 void uvm_tools_record_map_remote(uvm_va_block_t *va_block,
2008                                  uvm_push_t *push,
2009                                  uvm_processor_id_t processor,
2010                                  uvm_processor_id_t residency,
2011                                  NvU64 address,
2012                                  size_t region_size,
2013                                  UvmEventMapRemoteCause cause)
2014 {
2015     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
2016 
2017     UVM_ASSERT(UVM_ID_IS_VALID(processor));
2018     UVM_ASSERT(UVM_ID_IS_VALID(residency));
2019     UVM_ASSERT(cause != UvmEventMapRemoteCauseInvalid);
2020 
2021     uvm_assert_rwsem_locked(&va_space->lock);
2022 
2023     if (!va_space->tools.enabled)
2024         return;
2025 
2026     uvm_down_read(&va_space->tools.lock);
2027 
2028     if (UVM_ID_IS_CPU(processor)) {
2029         if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V1)) {
2030             UvmEventEntry_V1 entry;
2031 
2032             memset(&entry, 0, sizeof(entry));
2033 
2034             entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
2035             entry.eventData.mapRemote.srcIndex       = uvm_parent_id_value_from_processor_id(processor);
2036             entry.eventData.mapRemote.dstIndex       = uvm_parent_id_value_from_processor_id(residency);
2037             entry.eventData.mapRemote.mapRemoteCause = cause;
2038             entry.eventData.mapRemote.timeStamp      = NV_GETTIME();
2039             entry.eventData.mapRemote.address        = address;
2040             entry.eventData.mapRemote.size           = region_size;
2041             entry.eventData.mapRemote.timeStampGpu   = 0;
2042 
2043             UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
2044 
2045             uvm_tools_record_event_v1(va_space, &entry);
2046         }
2047         if (tools_is_event_enabled_version(va_space, UvmEventTypeMapRemote, UvmToolsEventQueueVersion_V2)) {
2048             UvmEventEntry_V2 entry;
2049 
2050             memset(&entry, 0, sizeof(entry));
2051 
2052             entry.eventData.mapRemote.eventType      = UvmEventTypeMapRemote;
2053             entry.eventData.mapRemote.srcIndex       = uvm_id_value(processor);
2054             entry.eventData.mapRemote.dstIndex       = uvm_id_value(residency);
2055             entry.eventData.mapRemote.mapRemoteCause = cause;
2056             entry.eventData.mapRemote.timeStamp      = NV_GETTIME();
2057             entry.eventData.mapRemote.address        = address;
2058             entry.eventData.mapRemote.size           = region_size;
2059             entry.eventData.mapRemote.timeStampGpu   = 0;
2060 
2061             UVM_ASSERT(entry.eventData.mapRemote.mapRemoteCause != UvmEventMapRemoteCauseInvalid);
2062 
2063             uvm_tools_record_event_v2(va_space, &entry);
2064         }
2065     }
2066     else if (tools_is_event_enabled(va_space, UvmEventTypeMapRemote)) {
2067         uvm_push_info_t *push_info = uvm_push_info_from_push(push);
2068         block_map_remote_data_t *block_map_remote;
2069         map_remote_data_t *map_remote;
2070 
2071         // The first call on this pushbuffer creates the per-VA block structure
2072         if (push_info->on_complete == NULL) {
2073             UVM_ASSERT(push_info->on_complete_data == NULL);
2074 
2075             block_map_remote = kmem_cache_alloc(g_tools_block_map_remote_data_cache, NV_UVM_GFP_FLAGS);
2076             if (block_map_remote == NULL)
2077                 goto done;
2078 
2079             block_map_remote->src = processor;
2080             block_map_remote->dst = residency;
2081             block_map_remote->cause = cause;
2082             block_map_remote->timestamp = NV_GETTIME();
2083             block_map_remote->va_space = va_space;
2084             block_map_remote->channel = push->channel;
2085             INIT_LIST_HEAD(&block_map_remote->events);
2086 
2087             push_info->on_complete_data = block_map_remote;
2088             push_info->on_complete = on_map_remote_complete;
2089 
2090             uvm_spin_lock(&g_tools_channel_list_lock);
2091             add_pending_event_for_channel(block_map_remote->channel);
2092             uvm_spin_unlock(&g_tools_channel_list_lock);
2093         }
2094         else {
2095             block_map_remote = push_info->on_complete_data;
2096         }
2097         UVM_ASSERT(block_map_remote);
2098 
2099         map_remote = kmem_cache_alloc(g_tools_map_remote_data_cache, NV_UVM_GFP_FLAGS);
2100         if (map_remote == NULL)
2101             goto done;
2102 
2103         map_remote->address = address;
2104         map_remote->size = region_size;
2105         map_remote->timestamp_gpu_addr = uvm_push_timestamp(push);
2106 
2107         list_add_tail(&map_remote->events_node, &block_map_remote->events);
2108     }
2109 
2110 done:
2111     uvm_up_read(&va_space->tools.lock);
2112 }
2113 
uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS * params,struct file * filp)2114 NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp)
2115 {
2116     NV_STATUS status = NV_OK;
2117     uvm_tools_event_tracker_t *event_tracker;
2118 
2119     if (params->requestedVersion != UvmToolsEventQueueVersion_V1 &&
2120         params->requestedVersion != UvmToolsEventQueueVersion_V2)
2121         return NV_ERR_INVALID_ARGUMENT;
2122 
2123     event_tracker = nv_kmem_cache_zalloc(g_tools_event_tracker_cache, NV_UVM_GFP_FLAGS);
2124     if (event_tracker == NULL)
2125         return NV_ERR_NO_MEMORY;
2126 
2127     event_tracker->version = params->requestedVersion;
2128 
2129     event_tracker->uvm_file = fget(params->uvmFd);
2130     if (event_tracker->uvm_file == NULL) {
2131         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
2132         goto fail;
2133     }
2134 
2135     if (!uvm_file_is_nvidia_uvm(event_tracker->uvm_file)) {
2136         fput(event_tracker->uvm_file);
2137         event_tracker->uvm_file = NULL;
2138         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
2139         goto fail;
2140     }
2141 
2142     // We don't use uvm_fd_va_space() here because tools can work
2143     // without an associated va_space_mm.
2144     if (!uvm_fd_get_type(event_tracker->uvm_file, UVM_FD_VA_SPACE)) {
2145         fput(event_tracker->uvm_file);
2146         event_tracker->uvm_file = NULL;
2147         status = NV_ERR_ILLEGAL_ACTION;
2148         goto fail;
2149     }
2150 
2151     event_tracker->is_queue = params->queueBufferSize != 0;
2152     if (event_tracker->is_queue) {
2153         uvm_tools_queue_t *queue = &event_tracker->queue;
2154         NvU64 buffer_size, control_size;
2155 
2156         uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
2157         init_waitqueue_head(&queue->wait_queue);
2158 
2159         if (params->queueBufferSize > UINT_MAX) {
2160             status = NV_ERR_INVALID_ARGUMENT;
2161             goto fail;
2162         }
2163 
2164         queue->queue_buffer_count = (NvU32)params->queueBufferSize;
2165         queue->notification_threshold = queue->queue_buffer_count / 2;
2166 
2167         // queue_buffer_count must be a power of 2, of at least 2
2168         if (!is_power_of_2(queue->queue_buffer_count) || queue->queue_buffer_count < 2) {
2169             status = NV_ERR_INVALID_ARGUMENT;
2170             goto fail;
2171         }
2172 
2173         if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
2174             buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
2175             control_size = sizeof(UvmToolsEventControlData_V1);
2176         }
2177         else {
2178             buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
2179             control_size = sizeof(UvmToolsEventControlData_V2);
2180         }
2181 
2182         status = map_user_pages(params->queueBuffer,
2183                                 buffer_size,
2184                                 (void **)&queue->queue_v2,
2185                                 &queue->queue_buffer_pages);
2186         if (status != NV_OK)
2187             goto fail;
2188 
2189         status = map_user_pages(params->controlBuffer,
2190                                 control_size,
2191                                 (void **)&queue->control_v2,
2192                                 &queue->control_buffer_pages);
2193 
2194         if (status != NV_OK)
2195             goto fail;
2196     }
2197     else {
2198         uvm_tools_counter_t *counter = &event_tracker->counter;
2199         counter->all_processors = params->allProcessors;
2200         counter->processor = params->processor;
2201         status = map_user_pages(params->controlBuffer,
2202                                 sizeof(NvU64) * UVM_TOTAL_COUNTERS,
2203                                 (void **)&counter->counters,
2204                                 &counter->counter_buffer_pages);
2205         if (status != NV_OK)
2206             goto fail;
2207     }
2208 
2209     if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
2210         status = NV_ERR_INVALID_ARGUMENT;
2211         goto fail;
2212     }
2213 
2214     params->grantedVersion = params->requestedVersion;
2215 
2216     return NV_OK;
2217 
2218 fail:
2219     destroy_event_tracker(event_tracker);
2220     return status;
2221 }
2222 
uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS * params,struct file * filp)2223 NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp)
2224 {
2225     uvm_tools_queue_snapshot_t sn;
2226     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2227 
2228     if (!tracker_is_queue(event_tracker))
2229         return NV_ERR_INVALID_ARGUMENT;
2230 
2231     uvm_spin_lock(&event_tracker->queue.lock);
2232 
2233     event_tracker->queue.notification_threshold = params->notificationThreshold;
2234 
2235     if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
2236         UvmToolsEventControlData_V1 *ctrl = event_tracker->queue.control_v1;
2237 
2238         sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
2239         sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
2240     }
2241     else {
2242         UvmToolsEventControlData_V2 *ctrl = event_tracker->queue.control_v2;
2243 
2244         sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
2245         sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
2246     }
2247 
2248     if (queue_needs_wakeup(&event_tracker->queue, &sn))
2249         wake_up_all(&event_tracker->queue.wait_queue);
2250 
2251     uvm_spin_unlock(&event_tracker->queue.lock);
2252 
2253     return NV_OK;
2254 }
2255 
tools_update_perf_events_callbacks(uvm_va_space_t * va_space)2256 static NV_STATUS tools_update_perf_events_callbacks(uvm_va_space_t *va_space)
2257 {
2258     NV_STATUS status;
2259 
2260     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
2261     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
2262 
2263     if (tools_is_fault_callback_needed(va_space)) {
2264         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
2265             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
2266                                                              UVM_PERF_EVENT_FAULT,
2267                                                              uvm_tools_record_fault);
2268 
2269             if (status != NV_OK)
2270                 return status;
2271         }
2272     }
2273     else {
2274         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_FAULT, uvm_tools_record_fault)) {
2275             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
2276                                                       UVM_PERF_EVENT_FAULT,
2277                                                       uvm_tools_record_fault);
2278         }
2279     }
2280 
2281     if (tools_is_migration_callback_needed(va_space)) {
2282         if (!uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
2283             status = uvm_perf_register_event_callback_locked(&va_space->perf_events,
2284                                                              UVM_PERF_EVENT_MIGRATION,
2285                                                              uvm_tools_record_migration);
2286 
2287             if (status != NV_OK)
2288                 return status;
2289         }
2290     }
2291     else {
2292         if (uvm_perf_is_event_callback_registered(&va_space->perf_events, UVM_PERF_EVENT_MIGRATION, uvm_tools_record_migration)) {
2293             uvm_perf_unregister_event_callback_locked(&va_space->perf_events,
2294                                                       UVM_PERF_EVENT_MIGRATION,
2295                                                       uvm_tools_record_migration);
2296         }
2297     }
2298 
2299     return NV_OK;
2300 }
2301 
tools_update_status(uvm_va_space_t * va_space)2302 static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
2303 {
2304     NV_STATUS status;
2305     bool should_be_enabled;
2306     uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
2307     uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
2308     uvm_assert_rwsem_locked_write(&va_space->tools.lock);
2309 
2310     status = tools_update_perf_events_callbacks(va_space);
2311     if (status != NV_OK)
2312         return status;
2313 
2314     should_be_enabled = tools_are_enabled(va_space);
2315     if (should_be_enabled != va_space->tools.enabled) {
2316         if (should_be_enabled)
2317             list_add(&va_space->tools.node, &g_tools_va_space_list);
2318         else
2319             list_del(&va_space->tools.node);
2320 
2321         va_space->tools.enabled = should_be_enabled;
2322     }
2323 
2324     return NV_OK;
2325 }
2326 
2327 #define EVENT_FLAGS_BITS (sizeof(NvU64) * 8)
2328 
mask_contains_invalid_events(NvU64 event_flags)2329 static bool mask_contains_invalid_events(NvU64 event_flags)
2330 {
2331     const unsigned long *event_mask = (const unsigned long *)&event_flags;
2332     DECLARE_BITMAP(helper_mask, EVENT_FLAGS_BITS);
2333     DECLARE_BITMAP(valid_events_mask, EVENT_FLAGS_BITS);
2334     DECLARE_BITMAP(tests_events_mask, EVENT_FLAGS_BITS);
2335 
2336     bitmap_zero(tests_events_mask, EVENT_FLAGS_BITS);
2337     bitmap_set(tests_events_mask,
2338                UvmEventTestTypesFirst,
2339                UvmEventTestTypesLast - UvmEventTestTypesFirst + 1);
2340 
2341     bitmap_zero(valid_events_mask, EVENT_FLAGS_BITS);
2342     bitmap_set(valid_events_mask, 1, UvmEventNumTypes - 1);
2343 
2344     if (uvm_enable_builtin_tests)
2345         bitmap_or(valid_events_mask, valid_events_mask, tests_events_mask, EVENT_FLAGS_BITS);
2346 
2347     // Make sure that test event ids do not overlap with regular events
2348     BUILD_BUG_ON(UvmEventTestTypesFirst < UvmEventNumTypes);
2349     BUILD_BUG_ON(UvmEventTestTypesFirst > UvmEventTestTypesLast);
2350     BUILD_BUG_ON(UvmEventTestTypesLast >= UvmEventNumTypesAll);
2351 
2352     // Make sure that no test event ever changes the size of UvmEventEntry_V2
2353     BUILD_BUG_ON(sizeof(((UvmEventEntry_V2 *)NULL)->testEventData) >
2354                  sizeof(((UvmEventEntry_V2 *)NULL)->eventData));
2355     BUILD_BUG_ON(UvmEventNumTypesAll > EVENT_FLAGS_BITS);
2356 
2357     if (!bitmap_andnot(helper_mask, event_mask, valid_events_mask, EVENT_FLAGS_BITS))
2358         return false;
2359 
2360     if (!uvm_enable_builtin_tests && bitmap_and(helper_mask, event_mask, tests_events_mask, EVENT_FLAGS_BITS))
2361         UVM_INFO_PRINT("Event index not found. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
2362 
2363     return true;
2364 }
2365 
uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS * params,struct file * filp)2366 NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp)
2367 {
2368     uvm_va_space_t *va_space;
2369     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2370     NV_STATUS status = NV_OK;
2371     NvU64 inserted_lists;
2372 
2373     if (!tracker_is_queue(event_tracker))
2374         return NV_ERR_INVALID_ARGUMENT;
2375 
2376     if (mask_contains_invalid_events(params->eventTypeFlags))
2377         return NV_ERR_INVALID_ARGUMENT;
2378 
2379     va_space = tools_event_tracker_va_space(event_tracker);
2380 
2381     uvm_down_write(&g_tools_va_space_list_lock);
2382     uvm_down_write(&va_space->perf_events.lock);
2383     uvm_down_write(&va_space->tools.lock);
2384 
2385     insert_event_tracker(va_space,
2386                          event_tracker->queue.queue_nodes,
2387                          UvmEventNumTypesAll,
2388                          params->eventTypeFlags,
2389                          &event_tracker->queue.subscribed_queues,
2390                          event_tracker->version == UvmToolsEventQueueVersion_V1 ?
2391                              va_space->tools.queues_v1 : va_space->tools.queues_v2,
2392                          &inserted_lists);
2393 
2394     // perform any necessary registration
2395     status = tools_update_status(va_space);
2396     if (status != NV_OK) {
2397         // on error, unregister any newly registered event
2398         remove_event_tracker(va_space,
2399                              event_tracker->queue.queue_nodes,
2400                              UvmEventNumTypes,
2401                              inserted_lists,
2402                              &event_tracker->queue.subscribed_queues);
2403     }
2404 
2405     uvm_up_write(&va_space->tools.lock);
2406     uvm_up_write(&va_space->perf_events.lock);
2407     uvm_up_write(&g_tools_va_space_list_lock);
2408 
2409     return status;
2410 }
2411 
uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS * params,struct file * filp)2412 NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp)
2413 {
2414     NV_STATUS status;
2415     uvm_va_space_t *va_space;
2416     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2417 
2418     if (!tracker_is_queue(event_tracker))
2419         return NV_ERR_INVALID_ARGUMENT;
2420 
2421     va_space = tools_event_tracker_va_space(event_tracker);
2422 
2423     uvm_down_write(&g_tools_va_space_list_lock);
2424     uvm_down_write(&va_space->perf_events.lock);
2425     uvm_down_write(&va_space->tools.lock);
2426     remove_event_tracker(va_space,
2427                          event_tracker->queue.queue_nodes,
2428                          UvmEventNumTypesAll,
2429                          params->eventTypeFlags,
2430                          &event_tracker->queue.subscribed_queues);
2431 
2432     // de-registration should not fail
2433     status = tools_update_status(va_space);
2434     UVM_ASSERT(status == NV_OK);
2435 
2436     uvm_up_write(&va_space->tools.lock);
2437     uvm_up_write(&va_space->perf_events.lock);
2438     uvm_up_write(&g_tools_va_space_list_lock);
2439     return NV_OK;
2440 }
2441 
uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS * params,struct file * filp)2442 NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp)
2443 {
2444     uvm_va_space_t *va_space;
2445     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2446     NV_STATUS status = NV_OK;
2447     NvU64 inserted_lists;
2448 
2449     if (!tracker_is_counter(event_tracker))
2450         return NV_ERR_INVALID_ARGUMENT;
2451 
2452     va_space = tools_event_tracker_va_space(event_tracker);
2453 
2454     uvm_down_write(&g_tools_va_space_list_lock);
2455     uvm_down_write(&va_space->perf_events.lock);
2456     uvm_down_write(&va_space->tools.lock);
2457 
2458     insert_event_tracker(va_space,
2459                          event_tracker->counter.counter_nodes,
2460                          UVM_TOTAL_COUNTERS,
2461                          params->counterTypeFlags,
2462                          &event_tracker->counter.subscribed_counters,
2463                          va_space->tools.counters,
2464                          &inserted_lists);
2465 
2466     // perform any necessary registration
2467     status = tools_update_status(va_space);
2468     if (status != NV_OK) {
2469         remove_event_tracker(va_space,
2470                              event_tracker->counter.counter_nodes,
2471                              UVM_TOTAL_COUNTERS,
2472                              inserted_lists,
2473                              &event_tracker->counter.subscribed_counters);
2474     }
2475 
2476     uvm_up_write(&va_space->tools.lock);
2477     uvm_up_write(&va_space->perf_events.lock);
2478     uvm_up_write(&g_tools_va_space_list_lock);
2479 
2480     return status;
2481 }
2482 
uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS * params,struct file * filp)2483 NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp)
2484 {
2485     NV_STATUS status;
2486     uvm_va_space_t *va_space;
2487     uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
2488 
2489     if (!tracker_is_counter(event_tracker))
2490         return NV_ERR_INVALID_ARGUMENT;
2491 
2492     va_space = tools_event_tracker_va_space(event_tracker);
2493 
2494     uvm_down_write(&g_tools_va_space_list_lock);
2495     uvm_down_write(&va_space->perf_events.lock);
2496     uvm_down_write(&va_space->tools.lock);
2497     remove_event_tracker(va_space,
2498                          event_tracker->counter.counter_nodes,
2499                          UVM_TOTAL_COUNTERS,
2500                          params->counterTypeFlags,
2501                          &event_tracker->counter.subscribed_counters);
2502 
2503     // de-registration should not fail
2504     status = tools_update_status(va_space);
2505     UVM_ASSERT(status == NV_OK);
2506 
2507     uvm_up_write(&va_space->tools.lock);
2508     uvm_up_write(&va_space->perf_events.lock);
2509     uvm_up_write(&g_tools_va_space_list_lock);
2510 
2511     return NV_OK;
2512 }
2513 
tools_access_va_block(uvm_va_block_t * va_block,uvm_va_block_context_t * block_context,NvU64 target_va,NvU64 size,bool is_write,uvm_mem_t * stage_mem)2514 static NV_STATUS tools_access_va_block(uvm_va_block_t *va_block,
2515                                        uvm_va_block_context_t *block_context,
2516                                        NvU64 target_va,
2517                                        NvU64 size,
2518                                        bool is_write,
2519                                        uvm_mem_t *stage_mem)
2520 {
2521     if (is_write) {
2522         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2523                                        NULL,
2524                                        uvm_va_block_write_from_cpu(va_block, block_context, target_va, stage_mem, size));
2525     }
2526     else {
2527         return UVM_VA_BLOCK_LOCK_RETRY(va_block,
2528                                        NULL,
2529                                        uvm_va_block_read_to_cpu(va_block, block_context, stage_mem, target_va, size));
2530 
2531     }
2532 }
2533 
tools_access_process_memory(uvm_va_space_t * va_space,NvU64 target_va,NvU64 size,NvU64 user_va,NvU64 * bytes,bool is_write)2534 static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
2535                                              NvU64 target_va,
2536                                              NvU64 size,
2537                                              NvU64 user_va,
2538                                              NvU64 *bytes,
2539                                              bool is_write)
2540 {
2541     NV_STATUS status;
2542     uvm_mem_t *stage_mem = NULL;
2543     void *stage_addr;
2544     uvm_processor_mask_t *retained_gpus = NULL;
2545     uvm_va_block_context_t *block_context = NULL;
2546     struct mm_struct *mm = NULL;
2547 
2548     retained_gpus = uvm_processor_mask_cache_alloc();
2549     if (!retained_gpus)
2550         return NV_ERR_NO_MEMORY;
2551 
2552     uvm_processor_mask_zero(retained_gpus);
2553 
2554     mm = uvm_va_space_mm_or_current_retain(va_space);
2555 
2556     status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(PAGE_SIZE, mm, &stage_mem);
2557     if (status != NV_OK)
2558         goto exit;
2559 
2560     block_context = uvm_va_block_context_alloc(mm);
2561     if (!block_context) {
2562         status = NV_ERR_NO_MEMORY;
2563         goto exit;
2564     }
2565 
2566     stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
2567     *bytes = 0;
2568 
2569     while (*bytes < size) {
2570         uvm_gpu_t *gpu;
2571         uvm_va_block_t *block;
2572         void *user_va_start = (void *) (user_va + *bytes);
2573         NvU64 target_va_start = target_va + *bytes;
2574         NvU64 bytes_left = size - *bytes;
2575         NvU64 page_offset = target_va_start & (PAGE_SIZE - 1);
2576         NvU64 bytes_now = min(bytes_left, (NvU64)(PAGE_SIZE - page_offset));
2577         bool map_stage_mem_on_gpus = true;
2578 
2579         if (is_write) {
2580             NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
2581             if (remaining != 0)  {
2582                 status = NV_ERR_INVALID_ARGUMENT;
2583                 goto exit;
2584             }
2585         }
2586 
2587         if (mm)
2588             uvm_down_read_mmap_lock(mm);
2589 
2590         // The RM flavor of the lock is needed to perform ECC checks.
2591         uvm_va_space_down_read_rm(va_space);
2592         if (mm)
2593             status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
2594         else
2595             status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
2596 
2597         if (status != NV_OK)
2598             goto unlock_and_exit;
2599 
2600         // When CC is enabled, the staging memory cannot be mapped on the GPU
2601         // (it is protected sysmem), but it is still used to store the
2602         // unencrypted version of the page contents when the page is resident
2603         // on vidmem.
2604         if (g_uvm_global.conf_computing_enabled)
2605             map_stage_mem_on_gpus = false;
2606 
2607         if (map_stage_mem_on_gpus) {
2608             for_each_gpu_in_mask(gpu, &va_space->registered_gpus) {
2609                 if (uvm_processor_mask_test_and_set(retained_gpus, gpu->id))
2610                     continue;
2611 
2612                 // The retention of each GPU ensures that the staging memory is
2613                 // freed before the unregistration of any of the GPUs is mapped
2614                 // on. Each GPU is retained once.
2615                 uvm_gpu_retain(gpu);
2616 
2617                 // Accessing the VA block may result in copying data between the
2618                 // CPU and a GPU. Conservatively add virtual mappings to all the
2619                 // GPUs (even if those mappings may never be used) as tools
2620                 // read/write is not on a performance critical path.
2621                 status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
2622                 if (status != NV_OK)
2623                     goto unlock_and_exit;
2624             }
2625         }
2626         else {
2627             UVM_ASSERT(uvm_processor_mask_empty(retained_gpus));
2628         }
2629 
2630         // Make sure a CPU resident page has an up to date struct page pointer.
2631         if (uvm_va_block_is_hmm(block)) {
2632             status = uvm_hmm_va_block_update_residency_info(block, mm, UVM_PAGE_ALIGN_DOWN(target_va_start), true);
2633             if (status != NV_OK)
2634                 goto unlock_and_exit;
2635         }
2636 
2637         status = tools_access_va_block(block, block_context, target_va_start, bytes_now, is_write, stage_mem);
2638 
2639         // For simplicity, check for ECC errors on all GPUs registered in the VA
2640         // space
2641         if (status == NV_OK)
2642             status = uvm_global_gpu_check_ecc_error(&va_space->registered_gpus);
2643 
2644         uvm_va_space_up_read_rm(va_space);
2645         if (mm)
2646             uvm_up_read_mmap_lock(mm);
2647 
2648         if (status != NV_OK)
2649             goto exit;
2650 
2651         if (!is_write) {
2652             NvU64 remaining;
2653 
2654             // Prevent processor speculation prior to accessing user-mapped
2655             // memory to avoid leaking information from side-channel attacks.
2656             // Under speculation, a valid VA range which does not contain
2657             // target_va could be used, and the block index could run off the
2658             // end of the array. Information about the state of that kernel
2659             // memory could be inferred if speculative execution gets to the
2660             // point where the data is copied out.
2661             nv_speculation_barrier();
2662 
2663             remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
2664             if (remaining > 0) {
2665                 status = NV_ERR_INVALID_ARGUMENT;
2666                 goto exit;
2667             }
2668         }
2669 
2670         *bytes += bytes_now;
2671     }
2672 
2673 unlock_and_exit:
2674     if (status != NV_OK) {
2675         uvm_va_space_up_read_rm(va_space);
2676         if (mm)
2677             uvm_up_read_mmap_lock(mm);
2678     }
2679 
2680 exit:
2681     uvm_va_block_context_free(block_context);
2682 
2683     uvm_mem_free(stage_mem);
2684 
2685     uvm_global_gpu_release(retained_gpus);
2686 
2687     uvm_va_space_mm_or_current_release(va_space, mm);
2688 
2689     uvm_processor_mask_cache_free(retained_gpus);
2690 
2691     return status;
2692 }
2693 
uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS * params,struct file * filp)2694 NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2695 {
2696     return tools_access_process_memory(uvm_va_space_get(filp),
2697                                        params->targetVa,
2698                                        params->size,
2699                                        params->buffer,
2700                                        &params->bytesRead,
2701                                        false);
2702 }
2703 
uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS * params,struct file * filp)2704 NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp)
2705 {
2706     return tools_access_process_memory(uvm_va_space_get(filp),
2707                                        params->targetVa,
2708                                        params->size,
2709                                        params->buffer,
2710                                        &params->bytesWritten,
2711                                        true);
2712 }
2713 
uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS * params,struct file * filp)2714 NV_STATUS uvm_test_inject_tools_event(UVM_TEST_INJECT_TOOLS_EVENT_PARAMS *params, struct file *filp)
2715 {
2716     NvU32 i;
2717     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2718 
2719     if (params->version != UvmToolsEventQueueVersion_V1 && params->version != UvmToolsEventQueueVersion_V2)
2720         return NV_ERR_INVALID_ARGUMENT;
2721 
2722     if (params->entry_v1.eventData.eventType >= UvmEventNumTypesAll)
2723         return NV_ERR_INVALID_ARGUMENT;
2724 
2725     uvm_down_read(&va_space->tools.lock);
2726     for (i = 0; i < params->count; i++) {
2727         if (params->version == UvmToolsEventQueueVersion_V1)
2728             uvm_tools_record_event_v1(va_space, &params->entry_v1);
2729         else
2730             uvm_tools_record_event_v2(va_space, &params->entry_v2);
2731     }
2732     uvm_up_read(&va_space->tools.lock);
2733     return NV_OK;
2734 }
2735 
uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS * params,struct file * filp)2736 NV_STATUS uvm_test_increment_tools_counter(UVM_TEST_INCREMENT_TOOLS_COUNTER_PARAMS *params, struct file *filp)
2737 {
2738     NvU32 i;
2739     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2740 
2741     if (params->counter >= UVM_TOTAL_COUNTERS)
2742         return NV_ERR_INVALID_ARGUMENT;
2743 
2744     uvm_down_read(&va_space->tools.lock);
2745     for (i = 0; i < params->count; i++)
2746         uvm_tools_inc_counter(va_space, params->counter, params->amount, &params->processor);
2747     uvm_up_read(&va_space->tools.lock);
2748 
2749     return NV_OK;
2750 }
2751 
uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS * params,struct file * filp)2752 NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS *params, struct file *filp)
2753 {
2754     NvProcessorUuid *uuids;
2755     NvU64 remaining;
2756     uvm_gpu_t *gpu;
2757     NvU32 count = params->count;
2758     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2759     NvU32 version = UvmToolsEventQueueVersion_V2;
2760 
2761     // Prior to Multi-MIG support, params->count was always zero meaning the
2762     // input array was size UVM_MAX_PROCESSORS_V1 or 33 at that time.
2763     if (count == 0 && params->tablePtr) {
2764         version = UvmToolsEventQueueVersion_V1;
2765         count = UVM_MAX_PROCESSORS_V1;
2766     }
2767     else if (count == 0 || count > UVM_ID_MAX_PROCESSORS) {
2768         // Note that we don't rely on the external API definition
2769         // UVM_MAX_PROCESSORS since the kernel determines the array size needed
2770         // and reports the number of processors found to the caller.
2771         count = UVM_ID_MAX_PROCESSORS;
2772     }
2773 
2774     // Return which version of the table is being returned.
2775     params->version = version;
2776 
2777     uuids = uvm_kvmalloc_zero(sizeof(NvProcessorUuid) * count);
2778     if (uuids == NULL)
2779         return NV_ERR_NO_MEMORY;
2780 
2781     uvm_uuid_copy(&uuids[UVM_ID_CPU_VALUE], &NV_PROCESSOR_UUID_CPU_DEFAULT);
2782     params->count = 1;
2783 
2784     uvm_va_space_down_read(va_space);
2785     for_each_va_space_gpu(gpu, va_space) {
2786         NvU32 id_value;
2787         const NvProcessorUuid *uuid;
2788 
2789         // Version 1 only supports processors 0..32 and uses the parent
2790         // GPU UUID.
2791         if (version == UvmToolsEventQueueVersion_V1) {
2792             id_value = uvm_parent_id_value(gpu->parent->id);
2793             uuid = &gpu->parent->uuid;
2794         }
2795         else {
2796             id_value = uvm_id_value(gpu->id);
2797             uuid = &gpu->uuid;
2798         }
2799 
2800         if (id_value < count)
2801             uvm_uuid_copy(&uuids[id_value], uuid);
2802 
2803         // Return the actual count even if the UUID isn't returned due to
2804         // limited input array size.
2805         if (id_value + 1 > params->count)
2806             params->count = id_value + 1;
2807     }
2808     uvm_va_space_up_read(va_space);
2809 
2810     if (params->tablePtr)
2811         remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count);
2812     else
2813         remaining = 0;
2814     uvm_kvfree(uuids);
2815 
2816     if (remaining != 0)
2817         return NV_ERR_INVALID_ADDRESS;
2818 
2819     return NV_OK;
2820 }
2821 
uvm_tools_flush_events(void)2822 void uvm_tools_flush_events(void)
2823 {
2824     tools_schedule_completed_events();
2825 
2826     nv_kthread_q_flush(&g_tools_queue);
2827 }
2828 
uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS * params,struct file * filp)2829 NV_STATUS uvm_api_tools_flush_events(UVM_TOOLS_FLUSH_EVENTS_PARAMS *params, struct file *filp)
2830 {
2831     uvm_tools_flush_events();
2832     return NV_OK;
2833 }
2834 
uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS * params,struct file * filp)2835 NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp)
2836 {
2837     NV_STATUS status = NV_OK;
2838     uvm_gpu_t *gpu = NULL;
2839     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2840 
2841     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpuUuid);
2842     if (!gpu)
2843         return NV_ERR_INVALID_DEVICE;
2844 
2845     // Wait for register-based fault clears to queue the replay event
2846     if (!gpu->parent->has_clear_faulted_channel_method) {
2847         uvm_parent_gpu_non_replayable_faults_isr_lock(gpu->parent);
2848         uvm_parent_gpu_non_replayable_faults_isr_unlock(gpu->parent);
2849     }
2850 
2851     // Wait for pending fault replay methods to complete (replayable faults on
2852     // all GPUs, and non-replayable faults on method-based GPUs).
2853     status = uvm_channel_manager_wait(gpu->channel_manager);
2854 
2855     // Flush any pending events even if (status != NV_OK)
2856     uvm_tools_flush_events();
2857     uvm_gpu_release(gpu);
2858 
2859     return status;
2860 }
2861 
2862 static const struct file_operations uvm_tools_fops =
2863 {
2864     .open            = uvm_tools_open_entry,
2865     .release         = uvm_tools_release_entry,
2866     .unlocked_ioctl  = uvm_tools_unlocked_ioctl_entry,
2867 #if NVCPU_IS_X86_64
2868     .compat_ioctl    = uvm_tools_unlocked_ioctl_entry,
2869 #endif
2870     .poll            = uvm_tools_poll_entry,
2871     .owner           = THIS_MODULE,
2872 };
2873 
_uvm_tools_destroy_cache_all(void)2874 static void _uvm_tools_destroy_cache_all(void)
2875 {
2876     // The pointers are initialized to NULL,
2877     // it's safe to call destroy on all of them.
2878     kmem_cache_destroy_safe(&g_tools_event_tracker_cache);
2879     kmem_cache_destroy_safe(&g_tools_block_migration_data_cache);
2880     kmem_cache_destroy_safe(&g_tools_migration_data_cache);
2881     kmem_cache_destroy_safe(&g_tools_replay_data_cache);
2882     kmem_cache_destroy_safe(&g_tools_block_map_remote_data_cache);
2883     kmem_cache_destroy_safe(&g_tools_map_remote_data_cache);
2884 }
2885 
uvm_tools_init(dev_t uvm_base_dev)2886 int uvm_tools_init(dev_t uvm_base_dev)
2887 {
2888     dev_t uvm_tools_dev = MKDEV(MAJOR(uvm_base_dev), NVIDIA_UVM_TOOLS_MINOR_NUMBER);
2889     int ret = -ENOMEM; // This will be updated later if allocations succeed
2890 
2891     uvm_init_rwsem(&g_tools_va_space_list_lock, UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
2892 
2893     g_tools_event_tracker_cache = NV_KMEM_CACHE_CREATE("uvm_tools_event_tracker_t",
2894                                                         uvm_tools_event_tracker_t);
2895     if (!g_tools_event_tracker_cache)
2896         goto err_cache_destroy;
2897 
2898     g_tools_block_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_migration_data_t",
2899                                                               block_migration_data_t);
2900     if (!g_tools_block_migration_data_cache)
2901         goto err_cache_destroy;
2902 
2903     g_tools_migration_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_migration_data_t",
2904                                                         migration_data_t);
2905     if (!g_tools_migration_data_cache)
2906         goto err_cache_destroy;
2907 
2908     g_tools_replay_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_replay_data_t",
2909                                                      replay_data_t);
2910     if (!g_tools_replay_data_cache)
2911         goto err_cache_destroy;
2912 
2913     g_tools_block_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_block_map_remote_data_t",
2914                                                                block_map_remote_data_t);
2915     if (!g_tools_block_map_remote_data_cache)
2916         goto err_cache_destroy;
2917 
2918     g_tools_map_remote_data_cache = NV_KMEM_CACHE_CREATE("uvm_tools_map_remote_data_t",
2919                                                          map_remote_data_t);
2920     if (!g_tools_map_remote_data_cache)
2921         goto err_cache_destroy;
2922 
2923     uvm_spin_lock_init(&g_tools_channel_list_lock, UVM_LOCK_ORDER_LEAF);
2924 
2925     ret = nv_kthread_q_init(&g_tools_queue, "UVM Tools Event Queue");
2926     if (ret < 0)
2927         goto err_cache_destroy;
2928 
2929     uvm_init_character_device(&g_uvm_tools_cdev, &uvm_tools_fops);
2930     ret = cdev_add(&g_uvm_tools_cdev, uvm_tools_dev, 1);
2931     if (ret != 0) {
2932         UVM_ERR_PRINT("cdev_add (major %u, minor %u) failed: %d\n", MAJOR(uvm_tools_dev),
2933                       MINOR(uvm_tools_dev), ret);
2934         goto err_stop_thread;
2935     }
2936 
2937     return ret;
2938 
2939 err_stop_thread:
2940     nv_kthread_q_stop(&g_tools_queue);
2941 
2942 err_cache_destroy:
2943     _uvm_tools_destroy_cache_all();
2944     return ret;
2945 }
2946 
uvm_tools_exit(void)2947 void uvm_tools_exit(void)
2948 {
2949     unsigned i;
2950     cdev_del(&g_uvm_tools_cdev);
2951 
2952     nv_kthread_q_stop(&g_tools_queue);
2953 
2954     for (i = 0; i < UvmEventNumTypesAll; ++i)
2955         UVM_ASSERT(g_tools_enabled_event_count[i] == 0);
2956 
2957     UVM_ASSERT(list_empty(&g_tools_va_space_list));
2958 
2959     _uvm_tools_destroy_cache_all();
2960 }
2961