1 /*******************************************************************************
2     Copyright (c) 2018-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_common.h"
25 #include "uvm_kvmalloc.h"
26 #include "uvm_va_space.h"
27 #include "uvm_va_space_mm.h"
28 #include "uvm_ats.h"
29 #include "uvm_api.h"
30 #include "uvm_test.h"
31 #include "uvm_test_ioctl.h"
32 
33 //
34 // This comment block describes some implementation rationale. See the header
35 // for the API descriptions.
36 //
37 // ========================= Retain count vs mm_users ==========================
38 //
39 // To guarantee the mm is available and won't be destroyed we require
40 // userspace to open a second file descriptor (uvm_mm_fd) and
41 // initialize it with uvm_api_mm_initialize(). During initialization
42 // we take a mm_users reference to ensure the mm remains valid until
43 // the file descriptor is closed.
44 //
45 // To ensure userspace can't close the file descriptor and drop the
46 // mm_users refcount while it is in use threads must call either
47 // uvm_va_space_mm_retain() or uvm_va_space_mm_or_current_retain() to
48 // increment the retained count. This also checks that userspace has
49 // initialized the uvm_mm_fd and therefore holds a valid pagetable
50 // pin.
51 //
52 // Closing uvm_mm_fd will call uvm_va_space_mm_shutdown() prior to
53 // mmput() which ensures there are no active users of the mm. This
54 // indirection is required because not all threads can call mmput()
55 // directly. In particular the replayable GPU fault handling path
56 // can't call mmput() because it may result in exit_mmap() which could
57 // result in RM calls and VA space destroy and those need to wait for
58 // the GPU fault handler to finish.
59 //
60 // ============================ Handling mm teardown ===========================
61 //
62 // When the process is exiting we will get notified either via an
63 // explict close of uvm_mm_fd or implicitly as part of
64 // exit_files(). We are guaranteed to get this call because we don't
65 // allow mmap on uvm_mm_fd, and the userspace pagetables (mm_users)
66 // are guaranteed to exist because we hold a mm_users refcount
67 // which is released as part of file close.
68 //
69 // This allows any outstanding GPU faults to be processed. To prevent
70 // new faults occurring uvm_va_space_mm_shutdown() is called to stop
71 // all GPU memory accesses to the mm. Once all GPU memory has been
72 // stopped no new retainers of the va_space will be allowed and the
73 // mm_users reference will be dropped, potentially tearing down the mm
74 // and associated pagetables.
75 //
76 // This essentially shuts down the VA space for new work. The VA space
77 // object remains valid for most teardown ioctls until the file is
78 // closed, because it's legal for the associated process to die then
79 // for another process with a reference on the file to perform the
80 // unregisters or associated ioctls.  This is particularly true for
81 // tools users.
82 //
83 // An exception to the above is UvmUnregisterChannel. Since channels are
84 // completely removed from the VA space on mm teardown, later channel
85 // unregisters will fail to find the handles and will return an error.
86 //
87 // At a high level, the sequence of operations to perform prior to mm
88 // teardown is:
89 //
90 // 1) Stop all channels
91 //      - Prevents new faults and accesses on non-MPS
92 // 2) Detach all channels
93 //      - Prevents pending faults from being translated to this VA space
94 //      - Non-replayable faults will be dropped so no new ones can arrive
95 //      - Access counter notifications will be prevented from getting new
96 //        translations to this VA space. Pending entries may attempt to retain
97 //        the mm, but will drop the notification if they can't be serviced.
98 // 3) Flush the fault buffer
99 //      - The only reason to flush the fault buffer is to avoid spurious
100 //        cancels. If we didn't flush the fault buffer before marking the mm
101 //        as dead, then remaining faults which require the mm would be
102 //        cancelled. Since the faults might be stale, we would record cancel
103 //        events which didn't really happen (the access didn't happen after
104 //        the mm died). By flushing we clear out all stale faults, and in
105 //        the case of MPS, cancel real faults after.
106 // 4) UnsetPageDir
107 //      - Prevents new accesses on MPS
108 // 5) Mark the va_space_mm as released
109 //      - Prevents new retainers from using the mm. There won't be any more on
110 //        the fault handling paths, but there could be others in worker threads.
111 //
112 // Here are some tables of each step in the sequence, and what operations can
113 // still be performed after each step. This is all from the perspective of a
114 // single VA space. "Untranslated" means that the fault entry has not been
115 // translated to a uvm_va_space yet.
116 //
117 // Replayable non-MPS Behavior:
118 //
119 //                  Can              Pending         Pending         Can be
120 //                  access   Can     untranslated    translated      servicing
121 //                  memory   fault   faults          faults          faults
122 // -----------------------------------------------------------------------------
123 // Shutdown start   Yes      Yes     Service         Service         Yes
124 // Stop channels    No       No      Service [1]     Service [1]     Yes [1]
125 // Detach channels  No       No      Flush buffer    Service [1]     Yes [1], [2]
126 // Flush buffer     No       No      None possible   None possible   No
127 // UnsetPageDir     No       No      None possible   None possible   No
128 //
129 //
130 // Replayable MPS Behavior:
131 //
132 //                  Can              Pending         Pending         Can be
133 //                  access   Can     untranslated    translated      servicing
134 //                  memory   fault   faults          faults          faults
135 // -----------------------------------------------------------------------------
136 // Shutdown start   Yes      Yes     Service         Service         Yes
137 // Stop channels    Yes      Yes     Service         Service         Yes
138 // Detach channels  Yes      Yes     Cancel, flush   Service         Yes
139 // Flush buffer     Yes      Yes     Cancel, flush   None possible   No
140 // UnsetPageDir     No [3]   Yes     Cancel, flush   None possible   No
141 //
142 //
143 // [1]: All pending faults in this VA space are stale since channel stop
144 //      preempted the context.
145 // [2]: Faults in this VA space can't be serviced concurrently with detach since
146 //      detach holds the VA space lock in write mode. Faults in other VA spaces
147 //      can be serviced, and stale faults in this VA space can resume service
148 //      after detach is done.
149 // [3]: Due to the nature of MPS, remaining work which had started under the VA
150 //      space could still execute and attempt to make memory accesses. However,
151 //      since the PDB at that point is empty and ATS is disabled (if available),
152 //      all accesses will fault and be cancelled rather than successfully
153 //      translate to physical memory.
154 //
155 // =============================================================================
156 
157 static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space);
158 
159 static int uvm_enable_va_space_mm = 1;
160 module_param(uvm_enable_va_space_mm, int, S_IRUGO);
161 MODULE_PARM_DESC(uvm_enable_va_space_mm,
162                  "Set to 0 to disable UVM from using mmu_notifiers to create "
163                  "an association between a UVM VA space and a process. This "
164                  "will also disable pageable memory access via either ATS or "
165                  "HMM.");
166 
uvm_va_space_mm_enabled_system(void)167 bool uvm_va_space_mm_enabled_system(void)
168 {
169     return UVM_CAN_USE_MMU_NOTIFIERS() && uvm_enable_va_space_mm;
170 }
171 
uvm_va_space_mm_enabled(uvm_va_space_t * va_space)172 bool uvm_va_space_mm_enabled(uvm_va_space_t *va_space)
173 {
174     // A va_space doesn't have any association with an mm in multi-process
175     // sharing mode.
176     if (va_space->initialization_flags & UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE)
177         return false;
178 
179     return uvm_va_space_mm_enabled_system();
180 }
181 
182 #if UVM_CAN_USE_MMU_NOTIFIERS()
get_va_space(struct mmu_notifier * mn)183     static uvm_va_space_t *get_va_space(struct mmu_notifier *mn)
184     {
185         // This may be called without a thread context present, so be careful
186         // what is used here.
187         return container_of(mn, uvm_va_space_t, va_space_mm.mmu_notifier);
188     }
189 
uvm_mmu_notifier_invalidate_range_ats(struct mmu_notifier * mn,struct mm_struct * mm,unsigned long start,unsigned long end)190     static void uvm_mmu_notifier_invalidate_range_ats(struct mmu_notifier *mn,
191                                                       struct mm_struct *mm,
192                                                       unsigned long start,
193                                                       unsigned long end)
194     {
195         // In most cases ->invalidate_range() is called with exclusive end.
196         // uvm_ats_invalidate() expects an inclusive end so we have to
197         // convert it.
198         //
199         // There's a special case however. Kernel TLB gathering sometimes
200         // identifies "fullmm" invalidates by setting both start and end to ~0.
201         //
202         // It's unclear if there are any other cases in which the kernel will
203         // call us with start == end. Since we can't definitively say no, we
204         // conservatively treat all such calls as full invalidates.
205         if (start == end) {
206             start = 0;
207             end = ~0UL;
208         }
209         else {
210             --end;
211         }
212 
213         UVM_ENTRY_VOID(uvm_ats_invalidate(get_va_space(mn), start, end));
214     }
215 
216     static struct mmu_notifier_ops uvm_mmu_notifier_ops_ats =
217     {
218 #if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
219         .invalidate_range = uvm_mmu_notifier_invalidate_range_ats,
220 #elif defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
221         .arch_invalidate_secondary_tlbs = uvm_mmu_notifier_invalidate_range_ats,
222 #else
223         #error One of invalidate_range/arch_invalid_secondary must be present
224 #endif
225     };
226 
uvm_mmu_notifier_register(uvm_va_space_mm_t * va_space_mm)227     static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)
228     {
229         UVM_ASSERT(va_space_mm->mm);
230         uvm_assert_mmap_lock_locked_write(va_space_mm->mm);
231 
232         va_space_mm->mmu_notifier.ops = &uvm_mmu_notifier_ops_ats;
233         return __mmu_notifier_register(&va_space_mm->mmu_notifier, va_space_mm->mm);
234     }
235 
uvm_mmu_notifier_unregister(uvm_va_space_mm_t * va_space_mm)236     static void uvm_mmu_notifier_unregister(uvm_va_space_mm_t *va_space_mm)
237     {
238         mmu_notifier_unregister(&va_space_mm->mmu_notifier, va_space_mm->mm);
239     }
240 #else
uvm_mmu_notifier_register(uvm_va_space_mm_t * va_space_mm)241     static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)
242     {
243         UVM_ASSERT(0);
244         return 0;
245     }
246 
uvm_mmu_notifier_unregister(uvm_va_space_mm_t * va_space_mm)247     static void uvm_mmu_notifier_unregister(uvm_va_space_mm_t *va_space_mm)
248     {
249         UVM_ASSERT(0);
250     }
251 #endif // UVM_CAN_USE_MMU_NOTIFIERS()
252 
uvm_va_space_mm_register(uvm_va_space_t * va_space)253 NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
254 {
255     uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
256     int ret;
257 
258     uvm_assert_mmap_lock_locked_write(current->mm);
259     uvm_assert_rwsem_locked_write(&va_space->lock);
260 
261     va_space_mm->state = UVM_VA_SPACE_MM_STATE_UNINITIALIZED;
262 
263     if (!uvm_va_space_mm_enabled(va_space))
264         return NV_OK;
265 
266     UVM_ASSERT(!va_space_mm->mm);
267     va_space_mm->mm = current->mm;
268     uvm_mmgrab(va_space_mm->mm);
269 
270     // We must be prepared to handle callbacks as soon as we make this call,
271     // except for ->release() which can't be called since the mm belongs to
272     // current.
273     if (UVM_ATS_IBM_SUPPORTED_IN_DRIVER() && g_uvm_global.ats.enabled) {
274         ret = uvm_mmu_notifier_register(va_space_mm);
275         if (ret) {
276             // Inform uvm_va_space_mm_unregister() that it has nothing to do.
277             uvm_mmdrop(va_space_mm->mm);
278             va_space_mm->mm = NULL;
279             return errno_to_nv_status(ret);
280         }
281     }
282 
283     if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
284         uvm_va_space_pageable_mem_access_supported(va_space)) {
285 
286         #if UVM_CAN_USE_MMU_NOTIFIERS()
287             // Initialize MMU interval notifiers for this process. This allows
288             // mmu_interval_notifier_insert() to be called without holding the
289             // mmap_lock for write.
290             // Note: there is no __mmu_notifier_unregister(), this call just
291             // allocates memory which is attached to the mm_struct and freed
292             // when the mm_struct is freed.
293             ret = __mmu_notifier_register(NULL, current->mm);
294             if (ret)
295                 return errno_to_nv_status(ret);
296         #else
297             UVM_ASSERT(0);
298         #endif
299     }
300 
301     return NV_OK;
302 }
303 
uvm_va_space_mm_unregister(uvm_va_space_t * va_space)304 void uvm_va_space_mm_unregister(uvm_va_space_t *va_space)
305 {
306     uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
307 
308     // We can't hold the VA space lock or mmap_lock because
309     // uvm_va_space_mm_shutdown() waits for retainers which may take
310     // these locks.
311     uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
312     uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
313 
314     uvm_va_space_mm_shutdown(va_space);
315     UVM_ASSERT(va_space_mm->retained_count == 0);
316 
317     // Only happens if uvm_va_space_mm_register() fails
318     if (!va_space_mm->mm)
319         return;
320 
321     if (uvm_va_space_mm_enabled(va_space)) {
322         if (UVM_ATS_IBM_SUPPORTED_IN_DRIVER() && g_uvm_global.ats.enabled)
323             uvm_mmu_notifier_unregister(va_space_mm);
324         uvm_mmdrop(va_space_mm->mm);
325     }
326 }
327 
uvm_va_space_mm_retain(uvm_va_space_t * va_space)328 struct mm_struct *uvm_va_space_mm_retain(uvm_va_space_t *va_space)
329 {
330     uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
331     struct mm_struct *mm = NULL;
332 
333     if (!uvm_va_space_mm_enabled(va_space))
334         return NULL;
335 
336     uvm_spin_lock(&va_space_mm->lock);
337 
338     if (!uvm_va_space_mm_alive(va_space_mm))
339         goto out;
340 
341     ++va_space_mm->retained_count;
342 
343     mm = va_space_mm->mm;
344     UVM_ASSERT(mm);
345 
346 out:
347 
348     // uvm_api_mm_init() holds a reference
349     if (mm)
350         UVM_ASSERT(atomic_read(&mm->mm_users) > 0);
351 
352     uvm_spin_unlock(&va_space_mm->lock);
353 
354     return mm;
355 }
356 
uvm_va_space_mm_or_current_retain(uvm_va_space_t * va_space)357 struct mm_struct *uvm_va_space_mm_or_current_retain(uvm_va_space_t *va_space)
358 {
359     // We should only attempt to use current->mm from a user thread
360     UVM_ASSERT(!(current->flags & PF_KTHREAD));
361 
362     // current->mm is NULL when we're in process teardown. In that case it
363     // doesn't make sense to use any mm.
364     if (!current->mm)
365         return NULL;
366 
367     // If !uvm_va_space_mm_enabled() we use current->mm on the ioctl
368     // paths. In that case we don't need to mmget(current->mm) because
369     // the current thread mm is always valid. On
370     // uvm_va_space_mm_enabled() systems we skip trying to retain the
371     // mm if it is current->mm because userspace may not have
372     // initialised the mm fd but UVM callers on the ioctl path still
373     // assume retaining current->mm will succeed.
374     if (!uvm_va_space_mm_enabled(va_space))
375         return current->mm;
376 
377     return uvm_va_space_mm_retain(va_space);
378 }
379 
uvm_va_space_mm_release(uvm_va_space_t * va_space)380 void uvm_va_space_mm_release(uvm_va_space_t *va_space)
381 {
382     uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
383 
384     UVM_ASSERT(uvm_va_space_mm_enabled(va_space));
385 
386     // The mm must not have been torn down while we have it retained
387     UVM_ASSERT(va_space_mm->mm);
388 
389     uvm_spin_lock(&va_space_mm->lock);
390 
391     UVM_ASSERT(va_space_mm->retained_count > 0);
392     --va_space_mm->retained_count;
393 
394     // If we're the last retainer on a dead mm, signal any potential waiters
395     if (va_space_mm->retained_count == 0 && !uvm_va_space_mm_alive(va_space_mm)) {
396         uvm_spin_unlock(&va_space_mm->lock);
397 
398         // There could be a thread in uvm_va_space_mm_shutdown()
399         // waiting on us, so wake it up.
400         wake_up(&va_space_mm->last_retainer_wait_queue);
401     }
402     else {
403         uvm_spin_unlock(&va_space_mm->lock);
404     }
405 }
406 
uvm_va_space_mm_or_current_release(uvm_va_space_t * va_space,struct mm_struct * mm)407 void uvm_va_space_mm_or_current_release(uvm_va_space_t *va_space, struct mm_struct *mm)
408 {
409     if (!uvm_va_space_mm_enabled(va_space) || !mm)
410         return;
411 
412     uvm_va_space_mm_release(va_space);
413 }
414 
uvm_va_space_mm_shutdown(uvm_va_space_t * va_space)415 static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space)
416 {
417     uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
418     uvm_gpu_va_space_t *gpu_va_space;
419     uvm_gpu_t *gpu;
420     uvm_processor_mask_t *retained_gpus = &va_space_mm->scratch_processor_mask;
421     uvm_parent_processor_mask_t flushed_parent_gpus;
422     LIST_HEAD(deferred_free_list);
423 
424     uvm_va_space_down_write(va_space);
425 
426     // Prevent future registrations of any kind. We'll be iterating over all
427     // GPUs and GPU VA spaces below but taking and dropping the VA space lock.
428     // It's ok for other threads to unregister those objects, but not to
429     // register new ones.
430     //
431     // We also need to prevent new channel work from arriving since we're trying
432     // to stop memory accesses.
433     va_space->disallow_new_registers = true;
434 
435     uvm_va_space_downgrade_write_rm(va_space);
436 
437     // Stop channels to prevent new accesses and new faults on non-MPS
438     uvm_va_space_stop_all_user_channels(va_space);
439 
440     uvm_va_space_up_read_rm(va_space);
441 
442     // Detach all channels to prevent pending untranslated faults from getting
443     // to this VA space. This also removes those channels from the VA space and
444     // puts them on the deferred free list.
445     uvm_va_space_down_write(va_space);
446     uvm_va_space_detach_all_user_channels(va_space, &deferred_free_list);
447     uvm_processor_mask_and(retained_gpus, &va_space->registered_gpus, &va_space->faultable_processors);
448     uvm_global_gpu_retain(retained_gpus);
449     uvm_va_space_up_write(va_space);
450 
451     // It's ok to use retained_gpus outside the lock since there can only be one
452     // thread executing in uvm_va_space_mm_shutdown at a time.
453 
454     // Flush the fault buffer on all registered faultable GPUs.
455     // This will avoid spurious cancels of stale pending translated
456     // faults after we set UVM_VA_SPACE_MM_STATE_RELEASED later.
457     uvm_parent_processor_mask_zero(&flushed_parent_gpus);
458     for_each_gpu_in_mask(gpu, retained_gpus) {
459         if (!uvm_parent_processor_mask_test_and_set(&flushed_parent_gpus, gpu->parent->id))
460             uvm_gpu_fault_buffer_flush(gpu);
461     }
462 
463     uvm_global_gpu_release(retained_gpus);
464 
465     // Call nvUvmInterfaceUnsetPageDirectory. This has no effect on non-MPS.
466     // Under MPS this guarantees that no new GPU accesses will be made using
467     // this mm.
468     //
469     // We need only one thread to make this call, but we could have one thread
470     // in here and one in destroy_gpu_va_space. Serialize these by starting in
471     // write mode then downgrading to read.
472     uvm_va_space_down_write(va_space);
473     uvm_va_space_downgrade_write_rm(va_space);
474     for_each_gpu_va_space(gpu_va_space, va_space)
475         uvm_gpu_va_space_unset_page_dir(gpu_va_space);
476     uvm_va_space_up_read_rm(va_space);
477 
478     // The above call to uvm_gpu_va_space_unset_page_dir handles the GPU VA
479     // spaces which are known to be registered. However, we could've raced with
480     // a concurrent uvm_va_space_unregister_gpu_va_space, giving this sequence:
481     //
482     // unregister_gpu_va_space                  uvm_va_space_mm_shutdown
483     //     uvm_va_space_down_write
484     //     remove_gpu_va_space
485     //     uvm_va_space_up_write
486     //                                          uvm_va_space_down_write(va_space);
487     //                                          // No GPU VA spaces
488     //                                          Unlock, return
489     //     uvm_deferred_free_object_list
490     //         uvm_gpu_va_space_unset_page_dir
491     //
492     // We have to be sure that all accesses in this GPU VA space are done before
493     // returning, so we have to wait for the other thread to finish its
494     // uvm_gpu_va_space_unset_page_dir call.
495     //
496     // We can be sure that num_pending will eventually go to zero because we've
497     // prevented new GPU VA spaces from being registered above.
498     wait_event(va_space->gpu_va_space_deferred_free.wait_queue,
499                atomic_read(&va_space->gpu_va_space_deferred_free.num_pending) == 0);
500 
501     // Now that there won't be any new GPU faults, prevent subsequent retainers
502     // from accessing this mm.
503     uvm_spin_lock(&va_space_mm->lock);
504     va_space_mm->state = UVM_VA_SPACE_MM_STATE_RELEASED;
505     uvm_spin_unlock(&va_space_mm->lock);
506 
507     // Finish channel destroy. This can be done at any point after detach as
508     // long as we don't hold the VA space lock.
509     uvm_deferred_free_object_list(&deferred_free_list);
510 
511     // Flush out all pending retainers
512     wait_event(va_space_mm->last_retainer_wait_queue, va_space_mm->retained_count == 0);
513 }
514 
mm_read64(struct mm_struct * mm,NvU64 addr,NvU64 * val)515 static NV_STATUS mm_read64(struct mm_struct *mm, NvU64 addr, NvU64 *val)
516 {
517     long ret;
518     struct page *page;
519     NvU64 *mapping;
520 
521     UVM_ASSERT(IS_ALIGNED(addr, sizeof(*val)));
522 
523     uvm_down_read_mmap_lock(mm);
524     ret = NV_PIN_USER_PAGES_REMOTE(mm, (unsigned long)addr, 1, 0, &page, NULL);
525     uvm_up_read_mmap_lock(mm);
526 
527     if (ret < 0)
528         return errno_to_nv_status(ret);
529 
530     UVM_ASSERT(ret == 1);
531 
532     mapping = (NvU64 *)((char *)kmap(page) + (addr % PAGE_SIZE));
533     *val = *mapping;
534     kunmap(page);
535     NV_UNPIN_USER_PAGE(page);
536 
537     return NV_OK;
538 }
539 
uvm_test_va_space_mm_retain(UVM_TEST_VA_SPACE_MM_RETAIN_PARAMS * params,struct file * filp)540 NV_STATUS uvm_test_va_space_mm_retain(UVM_TEST_VA_SPACE_MM_RETAIN_PARAMS *params, struct file *filp)
541 {
542     uvm_va_space_t *va_space = NULL;
543     struct mm_struct *mm = NULL;
544     NV_STATUS status = NV_OK;
545 
546     if (!IS_ALIGNED(params->addr, sizeof(params->val_before)))
547         return NV_ERR_INVALID_ARGUMENT;
548 
549     uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
550 
551     list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node) {
552         if ((uintptr_t)va_space == params->va_space_ptr) {
553             mm = uvm_va_space_mm_retain(va_space);
554             break;
555         }
556     }
557 
558     uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
559 
560     if ((uintptr_t)va_space != params->va_space_ptr)
561         return NV_ERR_MISSING_TABLE_ENTRY;
562 
563     if (!mm)
564         return NV_ERR_PAGE_TABLE_NOT_AVAIL;
565 
566     status = mm_read64(mm, params->addr, &params->val_before);
567 
568     if (status == NV_OK && params->sleep_us) {
569         usleep_range(params->sleep_us, params->sleep_us + 1000);
570         status = mm_read64(mm, params->addr, &params->val_after);
571     }
572 
573     uvm_va_space_mm_release(va_space);
574     return status;
575 }
576 
uvm_test_va_space_mm_or_current_retain(UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN_PARAMS * params,struct file * filp)577 NV_STATUS uvm_test_va_space_mm_or_current_retain(UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN_PARAMS *params,
578                                                  struct file *filp)
579 {
580     uvm_va_space_t *va_space = uvm_va_space_get(filp);
581     struct mm_struct *mm;
582     NV_STATUS status = NV_OK;
583 
584     mm = uvm_va_space_mm_or_current_retain(va_space);
585     if (!mm)
586         return NV_ERR_PAGE_TABLE_NOT_AVAIL;
587 
588     if (params->retain_done_ptr) {
589         NvU64 flag = true;
590 
591         if (nv_copy_to_user((void __user *)params->retain_done_ptr, &flag, sizeof(flag)))
592             status = NV_ERR_INVALID_ARGUMENT;
593     }
594 
595     if (status == NV_OK && params->sleep_us)
596             usleep_range(params->sleep_us, params->sleep_us + 1000);
597 
598     uvm_va_space_mm_or_current_release(va_space, mm);
599 
600     return status;
601 }
602