1 /*******************************************************************************
2     Copyright (c) 2015-2024 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_api.h"
25 #include "uvm_ats.h"
26 #include "uvm_global.h"
27 #include "uvm_gpu_replayable_faults.h"
28 #include "uvm_mem.h"
29 #include "uvm_perf_events.h"
30 #include "uvm_processors.h"
31 #include "uvm_procfs.h"
32 #include "uvm_thread_context.h"
33 #include "uvm_va_range.h"
34 #include "uvm_kvmalloc.h"
35 #include "uvm_mmu.h"
36 #include "uvm_perf_heuristics.h"
37 #include "uvm_pmm_sysmem.h"
38 #include "uvm_migrate.h"
39 #include "uvm_gpu_access_counters.h"
40 #include "uvm_va_space_mm.h"
41 #include "nv_uvm_interface.h"
42 
43 uvm_global_t g_uvm_global;
44 static struct UvmOpsUvmEvents g_exported_uvm_ops;
45 static bool g_ops_registered = false;
46 
uvm_register_callbacks(void)47 static NV_STATUS uvm_register_callbacks(void)
48 {
49     NV_STATUS status = NV_OK;
50 
51     g_exported_uvm_ops.suspend = uvm_suspend_entry;
52     g_exported_uvm_ops.resume = uvm_resume_entry;
53     g_exported_uvm_ops.startDevice = NULL;
54     g_exported_uvm_ops.stopDevice  = NULL;
55     g_exported_uvm_ops.isrTopHalf  = uvm_isr_top_half_entry;
56 
57     // Register the UVM callbacks with the main GPU driver:
58     status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
59     if (status != NV_OK)
60         return status;
61 
62     g_ops_registered = true;
63     return NV_OK;
64 }
65 
66 // Calling this function more than once is harmless:
uvm_unregister_callbacks(void)67 static void uvm_unregister_callbacks(void)
68 {
69     if (g_ops_registered) {
70         uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
71         g_ops_registered = false;
72     }
73 }
74 
uvm_global_init(void)75 NV_STATUS uvm_global_init(void)
76 {
77     NV_STATUS status;
78     UvmPlatformInfo platform_info;
79 
80     // Initialization of thread contexts happened already, during registration
81     // (addition) of the thread context associated with the UVM module entry
82     // point that is calling this function.
83     UVM_ASSERT(uvm_thread_context_global_initialized());
84 
85     uvm_mutex_init(&g_uvm_global.global_lock, UVM_LOCK_ORDER_GLOBAL);
86     uvm_init_rwsem(&g_uvm_global.pm.lock, UVM_LOCK_ORDER_GLOBAL_PM);
87     uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
88     uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
89     INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
90 
91     status = uvm_kvmalloc_init();
92     if (status != NV_OK) {
93         UVM_ERR_PRINT("uvm_kvmalloc_init() failed: %s\n", nvstatusToString(status));
94         goto error;
95     }
96 
97     status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.global_q, "UVM global queue"));
98     if (status != NV_OK) {
99         UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
100         goto error;
101     }
102 
103     status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
104     if (status != NV_OK) {
105         UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
106         goto error;
107     }
108 
109     status = uvm_procfs_init();
110     if (status != NV_OK) {
111         UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
112         goto error;
113     }
114 
115     status = uvm_rm_locked_call(nvUvmInterfaceSessionCreate(&g_uvm_global.rm_session_handle, &platform_info));
116     if (status != NV_OK) {
117         UVM_ERR_PRINT("nvUvmInterfaceSessionCreate() failed: %s\n", nvstatusToString(status));
118         return status;
119     }
120 
121     uvm_ats_init(&platform_info);
122     g_uvm_global.num_simulated_devices = 0;
123     g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
124 
125     status = uvm_processor_mask_cache_init();
126     if (status != NV_OK) {
127         UVM_ERR_PRINT("uvm_processor_mask_cache_init() failed: %s\n", nvstatusToString(status));
128         goto error;
129     }
130 
131     status = uvm_gpu_init();
132     if (status != NV_OK) {
133         UVM_ERR_PRINT("uvm_gpu_init() failed: %s\n", nvstatusToString(status));
134         goto error;
135     }
136 
137     status = uvm_pmm_sysmem_init();
138     if (status != NV_OK) {
139         UVM_ERR_PRINT("uvm_pmm_sysmem_init() failed: %s\n", nvstatusToString(status));
140         goto error;
141     }
142 
143     status = uvm_mmu_init();
144     if (status != NV_OK) {
145         UVM_ERR_PRINT("uvm_mmu_init() failed: %s\n", nvstatusToString(status));
146         goto error;
147     }
148 
149     status = uvm_mem_global_init();
150     if (status != NV_OK) {
151         UVM_ERR_PRINT("uvm_mem_gloal_init() failed: %s\n", nvstatusToString(status));
152         goto error;
153     }
154 
155     status = uvm_va_policy_init();
156     if (status != NV_OK) {
157         UVM_ERR_PRINT("uvm_va_policy_init() failed: %s\n", nvstatusToString(status));
158         goto error;
159     }
160 
161     status = uvm_va_range_init();
162     if (status != NV_OK) {
163         UVM_ERR_PRINT("uvm_va_range_init() failed: %s\n", nvstatusToString(status));
164         goto error;
165     }
166 
167     status = uvm_range_group_init();
168     if (status != NV_OK) {
169         UVM_ERR_PRINT("uvm_range_group_init() failed: %s\n", nvstatusToString(status));
170         goto error;
171     }
172 
173     status = uvm_migrate_init();
174     if (status != NV_OK) {
175         UVM_ERR_PRINT("uvm_migrate_init() failed: %s\n", nvstatusToString(status));
176         goto error;
177     }
178 
179     status = uvm_perf_events_init();
180     if (status != NV_OK) {
181         UVM_ERR_PRINT("uvm_perf_events_init() failed: %s\n", nvstatusToString(status));
182         goto error;
183     }
184 
185     status = uvm_perf_heuristics_init();
186     if (status != NV_OK) {
187         UVM_ERR_PRINT("uvm_perf_heuristics_init() failed: %s\n", nvstatusToString(status));
188         goto error;
189     }
190 
191     status = uvm_service_block_context_init();
192     if (status != NV_OK) {
193         UVM_ERR_PRINT("uvm_service_block_context_init failed: %s\n", nvstatusToString(status));
194         goto error;
195     }
196 
197     // This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
198     // call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
199     // this point:
200     status = uvm_register_callbacks();
201     if (status != NV_OK) {
202         UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
203         goto error;
204     }
205 
206     return NV_OK;
207 
208 error:
209     uvm_global_exit();
210     return status;
211 }
212 
uvm_global_exit(void)213 void uvm_global_exit(void)
214 {
215     uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
216 
217     // Guarantee completion of any release callbacks scheduled after the flush
218     // in uvm_resume().
219     nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
220 
221     uvm_unregister_callbacks();
222     uvm_service_block_context_exit();
223 
224     uvm_perf_heuristics_exit();
225     uvm_perf_events_exit();
226     uvm_migrate_exit();
227     uvm_range_group_exit();
228     uvm_va_range_exit();
229     uvm_va_policy_exit();
230     uvm_mem_global_exit();
231     uvm_pmm_sysmem_exit();
232     uvm_gpu_exit();
233     uvm_processor_mask_cache_exit();
234 
235     if (g_uvm_global.rm_session_handle != 0)
236         uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
237 
238     uvm_procfs_exit();
239 
240     nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
241     nv_kthread_q_stop(&g_uvm_global.global_q);
242 
243     uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
244     UVM_ASSERT(list_empty(&g_uvm_global.va_spaces.list));
245 
246     uvm_thread_context_global_exit();
247     uvm_kvmalloc_exit();
248 }
249 
250 // Signal to the top-half ISR whether calls from the RM's top-half ISR are to
251 // be completed without processing.
uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t * parent_gpu,bool is_suspended)252 static void uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t *parent_gpu, bool is_suspended)
253 {
254     uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
255 
256     parent_gpu->isr.is_suspended = is_suspended;
257 
258     uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
259 }
260 
uvm_suspend(void)261 static NV_STATUS uvm_suspend(void)
262 {
263     uvm_va_space_t *va_space = NULL;
264     uvm_gpu_id_t gpu_id;
265     uvm_gpu_t *gpu;
266 
267     // Upon entry into this function, the following is true:
268     //   * GPU interrupts are enabled
269     //   * Any number of fault or access counter notifications could
270     //     be pending
271     //   * No new fault notifications will appear, but new access
272     //     counter notifications could
273     //   * Any of the bottom halves could be running
274     //   * New bottom halves of all types could be scheduled as GPU
275     //     interrupts are handled
276     // Due to this, the sequence of suspend operations for each GPU is the
277     // following:
278     //   * Flush the fault buffer to prevent fault interrupts when
279     //     the top-half ISR is suspended
280     //   * Suspend access counter processing
281     //   * Suspend the top-half ISR
282     //   * Flush relevant kthread queues (bottom half, etc.)
283 
284     // Some locks acquired by this function, such as pm.lock, are released
285     // by uvm_resume().  This is contrary to the lock tracking code's
286     // expectations, so lock tracking is disabled.
287     uvm_thread_context_lock_disable_tracking();
288 
289     // Take the global power management lock in write mode to lock out
290     // most user-facing entry points.
291     uvm_down_write(&g_uvm_global.pm.lock);
292 
293     nv_kthread_q_flush(&g_uvm_global.global_q);
294 
295     // Though global_lock isn't held here, pm.lock indirectly prevents the
296     // addition and removal of GPUs, since these operations can currently
297     // only occur in response to ioctl() calls.
298     for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
299         gpu = uvm_gpu_get(gpu_id);
300 
301         // Since fault buffer state may be lost across sleep cycles, UVM must
302         // ensure any outstanding replayable faults are dismissed.  The RM
303         // guarantees that all user channels have been preempted before
304         // uvm_suspend() is called, which implies that no user channels can be
305         // stalled on faults when this point is reached.
306         if (gpu->parent->replayable_faults_supported)
307             uvm_gpu_fault_buffer_flush(gpu);
308 
309         // TODO: Bug 2535118: flush the non-replayable fault buffer
310 
311         // Stop access counter interrupt processing for the duration of this
312         // sleep cycle to defend against potential interrupt storms in
313         // the suspend path: if rate limiting is applied to access counter
314         // interrupts in the bottom half in the future, the bottom half flush
315         // below will no longer be able to guarantee that all outstanding
316         // notifications have been handled.
317         uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);
318 
319         uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
320 
321         nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);
322 
323         if (gpu->parent->isr.non_replayable_faults.handling)
324             nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
325     }
326 
327     // Acquire each VA space's lock in write mode to lock out VMA open and
328     // release callbacks.  These entry points do not have feasible early exit
329     // options, and so aren't suitable for synchronization with pm.lock.
330     uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
331 
332     list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
333         uvm_va_space_down_write(va_space);
334 
335     uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
336 
337     uvm_thread_context_lock_enable_tracking();
338 
339     g_uvm_global.pm.is_suspended = true;
340 
341     return NV_OK;
342 }
343 
uvm_suspend_entry(void)344 NV_STATUS uvm_suspend_entry(void)
345 {
346     UVM_ENTRY_RET(uvm_suspend());
347 }
348 
uvm_resume(void)349 static NV_STATUS uvm_resume(void)
350 {
351     uvm_va_space_t *va_space = NULL;
352     uvm_gpu_id_t gpu_id;
353     uvm_gpu_t *gpu;
354 
355     g_uvm_global.pm.is_suspended = false;
356 
357     // Some locks released by this function, such as pm.lock, were acquired
358     // by uvm_suspend().  This is contrary to the lock tracking code's
359     // expectations, so lock tracking is disabled.
360     uvm_thread_context_lock_disable_tracking();
361 
362     // Release each VA space's lock.
363     uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
364 
365     list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
366         uvm_va_space_up_write(va_space);
367 
368     uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
369 
370     // pm.lock is held in lieu of global_lock to prevent GPU addition/removal
371     for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
372         gpu = uvm_gpu_get(gpu_id);
373 
374         // Bring the fault buffer software state back in sync with the
375         // hardware state.
376         uvm_parent_gpu_fault_buffer_resume(gpu->parent);
377 
378         uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
379 
380         // Reenable access counter interrupt processing unless notifications
381         // have been set to be suppressed.
382         uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
383     }
384 
385     uvm_up_write(&g_uvm_global.pm.lock);
386 
387     uvm_thread_context_lock_enable_tracking();
388 
389     // Force completion of any release callbacks successfully queued for
390     // deferred completion while suspended.  The deferred release
391     // queue is not guaranteed to remain empty following this flush since
392     // some threads that failed to acquire pm.lock in uvm_release() may
393     // not have scheduled their handlers yet.
394     nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
395 
396     return NV_OK;
397 }
398 
uvm_resume_entry(void)399 NV_STATUS uvm_resume_entry(void)
400 {
401     UVM_ENTRY_RET(uvm_resume());
402 }
403 
uvm_global_is_suspended(void)404 bool uvm_global_is_suspended(void)
405 {
406     return g_uvm_global.pm.is_suspended;
407 }
408 
uvm_global_set_fatal_error_impl(NV_STATUS error)409 void uvm_global_set_fatal_error_impl(NV_STATUS error)
410 {
411     NV_STATUS previous_error;
412 
413     UVM_ASSERT(error != NV_OK);
414 
415     previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
416 
417     if (previous_error == NV_OK) {
418         UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
419     }
420     else {
421         UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
422                 nvstatusToString(error), nvstatusToString(previous_error));
423     }
424 }
425 
uvm_global_reset_fatal_error(void)426 NV_STATUS uvm_global_reset_fatal_error(void)
427 {
428     if (!uvm_enable_builtin_tests) {
429         UVM_ASSERT_MSG(0, "Resetting global fatal error without tests being enabled\n");
430         return NV_ERR_INVALID_STATE;
431     }
432 
433     return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
434 }
435 
uvm_global_gpu_retain(const uvm_processor_mask_t * mask)436 void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
437 {
438     uvm_gpu_t *gpu;
439 
440     for_each_gpu_in_mask(gpu, mask)
441         uvm_gpu_retain(gpu);
442 }
443 
uvm_global_gpu_release(const uvm_processor_mask_t * mask)444 void uvm_global_gpu_release(const uvm_processor_mask_t *mask)
445 {
446     uvm_gpu_id_t gpu_id;
447 
448     if (uvm_processor_mask_empty(mask))
449         return;
450 
451     uvm_mutex_lock(&g_uvm_global.global_lock);
452 
453     // Do not use for_each_gpu_in_mask as it reads the GPU state and it
454     // might get destroyed.
455     for_each_gpu_id_in_mask(gpu_id, mask)
456         uvm_gpu_release_locked(uvm_gpu_get(gpu_id));
457 
458     uvm_mutex_unlock(&g_uvm_global.global_lock);
459 }
460 
uvm_global_gpu_check_ecc_error(uvm_processor_mask_t * gpus)461 NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
462 {
463     uvm_gpu_t *gpu;
464 
465     for_each_gpu_in_mask(gpu, gpus) {
466         NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
467         if (status != NV_OK)
468             return status;
469     }
470 
471     return NV_OK;
472 }
473