1 /*******************************************************************************
2 Copyright (c) 2015-2024 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_api.h"
25 #include "uvm_ats.h"
26 #include "uvm_global.h"
27 #include "uvm_gpu_replayable_faults.h"
28 #include "uvm_mem.h"
29 #include "uvm_perf_events.h"
30 #include "uvm_processors.h"
31 #include "uvm_procfs.h"
32 #include "uvm_thread_context.h"
33 #include "uvm_va_range.h"
34 #include "uvm_kvmalloc.h"
35 #include "uvm_mmu.h"
36 #include "uvm_perf_heuristics.h"
37 #include "uvm_pmm_sysmem.h"
38 #include "uvm_migrate.h"
39 #include "uvm_gpu_access_counters.h"
40 #include "uvm_va_space_mm.h"
41 #include "nv_uvm_interface.h"
42
43 uvm_global_t g_uvm_global;
44 static struct UvmOpsUvmEvents g_exported_uvm_ops;
45 static bool g_ops_registered = false;
46
uvm_register_callbacks(void)47 static NV_STATUS uvm_register_callbacks(void)
48 {
49 NV_STATUS status = NV_OK;
50
51 g_exported_uvm_ops.suspend = uvm_suspend_entry;
52 g_exported_uvm_ops.resume = uvm_resume_entry;
53 g_exported_uvm_ops.startDevice = NULL;
54 g_exported_uvm_ops.stopDevice = NULL;
55 g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
56
57 // Register the UVM callbacks with the main GPU driver:
58 status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
59 if (status != NV_OK)
60 return status;
61
62 g_ops_registered = true;
63 return NV_OK;
64 }
65
66 // Calling this function more than once is harmless:
uvm_unregister_callbacks(void)67 static void uvm_unregister_callbacks(void)
68 {
69 if (g_ops_registered) {
70 uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
71 g_ops_registered = false;
72 }
73 }
74
uvm_global_init(void)75 NV_STATUS uvm_global_init(void)
76 {
77 NV_STATUS status;
78 UvmPlatformInfo platform_info;
79
80 // Initialization of thread contexts happened already, during registration
81 // (addition) of the thread context associated with the UVM module entry
82 // point that is calling this function.
83 UVM_ASSERT(uvm_thread_context_global_initialized());
84
85 uvm_mutex_init(&g_uvm_global.global_lock, UVM_LOCK_ORDER_GLOBAL);
86 uvm_init_rwsem(&g_uvm_global.pm.lock, UVM_LOCK_ORDER_GLOBAL_PM);
87 uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
88 uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
89 INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
90
91 status = uvm_kvmalloc_init();
92 if (status != NV_OK) {
93 UVM_ERR_PRINT("uvm_kvmalloc_init() failed: %s\n", nvstatusToString(status));
94 goto error;
95 }
96
97 status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.global_q, "UVM global queue"));
98 if (status != NV_OK) {
99 UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
100 goto error;
101 }
102
103 status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
104 if (status != NV_OK) {
105 UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
106 goto error;
107 }
108
109 status = uvm_procfs_init();
110 if (status != NV_OK) {
111 UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
112 goto error;
113 }
114
115 status = uvm_rm_locked_call(nvUvmInterfaceSessionCreate(&g_uvm_global.rm_session_handle, &platform_info));
116 if (status != NV_OK) {
117 UVM_ERR_PRINT("nvUvmInterfaceSessionCreate() failed: %s\n", nvstatusToString(status));
118 return status;
119 }
120
121 uvm_ats_init(&platform_info);
122 g_uvm_global.num_simulated_devices = 0;
123 g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
124
125 status = uvm_processor_mask_cache_init();
126 if (status != NV_OK) {
127 UVM_ERR_PRINT("uvm_processor_mask_cache_init() failed: %s\n", nvstatusToString(status));
128 goto error;
129 }
130
131 status = uvm_gpu_init();
132 if (status != NV_OK) {
133 UVM_ERR_PRINT("uvm_gpu_init() failed: %s\n", nvstatusToString(status));
134 goto error;
135 }
136
137 status = uvm_pmm_sysmem_init();
138 if (status != NV_OK) {
139 UVM_ERR_PRINT("uvm_pmm_sysmem_init() failed: %s\n", nvstatusToString(status));
140 goto error;
141 }
142
143 status = uvm_mmu_init();
144 if (status != NV_OK) {
145 UVM_ERR_PRINT("uvm_mmu_init() failed: %s\n", nvstatusToString(status));
146 goto error;
147 }
148
149 status = uvm_mem_global_init();
150 if (status != NV_OK) {
151 UVM_ERR_PRINT("uvm_mem_gloal_init() failed: %s\n", nvstatusToString(status));
152 goto error;
153 }
154
155 status = uvm_va_policy_init();
156 if (status != NV_OK) {
157 UVM_ERR_PRINT("uvm_va_policy_init() failed: %s\n", nvstatusToString(status));
158 goto error;
159 }
160
161 status = uvm_va_range_init();
162 if (status != NV_OK) {
163 UVM_ERR_PRINT("uvm_va_range_init() failed: %s\n", nvstatusToString(status));
164 goto error;
165 }
166
167 status = uvm_range_group_init();
168 if (status != NV_OK) {
169 UVM_ERR_PRINT("uvm_range_group_init() failed: %s\n", nvstatusToString(status));
170 goto error;
171 }
172
173 status = uvm_migrate_init();
174 if (status != NV_OK) {
175 UVM_ERR_PRINT("uvm_migrate_init() failed: %s\n", nvstatusToString(status));
176 goto error;
177 }
178
179 status = uvm_perf_events_init();
180 if (status != NV_OK) {
181 UVM_ERR_PRINT("uvm_perf_events_init() failed: %s\n", nvstatusToString(status));
182 goto error;
183 }
184
185 status = uvm_perf_heuristics_init();
186 if (status != NV_OK) {
187 UVM_ERR_PRINT("uvm_perf_heuristics_init() failed: %s\n", nvstatusToString(status));
188 goto error;
189 }
190
191 status = uvm_service_block_context_init();
192 if (status != NV_OK) {
193 UVM_ERR_PRINT("uvm_service_block_context_init failed: %s\n", nvstatusToString(status));
194 goto error;
195 }
196
197 // This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
198 // call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
199 // this point:
200 status = uvm_register_callbacks();
201 if (status != NV_OK) {
202 UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
203 goto error;
204 }
205
206 return NV_OK;
207
208 error:
209 uvm_global_exit();
210 return status;
211 }
212
uvm_global_exit(void)213 void uvm_global_exit(void)
214 {
215 uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
216
217 // Guarantee completion of any release callbacks scheduled after the flush
218 // in uvm_resume().
219 nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
220
221 uvm_unregister_callbacks();
222 uvm_service_block_context_exit();
223
224 uvm_perf_heuristics_exit();
225 uvm_perf_events_exit();
226 uvm_migrate_exit();
227 uvm_range_group_exit();
228 uvm_va_range_exit();
229 uvm_va_policy_exit();
230 uvm_mem_global_exit();
231 uvm_pmm_sysmem_exit();
232 uvm_gpu_exit();
233 uvm_processor_mask_cache_exit();
234
235 if (g_uvm_global.rm_session_handle != 0)
236 uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
237
238 uvm_procfs_exit();
239
240 nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
241 nv_kthread_q_stop(&g_uvm_global.global_q);
242
243 uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
244 UVM_ASSERT(list_empty(&g_uvm_global.va_spaces.list));
245
246 uvm_thread_context_global_exit();
247 uvm_kvmalloc_exit();
248 }
249
250 // Signal to the top-half ISR whether calls from the RM's top-half ISR are to
251 // be completed without processing.
uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t * parent_gpu,bool is_suspended)252 static void uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t *parent_gpu, bool is_suspended)
253 {
254 uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
255
256 parent_gpu->isr.is_suspended = is_suspended;
257
258 uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
259 }
260
uvm_suspend(void)261 static NV_STATUS uvm_suspend(void)
262 {
263 uvm_va_space_t *va_space = NULL;
264 uvm_gpu_id_t gpu_id;
265 uvm_gpu_t *gpu;
266
267 // Upon entry into this function, the following is true:
268 // * GPU interrupts are enabled
269 // * Any number of fault or access counter notifications could
270 // be pending
271 // * No new fault notifications will appear, but new access
272 // counter notifications could
273 // * Any of the bottom halves could be running
274 // * New bottom halves of all types could be scheduled as GPU
275 // interrupts are handled
276 // Due to this, the sequence of suspend operations for each GPU is the
277 // following:
278 // * Flush the fault buffer to prevent fault interrupts when
279 // the top-half ISR is suspended
280 // * Suspend access counter processing
281 // * Suspend the top-half ISR
282 // * Flush relevant kthread queues (bottom half, etc.)
283
284 // Some locks acquired by this function, such as pm.lock, are released
285 // by uvm_resume(). This is contrary to the lock tracking code's
286 // expectations, so lock tracking is disabled.
287 uvm_thread_context_lock_disable_tracking();
288
289 // Take the global power management lock in write mode to lock out
290 // most user-facing entry points.
291 uvm_down_write(&g_uvm_global.pm.lock);
292
293 nv_kthread_q_flush(&g_uvm_global.global_q);
294
295 // Though global_lock isn't held here, pm.lock indirectly prevents the
296 // addition and removal of GPUs, since these operations can currently
297 // only occur in response to ioctl() calls.
298 for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
299 gpu = uvm_gpu_get(gpu_id);
300
301 // Since fault buffer state may be lost across sleep cycles, UVM must
302 // ensure any outstanding replayable faults are dismissed. The RM
303 // guarantees that all user channels have been preempted before
304 // uvm_suspend() is called, which implies that no user channels can be
305 // stalled on faults when this point is reached.
306 if (gpu->parent->replayable_faults_supported)
307 uvm_gpu_fault_buffer_flush(gpu);
308
309 // TODO: Bug 2535118: flush the non-replayable fault buffer
310
311 // Stop access counter interrupt processing for the duration of this
312 // sleep cycle to defend against potential interrupt storms in
313 // the suspend path: if rate limiting is applied to access counter
314 // interrupts in the bottom half in the future, the bottom half flush
315 // below will no longer be able to guarantee that all outstanding
316 // notifications have been handled.
317 uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);
318
319 uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
320
321 nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);
322
323 if (gpu->parent->isr.non_replayable_faults.handling)
324 nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
325 }
326
327 // Acquire each VA space's lock in write mode to lock out VMA open and
328 // release callbacks. These entry points do not have feasible early exit
329 // options, and so aren't suitable for synchronization with pm.lock.
330 uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
331
332 list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
333 uvm_va_space_down_write(va_space);
334
335 uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
336
337 uvm_thread_context_lock_enable_tracking();
338
339 g_uvm_global.pm.is_suspended = true;
340
341 return NV_OK;
342 }
343
uvm_suspend_entry(void)344 NV_STATUS uvm_suspend_entry(void)
345 {
346 UVM_ENTRY_RET(uvm_suspend());
347 }
348
uvm_resume(void)349 static NV_STATUS uvm_resume(void)
350 {
351 uvm_va_space_t *va_space = NULL;
352 uvm_gpu_id_t gpu_id;
353 uvm_gpu_t *gpu;
354
355 g_uvm_global.pm.is_suspended = false;
356
357 // Some locks released by this function, such as pm.lock, were acquired
358 // by uvm_suspend(). This is contrary to the lock tracking code's
359 // expectations, so lock tracking is disabled.
360 uvm_thread_context_lock_disable_tracking();
361
362 // Release each VA space's lock.
363 uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
364
365 list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
366 uvm_va_space_up_write(va_space);
367
368 uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
369
370 // pm.lock is held in lieu of global_lock to prevent GPU addition/removal
371 for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
372 gpu = uvm_gpu_get(gpu_id);
373
374 // Bring the fault buffer software state back in sync with the
375 // hardware state.
376 uvm_parent_gpu_fault_buffer_resume(gpu->parent);
377
378 uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
379
380 // Reenable access counter interrupt processing unless notifications
381 // have been set to be suppressed.
382 uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
383 }
384
385 uvm_up_write(&g_uvm_global.pm.lock);
386
387 uvm_thread_context_lock_enable_tracking();
388
389 // Force completion of any release callbacks successfully queued for
390 // deferred completion while suspended. The deferred release
391 // queue is not guaranteed to remain empty following this flush since
392 // some threads that failed to acquire pm.lock in uvm_release() may
393 // not have scheduled their handlers yet.
394 nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
395
396 return NV_OK;
397 }
398
uvm_resume_entry(void)399 NV_STATUS uvm_resume_entry(void)
400 {
401 UVM_ENTRY_RET(uvm_resume());
402 }
403
uvm_global_is_suspended(void)404 bool uvm_global_is_suspended(void)
405 {
406 return g_uvm_global.pm.is_suspended;
407 }
408
uvm_global_set_fatal_error_impl(NV_STATUS error)409 void uvm_global_set_fatal_error_impl(NV_STATUS error)
410 {
411 NV_STATUS previous_error;
412
413 UVM_ASSERT(error != NV_OK);
414
415 previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
416
417 if (previous_error == NV_OK) {
418 UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
419 }
420 else {
421 UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
422 nvstatusToString(error), nvstatusToString(previous_error));
423 }
424 }
425
uvm_global_reset_fatal_error(void)426 NV_STATUS uvm_global_reset_fatal_error(void)
427 {
428 if (!uvm_enable_builtin_tests) {
429 UVM_ASSERT_MSG(0, "Resetting global fatal error without tests being enabled\n");
430 return NV_ERR_INVALID_STATE;
431 }
432
433 return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
434 }
435
uvm_global_gpu_retain(const uvm_processor_mask_t * mask)436 void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
437 {
438 uvm_gpu_t *gpu;
439
440 for_each_gpu_in_mask(gpu, mask)
441 uvm_gpu_retain(gpu);
442 }
443
uvm_global_gpu_release(const uvm_processor_mask_t * mask)444 void uvm_global_gpu_release(const uvm_processor_mask_t *mask)
445 {
446 uvm_gpu_id_t gpu_id;
447
448 if (uvm_processor_mask_empty(mask))
449 return;
450
451 uvm_mutex_lock(&g_uvm_global.global_lock);
452
453 // Do not use for_each_gpu_in_mask as it reads the GPU state and it
454 // might get destroyed.
455 for_each_gpu_id_in_mask(gpu_id, mask)
456 uvm_gpu_release_locked(uvm_gpu_get(gpu_id));
457
458 uvm_mutex_unlock(&g_uvm_global.global_lock);
459 }
460
uvm_global_gpu_check_ecc_error(uvm_processor_mask_t * gpus)461 NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
462 {
463 uvm_gpu_t *gpu;
464
465 for_each_gpu_in_mask(gpu, gpus) {
466 NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
467 if (status != NV_OK)
468 return status;
469 }
470
471 return NV_OK;
472 }
473