1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_GLOBAL_H__
25 #define __UVM_GLOBAL_H__
26 
27 #include "nv_uvm_types.h"
28 #include "uvm_extern_decl.h"
29 #include "uvm_linux.h"
30 #include "uvm_common.h"
31 #include "uvm_processors.h"
32 #include "uvm_gpu.h"
33 #include "uvm_lock.h"
34 #include "uvm_ats_ibm.h"
35 
36 // Global state of the uvm driver
37 struct uvm_global_struct
38 {
39     // Mask of retained GPUs.
40     // Note that GPUs are added to this mask as the last step of add_gpu() and
41     // removed from it as the first step of remove_gpu() implying that a GPU
42     // that's being initialized or deinitialized will not be in it.
43     uvm_processor_mask_t retained_gpus;
44 
45     // Array of the parent GPUs registered with UVM. Note that GPUs will have
46     // ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
47     // will have GPU id = 1. A GPU entry is unused iff it does not exist
48     // (is a NULL pointer) in this table.
49     uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];
50 
51     // A global RM session (RM client)
52     // Created on module load and destroyed on module unload
53     uvmGpuSessionHandle rm_session_handle;
54 
55     // peer-to-peer table
56     // peer info is added and removed from this table when usermode
57     // driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
58     // respectively.
59     uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
60 
61     // peer-to-peer copy mode
62     // Pascal+ GPUs support virtual addresses in p2p copies.
63     // Ampere+ GPUs add support for physical addresses in p2p copies.
64     uvm_gpu_peer_copy_mode_t peer_copy_mode;
65 
66     // Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
67     // do most anything other than try and clean up as much as possible.
68     // An example of a fatal error is an unrecoverable ECC error on one of the
69     // GPUs.
70     atomic_t fatal_error;
71 
72     // A flag to disable the assert on fatal error
73     // To be used by tests and only consulted if tests are enabled.
74     bool disable_fatal_error_assert;
75 
76     // Lock protecting the global state
77     uvm_mutex_t global_lock;
78 
79     struct
80     {
81         // Lock synchronizing user threads with power management activity
82         uvm_rw_semaphore_t lock;
83 
84         // Power management state flag; tested by UVM_GPU_WRITE_ONCE()
85         // and UVM_GPU_READ_ONCE() to detect accesses to GPUs when
86         // UVM is suspended.
87         bool is_suspended;
88     } pm;
89 
90     // This lock synchronizes addition and removal of GPUs from UVM's global
91     // table. It must be held whenever g_uvm_global.parent_gpus[] is written. In
92     // order to read from this table, you must hold either the gpu_table_lock,
93     // or the global_lock.
94     //
95     // This is a leaf lock.
96     uvm_spinlock_irqsave_t gpu_table_lock;
97 
98     // Number of simulated/emulated devices that have registered with UVM
99     unsigned num_simulated_devices;
100 
101     // A single queue for deferred work that is non-GPU-specific.
102     nv_kthread_q_t global_q;
103 
104     // A single queue for deferred f_ops->release() handling.  Items scheduled to
105     // run on it may block for the duration of system sleep cycles, stalling
106     // the queue and preventing any other items from running.
107     nv_kthread_q_t deferred_release_q;
108 
109     struct
110     {
111         // Indicates whether the system HW supports ATS. This field is set once
112         // during global initialization (uvm_global_init), and can be read
113         // afterwards without acquiring any locks.
114         bool supported;
115 
116         // On top of HW platform support, ATS support can be overridden using
117         // the module parameter uvm_ats_mode. This field is set once during
118         // global initialization (uvm_global_init), and can be read afterwards
119         // without acquiring any locks.
120         bool enabled;
121     } ats;
122 
123 #if UVM_IBM_NPU_SUPPORTED()
124     // On IBM systems this array tracks the active NPUs (the NPUs which are
125     // attached to retained GPUs).
126     uvm_ibm_npu_t npus[NV_MAX_NPUS];
127 #endif
128 
129     // List of all active VA spaces
130     struct
131     {
132         uvm_mutex_t lock;
133         struct list_head list;
134     } va_spaces;
135 
136     // Notify a registered process about the driver state after it's unloaded.
137     // The intent is to systematically report any error during the driver
138     // teardown. unload_state is used for testing only.
139     struct
140     {
141         // ptr points to a 8-byte buffer within page.
142         NvU64 *ptr;
143         struct page *page;
144     } unload_state;
145 
146     // True if the VM has AMD's SEV, or equivalent HW security extensions such
147     // as Intel's TDX, enabled. The flag is always false on the host.
148     //
149     // This value moves in tandem with that of Confidential Computing in the
150     // GPU(s) in all supported configurations, so it is used as a proxy for the
151     // Confidential Computing state.
152     //
153     // This field is set once during global initialization (uvm_global_init),
154     // and can be read afterwards without acquiring any locks.
155     bool conf_computing_enabled;
156 };
157 
158 // Initialize global uvm state
159 NV_STATUS uvm_global_init(void);
160 
161 // Deinitialize global state (called from module exit)
162 void uvm_global_exit(void);
163 
164 // Prepare for entry into a system sleep state
165 NV_STATUS uvm_suspend_entry(void);
166 
167 // Recover after exit from a system sleep state
168 NV_STATUS uvm_resume_entry(void);
169 
170 // Add parent GPU to the global table.
171 //
172 // LOCKING: requires that you hold the global lock and gpu_table_lock
173 static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
174 {
175     NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
176 
177     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
178     uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
179 
180     UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]);
181     g_uvm_global.parent_gpus[gpu_index] = parent_gpu;
182 }
183 
184 // Remove parent GPU from the global table.
185 //
186 // LOCKING: requires that you hold the global lock and gpu_table_lock
187 static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
188 {
189     NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
190 
191     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
192     uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
193 
194     UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
195 
196     g_uvm_global.parent_gpus[gpu_index] = NULL;
197 }
198 
199 // Get a parent gpu by its id.
200 // Returns a pointer to the parent GPU object, or NULL if not found.
201 //
202 // LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
203 // retained at least one of the child GPUs.
204 static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
205 {
206     return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
207 }
208 
209 // Get a gpu by its GPU id.
210 // Returns a pointer to the GPU object, or NULL if not found.
211 //
212 // LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
213 // retained the gpu.
214 static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
215 {
216     uvm_parent_gpu_t *parent_gpu;
217 
218     parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
219     if (!parent_gpu)
220         return NULL;
221 
222     return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
223 }
224 
225 static uvmGpuSessionHandle uvm_global_session_handle(void)
226 {
227     return g_uvm_global.rm_session_handle;
228 }
229 
230 // Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources
231 // in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is
232 // suspended.
233 #define UVM_GPU_WRITE_ONCE(x, val) do {         \
234         UVM_ASSERT(!uvm_global_is_suspended()); \
235         UVM_WRITE_ONCE(x, val);                 \
236     } while (0)
237 
238 #define UVM_GPU_READ_ONCE(x) ({                 \
239         UVM_ASSERT(!uvm_global_is_suspended()); \
240         UVM_READ_ONCE(x);                       \
241     })
242 
243 static bool global_is_fatal_error_assert_disabled(void)
244 {
245     // Only allow the assert to be disabled if tests are enabled
246     if (!uvm_enable_builtin_tests)
247         return false;
248 
249     return g_uvm_global.disable_fatal_error_assert;
250 }
251 
252 // Set a global fatal error
253 // Once that happens the the driver should refuse to do anything other than try
254 // and clean up as much as possible.
255 // An example of a fatal error is an unrecoverable ECC error on one of the
256 // GPUs.
257 // Use a macro so that the assert below provides precise file and line info and
258 // a backtrace.
259 #define uvm_global_set_fatal_error(error)                                       \
260     do {                                                                        \
261         if (!global_is_fatal_error_assert_disabled())                           \
262             UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error));    \
263         uvm_global_set_fatal_error_impl(error);                                 \
264     } while (0)
265 void uvm_global_set_fatal_error_impl(NV_STATUS error);
266 
267 // Get the global status
268 static NV_STATUS uvm_global_get_status(void)
269 {
270     return atomic_read(&g_uvm_global.fatal_error);
271 }
272 
273 // Reset global fatal error
274 // This is to be used by tests triggering the global error on purpose only.
275 // Returns the value of the global error field that existed just before this
276 // reset call was made.
277 NV_STATUS uvm_global_reset_fatal_error(void);
278 
279 static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
280 {
281     uvm_gpu_t *gpu;
282     uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);
283 
284     if (UVM_ID_IS_INVALID(gpu_id))
285         return NULL;
286 
287     gpu = uvm_gpu_get(gpu_id);
288 
289     // If there is valid GPU id in the mask, assert that the corresponding
290     // uvm_gpu_t is present. Otherwise it would stop a
291     // for_each_gpu_in_mask() loop pre-maturely. Today, this could only
292     // happen in remove_gpu() because the GPU being removed is deleted from the
293     // global table very early.
294     UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
295 
296     return gpu;
297 }
298 
299 static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
300 {
301     uvm_gpu_id_t gpu_id;
302 
303     UVM_ASSERT(gpu);
304 
305     gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
306     if (UVM_ID_IS_INVALID(gpu_id))
307         return NULL;
308 
309     gpu = uvm_gpu_get(gpu_id);
310 
311     // See comment in uvm_processor_mask_find_first_gpu().
312     UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
313 
314     return gpu;
315 }
316 
317 // Helper to iterate over all GPUs in the input mask
318 #define for_each_gpu_in_mask(gpu, mask)                         \
319     for (gpu = uvm_processor_mask_find_first_gpu(mask);         \
320          gpu != NULL;                                           \
321          gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
322 
323 // Helper to iterate over all GPUs retained by the UVM driver
324 // (across all va spaces).
325 #define for_each_gpu(gpu)                                                              \
326     for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                         \
327            gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);});    \
328            gpu != NULL;                                                                \
329            gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
330 
331 // LOCKING: Must hold either the global_lock or the gpu_table_lock
332 static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
333 {
334     NvU32 i;
335 
336     if (parent_gpu) {
337         NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
338         i = gpu_index + 1;
339     }
340     else {
341         i = 0;
342     }
343 
344     parent_gpu = NULL;
345 
346     while (i < UVM_PARENT_ID_MAX_GPUS) {
347         if (g_uvm_global.parent_gpus[i]) {
348             parent_gpu = g_uvm_global.parent_gpus[i];
349             break;
350         }
351 
352         i++;
353     }
354 
355     return parent_gpu;
356 }
357 
358 // LOCKING: Must hold the global_lock
359 static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
360 {
361     uvm_gpu_t *gpu = NULL;
362     uvm_gpu_id_t gpu_id;
363     NvU32 sub_processor_index;
364     NvU32 cur_sub_processor_index;
365 
366     UVM_ASSERT(parent_gpu);
367 
368     gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
369     cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
370 
371     sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
372     if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
373         gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
374         UVM_ASSERT(gpu != NULL);
375     }
376 
377     return gpu;
378 }
379 
380 // LOCKING: Must hold either the global_lock or the gpu_table_lock
381 #define for_each_parent_gpu(parent_gpu)                                                         \
382     for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL);                                  \
383          (parent_gpu) != NULL;                                                                  \
384          (parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
385 
386 // LOCKING: Must hold the global_lock
387 #define for_each_gpu_in_parent(parent_gpu, gpu)                                                 \
388     for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                                  \
389          (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);});                  \
390          (gpu) != NULL;                                                                         \
391          (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
392 
393 // Helper which calls uvm_gpu_retain() on each GPU in mask.
394 void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);
395 
396 // Helper which calls uvm_gpu_release_locked on each GPU in mask.
397 //
398 // LOCKING: this function takes and releases the global lock if the input mask
399 //          is not empty
400 void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
401 
402 // Check for ECC errors for all GPUs in a mask
403 // Notably this check cannot be performed where it's not safe to call into RM.
404 NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
405 
406 // Pre-allocate fault service contexts.
407 NV_STATUS uvm_service_block_context_init(void);
408 
409 // Release fault service contexts if any exist.
410 void uvm_service_block_context_exit(void);
411 
412 #endif // __UVM_GLOBAL_H__
413