1 /******************************************************************************* 2 Copyright (c) 2015-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #ifndef __UVM_GLOBAL_H__ 25 #define __UVM_GLOBAL_H__ 26 27 #include "nv_uvm_types.h" 28 #include "uvm_extern_decl.h" 29 #include "uvm_linux.h" 30 #include "uvm_common.h" 31 #include "uvm_processors.h" 32 #include "uvm_gpu.h" 33 #include "uvm_lock.h" 34 #include "uvm_ats_ibm.h" 35 36 // Global state of the uvm driver 37 struct uvm_global_struct 38 { 39 // Mask of retained GPUs. 40 // Note that GPUs are added to this mask as the last step of add_gpu() and 41 // removed from it as the first step of remove_gpu() implying that a GPU 42 // that's being initialized or deinitialized will not be in it. 43 uvm_processor_mask_t retained_gpus; 44 45 // Array of the parent GPUs registered with UVM. Note that GPUs will have 46 // ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0] 47 // will have GPU id = 1. A GPU entry is unused iff it does not exist 48 // (is a NULL pointer) in this table. 49 uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS]; 50 51 // A global RM session (RM client) 52 // Created on module load and destroyed on module unload 53 uvmGpuSessionHandle rm_session_handle; 54 55 // peer-to-peer table 56 // peer info is added and removed from this table when usermode 57 // driver calls UvmEnablePeerAccess and UvmDisablePeerAccess 58 // respectively. 59 uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS]; 60 61 // peer-to-peer copy mode 62 // Pascal+ GPUs support virtual addresses in p2p copies. 63 // Ampere+ GPUs add support for physical addresses in p2p copies. 64 uvm_gpu_peer_copy_mode_t peer_copy_mode; 65 66 // Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to 67 // do most anything other than try and clean up as much as possible. 68 // An example of a fatal error is an unrecoverable ECC error on one of the 69 // GPUs. 70 atomic_t fatal_error; 71 72 // A flag to disable the assert on fatal error 73 // To be used by tests and only consulted if tests are enabled. 74 bool disable_fatal_error_assert; 75 76 // Lock protecting the global state 77 uvm_mutex_t global_lock; 78 79 struct 80 { 81 // Lock synchronizing user threads with power management activity 82 uvm_rw_semaphore_t lock; 83 84 // Power management state flag; tested by UVM_GPU_WRITE_ONCE() 85 // and UVM_GPU_READ_ONCE() to detect accesses to GPUs when 86 // UVM is suspended. 87 bool is_suspended; 88 } pm; 89 90 // This lock synchronizes addition and removal of GPUs from UVM's global 91 // table. It must be held whenever g_uvm_global.parent_gpus[] is written. In 92 // order to read from this table, you must hold either the gpu_table_lock, 93 // or the global_lock. 94 // 95 // This is a leaf lock. 96 uvm_spinlock_irqsave_t gpu_table_lock; 97 98 // Number of simulated/emulated devices that have registered with UVM 99 unsigned num_simulated_devices; 100 101 // A single queue for deferred work that is non-GPU-specific. 102 nv_kthread_q_t global_q; 103 104 // A single queue for deferred f_ops->release() handling. Items scheduled to 105 // run on it may block for the duration of system sleep cycles, stalling 106 // the queue and preventing any other items from running. 107 nv_kthread_q_t deferred_release_q; 108 109 struct 110 { 111 // Indicates whether the system HW supports ATS. This field is set once 112 // during global initialization (uvm_global_init), and can be read 113 // afterwards without acquiring any locks. 114 bool supported; 115 116 // On top of HW platform support, ATS support can be overridden using 117 // the module parameter uvm_ats_mode. This field is set once during 118 // global initialization (uvm_global_init), and can be read afterwards 119 // without acquiring any locks. 120 bool enabled; 121 } ats; 122 123 #if UVM_IBM_NPU_SUPPORTED() 124 // On IBM systems this array tracks the active NPUs (the NPUs which are 125 // attached to retained GPUs). 126 uvm_ibm_npu_t npus[NV_MAX_NPUS]; 127 #endif 128 129 // List of all active VA spaces 130 struct 131 { 132 uvm_mutex_t lock; 133 struct list_head list; 134 } va_spaces; 135 136 // Notify a registered process about the driver state after it's unloaded. 137 // The intent is to systematically report any error during the driver 138 // teardown. unload_state is used for testing only. 139 struct 140 { 141 // ptr points to a 8-byte buffer within page. 142 NvU64 *ptr; 143 struct page *page; 144 } unload_state; 145 146 // True if the VM has AMD's SEV, or equivalent HW security extensions such 147 // as Intel's TDX, enabled. The flag is always false on the host. 148 // 149 // This value moves in tandem with that of Confidential Computing in the 150 // GPU(s) in all supported configurations, so it is used as a proxy for the 151 // Confidential Computing state. 152 // 153 // This field is set once during global initialization (uvm_global_init), 154 // and can be read afterwards without acquiring any locks. 155 bool conf_computing_enabled; 156 }; 157 158 // Initialize global uvm state 159 NV_STATUS uvm_global_init(void); 160 161 // Deinitialize global state (called from module exit) 162 void uvm_global_exit(void); 163 164 // Prepare for entry into a system sleep state 165 NV_STATUS uvm_suspend_entry(void); 166 167 // Recover after exit from a system sleep state 168 NV_STATUS uvm_resume_entry(void); 169 170 // Add parent GPU to the global table. 171 // 172 // LOCKING: requires that you hold the global lock and gpu_table_lock 173 static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu) 174 { 175 NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id); 176 177 uvm_assert_mutex_locked(&g_uvm_global.global_lock); 178 uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock); 179 180 UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]); 181 g_uvm_global.parent_gpus[gpu_index] = parent_gpu; 182 } 183 184 // Remove parent GPU from the global table. 185 // 186 // LOCKING: requires that you hold the global lock and gpu_table_lock 187 static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu) 188 { 189 NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id); 190 191 uvm_assert_mutex_locked(&g_uvm_global.global_lock); 192 uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock); 193 194 UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu); 195 196 g_uvm_global.parent_gpus[gpu_index] = NULL; 197 } 198 199 // Get a parent gpu by its id. 200 // Returns a pointer to the parent GPU object, or NULL if not found. 201 // 202 // LOCKING: requires that you hold the gpu_table_lock, the global lock, or have 203 // retained at least one of the child GPUs. 204 static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id) 205 { 206 return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)]; 207 } 208 209 // Get a gpu by its GPU id. 210 // Returns a pointer to the GPU object, or NULL if not found. 211 // 212 // LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have 213 // retained the gpu. 214 static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id) 215 { 216 uvm_parent_gpu_t *parent_gpu; 217 218 parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)]; 219 if (!parent_gpu) 220 return NULL; 221 222 return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)]; 223 } 224 225 static uvmGpuSessionHandle uvm_global_session_handle(void) 226 { 227 return g_uvm_global.rm_session_handle; 228 } 229 230 // Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources 231 // in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is 232 // suspended. 233 #define UVM_GPU_WRITE_ONCE(x, val) do { \ 234 UVM_ASSERT(!uvm_global_is_suspended()); \ 235 UVM_WRITE_ONCE(x, val); \ 236 } while (0) 237 238 #define UVM_GPU_READ_ONCE(x) ({ \ 239 UVM_ASSERT(!uvm_global_is_suspended()); \ 240 UVM_READ_ONCE(x); \ 241 }) 242 243 static bool global_is_fatal_error_assert_disabled(void) 244 { 245 // Only allow the assert to be disabled if tests are enabled 246 if (!uvm_enable_builtin_tests) 247 return false; 248 249 return g_uvm_global.disable_fatal_error_assert; 250 } 251 252 // Set a global fatal error 253 // Once that happens the the driver should refuse to do anything other than try 254 // and clean up as much as possible. 255 // An example of a fatal error is an unrecoverable ECC error on one of the 256 // GPUs. 257 // Use a macro so that the assert below provides precise file and line info and 258 // a backtrace. 259 #define uvm_global_set_fatal_error(error) \ 260 do { \ 261 if (!global_is_fatal_error_assert_disabled()) \ 262 UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error)); \ 263 uvm_global_set_fatal_error_impl(error); \ 264 } while (0) 265 void uvm_global_set_fatal_error_impl(NV_STATUS error); 266 267 // Get the global status 268 static NV_STATUS uvm_global_get_status(void) 269 { 270 return atomic_read(&g_uvm_global.fatal_error); 271 } 272 273 // Reset global fatal error 274 // This is to be used by tests triggering the global error on purpose only. 275 // Returns the value of the global error field that existed just before this 276 // reset call was made. 277 NV_STATUS uvm_global_reset_fatal_error(void); 278 279 static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus) 280 { 281 uvm_gpu_t *gpu; 282 uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus); 283 284 if (UVM_ID_IS_INVALID(gpu_id)) 285 return NULL; 286 287 gpu = uvm_gpu_get(gpu_id); 288 289 // If there is valid GPU id in the mask, assert that the corresponding 290 // uvm_gpu_t is present. Otherwise it would stop a 291 // for_each_gpu_in_mask() loop pre-maturely. Today, this could only 292 // happen in remove_gpu() because the GPU being removed is deleted from the 293 // global table very early. 294 UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id)); 295 296 return gpu; 297 } 298 299 static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu) 300 { 301 uvm_gpu_id_t gpu_id; 302 303 UVM_ASSERT(gpu); 304 305 gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id)); 306 if (UVM_ID_IS_INVALID(gpu_id)) 307 return NULL; 308 309 gpu = uvm_gpu_get(gpu_id); 310 311 // See comment in uvm_processor_mask_find_first_gpu(). 312 UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id)); 313 314 return gpu; 315 } 316 317 // Helper to iterate over all GPUs in the input mask 318 #define for_each_gpu_in_mask(gpu, mask) \ 319 for (gpu = uvm_processor_mask_find_first_gpu(mask); \ 320 gpu != NULL; \ 321 gpu = __uvm_processor_mask_find_next_gpu(mask, gpu)) 322 323 // Helper to iterate over all GPUs retained by the UVM driver 324 // (across all va spaces). 325 #define for_each_gpu(gpu) \ 326 for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \ 327 gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \ 328 gpu != NULL; \ 329 gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu)) 330 331 // LOCKING: Must hold either the global_lock or the gpu_table_lock 332 static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu) 333 { 334 NvU32 i; 335 336 if (parent_gpu) { 337 NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id); 338 i = gpu_index + 1; 339 } 340 else { 341 i = 0; 342 } 343 344 parent_gpu = NULL; 345 346 while (i < UVM_PARENT_ID_MAX_GPUS) { 347 if (g_uvm_global.parent_gpus[i]) { 348 parent_gpu = g_uvm_global.parent_gpus[i]; 349 break; 350 } 351 352 i++; 353 } 354 355 return parent_gpu; 356 } 357 358 // LOCKING: Must hold the global_lock 359 static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu) 360 { 361 uvm_gpu_t *gpu = NULL; 362 uvm_gpu_id_t gpu_id; 363 NvU32 sub_processor_index; 364 NvU32 cur_sub_processor_index; 365 366 UVM_ASSERT(parent_gpu); 367 368 gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id); 369 cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1; 370 371 sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1); 372 if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) { 373 gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index)); 374 UVM_ASSERT(gpu != NULL); 375 } 376 377 return gpu; 378 } 379 380 // LOCKING: Must hold either the global_lock or the gpu_table_lock 381 #define for_each_parent_gpu(parent_gpu) \ 382 for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL); \ 383 (parent_gpu) != NULL; \ 384 (parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu))) 385 386 // LOCKING: Must hold the global_lock 387 #define for_each_gpu_in_parent(parent_gpu, gpu) \ 388 for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \ 389 (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \ 390 (gpu) != NULL; \ 391 (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu))) 392 393 // Helper which calls uvm_gpu_retain() on each GPU in mask. 394 void uvm_global_gpu_retain(const uvm_processor_mask_t *mask); 395 396 // Helper which calls uvm_gpu_release_locked on each GPU in mask. 397 // 398 // LOCKING: this function takes and releases the global lock if the input mask 399 // is not empty 400 void uvm_global_gpu_release(const uvm_processor_mask_t *mask); 401 402 // Check for ECC errors for all GPUs in a mask 403 // Notably this check cannot be performed where it's not safe to call into RM. 404 NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus); 405 406 // Pre-allocate fault service contexts. 407 NV_STATUS uvm_service_block_context_init(void); 408 409 // Release fault service contexts if any exist. 410 void uvm_service_block_context_exit(void); 411 412 #endif // __UVM_GLOBAL_H__ 413