1 /******************************************************************************* 2 Copyright (c) 2013-2023 NVIDIA Corporation 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 02110-1301, USA. 18 *******************************************************************************/ 19 20 #include "uvm_common.h" 21 #include "uvm_linux.h" 22 #include "uvm_forward_decl.h" 23 24 // TODO: Bug 1710855: Tweak this number through benchmarks 25 #define UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS (10*1000ULL) 26 #define UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC 30ULL 27 28 // Default to debug prints being enabled for debug and develop builds and 29 // disabled for release builds. 30 static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP(); 31 32 // Make the module param writable so that prints can be enabled or disabled at 33 // any time by modifying the module parameter. 34 module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR); 35 MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints."); 36 37 bool uvm_debug_prints_enabled(void) 38 { 39 return uvm_debug_prints != 0; 40 } 41 42 // This parameter allows a program in user mode to call the kernel tests 43 // defined in this module. This parameter should only be used for testing and 44 // must not be set to true otherwise since it breaks security when it is 45 // enabled. By default and for safety reasons this parameter is set to false. 46 int uvm_enable_builtin_tests __read_mostly = 0; 47 module_param(uvm_enable_builtin_tests, int, S_IRUGO); 48 MODULE_PARM_DESC(uvm_enable_builtin_tests, 49 "Enable the UVM built-in tests. (This is a security risk)"); 50 51 // Default to release asserts being enabled. 52 int uvm_release_asserts __read_mostly = 1; 53 54 // Make the module param writable so that release asserts can be enabled or 55 // disabled at any time by modifying the module parameter. 56 module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR); 57 MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds."); 58 59 // Default to failed release asserts not dumping stack. 60 int uvm_release_asserts_dump_stack __read_mostly = 0; 61 62 // Make the module param writable so that dumping the stack can be enabled and 63 // disabled at any time by modifying the module parameter. 64 module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR); 65 MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts."); 66 67 // Default to failed release asserts not setting the global UVM error. 68 int uvm_release_asserts_set_global_error __read_mostly = 0; 69 70 // Make the module param writable so that setting the global fatal error can be 71 // enabled and disabled at any time by modifying the module parameter. 72 module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR); 73 MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts."); 74 75 // A separate flag to enable setting global error, to be used by tests only. 76 bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false; 77 78 // 79 // Convert kernel errno codes to corresponding NV_STATUS 80 // 81 NV_STATUS errno_to_nv_status(int errnoCode) 82 { 83 if (errnoCode < 0) 84 errnoCode = -errnoCode; 85 86 switch (errnoCode) 87 { 88 case 0: 89 return NV_OK; 90 91 case E2BIG: 92 case EINVAL: 93 return NV_ERR_INVALID_ARGUMENT; 94 95 case EACCES: 96 return NV_ERR_INVALID_ACCESS_TYPE; 97 98 case EADDRINUSE: 99 case EADDRNOTAVAIL: 100 return NV_ERR_UVM_ADDRESS_IN_USE; 101 102 case EFAULT: 103 return NV_ERR_INVALID_ADDRESS; 104 105 case EOVERFLOW: 106 return NV_ERR_OUT_OF_RANGE; 107 108 case EINTR: 109 case EBUSY: 110 case EAGAIN: 111 return NV_ERR_BUSY_RETRY; 112 113 case ENXIO: 114 case ENODEV: 115 return NV_ERR_MODULE_LOAD_FAILED; 116 117 case ENOMEM: 118 return NV_ERR_NO_MEMORY; 119 120 case EPERM: 121 return NV_ERR_INSUFFICIENT_PERMISSIONS; 122 123 case ESRCH: 124 return NV_ERR_PID_NOT_FOUND; 125 126 case ETIMEDOUT: 127 return NV_ERR_TIMEOUT; 128 129 case EEXIST: 130 return NV_ERR_IN_USE; 131 132 case ENOSYS: 133 case EOPNOTSUPP: 134 return NV_ERR_NOT_SUPPORTED; 135 136 case ENOENT: 137 return NV_ERR_NO_VALID_PATH; 138 139 case EIO: 140 return NV_ERR_RC_ERROR; 141 142 case ENODATA: 143 return NV_ERR_OBJECT_NOT_FOUND; 144 145 default: 146 return NV_ERR_GENERIC; 147 }; 148 } 149 150 // Returns POSITIVE errno 151 int nv_status_to_errno(NV_STATUS status) 152 { 153 switch (status) { 154 case NV_OK: 155 return 0; 156 157 case NV_ERR_BUSY_RETRY: 158 return EAGAIN; 159 160 case NV_ERR_INSUFFICIENT_PERMISSIONS: 161 return EPERM; 162 163 case NV_ERR_GPU_UUID_NOT_FOUND: 164 return ENODEV; 165 166 case NV_ERR_INSUFFICIENT_RESOURCES: 167 case NV_ERR_NO_MEMORY: 168 return ENOMEM; 169 170 case NV_ERR_INVALID_ACCESS_TYPE: 171 return EACCES; 172 173 case NV_ERR_INVALID_ADDRESS: 174 return EFAULT; 175 176 case NV_ERR_INVALID_ARGUMENT: 177 case NV_ERR_INVALID_DEVICE: 178 case NV_ERR_INVALID_PARAMETER: 179 case NV_ERR_INVALID_REQUEST: 180 case NV_ERR_INVALID_STATE: 181 return EINVAL; 182 183 case NV_ERR_NOT_SUPPORTED: 184 return ENOSYS; 185 186 case NV_ERR_OBJECT_NOT_FOUND: 187 return ENODATA; 188 189 case NV_ERR_MODULE_LOAD_FAILED: 190 return ENXIO; 191 192 case NV_ERR_OVERLAPPING_UVM_COMMIT: 193 case NV_ERR_UVM_ADDRESS_IN_USE: 194 return EADDRINUSE; 195 196 case NV_ERR_PID_NOT_FOUND: 197 return ESRCH; 198 199 case NV_ERR_TIMEOUT: 200 case NV_ERR_TIMEOUT_RETRY: 201 return ETIMEDOUT; 202 203 case NV_ERR_IN_USE: 204 return EEXIST; 205 206 case NV_ERR_NO_VALID_PATH: 207 return ENOENT; 208 209 case NV_ERR_RC_ERROR: 210 case NV_ERR_ECC_ERROR: 211 return EIO; 212 213 case NV_ERR_OUT_OF_RANGE: 214 return EOVERFLOW; 215 216 default: 217 UVM_ASSERT_MSG(0, "No errno conversion set up for NV_STATUS %s\n", nvstatusToString(status)); 218 return EINVAL; 219 } 220 } 221 222 // 223 // This routine retrieves the process ID of current, but makes no attempt to 224 // refcount or lock the pid in place. 225 // 226 unsigned uvm_get_stale_process_id(void) 227 { 228 return (unsigned)task_tgid_vnr(current); 229 } 230 231 unsigned uvm_get_stale_thread_id(void) 232 { 233 return (unsigned)task_pid_vnr(current); 234 } 235 236 void on_uvm_test_fail(void) 237 { 238 (void)NULL; 239 } 240 241 void on_uvm_assert(void) 242 { 243 (void)NULL; 244 #ifdef __COVERITY__ 245 __coverity_panic__() 246 #endif 247 } 248 249 NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin) 250 { 251 NvU64 curr = NV_GETTIME(); 252 253 // This schedule() is required for functionality, not just system 254 // performance. It allows RM to run and unblock the UVM driver: 255 // 256 // - UVM must service faults in order for RM to idle/preempt a context 257 // - RM must service interrupts which stall UVM (SW methods, stalling CE 258 // interrupts, etc) in order for UVM to service faults 259 // 260 // Even though UVM's bottom half is preemptable, we have encountered cases 261 // in which a user thread running in RM won't preempt the UVM driver's 262 // thread unless the UVM driver thread gives up its timeslice. This is also 263 // theoretically possible if the RM thread has a low nice priority. 264 // 265 // TODO: Bug 1710855: Look into proper prioritization of these threads as a longer-term 266 // solution. 267 if (curr - spin->start_time_ns >= UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS && NV_MAY_SLEEP()) { 268 schedule(); 269 curr = NV_GETTIME(); 270 } 271 272 cpu_relax(); 273 274 // TODO: Bug 1710855: Also check fatal_signal_pending() here if the caller can handle it. 275 276 if (curr - spin->print_time_ns >= 1000*1000*1000*UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC) { 277 spin->print_time_ns = curr; 278 return NV_ERR_TIMEOUT_RETRY; 279 } 280 281 return NV_OK; 282 } 283 284 // This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as 285 // what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so 286 // that we know that we have a real, binary formatted UUID that will work in 287 // the UVM APIs. 288 // 289 // It comes out like this: 290 // 291 // UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27 292 // 293 // This routine will always null-terminate the string for you. This is true 294 // even if the buffer was too small! 295 // 296 // Return value is the number of non-null characters written. 297 // 298 // Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command 299 // return it's default format, which is ascii, not binary, then you would get 300 // this back: 301 // 302 // GPU-d802726c-df8d-a3c3-ec53-48bdec201c27 303 // 304 // ...which is actually a character string, and won't work for UVM API calls. 305 // So it's very important to be able to see the difference. 306 // 307 static char uvm_digit_to_hex(unsigned value) 308 { 309 if (value >= 10) 310 return value - 10 + 'a'; 311 else 312 return value + '0'; 313 } 314 315 int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct) 316 { 317 char *str = buffer+8; 318 unsigned i; 319 unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10; 320 321 memcpy(buffer, "UVM-GPU-", 8); 322 if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/)) 323 return *buffer = 0; 324 325 for (i = 0; i < 16; i++) { 326 *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4); 327 *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF); 328 329 if (dashMask & (1 << (i+1))) 330 *str++ = '-'; 331 } 332 333 *str = 0; 334 335 return (int)(str-buffer); 336 } 337 338