1 /*******************************************************************************
2 Copyright (c) 2013-2023 NVIDIA Corporation
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 02110-1301, USA.
18 *******************************************************************************/
19
20 #include "uvm_common.h"
21 #include "uvm_linux.h"
22 #include "uvm_forward_decl.h"
23
24 // TODO: Bug 1710855: Tweak this number through benchmarks
25 #define UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS (10*1000ULL)
26 #define UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC 30ULL
27
28 // Default to debug prints being enabled for debug and develop builds and
29 // disabled for release builds.
30 static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
31
32 // Make the module param writable so that prints can be enabled or disabled at
33 // any time by modifying the module parameter.
34 module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
35 MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
36
uvm_debug_prints_enabled(void)37 bool uvm_debug_prints_enabled(void)
38 {
39 return uvm_debug_prints != 0;
40 }
41
42 // This parameter allows a program in user mode to call the kernel tests
43 // defined in this module. This parameter should only be used for testing and
44 // must not be set to true otherwise since it breaks security when it is
45 // enabled. By default and for safety reasons this parameter is set to false.
46 int uvm_enable_builtin_tests __read_mostly = 0;
47 module_param(uvm_enable_builtin_tests, int, S_IRUGO);
48 MODULE_PARM_DESC(uvm_enable_builtin_tests,
49 "Enable the UVM built-in tests. (This is a security risk)");
50
51 // Default to release asserts being enabled.
52 int uvm_release_asserts __read_mostly = 1;
53
54 // Make the module param writable so that release asserts can be enabled or
55 // disabled at any time by modifying the module parameter.
56 module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR);
57 MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds.");
58
59 // Default to failed release asserts not dumping stack.
60 int uvm_release_asserts_dump_stack __read_mostly = 0;
61
62 // Make the module param writable so that dumping the stack can be enabled and
63 // disabled at any time by modifying the module parameter.
64 module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR);
65 MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts.");
66
67 // Default to failed release asserts not setting the global UVM error.
68 int uvm_release_asserts_set_global_error __read_mostly = 0;
69
70 // Make the module param writable so that setting the global fatal error can be
71 // enabled and disabled at any time by modifying the module parameter.
72 module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR);
73 MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts.");
74
75 // A separate flag to enable setting global error, to be used by tests only.
76 bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false;
77
78 //
79 // Convert kernel errno codes to corresponding NV_STATUS
80 //
errno_to_nv_status(int errnoCode)81 NV_STATUS errno_to_nv_status(int errnoCode)
82 {
83 if (errnoCode < 0)
84 errnoCode = -errnoCode;
85
86 switch (errnoCode)
87 {
88 case 0:
89 return NV_OK;
90
91 case E2BIG:
92 case EINVAL:
93 return NV_ERR_INVALID_ARGUMENT;
94
95 case EACCES:
96 return NV_ERR_INVALID_ACCESS_TYPE;
97
98 case EADDRINUSE:
99 case EADDRNOTAVAIL:
100 return NV_ERR_UVM_ADDRESS_IN_USE;
101
102 case EFAULT:
103 return NV_ERR_INVALID_ADDRESS;
104
105 case EOVERFLOW:
106 return NV_ERR_OUT_OF_RANGE;
107
108 case EINTR:
109 case EBUSY:
110 case EAGAIN:
111 return NV_ERR_BUSY_RETRY;
112
113 case ENXIO:
114 case ENODEV:
115 return NV_ERR_MODULE_LOAD_FAILED;
116
117 case ENOMEM:
118 return NV_ERR_NO_MEMORY;
119
120 case EPERM:
121 return NV_ERR_INSUFFICIENT_PERMISSIONS;
122
123 case ESRCH:
124 return NV_ERR_PID_NOT_FOUND;
125
126 case ETIMEDOUT:
127 return NV_ERR_TIMEOUT;
128
129 case EEXIST:
130 return NV_ERR_IN_USE;
131
132 case ENOSYS:
133 case EOPNOTSUPP:
134 return NV_ERR_NOT_SUPPORTED;
135
136 case ENOENT:
137 return NV_ERR_NO_VALID_PATH;
138
139 case EIO:
140 return NV_ERR_RC_ERROR;
141
142 case ENODATA:
143 return NV_ERR_OBJECT_NOT_FOUND;
144
145 default:
146 return NV_ERR_GENERIC;
147 };
148 }
149
150 // Returns POSITIVE errno
nv_status_to_errno(NV_STATUS status)151 int nv_status_to_errno(NV_STATUS status)
152 {
153 switch (status) {
154 case NV_OK:
155 return 0;
156
157 case NV_ERR_BUSY_RETRY:
158 return EAGAIN;
159
160 case NV_ERR_INSUFFICIENT_PERMISSIONS:
161 return EPERM;
162
163 case NV_ERR_GPU_UUID_NOT_FOUND:
164 return ENODEV;
165
166 case NV_ERR_INSUFFICIENT_RESOURCES:
167 case NV_ERR_NO_MEMORY:
168 return ENOMEM;
169
170 case NV_ERR_INVALID_ACCESS_TYPE:
171 return EACCES;
172
173 case NV_ERR_INVALID_ADDRESS:
174 return EFAULT;
175
176 case NV_ERR_INVALID_ARGUMENT:
177 case NV_ERR_INVALID_DEVICE:
178 case NV_ERR_INVALID_PARAMETER:
179 case NV_ERR_INVALID_REQUEST:
180 case NV_ERR_INVALID_STATE:
181 return EINVAL;
182
183 case NV_ERR_NOT_SUPPORTED:
184 return ENOSYS;
185
186 case NV_ERR_OBJECT_NOT_FOUND:
187 return ENODATA;
188
189 case NV_ERR_MODULE_LOAD_FAILED:
190 return ENXIO;
191
192 case NV_ERR_OVERLAPPING_UVM_COMMIT:
193 case NV_ERR_UVM_ADDRESS_IN_USE:
194 return EADDRINUSE;
195
196 case NV_ERR_PID_NOT_FOUND:
197 return ESRCH;
198
199 case NV_ERR_TIMEOUT:
200 case NV_ERR_TIMEOUT_RETRY:
201 return ETIMEDOUT;
202
203 case NV_ERR_IN_USE:
204 return EEXIST;
205
206 case NV_ERR_NO_VALID_PATH:
207 return ENOENT;
208
209 case NV_ERR_RC_ERROR:
210 case NV_ERR_ECC_ERROR:
211 return EIO;
212
213 case NV_ERR_OUT_OF_RANGE:
214 return EOVERFLOW;
215
216 default:
217 UVM_ASSERT_MSG(0, "No errno conversion set up for NV_STATUS %s\n", nvstatusToString(status));
218 return EINVAL;
219 }
220 }
221
222 //
223 // This routine retrieves the process ID of current, but makes no attempt to
224 // refcount or lock the pid in place.
225 //
uvm_get_stale_process_id(void)226 unsigned uvm_get_stale_process_id(void)
227 {
228 return (unsigned)task_tgid_vnr(current);
229 }
230
uvm_get_stale_thread_id(void)231 unsigned uvm_get_stale_thread_id(void)
232 {
233 return (unsigned)task_pid_vnr(current);
234 }
235
on_uvm_test_fail(void)236 void on_uvm_test_fail(void)
237 {
238 (void)NULL;
239 }
240
on_uvm_assert(void)241 void on_uvm_assert(void)
242 {
243 (void)NULL;
244 #ifdef __COVERITY__
245 __coverity_panic__()
246 #endif
247 }
248
uvm_spin_loop(uvm_spin_loop_t * spin)249 NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
250 {
251 NvU64 curr = NV_GETTIME();
252
253 // This schedule() is required for functionality, not just system
254 // performance. It allows RM to run and unblock the UVM driver:
255 //
256 // - UVM must service faults in order for RM to idle/preempt a context
257 // - RM must service interrupts which stall UVM (SW methods, stalling CE
258 // interrupts, etc) in order for UVM to service faults
259 //
260 // Even though UVM's bottom half is preemptable, we have encountered cases
261 // in which a user thread running in RM won't preempt the UVM driver's
262 // thread unless the UVM driver thread gives up its timeslice. This is also
263 // theoretically possible if the RM thread has a low nice priority.
264 //
265 // TODO: Bug 1710855: Look into proper prioritization of these threads as a longer-term
266 // solution.
267 if (curr - spin->start_time_ns >= UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS && NV_MAY_SLEEP()) {
268 schedule();
269 curr = NV_GETTIME();
270 }
271
272 cpu_relax();
273
274 // TODO: Bug 1710855: Also check fatal_signal_pending() here if the caller can handle it.
275
276 if (curr - spin->print_time_ns >= 1000*1000*1000*UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC) {
277 spin->print_time_ns = curr;
278 return NV_ERR_TIMEOUT_RETRY;
279 }
280
281 return NV_OK;
282 }
283
284 // This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
285 // what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
286 // that we know that we have a real, binary formatted UUID that will work in
287 // the UVM APIs.
288 //
289 // It comes out like this:
290 //
291 // UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
292 //
293 // This routine will always null-terminate the string for you. This is true
294 // even if the buffer was too small!
295 //
296 // Return value is the number of non-null characters written.
297 //
298 // Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
299 // return it's default format, which is ascii, not binary, then you would get
300 // this back:
301 //
302 // GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
303 //
304 // ...which is actually a character string, and won't work for UVM API calls.
305 // So it's very important to be able to see the difference.
306 //
uvm_digit_to_hex(unsigned value)307 static char uvm_digit_to_hex(unsigned value)
308 {
309 if (value >= 10)
310 return value - 10 + 'a';
311 else
312 return value + '0';
313 }
314
format_uuid_to_buffer(char * buffer,unsigned bufferLength,const NvProcessorUuid * pUuidStruct)315 int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
316 {
317 char *str = buffer+8;
318 unsigned i;
319 unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
320
321 if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
322 return *buffer = 0;
323
324 memcpy(buffer, "UVM-GPU-", 8);
325
326 for (i = 0; i < 16; i++) {
327 *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
328 *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
329
330 if (dashMask & (1 << (i+1)))
331 *str++ = '-';
332 }
333
334 *str = 0;
335
336 return (int)(str-buffer);
337 }
338
339