1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #define __NO_VERSION__
25
26 #include "os-interface.h"
27 #include "nv-linux.h"
28 #include "nv-caps-imex.h"
29
30 #include "nv-time.h"
31
32 #include <linux/mmzone.h>
33 #include <linux/numa.h>
34 #include <linux/cpuset.h>
35
36 #include <linux/pid.h>
37 #if defined(CONFIG_LOCKDEP)
38 #include <linux/lockdep.h>
39 #endif // CONFIG_LOCKDEP
40
41 extern char *NVreg_TemporaryFilePath;
42
43 #define MAX_ERROR_STRING 528
44 static char nv_error_string[MAX_ERROR_STRING];
45 static NV_DEFINE_SPINLOCK(nv_error_string_lock);
46
47 extern nv_linux_state_t nv_ctl_device;
48
49 extern nv_kthread_q_t nv_kthread_q;
50
51 NvU32 os_page_size = PAGE_SIZE;
52 NvU64 os_page_mask = NV_PAGE_MASK;
53 NvU8 os_page_shift = PAGE_SHIFT;
54 NvBool os_cc_enabled = 0;
55 NvBool os_cc_tdx_enabled = 0;
56
57 #if defined(CONFIG_DMA_SHARED_BUFFER)
58 NvBool os_dma_buf_enabled = NV_TRUE;
59 #else
60 NvBool os_dma_buf_enabled = NV_FALSE;
61 #endif // CONFIG_DMA_SHARED_BUFFER
62
63 NvBool os_imex_channel_is_supported = NV_TRUE;
64
os_disable_console_access(void)65 void NV_API_CALL os_disable_console_access(void)
66 {
67 console_lock();
68 }
69
os_enable_console_access(void)70 void NV_API_CALL os_enable_console_access(void)
71 {
72 console_unlock();
73 }
74
75 typedef struct semaphore os_mutex_t;
76
77 //
78 // os_alloc_mutex - Allocate the RM mutex
79 //
80 // ppMutex - filled in with pointer to opaque structure to mutex data type
81 //
os_alloc_mutex(void ** ppMutex)82 NV_STATUS NV_API_CALL os_alloc_mutex
83 (
84 void **ppMutex
85 )
86 {
87 NV_STATUS rmStatus;
88 os_mutex_t *os_mutex;
89
90 rmStatus = os_alloc_mem(ppMutex, sizeof(os_mutex_t));
91 if (rmStatus != NV_OK)
92 {
93 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate mutex!\n");
94 return rmStatus;
95 }
96 os_mutex = (os_mutex_t *)*ppMutex;
97 NV_INIT_MUTEX(os_mutex);
98
99 return NV_OK;
100 }
101
102 //
103 // os_free_mutex - Free resources associated with mutex allocated
104 // via os_alloc_mutex above.
105 //
106 // pMutex - Pointer to opaque structure to mutex data type
107 //
os_free_mutex(void * pMutex)108 void NV_API_CALL os_free_mutex
109 (
110 void *pMutex
111 )
112 {
113 os_mutex_t *os_mutex = (os_mutex_t *)pMutex;
114
115 if (os_mutex != NULL)
116 {
117 os_free_mem(pMutex);
118 }
119 }
120
121 //
122 // pMutex - Pointer to opaque structure to mutex data type
123 //
124
os_acquire_mutex(void * pMutex)125 NV_STATUS NV_API_CALL os_acquire_mutex
126 (
127 void *pMutex
128 )
129 {
130 os_mutex_t *os_mutex = (os_mutex_t *)pMutex;
131
132 if (!NV_MAY_SLEEP())
133 {
134 return NV_ERR_INVALID_REQUEST;
135 }
136 down(os_mutex);
137
138 return NV_OK;
139 }
140
os_cond_acquire_mutex(void * pMutex)141 NV_STATUS NV_API_CALL os_cond_acquire_mutex
142 (
143 void * pMutex
144 )
145 {
146 os_mutex_t *os_mutex = (os_mutex_t *)pMutex;
147 if (!NV_MAY_SLEEP())
148 {
149 return NV_ERR_INVALID_REQUEST;
150 }
151
152 if (down_trylock(os_mutex))
153 {
154 return NV_ERR_TIMEOUT_RETRY;
155 }
156
157 return NV_OK;
158 }
159
160
os_release_mutex(void * pMutex)161 void NV_API_CALL os_release_mutex
162 (
163 void *pMutex
164 )
165 {
166 os_mutex_t *os_mutex = (os_mutex_t *)pMutex;
167 up(os_mutex);
168 }
169
170 typedef struct semaphore os_semaphore_t;
171
172
os_alloc_semaphore(NvU32 initialValue)173 void* NV_API_CALL os_alloc_semaphore
174 (
175 NvU32 initialValue
176 )
177 {
178 NV_STATUS rmStatus;
179 os_semaphore_t *os_sema;
180
181 rmStatus = os_alloc_mem((void *)&os_sema, sizeof(os_semaphore_t));
182 if (rmStatus != NV_OK)
183 {
184 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate semaphore!\n");
185 return NULL;
186 }
187
188 sema_init(os_sema, initialValue);
189
190 return (void *)os_sema;
191 }
192
os_free_semaphore(void * pSema)193 void NV_API_CALL os_free_semaphore
194 (
195 void *pSema
196 )
197 {
198 os_semaphore_t *os_sema = (os_semaphore_t *)pSema;
199
200 os_free_mem(os_sema);
201 }
202
os_acquire_semaphore(void * pSema)203 NV_STATUS NV_API_CALL os_acquire_semaphore
204 (
205 void *pSema
206 )
207 {
208 os_semaphore_t *os_sema = (os_semaphore_t *)pSema;
209
210 if (!NV_MAY_SLEEP())
211 {
212 return NV_ERR_INVALID_REQUEST;
213 }
214 down(os_sema);
215 return NV_OK;
216 }
217
os_cond_acquire_semaphore(void * pSema)218 NV_STATUS NV_API_CALL os_cond_acquire_semaphore
219 (
220 void * pSema
221 )
222 {
223 os_semaphore_t *os_sema = (os_semaphore_t *)pSema;
224 //
225 // NOTE: down_trylock() is safe to call from IRQ, se we don't need an
226 // NV_MAY_SLEEP() check here. We do check it in os_cond_acquire_mutex(),
227 // even though it is also calling down_trylock(), since that keeps it
228 // in line with the kernel's 'struct mutex' API.
229 //
230 if (down_trylock(os_sema))
231 {
232 return NV_ERR_TIMEOUT_RETRY;
233 }
234
235 return NV_OK;
236 }
237
os_release_semaphore(void * pSema)238 NV_STATUS NV_API_CALL os_release_semaphore
239 (
240 void *pSema
241 )
242 {
243 os_semaphore_t *os_sema = (os_semaphore_t *)pSema;
244 up(os_sema);
245 return NV_OK;
246 }
247
248 typedef struct
249 {
250 struct rw_semaphore sem;
251
252 #if defined(CONFIG_LOCKDEP)
253 /**
254 * A key of lock class. It would be registered to Lockdep validator so all
255 * instances' usages and dependencies will contribute to constructing correct
256 * locking rules and this lock will be tracked by the Lockdep validator.
257 *
258 */
259 struct lock_class_key key;
260 #endif // CONFIG_LOCKDEP
261 } os_rwlock_t;
262
os_alloc_rwlock(void)263 void* NV_API_CALL os_alloc_rwlock(void)
264 {
265 os_rwlock_t *os_rwlock = NULL;
266
267 NV_STATUS rmStatus = os_alloc_mem((void *)&os_rwlock, sizeof(os_rwlock_t));
268 if (rmStatus != NV_OK)
269 {
270 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate a struct os_rwlock_t!\n");
271 return NULL;
272 }
273
274 init_rwsem(&os_rwlock->sem);
275
276 #if defined(CONFIG_LOCKDEP)
277 // Register the dynamically allocated key to Lockdep.
278 lockdep_register_key(&os_rwlock->key);
279 lockdep_set_class(&os_rwlock->sem, &os_rwlock->key);
280 #endif // CONFIG_LOCKDEP
281
282 return os_rwlock;
283 }
284
os_free_rwlock(void * pRwLock)285 void NV_API_CALL os_free_rwlock(void *pRwLock)
286 {
287 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
288
289 #if defined(CONFIG_LOCKDEP)
290 // Unregister the dynamically allocated key.
291 lockdep_unregister_key(&os_rwlock->key);
292 #endif // CONFIG_LOCKDEP
293
294 os_free_mem(os_rwlock);
295 }
296
os_acquire_rwlock_read(void * pRwLock)297 NV_STATUS NV_API_CALL os_acquire_rwlock_read(void *pRwLock)
298 {
299 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
300
301 if (!NV_MAY_SLEEP())
302 {
303 return NV_ERR_INVALID_REQUEST;
304 }
305 down_read(&os_rwlock->sem);
306 return NV_OK;
307 }
308
os_acquire_rwlock_write(void * pRwLock)309 NV_STATUS NV_API_CALL os_acquire_rwlock_write(void *pRwLock)
310 {
311 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
312
313 if (!NV_MAY_SLEEP())
314 {
315 return NV_ERR_INVALID_REQUEST;
316 }
317 down_write(&os_rwlock->sem);
318 return NV_OK;
319 }
320
os_cond_acquire_rwlock_read(void * pRwLock)321 NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read(void *pRwLock)
322 {
323 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
324
325 if (down_read_trylock(&os_rwlock->sem))
326 {
327 return NV_ERR_TIMEOUT_RETRY;
328 }
329
330 return NV_OK;
331 }
332
os_cond_acquire_rwlock_write(void * pRwLock)333 NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *pRwLock)
334 {
335 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
336
337 if (down_write_trylock(&os_rwlock->sem))
338 {
339 return NV_ERR_TIMEOUT_RETRY;
340 }
341
342 return NV_OK;
343 }
344
os_release_rwlock_read(void * pRwLock)345 void NV_API_CALL os_release_rwlock_read(void *pRwLock)
346 {
347 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
348 up_read(&os_rwlock->sem);
349 }
350
os_release_rwlock_write(void * pRwLock)351 void NV_API_CALL os_release_rwlock_write(void *pRwLock)
352 {
353 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock;
354 up_write(&os_rwlock->sem);
355 }
356
os_semaphore_may_sleep(void)357 NvBool NV_API_CALL os_semaphore_may_sleep(void)
358 {
359 return NV_MAY_SLEEP();
360 }
361
os_is_isr(void)362 NvBool NV_API_CALL os_is_isr(void)
363 {
364 return (in_irq());
365 }
366
367 // return TRUE if the caller is the super-user
os_is_administrator(void)368 NvBool NV_API_CALL os_is_administrator(void)
369 {
370 return NV_IS_SUSER();
371 }
372
os_allow_priority_override(void)373 NvBool NV_API_CALL os_allow_priority_override(void)
374 {
375 return capable(CAP_SYS_NICE);
376 }
377
os_string_copy(char * dst,const char * src)378 char* NV_API_CALL os_string_copy(
379 char *dst,
380 const char *src
381 )
382 {
383 return strcpy(dst, src);
384 }
385
os_string_length(const char * str)386 NvU32 NV_API_CALL os_string_length(
387 const char* str
388 )
389 {
390 return strlen(str);
391 }
392
os_strtoul(const char * str,char ** endp,NvU32 base)393 NvU32 NV_API_CALL os_strtoul(const char *str, char **endp, NvU32 base)
394 {
395 return (NvU32)simple_strtoul(str, endp, base);
396 }
397
os_string_compare(const char * str1,const char * str2)398 NvS32 NV_API_CALL os_string_compare(const char *str1, const char *str2)
399 {
400 return strcmp(str1, str2);
401 }
402
os_mem_copy_custom(void * dstPtr,const void * srcPtr,NvU32 length)403 void *os_mem_copy_custom(
404 void *dstPtr,
405 const void *srcPtr,
406 NvU32 length
407 )
408 {
409 void *ret = dstPtr;
410 NvU32 dwords, bytes = length;
411 NvU8 *dst = dstPtr;
412 const NvU8 *src = srcPtr;
413
414 if ((length >= 128) &&
415 (((NvUPtr)dst & 3) == 0) & (((NvUPtr)src & 3) == 0))
416 {
417 dwords = (length / sizeof(NvU32));
418 bytes = (length % sizeof(NvU32));
419
420 while (dwords != 0)
421 {
422 *(NvU32 *)dst = *(const NvU32 *)src;
423 dst += sizeof(NvU32);
424 src += sizeof(NvU32);
425 dwords--;
426 }
427 }
428
429 while (bytes != 0)
430 {
431 *dst = *src;
432 dst++;
433 src++;
434 bytes--;
435 }
436
437 return ret;
438 }
439
os_mem_copy(void * dst,const void * src,NvU32 length)440 void *NV_API_CALL os_mem_copy(
441 void *dst,
442 const void *src,
443 NvU32 length
444 )
445 {
446 #if defined(NVCPU_AARCH64)
447 /*
448 * TODO: Remove once memset/memcpy restructure is complete
449 *
450 * When performing memcpy for memory mapped as device, memcpy_[to/from]io
451 * must be used. WAR to check the source and destination to determine the
452 * correct memcpy_io to use.
453 *
454 * This WAR is limited to just aarch64 for now because the address range used
455 * to map ioremap and vmalloc is different on ppc64le, and is_vmalloc_addr()
456 * does not correctly handle this. is_ioremap_addr() is needed instead. This
457 * will have to be addressed when reorganizing RM to use the new memset model.
458 */
459 if (is_vmalloc_addr(dst) && !is_vmalloc_addr(src))
460 {
461 memcpy_toio(dst, src, length);
462 return dst;
463 }
464 else if (!is_vmalloc_addr(dst) && is_vmalloc_addr(src))
465 {
466 memcpy_fromio(dst, src, length);
467 return dst;
468 }
469 else if (is_vmalloc_addr(dst) && is_vmalloc_addr(src))
470 {
471 return os_mem_copy_custom(dst, src, length);
472 }
473 else
474 #endif
475 {
476 #if defined(CONFIG_CC_OPTIMIZE_FOR_SIZE)
477 /*
478 * When the kernel is configured with CC_OPTIMIZE_FOR_SIZE=y, Kbuild uses
479 * -Os universally. With -Os, GCC will aggressively inline builtins, even
480 * if -fno-builtin is specified, including memcpy with a tiny byte-copy
481 * loop on x86 (rep movsb). This is horrible for performance - a strict
482 * dword copy is much faster - so when we detect this case, just provide
483 * our own implementation.
484 */
485 return os_mem_copy_custom(dst, src, length);
486 #else
487 /*
488 * Generally speaking, the kernel-provided memcpy will be the fastest,
489 * (optimized much better for the target architecture than the above
490 * loop), so we want to use that whenever we can get to it.
491 */
492 return memcpy(dst, src, length);
493 #endif
494 }
495 }
496
os_memcpy_from_user(void * to,const void * from,NvU32 n)497 NV_STATUS NV_API_CALL os_memcpy_from_user(
498 void *to,
499 const void *from,
500 NvU32 n
501 )
502 {
503 return (NV_COPY_FROM_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
504 }
505
os_memcpy_to_user(void * to,const void * from,NvU32 n)506 NV_STATUS NV_API_CALL os_memcpy_to_user(
507 void *to,
508 const void *from,
509 NvU32 n
510 )
511 {
512 return (NV_COPY_TO_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
513 }
514
os_mem_set(void * dst,NvU8 c,NvU32 length)515 void* NV_API_CALL os_mem_set(
516 void *dst,
517 NvU8 c,
518 NvU32 length
519 )
520 {
521 #if defined(NVCPU_AARCH64)
522 /*
523 * TODO: Remove once memset/memcpy restructure is complete
524 *
525 * WAR to check the destination to determine if the memory is of type Device
526 * or Normal, and use the correct memset.
527 *
528 * This WAR is limited to just aarch64 for now because the address range used
529 * to map ioremap and vmalloc is different on ppc64le, and is_vmalloc_addr()
530 * does not correctly handle this. is_ioremap_addr() is needed instead. This
531 * will have to be addressed when reorganizing RM to use the new memset model.
532 */
533 if (is_vmalloc_addr(dst))
534 {
535 memset_io(dst, (int)c, length);
536 return dst;
537 }
538 else
539 #endif
540 return memset(dst, (int)c, length);
541 }
542
os_mem_cmp(const NvU8 * buf0,const NvU8 * buf1,NvU32 length)543 NvS32 NV_API_CALL os_mem_cmp(
544 const NvU8 *buf0,
545 const NvU8* buf1,
546 NvU32 length
547 )
548 {
549 return memcmp(buf0, buf1, length);
550 }
551
552
553 /*
554 * Operating System Memory Functions
555 *
556 * There are 2 interesting aspects of resource manager memory allocations
557 * that need special consideration on Linux:
558 *
559 * 1. They are typically very large, (e.g. single allocations of 164KB)
560 *
561 * 2. The resource manager assumes that it can safely allocate memory in
562 * interrupt handlers.
563 *
564 * The first requires that we call vmalloc, the second kmalloc. We decide
565 * which one to use at run time, based on the size of the request and the
566 * context. Allocations larger than 128KB require vmalloc, in the context
567 * of an ISR they fail.
568 */
569
570 #if defined(NV_VGX_HYPER)
571 /*
572 * Citrix Hypervisor-8.0 Dom0 sysmem ends up getting fragmented because
573 * of which high-order kmalloc allocations fail. We try to avoid it by
574 * requesting allocations not larger than 8K.
575 *
576 * KVM will be affected low memory pressure situation a lot,
577 * particularly if hugetlbfs hugepages are being used. Hence, 8K applies
578 * here too.
579 */
580 #define KMALLOC_LIMIT 8192
581 #else
582 #define KMALLOC_LIMIT 131072
583 #endif
584
585 #define VMALLOC_ALLOCATION_SIZE_FLAG (1 << 0)
586
os_alloc_mem(void ** address,NvU64 size)587 NV_STATUS NV_API_CALL os_alloc_mem(
588 void **address,
589 NvU64 size
590 )
591 {
592 NvU64 original_size = size;
593 unsigned long alloc_size;
594
595 if (address == NULL)
596 return NV_ERR_INVALID_ARGUMENT;
597
598 *address = NULL;
599 NV_MEM_TRACKING_PAD_SIZE(size);
600
601 // check for integer overflow on size
602 if (size < original_size)
603 return NV_ERR_INVALID_ARGUMENT;
604
605 //
606 // NV_KMALLOC, nv_vmalloc take an input of 4 bytes in x86. To avoid
607 // truncation and wrong allocation, below check is required.
608 //
609 alloc_size = size;
610
611 if (alloc_size != size)
612 return NV_ERR_INVALID_PARAMETER;
613
614 if (!NV_MAY_SLEEP())
615 {
616 if (alloc_size <= KMALLOC_LIMIT)
617 NV_KMALLOC_ATOMIC(*address, alloc_size);
618 }
619 else
620 {
621 if (alloc_size <= KMALLOC_LIMIT)
622 {
623 NV_KMALLOC_NO_OOM(*address, alloc_size);
624 }
625 if (*address == NULL)
626 {
627 *address = nv_vmalloc(alloc_size);
628 alloc_size |= VMALLOC_ALLOCATION_SIZE_FLAG;
629 }
630 }
631
632 NV_MEM_TRACKING_HIDE_SIZE(address, alloc_size);
633
634 return ((*address != NULL) ? NV_OK : NV_ERR_NO_MEMORY);
635 }
636
os_free_mem(void * address)637 void NV_API_CALL os_free_mem(void *address)
638 {
639 NvU64 size;
640
641 NV_MEM_TRACKING_RETRIEVE_SIZE(address, size);
642
643 if (size & VMALLOC_ALLOCATION_SIZE_FLAG)
644 {
645 size &= ~VMALLOC_ALLOCATION_SIZE_FLAG;
646 nv_vfree(address, size);
647 }
648 else
649 NV_KFREE(address, size);
650 }
651
652
653 /*****************************************************************************
654 *
655 * Name: osGetCurrentTime
656 *
657 *****************************************************************************/
658
os_get_current_time(NvU32 * seconds,NvU32 * useconds)659 NV_STATUS NV_API_CALL os_get_current_time(
660 NvU32 *seconds,
661 NvU32 *useconds
662 )
663 {
664 struct timespec64 tm;
665
666 ktime_get_real_ts64(&tm);
667
668 *seconds = tm.tv_sec;
669 *useconds = tm.tv_nsec / NSEC_PER_USEC;
670
671 return NV_OK;
672 }
673
674 //
675 // Get the High resolution tick count of the system uptime
676 //
os_get_current_tick_hr(void)677 NvU64 NV_API_CALL os_get_current_tick_hr(void)
678 {
679 struct timespec64 tm;
680 ktime_get_raw_ts64(&tm);
681 return (NvU64) timespec64_to_ns(&tm);
682 }
683
684 #if BITS_PER_LONG >= 64
685
os_get_current_tick(void)686 NvU64 NV_API_CALL os_get_current_tick(void)
687 {
688 #if defined(NV_JIFFIES_TO_TIMESPEC_PRESENT)
689 struct timespec ts;
690 jiffies_to_timespec(jiffies, &ts);
691 return (NvU64) timespec_to_ns(&ts);
692 #else
693 struct timespec64 ts;
694 jiffies_to_timespec64(jiffies, &ts);
695 return (NvU64) timespec64_to_ns(&ts);
696 #endif
697 }
698
os_get_tick_resolution(void)699 NvU64 NV_API_CALL os_get_tick_resolution(void)
700 {
701 return (NvU64)jiffies_to_usecs(1) * NSEC_PER_USEC;
702 }
703
704 #else
705
os_get_current_tick(void)706 NvU64 NV_API_CALL os_get_current_tick(void)
707 {
708 /*
709 * 'jiffies' overflows regularly on 32-bit builds (unsigned long is 4 bytes
710 * instead of 8 bytes), so it's unwise to build a tick counter on it, since
711 * the rest of the Resman assumes the 'tick' returned from this function is
712 * monotonically increasing and never overflows.
713 *
714 * Instead, use the previous implementation that we've lived with since the
715 * beginning, which uses system clock time to calculate the tick. This is
716 * subject to problems if the system clock time changes dramatically
717 * (more than a second or so) while the Resman is actively tracking a
718 * timeout.
719 */
720 NvU32 seconds, useconds;
721
722 (void) os_get_current_time(&seconds, &useconds);
723
724 return ((NvU64)seconds * NSEC_PER_SEC +
725 (NvU64)useconds * NSEC_PER_USEC);
726 }
727
os_get_tick_resolution(void)728 NvU64 NV_API_CALL os_get_tick_resolution(void)
729 {
730 /*
731 * os_get_current_tick() uses os_get_current_time(), which has
732 * microsecond resolution.
733 */
734 return 1000ULL;
735 }
736
737 #endif
738
739 //---------------------------------------------------------------------------
740 //
741 // Misc services.
742 //
743 //---------------------------------------------------------------------------
744
os_delay_us(NvU32 MicroSeconds)745 NV_STATUS NV_API_CALL os_delay_us(NvU32 MicroSeconds)
746 {
747 return nv_sleep_us(MicroSeconds);
748 }
749
os_delay(NvU32 MilliSeconds)750 NV_STATUS NV_API_CALL os_delay(NvU32 MilliSeconds)
751 {
752 return nv_sleep_ms(MilliSeconds);
753 }
754
os_get_cpu_frequency(void)755 NvU64 NV_API_CALL os_get_cpu_frequency(void)
756 {
757 NvU64 cpu_hz = 0;
758 #if defined(CONFIG_CPU_FREQ)
759 cpu_hz = (cpufreq_get(0) * 1000);
760 #elif defined(NVCPU_X86_64)
761 NvU64 tsc[2];
762
763 tsc[0] = nv_rdtsc();
764 mdelay(250);
765 tsc[1] = nv_rdtsc();
766
767 cpu_hz = ((tsc[1] - tsc[0]) * 4);
768 #endif
769 return cpu_hz;
770 }
771
os_get_current_process(void)772 NvU32 NV_API_CALL os_get_current_process(void)
773 {
774 return NV_GET_CURRENT_PROCESS();
775 }
776
os_get_current_process_name(char * buf,NvU32 len)777 void NV_API_CALL os_get_current_process_name(char *buf, NvU32 len)
778 {
779 task_lock(current);
780 strncpy(buf, current->comm, len - 1);
781 buf[len - 1] = '\0';
782 task_unlock(current);
783 }
784
os_get_current_thread(NvU64 * threadId)785 NV_STATUS NV_API_CALL os_get_current_thread(NvU64 *threadId)
786 {
787 if (in_interrupt())
788 *threadId = 0;
789 else
790 *threadId = (NvU64) current->pid;
791
792 return NV_OK;
793 }
794
795 /*******************************************************************************/
796 /* */
797 /* Debug and logging utilities follow */
798 /* */
799 /*******************************************************************************/
800
801 // The current debug display level (default to maximum debug level)
802 NvU32 cur_debuglevel = 0xffffffff;
803
804 /*
805 * The binary core of RM (nv-kernel.o) calls both out_string, and nv_printf.
806 */
out_string(const char * str)807 inline void NV_API_CALL out_string(const char *str)
808 {
809 printk("%s", str);
810 }
811
812 /*
813 * nv_printf() prints to the kernel log for the driver.
814 * Returns the number of characters written.
815 */
nv_printf(NvU32 debuglevel,const char * printf_format,...)816 int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...)
817 {
818 va_list arglist;
819 int chars_written = 0;
820
821 if (debuglevel >= ((cur_debuglevel >> 4) & 0x3))
822 {
823 size_t length;
824 unsigned long flags;
825
826 // When printk is called to extend the output of the previous line
827 // (i.e. when the previous line did not end in \n), the printk call
828 // must contain KERN_CONT. Older kernels still print the line
829 // correctly, but KERN_CONT was technically always required.
830
831 // This means that every call to printk() needs to have a KERN_xxx
832 // prefix. The only way to get this is to rebuild the format string
833 // into a new buffer, with a KERN_xxx prefix prepended.
834
835 // Unfortunately, we can't guarantee that two calls to nv_printf()
836 // won't be interrupted by a printk from another driver. So to be
837 // safe, we always append KERN_CONT. It's still technically wrong,
838 // but it works.
839
840 // The long-term fix is to modify all NV_PRINTF-ish calls so that the
841 // string always contains only one \n (at the end) and NV_PRINTF_EX
842 // is deleted. But that is unlikely to ever happen.
843
844 length = strlen(printf_format);
845 if (length < 1)
846 return 0;
847
848 NV_SPIN_LOCK_IRQSAVE(&nv_error_string_lock, flags);
849
850 // KERN_CONT changed in the 3.6 kernel, so we can't assume its
851 // composition or size.
852 memcpy(nv_error_string, KERN_CONT, sizeof(KERN_CONT) - 1);
853 memcpy(nv_error_string + sizeof(KERN_CONT) - 1, printf_format, length + 1);
854
855 va_start(arglist, printf_format);
856 chars_written = vprintk(nv_error_string, arglist);
857 va_end(arglist);
858
859 NV_SPIN_UNLOCK_IRQRESTORE(&nv_error_string_lock, flags);
860 }
861
862 return chars_written;
863 }
864
os_snprintf(char * buf,NvU32 size,const char * fmt,...)865 NvS32 NV_API_CALL os_snprintf(char *buf, NvU32 size, const char *fmt, ...)
866 {
867 va_list arglist;
868 int chars_written;
869
870 va_start(arglist, fmt);
871 chars_written = vsnprintf(buf, size, fmt, arglist);
872 va_end(arglist);
873
874 return chars_written;
875 }
876
os_vsnprintf(char * buf,NvU32 size,const char * fmt,va_list arglist)877 NvS32 NV_API_CALL os_vsnprintf(char *buf, NvU32 size, const char *fmt, va_list arglist)
878 {
879 return vsnprintf(buf, size, fmt, arglist);
880 }
881
os_log_error(const char * fmt,va_list ap)882 void NV_API_CALL os_log_error(const char *fmt, va_list ap)
883 {
884 unsigned long flags;
885
886 NV_SPIN_LOCK_IRQSAVE(&nv_error_string_lock, flags);
887
888 vsnprintf(nv_error_string, MAX_ERROR_STRING, fmt, ap);
889 nv_error_string[MAX_ERROR_STRING - 1] = 0;
890 printk(KERN_ERR "%s", nv_error_string);
891
892 NV_SPIN_UNLOCK_IRQRESTORE(&nv_error_string_lock, flags);
893 }
894
os_io_write_byte(NvU32 address,NvU8 value)895 void NV_API_CALL os_io_write_byte(
896 NvU32 address,
897 NvU8 value
898 )
899 {
900 outb(value, address);
901 }
902
os_io_write_word(NvU32 address,NvU16 value)903 void NV_API_CALL os_io_write_word(
904 NvU32 address,
905 NvU16 value
906 )
907 {
908 outw(value, address);
909 }
910
os_io_write_dword(NvU32 address,NvU32 value)911 void NV_API_CALL os_io_write_dword(
912 NvU32 address,
913 NvU32 value
914 )
915 {
916 outl(value, address);
917 }
918
os_io_read_byte(NvU32 address)919 NvU8 NV_API_CALL os_io_read_byte(
920 NvU32 address
921 )
922 {
923 return inb(address);
924 }
925
os_io_read_word(NvU32 address)926 NvU16 NV_API_CALL os_io_read_word(
927 NvU32 address
928 )
929 {
930 return inw(address);
931 }
932
os_io_read_dword(NvU32 address)933 NvU32 NV_API_CALL os_io_read_dword(
934 NvU32 address
935 )
936 {
937 return inl(address);
938 }
939
940
xen_support_fully_virtualized_kernel(void)941 static NvBool NV_API_CALL xen_support_fully_virtualized_kernel(void)
942 {
943 #if defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
944 return (os_is_vgx_hyper());
945 #endif
946 return NV_FALSE;
947 }
948
os_map_kernel_space(NvU64 start,NvU64 size_bytes,NvU32 mode)949 void* NV_API_CALL os_map_kernel_space(
950 NvU64 start,
951 NvU64 size_bytes,
952 NvU32 mode
953 )
954 {
955 void *vaddr;
956
957 if (!xen_support_fully_virtualized_kernel() && start == 0)
958 {
959 if (mode != NV_MEMORY_CACHED)
960 {
961 nv_printf(NV_DBG_ERRORS,
962 "NVRM: os_map_kernel_space: won't map address 0x%0llx UC!\n", start);
963 return NULL;
964 }
965 else
966 return (void *)PAGE_OFFSET;
967 }
968
969 if (!NV_MAY_SLEEP())
970 {
971 nv_printf(NV_DBG_ERRORS,
972 "NVRM: os_map_kernel_space: can't map 0x%0llx, invalid context!\n", start);
973 os_dbg_breakpoint();
974 return NULL;
975 }
976
977 switch (mode)
978 {
979 case NV_MEMORY_CACHED:
980 vaddr = nv_ioremap_cache(start, size_bytes);
981 break;
982 case NV_MEMORY_WRITECOMBINED:
983 vaddr = rm_disable_iomap_wc() ?
984 nv_ioremap_nocache(start, size_bytes) :
985 nv_ioremap_wc(start, size_bytes);
986 break;
987 case NV_MEMORY_UNCACHED:
988 case NV_MEMORY_DEFAULT:
989 vaddr = nv_ioremap_nocache(start, size_bytes);
990 break;
991 default:
992 nv_printf(NV_DBG_ERRORS,
993 "NVRM: os_map_kernel_space: unsupported mode!\n");
994 return NULL;
995 }
996
997 return vaddr;
998 }
999
os_unmap_kernel_space(void * addr,NvU64 size_bytes)1000 void NV_API_CALL os_unmap_kernel_space(
1001 void *addr,
1002 NvU64 size_bytes
1003 )
1004 {
1005 if (addr == (void *)PAGE_OFFSET)
1006 return;
1007
1008 nv_iounmap(addr, size_bytes);
1009 }
1010
1011 #if NVCPU_IS_AARCH64
1012
nv_flush_cache_cpu(void * info)1013 static inline void nv_flush_cache_cpu(void *info)
1014 {
1015 if (!nvos_is_chipset_io_coherent())
1016 {
1017 #if defined(NV_FLUSH_CACHE_ALL_PRESENT)
1018 flush_cache_all();
1019 #else
1020 WARN_ONCE(0, "kernel does not provide flush_cache_all()\n");
1021 #endif
1022 }
1023 }
1024
1025 // flush the cache of all cpus
os_flush_cpu_cache_all(void)1026 NV_STATUS NV_API_CALL os_flush_cpu_cache_all(void)
1027 {
1028 on_each_cpu(nv_flush_cache_cpu, NULL, 1);
1029 return NV_OK;
1030 }
1031
os_flush_user_cache(void)1032 NV_STATUS NV_API_CALL os_flush_user_cache(void)
1033 {
1034 if (!NV_MAY_SLEEP())
1035 {
1036 return NV_ERR_NOT_SUPPORTED;
1037 }
1038
1039 //
1040 // The Linux kernel does not export an interface for flushing a range,
1041 // although it is possible. For now, just flush the entire cache to be
1042 // safe.
1043 //
1044 on_each_cpu(nv_flush_cache_cpu, NULL, 1);
1045 return NV_OK;
1046 }
1047
1048 #else // NVCPU_IS_AARCH64
1049
os_flush_cpu_cache_all(void)1050 NV_STATUS NV_API_CALL os_flush_cpu_cache_all(void)
1051 {
1052 return NV_ERR_NOT_SUPPORTED;
1053 }
1054
os_flush_user_cache(void)1055 NV_STATUS NV_API_CALL os_flush_user_cache(void)
1056 {
1057 return NV_ERR_NOT_SUPPORTED;
1058 }
1059
1060 #endif
1061
os_flush_cpu_write_combine_buffer(void)1062 void NV_API_CALL os_flush_cpu_write_combine_buffer(void)
1063 {
1064 #if defined(NVCPU_X86_64)
1065 asm volatile("sfence" ::: "memory");
1066 #elif defined(NVCPU_PPC64LE)
1067 __asm__ __volatile__ ("sync" : : : "memory");
1068 #elif defined(NVCPU_AARCH64)
1069 asm volatile("dsb st" : : : "memory");
1070 #else
1071 mb();
1072 #endif
1073 }
1074
1075 // override initial debug level from registry
os_dbg_init(void)1076 void NV_API_CALL os_dbg_init(void)
1077 {
1078 NvU32 new_debuglevel;
1079 nvidia_stack_t *sp = NULL;
1080
1081 if (nv_kmem_cache_alloc_stack(&sp) != 0)
1082 {
1083 return;
1084 }
1085
1086 if (NV_OK == rm_read_registry_dword(sp, NULL,
1087 "ResmanDebugLevel",
1088 &new_debuglevel))
1089 {
1090 if (new_debuglevel != (NvU32)~0)
1091 cur_debuglevel = new_debuglevel;
1092 }
1093
1094 nv_kmem_cache_free_stack(sp);
1095 }
1096
os_dbg_set_level(NvU32 new_debuglevel)1097 void NV_API_CALL os_dbg_set_level(NvU32 new_debuglevel)
1098 {
1099 nv_printf(NV_DBG_SETUP, "NVRM: Changing debuglevel from 0x%x to 0x%x\n",
1100 cur_debuglevel, new_debuglevel);
1101 cur_debuglevel = new_debuglevel;
1102 }
1103
os_get_max_user_va(void)1104 NvU64 NV_API_CALL os_get_max_user_va(void)
1105 {
1106 return TASK_SIZE;
1107 }
1108
os_schedule(void)1109 NV_STATUS NV_API_CALL os_schedule(void)
1110 {
1111 if (NV_MAY_SLEEP())
1112 {
1113 set_current_state(TASK_INTERRUPTIBLE);
1114 schedule_timeout(1);
1115 return NV_OK;
1116 }
1117 else
1118 {
1119 nv_printf(NV_DBG_ERRORS, "NVRM: os_schedule: Attempted to yield"
1120 " the CPU while in atomic or interrupt"
1121 " context\n");
1122 return NV_ERR_ILLEGAL_ACTION;
1123 }
1124 }
1125
1126 typedef struct {
1127 nv_kthread_q_item_t item;
1128 void *data;
1129 } os_queue_data_t;
1130
os_execute_work_item(void * _oqd)1131 static void os_execute_work_item(void *_oqd)
1132 {
1133 os_queue_data_t *oqd = _oqd;
1134 nvidia_stack_t *sp = NULL;
1135 void *data = oqd->data;
1136
1137 NV_KFREE(oqd, sizeof(os_queue_data_t));
1138
1139 if (nv_kmem_cache_alloc_stack(&sp) != 0)
1140 {
1141 return;
1142 }
1143
1144 rm_execute_work_item(sp, data);
1145
1146 nv_kmem_cache_free_stack(sp);
1147 }
1148
os_queue_work_item(struct os_work_queue * queue,void * data)1149 NV_STATUS NV_API_CALL os_queue_work_item(struct os_work_queue *queue, void *data)
1150 {
1151 os_queue_data_t *oqd;
1152 nv_kthread_q_t *kthread;
1153
1154 /* Use the global queue unless a valid queue was provided */
1155 kthread = queue ? &queue->nvk : &nv_kthread_q;
1156
1157 /* Make sure the kthread is active */
1158 if (unlikely(!kthread->q_kthread)) {
1159 nv_printf(NV_DBG_ERRORS, "NVRM: queue is not enabled\n");
1160 return NV_ERR_NOT_READY;
1161 }
1162
1163 /* Allocate atomically just in case we're called in atomic context. */
1164 NV_KMALLOC_ATOMIC(oqd, sizeof(os_queue_data_t));
1165 if (!oqd)
1166 return NV_ERR_NO_MEMORY;
1167
1168 nv_kthread_q_item_init(&oqd->item, os_execute_work_item, oqd);
1169 oqd->data = data;
1170
1171 nv_kthread_q_schedule_q_item(kthread, &oqd->item);
1172
1173 return NV_OK;
1174 }
1175
os_flush_work_queue(struct os_work_queue * queue)1176 NV_STATUS NV_API_CALL os_flush_work_queue(struct os_work_queue *queue)
1177 {
1178 nv_kthread_q_t *kthread;
1179
1180 /* Use the global queue unless a valid queue was provided */
1181 kthread = queue ? &queue->nvk : &nv_kthread_q;
1182
1183 if (NV_MAY_SLEEP())
1184 {
1185 if (kthread->q_kthread)
1186 nv_kthread_q_flush(kthread);
1187
1188 return NV_OK;
1189 }
1190 else
1191 {
1192 nv_printf(NV_DBG_ERRORS,
1193 "NVRM: os_flush_work_queue: attempted to execute passive"
1194 "work from an atomic or interrupt context.\n");
1195 return NV_ERR_ILLEGAL_ACTION;
1196 }
1197 }
1198
1199 extern NvU32 NVreg_EnableDbgBreakpoint;
1200
os_dbg_breakpoint(void)1201 void NV_API_CALL os_dbg_breakpoint(void)
1202 {
1203 if (NVreg_EnableDbgBreakpoint == 0)
1204 {
1205 return;
1206 }
1207
1208 #if defined(CONFIG_X86_REMOTE_DEBUG) || defined(CONFIG_KGDB) || defined(CONFIG_XMON)
1209 #if defined(NVCPU_X86_64)
1210 __asm__ __volatile__ ("int $3");
1211 #elif defined(NVCPU_ARM)
1212 __asm__ __volatile__ (".word %c0" :: "i" (KGDB_COMPILED_BREAK));
1213 #elif defined(NVCPU_AARCH64)
1214 # warning "Need to implement os_dbg_breakpoint() for aarch64"
1215 #elif defined(NVCPU_PPC64LE)
1216 __asm__ __volatile__ ("trap");
1217 #endif // NVCPU_*
1218 #elif defined(CONFIG_KDB)
1219 KDB_ENTER();
1220 #endif // CONFIG_X86_REMOTE_DEBUG || CONFIG_KGDB || CONFIG_XMON
1221 }
1222
os_get_cpu_number(void)1223 NvU32 NV_API_CALL os_get_cpu_number(void)
1224 {
1225 NvU32 cpu_id = get_cpu();
1226 put_cpu();
1227 return cpu_id;
1228 }
1229
os_get_cpu_count(void)1230 NvU32 NV_API_CALL os_get_cpu_count(void)
1231 {
1232 return NV_NUM_CPUS();
1233 }
1234
os_pat_supported(void)1235 NvBool NV_API_CALL os_pat_supported(void)
1236 {
1237 return (nv_pat_mode != NV_PAT_MODE_DISABLED);
1238 }
1239
os_is_efi_enabled(void)1240 NvBool NV_API_CALL os_is_efi_enabled(void)
1241 {
1242 return efi_enabled(EFI_BOOT);
1243 }
1244
os_dump_stack(void)1245 void NV_API_CALL os_dump_stack(void)
1246 {
1247 dump_stack();
1248 }
1249
1250 typedef struct os_spinlock_s
1251 {
1252 nv_spinlock_t lock;
1253 unsigned long eflags;
1254 } os_spinlock_t;
1255
os_alloc_spinlock(void ** ppSpinlock)1256 NV_STATUS NV_API_CALL os_alloc_spinlock(void **ppSpinlock)
1257 {
1258 NV_STATUS rmStatus;
1259 os_spinlock_t *os_spinlock;
1260
1261 rmStatus = os_alloc_mem(ppSpinlock, sizeof(os_spinlock_t));
1262 if (rmStatus != NV_OK)
1263 {
1264 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate spinlock!\n");
1265 return rmStatus;
1266 }
1267
1268 os_spinlock = (os_spinlock_t *)*ppSpinlock;
1269 NV_SPIN_LOCK_INIT(&os_spinlock->lock);
1270 os_spinlock->eflags = 0;
1271 return NV_OK;
1272 }
1273
os_free_spinlock(void * pSpinlock)1274 void NV_API_CALL os_free_spinlock(void *pSpinlock)
1275 {
1276 os_free_mem(pSpinlock);
1277 }
1278
os_acquire_spinlock(void * pSpinlock)1279 NvU64 NV_API_CALL os_acquire_spinlock(void *pSpinlock)
1280 {
1281 os_spinlock_t *os_spinlock = (os_spinlock_t *)pSpinlock;
1282 unsigned long eflags;
1283
1284 NV_SPIN_LOCK_IRQSAVE(&os_spinlock->lock, eflags);
1285 os_spinlock->eflags = eflags;
1286
1287 #if defined(NVCPU_X86_64)
1288 eflags &= X86_EFLAGS_IF;
1289 #elif defined(NVCPU_AARCH64)
1290 eflags &= PSR_I_BIT;
1291 #endif
1292 return eflags;
1293 }
1294
os_release_spinlock(void * pSpinlock,NvU64 oldIrql)1295 void NV_API_CALL os_release_spinlock(void *pSpinlock, NvU64 oldIrql)
1296 {
1297 os_spinlock_t *os_spinlock = (os_spinlock_t *)pSpinlock;
1298 unsigned long eflags;
1299
1300 eflags = os_spinlock->eflags;
1301 os_spinlock->eflags = 0;
1302 NV_SPIN_UNLOCK_IRQRESTORE(&os_spinlock->lock, eflags);
1303 }
1304
1305 #define NV_KERNEL_RELEASE ((LINUX_VERSION_CODE >> 16) & 0x0ff)
1306 #define NV_KERNEL_VERSION ((LINUX_VERSION_CODE >> 8) & 0x0ff)
1307 #define NV_KERNEL_SUBVERSION ((LINUX_VERSION_CODE) & 0x0ff)
1308
os_get_version_info(os_version_info * pOsVersionInfo)1309 NV_STATUS NV_API_CALL os_get_version_info(os_version_info * pOsVersionInfo)
1310 {
1311 NV_STATUS status = NV_OK;
1312
1313 pOsVersionInfo->os_major_version = NV_KERNEL_RELEASE;
1314 pOsVersionInfo->os_minor_version = NV_KERNEL_VERSION;
1315 pOsVersionInfo->os_build_number = NV_KERNEL_SUBVERSION;
1316
1317 #if defined(UTS_RELEASE)
1318 pOsVersionInfo->os_build_version_str = UTS_RELEASE;
1319 #endif
1320
1321 #if defined(UTS_VERSION)
1322 pOsVersionInfo->os_build_date_plus_str = UTS_VERSION;
1323 #endif
1324
1325 return status;
1326 }
1327
os_is_xen_dom0(void)1328 NvBool NV_API_CALL os_is_xen_dom0(void)
1329 {
1330 #if defined(NV_DOM0_KERNEL_PRESENT)
1331 return NV_TRUE;
1332 #else
1333 return NV_FALSE;
1334 #endif
1335 }
1336
os_is_vgx_hyper(void)1337 NvBool NV_API_CALL os_is_vgx_hyper(void)
1338 {
1339 #if defined(NV_VGX_HYPER)
1340 return NV_TRUE;
1341 #else
1342 return NV_FALSE;
1343 #endif
1344 }
1345
os_inject_vgx_msi(NvU16 guestID,NvU64 msiAddr,NvU32 msiData)1346 NV_STATUS NV_API_CALL os_inject_vgx_msi(NvU16 guestID, NvU64 msiAddr, NvU32 msiData)
1347 {
1348 #if defined(NV_VGX_HYPER) && defined(NV_DOM0_KERNEL_PRESENT) && \
1349 defined(NV_XEN_IOEMU_INJECT_MSI)
1350 int rc = 0;
1351 rc = xen_ioemu_inject_msi(guestID, msiAddr, msiData);
1352 if (rc)
1353 {
1354 nv_printf(NV_DBG_ERRORS,
1355 "NVRM: %s: can't inject MSI to guest:%d, addr:0x%x, data:0x%x, err:%d\n",
1356 __FUNCTION__, guestID, msiAddr, msiData, rc);
1357 return NV_ERR_OPERATING_SYSTEM;
1358 }
1359 return NV_OK;
1360 #else
1361 return NV_ERR_NOT_SUPPORTED;
1362 #endif
1363 }
1364
os_is_grid_supported(void)1365 NvBool NV_API_CALL os_is_grid_supported(void)
1366 {
1367 #if defined(NV_GRID_BUILD)
1368 return NV_TRUE;
1369 #else
1370 return NV_FALSE;
1371 #endif
1372 }
1373
os_get_grid_csp_support(void)1374 NvU32 NV_API_CALL os_get_grid_csp_support(void)
1375 {
1376 #if defined(NV_GRID_BUILD_CSP)
1377 return NV_GRID_BUILD_CSP;
1378 #else
1379 return 0;
1380 #endif
1381 }
1382
os_bug_check(NvU32 bugCode,const char * bugCodeStr)1383 void NV_API_CALL os_bug_check(NvU32 bugCode, const char *bugCodeStr)
1384 {
1385 panic(bugCodeStr);
1386 }
1387
os_get_euid(NvU32 * pSecToken)1388 NV_STATUS NV_API_CALL os_get_euid(NvU32 *pSecToken)
1389 {
1390 *pSecToken = NV_CURRENT_EUID();
1391 return NV_OK;
1392 }
1393
1394 #if defined(NVCPU_X86_64) || defined(NVCPU_AARCH64)
1395
os_verify_checksum(const NvU8 * pMappedAddr,NvU32 length)1396 static NvBool os_verify_checksum(const NvU8 *pMappedAddr, NvU32 length)
1397 {
1398 NvU8 sum = 0;
1399 NvU32 iter = 0;
1400
1401 for (iter = 0; iter < length; iter++)
1402 sum += pMappedAddr[iter];
1403
1404 return sum == 0;
1405 }
1406
1407 #define _VERIFY_SMBIOS3(_pMappedAddr) \
1408 _pMappedAddr && \
1409 (os_mem_cmp(_pMappedAddr, "_SM3_", 5) == 0 && \
1410 _pMappedAddr[6] < 32 && \
1411 _pMappedAddr[6] > 0 && \
1412 os_verify_checksum(_pMappedAddr, _pMappedAddr[6]))
1413
1414 #define OS_VERIFY_SMBIOS3(pMappedAddr) _VERIFY_SMBIOS3((pMappedAddr))
1415
1416 #define _VERIFY_SMBIOS(_pMappedAddr) \
1417 _pMappedAddr && \
1418 (os_mem_cmp(_pMappedAddr, "_SM_", 4) == 0 && \
1419 _pMappedAddr[5] < 32 && \
1420 _pMappedAddr[5] > 0 && \
1421 os_verify_checksum(_pMappedAddr, _pMappedAddr[5]) && \
1422 os_mem_cmp((_pMappedAddr + 16), "_DMI_", 5) == 0 && \
1423 os_verify_checksum((_pMappedAddr + 16), 15))
1424
1425 #define OS_VERIFY_SMBIOS(pMappedAddr) _VERIFY_SMBIOS((pMappedAddr))
1426
1427 #define SMBIOS_LEGACY_BASE 0xF0000
1428 #define SMBIOS_LEGACY_SIZE 0x10000
1429
os_get_smbios_header_legacy(NvU64 * pSmbsAddr)1430 static NV_STATUS os_get_smbios_header_legacy(NvU64 *pSmbsAddr)
1431 {
1432 #if !defined(NVCPU_X86_64)
1433 return NV_ERR_NOT_SUPPORTED;
1434 #else
1435 NV_STATUS status = NV_ERR_OPERATING_SYSTEM;
1436 NvU8 *pMappedAddr = NULL;
1437 NvU8 *pIterAddr = NULL;
1438
1439 pMappedAddr = (NvU8*)os_map_kernel_space(SMBIOS_LEGACY_BASE,
1440 SMBIOS_LEGACY_SIZE,
1441 NV_MEMORY_CACHED);
1442 if (pMappedAddr == NULL)
1443 {
1444 return NV_ERR_INSUFFICIENT_RESOURCES;
1445 }
1446
1447 pIterAddr = pMappedAddr;
1448
1449 for (; pIterAddr < (pMappedAddr + SMBIOS_LEGACY_SIZE); pIterAddr += 16)
1450 {
1451 if (OS_VERIFY_SMBIOS3(pIterAddr))
1452 {
1453 *pSmbsAddr = SMBIOS_LEGACY_BASE + (pIterAddr - pMappedAddr);
1454 status = NV_OK;
1455 break;
1456 }
1457
1458 if (OS_VERIFY_SMBIOS(pIterAddr))
1459 {
1460 *pSmbsAddr = SMBIOS_LEGACY_BASE + (pIterAddr - pMappedAddr);
1461 status = NV_OK;
1462 break;
1463 }
1464 }
1465
1466 os_unmap_kernel_space(pMappedAddr, SMBIOS_LEGACY_SIZE);
1467
1468 return status;
1469 #endif
1470 }
1471
1472 // This function is needed only if "efi" is enabled.
1473 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI))
os_verify_smbios_header_uefi(NvU64 smbsAddr)1474 static NV_STATUS os_verify_smbios_header_uefi(NvU64 smbsAddr)
1475 {
1476 NV_STATUS status = NV_ERR_OBJECT_NOT_FOUND;
1477 NvU64 start= 0, offset =0 , size = 32;
1478 NvU8 *pMappedAddr = NULL, *pBufAddr = NULL;
1479
1480 start = smbsAddr;
1481 offset = (start & ~os_page_mask);
1482 start &= os_page_mask;
1483 size = ((size + offset + ~os_page_mask) & os_page_mask);
1484
1485 pBufAddr = (NvU8*)os_map_kernel_space(start,
1486 size,
1487 NV_MEMORY_CACHED);
1488 if (pBufAddr == NULL)
1489 {
1490 return NV_ERR_INSUFFICIENT_RESOURCES;
1491 }
1492
1493 pMappedAddr = pBufAddr + offset;
1494
1495 if (OS_VERIFY_SMBIOS3(pMappedAddr))
1496 {
1497 status = NV_OK;
1498 goto done;
1499 }
1500
1501 if (OS_VERIFY_SMBIOS(pMappedAddr))
1502 {
1503 status = NV_OK;
1504 }
1505
1506 done:
1507 os_unmap_kernel_space(pBufAddr, size);
1508 return status;
1509 }
1510 #endif
1511
os_get_smbios_header_uefi(NvU64 * pSmbsAddr)1512 static NV_STATUS os_get_smbios_header_uefi(NvU64 *pSmbsAddr)
1513 {
1514 NV_STATUS status = NV_ERR_OPERATING_SYSTEM;
1515
1516 // Make sure that efi.h is present before using "struct efi".
1517 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI))
1518
1519 // Make sure that efi.h has SMBIOS3_TABLE_GUID present.
1520 #if defined(SMBIOS3_TABLE_GUID)
1521 if (efi.smbios3 != EFI_INVALID_TABLE_ADDR)
1522 {
1523 status = os_verify_smbios_header_uefi(efi.smbios3);
1524 if (status == NV_OK)
1525 {
1526 *pSmbsAddr = efi.smbios3;
1527 return NV_OK;
1528 }
1529 }
1530 #endif
1531
1532 if (efi.smbios != EFI_INVALID_TABLE_ADDR)
1533 {
1534 status = os_verify_smbios_header_uefi(efi.smbios);
1535 if (status == NV_OK)
1536 {
1537 *pSmbsAddr = efi.smbios;
1538 return NV_OK;
1539 }
1540 }
1541 #endif
1542
1543 return status;
1544 }
1545
1546 #endif // defined(NVCPU_X86_64) || defined(NVCPU_AARCH64)
1547
1548 // The function locates the SMBIOS entry point.
os_get_smbios_header(NvU64 * pSmbsAddr)1549 NV_STATUS NV_API_CALL os_get_smbios_header(NvU64 *pSmbsAddr)
1550 {
1551
1552 #if !defined(NVCPU_X86_64) && !defined(NVCPU_AARCH64)
1553 return NV_ERR_NOT_SUPPORTED;
1554 #else
1555 NV_STATUS status = NV_OK;
1556
1557 if (os_is_efi_enabled())
1558 {
1559 status = os_get_smbios_header_uefi(pSmbsAddr);
1560 }
1561 else
1562 {
1563 status = os_get_smbios_header_legacy(pSmbsAddr);
1564 }
1565
1566 return status;
1567 #endif
1568 }
1569
os_get_acpi_rsdp_from_uefi(NvU32 * pRsdpAddr)1570 NV_STATUS NV_API_CALL os_get_acpi_rsdp_from_uefi
1571 (
1572 NvU32 *pRsdpAddr
1573 )
1574 {
1575 NV_STATUS status = NV_ERR_NOT_SUPPORTED;
1576
1577 if (pRsdpAddr == NULL)
1578 {
1579 return NV_ERR_INVALID_STATE;
1580 }
1581
1582 *pRsdpAddr = 0;
1583
1584 // Make sure that efi.h is present before using "struct efi".
1585 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI))
1586
1587 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
1588 {
1589 *pRsdpAddr = efi.acpi20;
1590 status = NV_OK;
1591 }
1592 else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
1593 {
1594 *pRsdpAddr = efi.acpi;
1595 status = NV_OK;
1596 }
1597 else
1598 {
1599 nv_printf(NV_DBG_ERRORS, "NVRM: RSDP Not found!\n");
1600 status = NV_ERR_OPERATING_SYSTEM;
1601 }
1602 #endif
1603
1604 return status;
1605 }
1606
os_add_record_for_crashLog(void * pbuffer,NvU32 size)1607 void NV_API_CALL os_add_record_for_crashLog(void *pbuffer, NvU32 size)
1608 {
1609 }
1610
os_delete_record_for_crashLog(void * pbuffer)1611 void NV_API_CALL os_delete_record_for_crashLog(void *pbuffer)
1612 {
1613 }
1614
1615 #if !defined(NV_VGPU_KVM_BUILD)
os_call_vgpu_vfio(void * pvgpu_vfio_info,NvU32 cmd_type)1616 NV_STATUS NV_API_CALL os_call_vgpu_vfio(void *pvgpu_vfio_info, NvU32 cmd_type)
1617 {
1618 return NV_ERR_NOT_SUPPORTED;
1619 }
1620 #endif
1621
os_alloc_pages_node(NvS32 nid,NvU32 size,NvU32 flag,NvU64 * pAddress)1622 NV_STATUS NV_API_CALL os_alloc_pages_node
1623 (
1624 NvS32 nid,
1625 NvU32 size,
1626 NvU32 flag,
1627 NvU64 *pAddress
1628 )
1629 {
1630 NV_STATUS status = NV_ERR_NOT_SUPPORTED;
1631
1632 #if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \
1633 defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN)
1634 gfp_t gfp_mask;
1635 struct page *alloc_addr;
1636 unsigned int order = get_order(size);
1637
1638 /*
1639 * Explanation of flags used:
1640 *
1641 * 1. __GFP_THISNODE: This will make sure the allocation happens
1642 * on the node specified by nid.
1643 *
1644 * 2. GFP_HIGHUSER_MOVABLE: This makes allocations from ZONE_MOVABLE.
1645 *
1646 * 3. __GFP_COMP: This will make allocations with compound
1647 * pages, which is needed in order to use
1648 * vm_insert_page API.
1649 *
1650 * 4. __GFP_NORETRY: Used to avoid the Linux kernel OOM killer.
1651 *
1652 * 5. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
1653 * the requested order is too large (just fail
1654 * instead).
1655 *
1656 * 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
1657 * This is part of GFP_USER and consequently
1658 * GFP_HIGHUSER_MOVABLE.
1659 *
1660 * Some of these flags are relatively more recent, with the last of them
1661 * (GFP_HIGHUSER_MOVABLE) having been added with this Linux kernel commit:
1662 *
1663 * 2007-07-17 769848c03895b63e5662eb7e4ec8c4866f7d0183
1664 *
1665 * Assume that this feature will only be used on kernels that support all
1666 * of the needed GFP flags.
1667 */
1668
1669 gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
1670 __GFP_NORETRY | __GFP_NOWARN;
1671
1672 #if defined(__GFP_RECLAIM)
1673 if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)
1674 {
1675 gfp_mask &= ~(__GFP_RECLAIM);
1676 }
1677 #endif // defined(__GFP_RECLAIM)
1678
1679 alloc_addr = alloc_pages_node(nid, gfp_mask, order);
1680 if (alloc_addr == NULL)
1681 {
1682 nv_printf(NV_DBG_INFO,
1683 "NVRM: alloc_pages_node(node = %d, order = %u) failed\n",
1684 nid, order);
1685 status = NV_ERR_NO_MEMORY;
1686 }
1687 else if (page_to_nid(alloc_addr) != nid)
1688 {
1689 //
1690 // We can hit this case when a Linux kernel bug is not patched.
1691 // The needed patch is https://patchwork.kernel.org/patch/10427387/
1692 //
1693 nv_printf(NV_DBG_ERRORS,
1694 "NVRM: alloc_pages_node(node = %d, order = %u) wrong node ID.\n",
1695 nid, order);
1696 __free_pages(alloc_addr, order);
1697 status = NV_ERR_NO_MEMORY;
1698 }
1699 else
1700 {
1701 *pAddress = (NvU64)page_to_phys(alloc_addr);
1702 status = NV_OK;
1703 }
1704 #endif // GFP flags
1705
1706 return status;
1707 }
1708
os_get_page(NvU64 address)1709 NV_STATUS NV_API_CALL os_get_page
1710 (
1711 NvU64 address
1712 )
1713 {
1714 get_page(NV_GET_PAGE_STRUCT(address));
1715 return NV_OK;
1716 }
1717
os_put_page(NvU64 address)1718 NV_STATUS NV_API_CALL os_put_page
1719 (
1720 NvU64 address
1721 )
1722 {
1723 put_page(NV_GET_PAGE_STRUCT(address));
1724 return NV_OK;
1725 }
1726
os_get_page_refcount(NvU64 address)1727 NvU32 NV_API_CALL os_get_page_refcount
1728 (
1729 NvU64 address
1730 )
1731 {
1732 return NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(address));
1733 }
1734
os_count_tail_pages(NvU64 address)1735 NvU32 NV_API_CALL os_count_tail_pages
1736 (
1737 NvU64 address
1738 )
1739 {
1740 NvU32 order = compound_order(compound_head(NV_GET_PAGE_STRUCT(address)));
1741
1742 return 1 << order;
1743 }
1744
os_free_pages_phys(NvU64 address,NvU32 size)1745 void NV_API_CALL os_free_pages_phys
1746 (
1747 NvU64 address,
1748 NvU32 size
1749 )
1750 {
1751 __free_pages(NV_GET_PAGE_STRUCT(address), get_order(size));
1752 }
1753
os_numa_memblock_size(NvU64 * memblock_size)1754 NV_STATUS NV_API_CALL os_numa_memblock_size
1755 (
1756 NvU64 *memblock_size
1757 )
1758 {
1759 #if NV_IS_EXPORT_SYMBOL_PRESENT_memory_block_size_bytes
1760 *memblock_size = memory_block_size_bytes();
1761 return NV_OK;
1762 #endif
1763 if (nv_ctl_device.numa_memblock_size == 0)
1764 return NV_ERR_INVALID_STATE;
1765 *memblock_size = nv_ctl_device.numa_memblock_size;
1766 return NV_OK;
1767 }
1768
os_open_temporary_file(void ** ppFile)1769 NV_STATUS NV_API_CALL os_open_temporary_file
1770 (
1771 void **ppFile
1772 )
1773 {
1774 #if NV_FILESYSTEM_ACCESS_AVAILABLE
1775 #if defined(O_TMPFILE)
1776 struct file *file;
1777 const char *default_path = "/tmp";
1778 const int flags = O_TMPFILE | O_LARGEFILE | O_RDWR;
1779 const char *path = NVreg_TemporaryFilePath;
1780
1781 /*
1782 * The filp_open() call below depends on the current task's fs_struct
1783 * (current->fs), which may already be NULL if this is called during
1784 * process teardown.
1785 */
1786 if (current->fs == NULL)
1787 {
1788 return NV_ERR_OPERATING_SYSTEM;
1789 }
1790
1791 if (!path)
1792 {
1793 path = default_path;
1794 }
1795
1796 file = filp_open(path, flags, 0);
1797 if (IS_ERR(file))
1798 {
1799 if ((path != default_path) && (PTR_ERR(file) == -ENOENT))
1800 {
1801 nv_printf(NV_DBG_ERRORS,
1802 "NVRM: The temporary file path specified via the NVreg_TemporaryFilePath\n"
1803 "NVRM: module parameter does not exist. Defaulting to /tmp.\n");
1804
1805 file = filp_open(default_path, flags, 0);
1806 }
1807 }
1808
1809 if (IS_ERR(file))
1810 {
1811 return NV_ERR_OPERATING_SYSTEM;
1812 }
1813
1814 *ppFile = (void *)file;
1815
1816 return NV_OK;
1817 #else
1818 return NV_ERR_NOT_SUPPORTED;
1819 #endif
1820 #else
1821 return NV_ERR_NOT_SUPPORTED;
1822 #endif
1823 }
1824
os_close_file(void * pFile)1825 void NV_API_CALL os_close_file
1826 (
1827 void *pFile
1828 )
1829 {
1830 #if NV_FILESYSTEM_ACCESS_AVAILABLE
1831 filp_close(pFile, NULL);
1832 #endif
1833 }
1834
1835 #define NV_MAX_NUM_FILE_IO_RETRIES 10
1836
os_write_file(void * pFile,NvU8 * pBuffer,NvU64 size,NvU64 offset)1837 NV_STATUS NV_API_CALL os_write_file
1838 (
1839 void *pFile,
1840 NvU8 *pBuffer,
1841 NvU64 size,
1842 NvU64 offset
1843 )
1844 {
1845 #if NV_FILESYSTEM_ACCESS_AVAILABLE
1846 loff_t f_pos = offset;
1847 ssize_t num_written;
1848 int num_retries = NV_MAX_NUM_FILE_IO_RETRIES;
1849
1850 retry:
1851 #if defined(NV_KERNEL_WRITE_HAS_POINTER_POS_ARG)
1852 num_written = kernel_write(pFile, pBuffer, size, &f_pos);
1853 #else
1854 num_written = kernel_write(pFile, pBuffer, size, f_pos);
1855 #endif
1856 if (num_written < 0)
1857 {
1858 return NV_ERR_OPERATING_SYSTEM;
1859 }
1860 else if (num_written < size)
1861 {
1862 if (num_written > 0)
1863 {
1864 pBuffer += num_written;
1865 size -= num_written;
1866 }
1867 if (--num_retries > 0)
1868 {
1869 cond_resched();
1870 goto retry;
1871 }
1872 return NV_ERR_OPERATING_SYSTEM;
1873 }
1874
1875 return NV_OK;
1876 #else
1877 return NV_ERR_NOT_SUPPORTED;
1878 #endif
1879 }
1880
os_read_file(void * pFile,NvU8 * pBuffer,NvU64 size,NvU64 offset)1881 NV_STATUS NV_API_CALL os_read_file
1882 (
1883 void *pFile,
1884 NvU8 *pBuffer,
1885 NvU64 size,
1886 NvU64 offset
1887 )
1888 {
1889 #if NV_FILESYSTEM_ACCESS_AVAILABLE
1890 loff_t f_pos = offset;
1891 ssize_t num_read;
1892 int num_retries = NV_MAX_NUM_FILE_IO_RETRIES;
1893
1894 retry:
1895 #if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
1896 num_read = kernel_read(pFile, pBuffer, size, &f_pos);
1897 #else
1898 num_read = kernel_read(pFile, f_pos, pBuffer, size);
1899 #endif
1900 if (num_read < 0)
1901 {
1902 return NV_ERR_OPERATING_SYSTEM;
1903 }
1904 else if (num_read < size)
1905 {
1906 if (num_read > 0)
1907 {
1908 pBuffer += num_read;
1909 size -= num_read;
1910 }
1911 if (--num_retries > 0)
1912 {
1913 cond_resched();
1914 goto retry;
1915 }
1916 return NV_ERR_OPERATING_SYSTEM;
1917 }
1918
1919 return NV_OK;
1920 #else
1921 return NV_ERR_NOT_SUPPORTED;
1922 #endif
1923 }
1924
os_open_readonly_file(const char * filename,void ** ppFile)1925 NV_STATUS NV_API_CALL os_open_readonly_file
1926 (
1927 const char *filename,
1928 void **ppFile
1929 )
1930 {
1931 #if NV_FILESYSTEM_ACCESS_AVAILABLE
1932 struct file *file;
1933
1934 /*
1935 * The filp_open() call below depends on the current task's fs_struct
1936 * (current->fs), which may already be NULL if this is called during
1937 * process teardown.
1938 */
1939 if (current->fs == NULL)
1940 {
1941 return NV_ERR_OPERATING_SYSTEM;
1942 }
1943
1944 file = filp_open(filename, O_RDONLY, 0);
1945 if (IS_ERR(file))
1946 {
1947 return NV_ERR_OPERATING_SYSTEM;
1948 }
1949
1950 *ppFile = (void *)file;
1951
1952 return NV_OK;
1953 #else
1954 return NV_ERR_NOT_SUPPORTED;
1955 #endif
1956 }
1957
os_open_and_read_file(const char * filename,NvU8 * buf,NvU64 count)1958 NV_STATUS NV_API_CALL os_open_and_read_file
1959 (
1960 const char *filename,
1961 NvU8 *buf,
1962 NvU64 count
1963 )
1964 {
1965 void *fileHandle;
1966 NV_STATUS status;
1967
1968 status = os_open_readonly_file(filename, &fileHandle);
1969 if (status != NV_OK)
1970 {
1971 return status;
1972 }
1973
1974 status = os_read_file(fileHandle, buf, count, 0);
1975
1976 os_close_file(fileHandle);
1977
1978 return status;
1979 }
1980
os_is_nvswitch_present(void)1981 NvBool NV_API_CALL os_is_nvswitch_present(void)
1982 {
1983 struct pci_device_id nvswitch_pci_table[] = {
1984 {
1985 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID),
1986 .class = PCI_CLASS_BRIDGE_OTHER << 8,
1987 .class_mask = PCI_ANY_ID
1988 },
1989 {0}
1990 };
1991
1992 return !!pci_dev_present(nvswitch_pci_table);
1993 }
1994
1995 /*
1996 * This function may sleep (interruptible).
1997 */
os_get_random_bytes(NvU8 * bytes,NvU16 numBytes)1998 NV_STATUS NV_API_CALL os_get_random_bytes
1999 (
2000 NvU8 *bytes,
2001 NvU16 numBytes
2002 )
2003 {
2004 #if defined NV_WAIT_FOR_RANDOM_BYTES_PRESENT
2005 if (wait_for_random_bytes() < 0)
2006 return NV_ERR_NOT_READY;
2007 #endif
2008
2009 get_random_bytes(bytes, numBytes);
2010 return NV_OK;
2011 }
2012
os_alloc_wait_queue(os_wait_queue ** wq)2013 NV_STATUS NV_API_CALL os_alloc_wait_queue
2014 (
2015 os_wait_queue **wq
2016 )
2017 {
2018 NV_KMALLOC(*wq, sizeof(os_wait_queue));
2019 if (*wq == NULL)
2020 return NV_ERR_NO_MEMORY;
2021
2022 init_completion(&(*wq)->q);
2023
2024 return NV_OK;
2025 }
2026
os_free_wait_queue(os_wait_queue * wq)2027 void NV_API_CALL os_free_wait_queue
2028 (
2029 os_wait_queue *wq
2030 )
2031 {
2032 NV_KFREE(wq, sizeof(os_wait_queue));
2033 }
2034
os_wait_uninterruptible(os_wait_queue * wq)2035 void NV_API_CALL os_wait_uninterruptible
2036 (
2037 os_wait_queue *wq
2038 )
2039 {
2040 wait_for_completion(&wq->q);
2041 }
2042
os_wait_interruptible(os_wait_queue * wq)2043 void NV_API_CALL os_wait_interruptible
2044 (
2045 os_wait_queue *wq
2046 )
2047 {
2048 wait_for_completion_interruptible(&wq->q);
2049 }
2050
os_wake_up(os_wait_queue * wq)2051 void NV_API_CALL os_wake_up
2052 (
2053 os_wait_queue *wq
2054 )
2055 {
2056 complete_all(&wq->q);
2057 }
2058
os_nv_cap_init(const char * path)2059 nv_cap_t* NV_API_CALL os_nv_cap_init
2060 (
2061 const char *path
2062 )
2063 {
2064 return nv_cap_init(path);
2065 }
2066
os_nv_cap_create_dir_entry(nv_cap_t * parent_cap,const char * name,int mode)2067 nv_cap_t* NV_API_CALL os_nv_cap_create_dir_entry
2068 (
2069 nv_cap_t *parent_cap,
2070 const char *name,
2071 int mode
2072 )
2073 {
2074 return nv_cap_create_dir_entry(parent_cap, name, mode);
2075 }
2076
os_nv_cap_create_file_entry(nv_cap_t * parent_cap,const char * name,int mode)2077 nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry
2078 (
2079 nv_cap_t *parent_cap,
2080 const char *name,
2081 int mode
2082 )
2083 {
2084 return nv_cap_create_file_entry(parent_cap, name, mode);
2085 }
2086
os_nv_cap_destroy_entry(nv_cap_t * cap)2087 void NV_API_CALL os_nv_cap_destroy_entry
2088 (
2089 nv_cap_t *cap
2090 )
2091 {
2092 nv_cap_destroy_entry(cap);
2093 }
2094
os_nv_cap_validate_and_dup_fd(const nv_cap_t * cap,int fd)2095 int NV_API_CALL os_nv_cap_validate_and_dup_fd
2096 (
2097 const nv_cap_t *cap,
2098 int fd
2099 )
2100 {
2101 return nv_cap_validate_and_dup_fd(cap, fd);
2102 }
2103
os_nv_cap_close_fd(int fd)2104 void NV_API_CALL os_nv_cap_close_fd
2105 (
2106 int fd
2107 )
2108 {
2109 nv_cap_close_fd(fd);
2110 }
2111
os_imex_channel_count(void)2112 NvS32 NV_API_CALL os_imex_channel_count
2113 (
2114 void
2115 )
2116 {
2117 return nv_caps_imex_channel_count();
2118 }
2119
os_imex_channel_get(NvU64 descriptor)2120 NvS32 NV_API_CALL os_imex_channel_get
2121 (
2122 NvU64 descriptor
2123 )
2124 {
2125 return nv_caps_imex_channel_get((int)descriptor);
2126 }
2127
2128 /*
2129 * Reads the total memory and free memory of a NUMA node from the kernel.
2130 */
os_get_numa_node_memory_usage(NvS32 node_id,NvU64 * free_memory_bytes,NvU64 * total_memory_bytes)2131 NV_STATUS NV_API_CALL os_get_numa_node_memory_usage
2132 (
2133 NvS32 node_id,
2134 NvU64 *free_memory_bytes,
2135 NvU64 *total_memory_bytes
2136 )
2137 {
2138 struct pglist_data *pgdat;
2139 struct zone *zone;
2140 NvU32 zone_id;
2141
2142 if (node_id >= MAX_NUMNODES)
2143 {
2144 nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n");
2145 return NV_ERR_INVALID_ARGUMENT;
2146 }
2147
2148 pgdat = NODE_DATA(node_id);
2149
2150 *free_memory_bytes = 0;
2151 *total_memory_bytes = 0;
2152
2153 for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++)
2154 {
2155 zone = &(pgdat->node_zones[zone_id]);
2156 if (!populated_zone(zone))
2157 continue;
2158 *free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE);
2159 *total_memory_bytes += (zone->present_pages * PAGE_SIZE);
2160 }
2161
2162 return NV_OK;
2163 }
2164
2165 typedef struct os_numa_gpu_mem_hotplug_notifier_s
2166 {
2167 NvU64 start_pa;
2168 NvU64 size;
2169 nv_pci_info_t pci_info;
2170 struct notifier_block memory_notifier;
2171 } os_numa_gpu_mem_hotplug_notifier_t;
2172
os_numa_verify_gpu_memory_zone(struct notifier_block * nb,unsigned long action,void * data)2173 static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
2174 unsigned long action, void *data)
2175 {
2176 os_numa_gpu_mem_hotplug_notifier_t *notifier = container_of(nb,
2177 os_numa_gpu_mem_hotplug_notifier_t,
2178 memory_notifier);
2179 struct memory_notify *mhp = data;
2180 NvU64 start_pa = PFN_PHYS(mhp->start_pfn);
2181 NvU64 size = PFN_PHYS(mhp->nr_pages);
2182
2183 if (action == MEM_GOING_ONLINE)
2184 {
2185 // Check if onlining memory falls in the GPU memory range
2186 if ((start_pa >= notifier->start_pa) &&
2187 (start_pa + size) <= (notifier->start_pa + notifier->size))
2188 {
2189 /*
2190 * Verify GPU memory NUMA node has memory only in ZONE_MOVABLE before
2191 * onlining the memory so that incorrect auto online setting doesn't
2192 * cause the memory onlined in a zone where kernel allocations
2193 * could happen, resulting in GPU memory hot unpluggable and requiring
2194 * system reboot.
2195 */
2196 if (page_zonenum((pfn_to_page(mhp->start_pfn))) != ZONE_MOVABLE)
2197 {
2198 nv_printf(NV_DBG_ERRORS, "NVRM: Failing GPU memory onlining as the onlining zone "
2199 "is not movable. pa: 0x%llx size: 0x%llx\n"
2200 "NVRM: The NVIDIA GPU %04x:%02x:%02x.%x installed in the system\n"
2201 "NVRM: requires auto onlining mode online_movable enabled in\n"
2202 "NVRM: /sys/devices/system/memory/auto_online_blocks\n",
2203 start_pa, size, notifier->pci_info.domain, notifier->pci_info.bus,
2204 notifier->pci_info.slot, notifier->pci_info.function);
2205 return NOTIFY_BAD;
2206 }
2207 }
2208 }
2209 return NOTIFY_OK;
2210 }
2211
2212 #define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
2213
os_numa_add_gpu_memory(void * handle,NvU64 offset,NvU64 size,NvU32 * nodeId)2214 NV_STATUS NV_API_CALL os_numa_add_gpu_memory
2215 (
2216 void *handle,
2217 NvU64 offset,
2218 NvU64 size,
2219 NvU32 *nodeId
2220 )
2221 {
2222 #if defined(NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT)
2223 int node = 0;
2224 nv_linux_state_t *nvl = pci_get_drvdata(handle);
2225 nv_state_t *nv = NV_STATE_PTR(nvl);
2226 NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
2227 int ret = 0;
2228 NvU64 memblock_size;
2229 NvU64 size_remaining;
2230 NvU64 calculated_segment_size;
2231 NvU64 segment_size;
2232 NvU64 segment_base;
2233 os_numa_gpu_mem_hotplug_notifier_t notifier =
2234 {
2235 .start_pa = base,
2236 .size = size,
2237 .pci_info = nv->pci_info,
2238 .memory_notifier.notifier_call = os_numa_verify_gpu_memory_zone,
2239 };
2240
2241 if (nodeId == NULL)
2242 {
2243 return NV_ERR_INVALID_ARGUMENT;
2244 }
2245
2246 if (bitmap_empty(nvl->coherent_link_info.free_node_bitmap, MAX_NUMNODES))
2247 {
2248 return NV_ERR_IN_USE;
2249 }
2250 node = find_first_bit(nvl->coherent_link_info.free_node_bitmap, MAX_NUMNODES);
2251 if (node == MAX_NUMNODES)
2252 {
2253 return NV_ERR_INVALID_STATE;
2254 }
2255
2256 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE_IN_PROGRESS);
2257
2258 ret = register_memory_notifier(¬ifier.memory_notifier);
2259 if (ret)
2260 {
2261 nv_printf(NV_DBG_ERRORS, "NVRM: Memory hotplug notifier registration failed\n");
2262 goto failed;
2263 }
2264
2265 //
2266 // Adding all memory at once can take a long time. Split up memory into segments
2267 // with schedule() in between to prevent soft lockups. Memory segments for
2268 // add_memory_driver_managed() need to be aligned to memblock size.
2269 //
2270 // If there are any issues splitting into segments, then add all memory at once.
2271 //
2272 if (os_numa_memblock_size(&memblock_size) == NV_OK)
2273 {
2274 calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
2275 }
2276 else
2277 {
2278 // Don't split into segments, add all memory at once
2279 calculated_segment_size = size;
2280 }
2281
2282 segment_size = calculated_segment_size;
2283 segment_base = base;
2284 size_remaining = size;
2285
2286 while ((size_remaining > 0) &&
2287 (ret == 0))
2288 {
2289 if (segment_size > size_remaining)
2290 {
2291 segment_size = size_remaining;
2292 }
2293
2294 #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
2295 ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
2296 #else
2297 ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
2298 #endif
2299 nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
2300 ret, segment_base, segment_size);
2301
2302 segment_base += segment_size;
2303 size_remaining -= segment_size;
2304
2305 // Yield CPU to prevent soft lockups
2306 schedule();
2307 }
2308 unregister_memory_notifier(¬ifier.memory_notifier);
2309
2310 if (ret == 0)
2311 {
2312 struct zone *zone = &NODE_DATA(node)->node_zones[ZONE_MOVABLE];
2313 NvU64 start_pfn = base >> PAGE_SHIFT;
2314 NvU64 end_pfn = (base + size) >> PAGE_SHIFT;
2315
2316 /* Verify the full GPU memory range passed on is onlined */
2317 if (zone->zone_start_pfn != start_pfn ||
2318 zone_end_pfn(zone) != end_pfn)
2319 {
2320 nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
2321
2322 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
2323 // Since zone movable auto onlining failed, need to remove the added memory.
2324 segment_size = calculated_segment_size;
2325 segment_base = base;
2326 size_remaining = size;
2327
2328 while (size_remaining > 0)
2329 {
2330 if (segment_size > size_remaining)
2331 {
2332 segment_size = size_remaining;
2333 }
2334
2335 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
2336 ret = offline_and_remove_memory(node, segment_base, segment_size);
2337 #else
2338 ret = offline_and_remove_memory(segment_base, segment_size);
2339 #endif
2340 nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
2341 ret, segment_base, segment_size);
2342
2343 segment_base += segment_size;
2344 size_remaining -= segment_size;
2345
2346 // Yield CPU to prevent soft lockups
2347 schedule();
2348 }
2349 #endif
2350 goto failed;
2351 }
2352
2353 /*
2354 * On systems with cpuset cgroup controller enabled, memory alloc on
2355 * this just hotplugged GPU memory node can fail if the
2356 * cpuset_hotplug_work is not scheduled yet. cpuset_hotplug_work is
2357 * where the current->mems_allowed is updated in the path
2358 * cpuset_hotplug_workfn->update_tasks_nodemask. When cpuset is
2359 * enabled and current->mems_allowed is not updated, memory allocation
2360 * with __GFP_THISNODE and this node id fails. cpuset_wait_for_hotplug
2361 * kernel function can be used to wait for the work to finish but that
2362 * is not exported. Adding a time loop to wait for
2363 * current->mems_allowed to be updated as a WAR while an upstream
2364 * kernel fix is being explored. Bug 4385903
2365 */
2366 if (!node_isset(node, cpuset_current_mems_allowed))
2367 {
2368 unsigned long delay;
2369
2370 delay = jiffies + (HZ / 10); // 100ms
2371 while(time_before(jiffies, delay) &&
2372 !node_isset(node, cpuset_current_mems_allowed))
2373 {
2374 os_schedule();
2375 }
2376
2377 if (!node_isset(node, cpuset_current_mems_allowed))
2378 {
2379 nv_printf(NV_DBG_ERRORS, "NVRM: Hotplugged GPU memory NUMA node: %d "
2380 "not set in current->mems_allowed!\n", node);
2381 }
2382 }
2383
2384 *nodeId = node;
2385 clear_bit(node, nvl->coherent_link_info.free_node_bitmap);
2386 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE);
2387 return NV_OK;
2388 }
2389 nv_printf(NV_DBG_ERRORS, "NVRM: Memory add failed. base: 0x%lx size: 0x%lx ret: %d\n",
2390 base, size, ret);
2391 failed:
2392 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE_FAILED);
2393 return NV_ERR_OPERATING_SYSTEM;
2394 #endif
2395 return NV_ERR_NOT_SUPPORTED;
2396 }
2397
2398
2399 typedef struct {
2400 NvU64 base;
2401 NvU64 size;
2402 NvU32 nodeId;
2403 int ret;
2404 } remove_numa_memory_info_t;
2405
offline_numa_memory_callback(void * args)2406 static void offline_numa_memory_callback
2407 (
2408 void *args
2409 )
2410 {
2411 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
2412 remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
2413 int ret = 0;
2414 NvU64 memblock_size;
2415 NvU64 size_remaining;
2416 NvU64 calculated_segment_size;
2417 NvU64 segment_size;
2418 NvU64 segment_base;
2419
2420 //
2421 // Removing all memory at once can take a long time. Split up memory into segments
2422 // with schedule() in between to prevent soft lockups. Memory segments for
2423 // offline_and_remove_memory() need to be aligned to memblock size.
2424 //
2425 // If there are any issues splitting into segments, then remove all memory at once.
2426 //
2427 if (os_numa_memblock_size(&memblock_size) == NV_OK)
2428 {
2429 calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
2430 }
2431 else
2432 {
2433 // Don't split into segments, remove all memory at once
2434 calculated_segment_size = pNumaInfo->size;
2435 }
2436
2437 segment_size = calculated_segment_size;
2438 segment_base = pNumaInfo->base;
2439 size_remaining = pNumaInfo->size;
2440
2441 while (size_remaining > 0)
2442 {
2443 if (segment_size > size_remaining)
2444 {
2445 segment_size = size_remaining;
2446 }
2447
2448 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
2449 ret = offline_and_remove_memory(pNumaInfo->nodeId,
2450 segment_base,
2451 segment_size);
2452 #else
2453 ret = offline_and_remove_memory(segment_base,
2454 segment_size);
2455 #endif
2456 nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
2457 ret, segment_base, segment_size);
2458 pNumaInfo->ret |= ret;
2459
2460 segment_base += segment_size;
2461 size_remaining -= segment_size;
2462
2463 // Yield CPU to prevent soft lockups
2464 schedule();
2465 }
2466 #endif
2467 }
2468
os_numa_remove_gpu_memory(void * handle,NvU64 offset,NvU64 size,NvU32 nodeId)2469 NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
2470 (
2471 void *handle,
2472 NvU64 offset,
2473 NvU64 size,
2474 NvU32 nodeId
2475 )
2476 {
2477 #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT
2478 nv_linux_state_t *nvl = pci_get_drvdata(handle);
2479 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
2480 NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
2481 remove_numa_memory_info_t numa_info;
2482 nv_kthread_q_item_t remove_numa_memory_q_item;
2483 int ret;
2484 #endif
2485
2486 if (nodeId >= MAX_NUMNODES)
2487 {
2488 return NV_ERR_INVALID_ARGUMENT;
2489 }
2490 if ((nodeId == NUMA_NO_NODE) || test_bit(nodeId, nvl->coherent_link_info.free_node_bitmap))
2491 {
2492 return NV_ERR_INVALID_ARGUMENT;
2493 }
2494
2495 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS);
2496
2497 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
2498 numa_info.base = base;
2499 numa_info.size = size;
2500 numa_info.nodeId = nodeId;
2501 numa_info.ret = 0;
2502
2503 nv_kthread_q_item_init(&remove_numa_memory_q_item,
2504 offline_numa_memory_callback,
2505 &numa_info);
2506 nv_kthread_q_schedule_q_item(&nvl->remove_numa_memory_q,
2507 &remove_numa_memory_q_item);
2508 nv_kthread_q_flush(&nvl->remove_numa_memory_q);
2509
2510 ret = numa_info.ret;
2511
2512 if (ret == 0)
2513 {
2514 set_bit(nodeId, nvl->coherent_link_info.free_node_bitmap);
2515
2516 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE);
2517 return NV_OK;
2518 }
2519
2520 nv_printf(NV_DBG_ERRORS, "NVRM: Memory remove failed. base: 0x%lx size: 0x%lx ret: %d\n",
2521 base, size, ret);
2522 #endif
2523 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE_FAILED);
2524 return NV_ERR_OPERATING_SYSTEM;
2525 #endif
2526 return NV_ERR_NOT_SUPPORTED;
2527 }
2528
os_offline_page_at_address(NvU64 address)2529 NV_STATUS NV_API_CALL os_offline_page_at_address
2530 (
2531 NvU64 address
2532 )
2533 {
2534 #if defined(CONFIG_MEMORY_FAILURE)
2535 int flags = 0;
2536 int ret;
2537 NvU64 pfn;
2538 struct page *page = NV_GET_PAGE_STRUCT(address);
2539
2540 if (page == NULL)
2541 {
2542 nv_printf(NV_DBG_ERRORS, "NVRM: Failed to get page struct for address: 0x%llx\n",
2543 address);
2544 return NV_ERR_INVALID_ARGUMENT;
2545 }
2546
2547 pfn = page_to_pfn(page);
2548
2549 #ifdef NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED
2550 //
2551 // Set MF_SW_SIMULATED flag so Linux kernel can differentiate this from a HW
2552 // memory failure. HW memory failures cannot be unset via unpoison_memory() API.
2553 //
2554 // Currently, RM does not use unpoison_memory(), so it makes no difference
2555 // whether or not MF_SW_SIMULATED is set. Regardless, it is semantically more
2556 // correct to set MF_SW_SIMULATED.
2557 //
2558 flags |= MF_SW_SIMULATED;
2559 #endif
2560
2561 #ifdef NV_MEMORY_FAILURE_HAS_TRAPNO_ARG
2562 ret = memory_failure(pfn, 0, flags);
2563 #else
2564 ret = memory_failure(pfn, flags);
2565 #endif
2566
2567 if (ret != 0)
2568 {
2569 nv_printf(NV_DBG_ERRORS, "NVRM: page offlining failed. address: 0x%llx pfn: 0x%llx ret: %d\n",
2570 address, pfn, ret);
2571 return NV_ERR_OPERATING_SYSTEM;
2572 }
2573
2574 return NV_OK;
2575 #else // !defined(CONFIG_MEMORY_FAILURE)
2576 nv_printf(NV_DBG_ERRORS, "NVRM: memory_failure() not supported by kernel. page offlining failed. address: 0x%llx\n",
2577 address);
2578 return NV_ERR_NOT_SUPPORTED;
2579 #endif
2580 }
2581
os_get_pid_info(void)2582 void* NV_API_CALL os_get_pid_info(void)
2583 {
2584 return get_task_pid(current, PIDTYPE_PID);
2585 }
2586
os_put_pid_info(void * pid_info)2587 void NV_API_CALL os_put_pid_info(void *pid_info)
2588 {
2589 if (pid_info != NULL)
2590 put_pid(pid_info);
2591 }
2592
os_find_ns_pid(void * pid_info,NvU32 * ns_pid)2593 NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid)
2594 {
2595 if ((pid_info == NULL) || (ns_pid == NULL))
2596 return NV_ERR_INVALID_ARGUMENT;
2597
2598 *ns_pid = pid_vnr((struct pid *)pid_info);
2599
2600 // The call returns 0 if the PID is not found in the current ns
2601 if (*ns_pid == 0)
2602 return NV_ERR_OBJECT_NOT_FOUND;
2603
2604 return NV_OK;
2605 }
2606
2607