1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define __NO_VERSION__ 25 26 #include "os-interface.h" 27 #include "nv-linux.h" 28 #include "nv-caps-imex.h" 29 30 #include "nv-time.h" 31 32 #include <linux/mmzone.h> 33 #include <linux/numa.h> 34 #include <linux/cpuset.h> 35 36 #include <linux/pid.h> 37 #if defined(CONFIG_LOCKDEP) 38 #include <linux/lockdep.h> 39 #endif // CONFIG_LOCKDEP 40 41 extern char *NVreg_TemporaryFilePath; 42 43 #define MAX_ERROR_STRING 528 44 static char nv_error_string[MAX_ERROR_STRING]; 45 static NV_DEFINE_SPINLOCK(nv_error_string_lock); 46 47 extern nv_linux_state_t nv_ctl_device; 48 49 extern nv_kthread_q_t nv_kthread_q; 50 51 NvU32 os_page_size = PAGE_SIZE; 52 NvU64 os_page_mask = NV_PAGE_MASK; 53 NvU8 os_page_shift = PAGE_SHIFT; 54 NvBool os_cc_enabled = 0; 55 NvBool os_cc_tdx_enabled = 0; 56 57 #if defined(CONFIG_DMA_SHARED_BUFFER) 58 NvBool os_dma_buf_enabled = NV_TRUE; 59 #else 60 NvBool os_dma_buf_enabled = NV_FALSE; 61 #endif // CONFIG_DMA_SHARED_BUFFER 62 63 NvBool os_imex_channel_is_supported = NV_TRUE; 64 65 void NV_API_CALL os_disable_console_access(void) 66 { 67 console_lock(); 68 } 69 70 void NV_API_CALL os_enable_console_access(void) 71 { 72 console_unlock(); 73 } 74 75 typedef struct semaphore os_mutex_t; 76 77 // 78 // os_alloc_mutex - Allocate the RM mutex 79 // 80 // ppMutex - filled in with pointer to opaque structure to mutex data type 81 // 82 NV_STATUS NV_API_CALL os_alloc_mutex 83 ( 84 void **ppMutex 85 ) 86 { 87 NV_STATUS rmStatus; 88 os_mutex_t *os_mutex; 89 90 rmStatus = os_alloc_mem(ppMutex, sizeof(os_mutex_t)); 91 if (rmStatus != NV_OK) 92 { 93 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate mutex!\n"); 94 return rmStatus; 95 } 96 os_mutex = (os_mutex_t *)*ppMutex; 97 NV_INIT_MUTEX(os_mutex); 98 99 return NV_OK; 100 } 101 102 // 103 // os_free_mutex - Free resources associated with mutex allocated 104 // via os_alloc_mutex above. 105 // 106 // pMutex - Pointer to opaque structure to mutex data type 107 // 108 void NV_API_CALL os_free_mutex 109 ( 110 void *pMutex 111 ) 112 { 113 os_mutex_t *os_mutex = (os_mutex_t *)pMutex; 114 115 if (os_mutex != NULL) 116 { 117 os_free_mem(pMutex); 118 } 119 } 120 121 // 122 // pMutex - Pointer to opaque structure to mutex data type 123 // 124 125 NV_STATUS NV_API_CALL os_acquire_mutex 126 ( 127 void *pMutex 128 ) 129 { 130 os_mutex_t *os_mutex = (os_mutex_t *)pMutex; 131 132 if (!NV_MAY_SLEEP()) 133 { 134 return NV_ERR_INVALID_REQUEST; 135 } 136 down(os_mutex); 137 138 return NV_OK; 139 } 140 141 NV_STATUS NV_API_CALL os_cond_acquire_mutex 142 ( 143 void * pMutex 144 ) 145 { 146 os_mutex_t *os_mutex = (os_mutex_t *)pMutex; 147 if (!NV_MAY_SLEEP()) 148 { 149 return NV_ERR_INVALID_REQUEST; 150 } 151 152 if (down_trylock(os_mutex)) 153 { 154 return NV_ERR_TIMEOUT_RETRY; 155 } 156 157 return NV_OK; 158 } 159 160 161 void NV_API_CALL os_release_mutex 162 ( 163 void *pMutex 164 ) 165 { 166 os_mutex_t *os_mutex = (os_mutex_t *)pMutex; 167 up(os_mutex); 168 } 169 170 typedef struct semaphore os_semaphore_t; 171 172 173 void* NV_API_CALL os_alloc_semaphore 174 ( 175 NvU32 initialValue 176 ) 177 { 178 NV_STATUS rmStatus; 179 os_semaphore_t *os_sema; 180 181 rmStatus = os_alloc_mem((void *)&os_sema, sizeof(os_semaphore_t)); 182 if (rmStatus != NV_OK) 183 { 184 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate semaphore!\n"); 185 return NULL; 186 } 187 188 sema_init(os_sema, initialValue); 189 190 return (void *)os_sema; 191 } 192 193 void NV_API_CALL os_free_semaphore 194 ( 195 void *pSema 196 ) 197 { 198 os_semaphore_t *os_sema = (os_semaphore_t *)pSema; 199 200 os_free_mem(os_sema); 201 } 202 203 NV_STATUS NV_API_CALL os_acquire_semaphore 204 ( 205 void *pSema 206 ) 207 { 208 os_semaphore_t *os_sema = (os_semaphore_t *)pSema; 209 210 if (!NV_MAY_SLEEP()) 211 { 212 return NV_ERR_INVALID_REQUEST; 213 } 214 down(os_sema); 215 return NV_OK; 216 } 217 218 NV_STATUS NV_API_CALL os_cond_acquire_semaphore 219 ( 220 void * pSema 221 ) 222 { 223 os_semaphore_t *os_sema = (os_semaphore_t *)pSema; 224 // 225 // NOTE: down_trylock() is safe to call from IRQ, se we don't need an 226 // NV_MAY_SLEEP() check here. We do check it in os_cond_acquire_mutex(), 227 // even though it is also calling down_trylock(), since that keeps it 228 // in line with the kernel's 'struct mutex' API. 229 // 230 if (down_trylock(os_sema)) 231 { 232 return NV_ERR_TIMEOUT_RETRY; 233 } 234 235 return NV_OK; 236 } 237 238 NV_STATUS NV_API_CALL os_release_semaphore 239 ( 240 void *pSema 241 ) 242 { 243 os_semaphore_t *os_sema = (os_semaphore_t *)pSema; 244 up(os_sema); 245 return NV_OK; 246 } 247 248 typedef struct 249 { 250 struct rw_semaphore sem; 251 252 #if defined(CONFIG_LOCKDEP) 253 /** 254 * A key of lock class. It would be registered to Lockdep validator so all 255 * instances' usages and dependencies will contribute to constructing correct 256 * locking rules and this lock will be tracked by the Lockdep validator. 257 * 258 */ 259 struct lock_class_key key; 260 #endif // CONFIG_LOCKDEP 261 } os_rwlock_t; 262 263 void* NV_API_CALL os_alloc_rwlock(void) 264 { 265 os_rwlock_t *os_rwlock = NULL; 266 267 NV_STATUS rmStatus = os_alloc_mem((void *)&os_rwlock, sizeof(os_rwlock_t)); 268 if (rmStatus != NV_OK) 269 { 270 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate a struct os_rwlock_t!\n"); 271 return NULL; 272 } 273 274 init_rwsem(&os_rwlock->sem); 275 276 #if defined(CONFIG_LOCKDEP) 277 // Register the dynamically allocated key to Lockdep. 278 lockdep_register_key(&os_rwlock->key); 279 lockdep_set_class(&os_rwlock->sem, &os_rwlock->key); 280 #endif // CONFIG_LOCKDEP 281 282 return os_rwlock; 283 } 284 285 void NV_API_CALL os_free_rwlock(void *pRwLock) 286 { 287 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 288 289 #if defined(CONFIG_LOCKDEP) 290 // Unregister the dynamically allocated key. 291 lockdep_unregister_key(&os_rwlock->key); 292 #endif // CONFIG_LOCKDEP 293 294 os_free_mem(os_rwlock); 295 } 296 297 NV_STATUS NV_API_CALL os_acquire_rwlock_read(void *pRwLock) 298 { 299 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 300 301 if (!NV_MAY_SLEEP()) 302 { 303 return NV_ERR_INVALID_REQUEST; 304 } 305 down_read(&os_rwlock->sem); 306 return NV_OK; 307 } 308 309 NV_STATUS NV_API_CALL os_acquire_rwlock_write(void *pRwLock) 310 { 311 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 312 313 if (!NV_MAY_SLEEP()) 314 { 315 return NV_ERR_INVALID_REQUEST; 316 } 317 down_write(&os_rwlock->sem); 318 return NV_OK; 319 } 320 321 NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read(void *pRwLock) 322 { 323 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 324 325 if (down_read_trylock(&os_rwlock->sem)) 326 { 327 return NV_ERR_TIMEOUT_RETRY; 328 } 329 330 return NV_OK; 331 } 332 333 NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *pRwLock) 334 { 335 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 336 337 if (down_write_trylock(&os_rwlock->sem)) 338 { 339 return NV_ERR_TIMEOUT_RETRY; 340 } 341 342 return NV_OK; 343 } 344 345 void NV_API_CALL os_release_rwlock_read(void *pRwLock) 346 { 347 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 348 up_read(&os_rwlock->sem); 349 } 350 351 void NV_API_CALL os_release_rwlock_write(void *pRwLock) 352 { 353 os_rwlock_t *os_rwlock = (os_rwlock_t *)pRwLock; 354 up_write(&os_rwlock->sem); 355 } 356 357 NvBool NV_API_CALL os_semaphore_may_sleep(void) 358 { 359 return NV_MAY_SLEEP(); 360 } 361 362 NvBool NV_API_CALL os_is_isr(void) 363 { 364 return (in_irq()); 365 } 366 367 // return TRUE if the caller is the super-user 368 NvBool NV_API_CALL os_is_administrator(void) 369 { 370 return NV_IS_SUSER(); 371 } 372 373 NvBool NV_API_CALL os_allow_priority_override(void) 374 { 375 return capable(CAP_SYS_NICE); 376 } 377 378 char* NV_API_CALL os_string_copy( 379 char *dst, 380 const char *src 381 ) 382 { 383 return strcpy(dst, src); 384 } 385 386 NvU32 NV_API_CALL os_string_length( 387 const char* str 388 ) 389 { 390 return strlen(str); 391 } 392 393 NvU32 NV_API_CALL os_strtoul(const char *str, char **endp, NvU32 base) 394 { 395 return (NvU32)simple_strtoul(str, endp, base); 396 } 397 398 NvS32 NV_API_CALL os_string_compare(const char *str1, const char *str2) 399 { 400 return strcmp(str1, str2); 401 } 402 403 void *os_mem_copy_custom( 404 void *dstPtr, 405 const void *srcPtr, 406 NvU32 length 407 ) 408 { 409 void *ret = dstPtr; 410 NvU32 dwords, bytes = length; 411 NvU8 *dst = dstPtr; 412 const NvU8 *src = srcPtr; 413 414 if ((length >= 128) && 415 (((NvUPtr)dst & 3) == 0) & (((NvUPtr)src & 3) == 0)) 416 { 417 dwords = (length / sizeof(NvU32)); 418 bytes = (length % sizeof(NvU32)); 419 420 while (dwords != 0) 421 { 422 *(NvU32 *)dst = *(const NvU32 *)src; 423 dst += sizeof(NvU32); 424 src += sizeof(NvU32); 425 dwords--; 426 } 427 } 428 429 while (bytes != 0) 430 { 431 *dst = *src; 432 dst++; 433 src++; 434 bytes--; 435 } 436 437 return ret; 438 } 439 440 void *NV_API_CALL os_mem_copy( 441 void *dst, 442 const void *src, 443 NvU32 length 444 ) 445 { 446 #if defined(NVCPU_AARCH64) 447 /* 448 * TODO: Remove once memset/memcpy restructure is complete 449 * 450 * When performing memcpy for memory mapped as device, memcpy_[to/from]io 451 * must be used. WAR to check the source and destination to determine the 452 * correct memcpy_io to use. 453 * 454 * This WAR is limited to just aarch64 for now because the address range used 455 * to map ioremap and vmalloc is different on ppc64le, and is_vmalloc_addr() 456 * does not correctly handle this. is_ioremap_addr() is needed instead. This 457 * will have to be addressed when reorganizing RM to use the new memset model. 458 */ 459 if (is_vmalloc_addr(dst) && !is_vmalloc_addr(src)) 460 { 461 memcpy_toio(dst, src, length); 462 return dst; 463 } 464 else if (!is_vmalloc_addr(dst) && is_vmalloc_addr(src)) 465 { 466 memcpy_fromio(dst, src, length); 467 return dst; 468 } 469 else if (is_vmalloc_addr(dst) && is_vmalloc_addr(src)) 470 { 471 return os_mem_copy_custom(dst, src, length); 472 } 473 else 474 #endif 475 { 476 #if defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) 477 /* 478 * When the kernel is configured with CC_OPTIMIZE_FOR_SIZE=y, Kbuild uses 479 * -Os universally. With -Os, GCC will aggressively inline builtins, even 480 * if -fno-builtin is specified, including memcpy with a tiny byte-copy 481 * loop on x86 (rep movsb). This is horrible for performance - a strict 482 * dword copy is much faster - so when we detect this case, just provide 483 * our own implementation. 484 */ 485 return os_mem_copy_custom(dst, src, length); 486 #else 487 /* 488 * Generally speaking, the kernel-provided memcpy will be the fastest, 489 * (optimized much better for the target architecture than the above 490 * loop), so we want to use that whenever we can get to it. 491 */ 492 return memcpy(dst, src, length); 493 #endif 494 } 495 } 496 497 NV_STATUS NV_API_CALL os_memcpy_from_user( 498 void *to, 499 const void *from, 500 NvU32 n 501 ) 502 { 503 return (NV_COPY_FROM_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK); 504 } 505 506 NV_STATUS NV_API_CALL os_memcpy_to_user( 507 void *to, 508 const void *from, 509 NvU32 n 510 ) 511 { 512 return (NV_COPY_TO_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK); 513 } 514 515 void* NV_API_CALL os_mem_set( 516 void *dst, 517 NvU8 c, 518 NvU32 length 519 ) 520 { 521 #if defined(NVCPU_AARCH64) 522 /* 523 * TODO: Remove once memset/memcpy restructure is complete 524 * 525 * WAR to check the destination to determine if the memory is of type Device 526 * or Normal, and use the correct memset. 527 * 528 * This WAR is limited to just aarch64 for now because the address range used 529 * to map ioremap and vmalloc is different on ppc64le, and is_vmalloc_addr() 530 * does not correctly handle this. is_ioremap_addr() is needed instead. This 531 * will have to be addressed when reorganizing RM to use the new memset model. 532 */ 533 if (is_vmalloc_addr(dst)) 534 { 535 memset_io(dst, (int)c, length); 536 return dst; 537 } 538 else 539 #endif 540 return memset(dst, (int)c, length); 541 } 542 543 NvS32 NV_API_CALL os_mem_cmp( 544 const NvU8 *buf0, 545 const NvU8* buf1, 546 NvU32 length 547 ) 548 { 549 return memcmp(buf0, buf1, length); 550 } 551 552 553 /* 554 * Operating System Memory Functions 555 * 556 * There are 2 interesting aspects of resource manager memory allocations 557 * that need special consideration on Linux: 558 * 559 * 1. They are typically very large, (e.g. single allocations of 164KB) 560 * 561 * 2. The resource manager assumes that it can safely allocate memory in 562 * interrupt handlers. 563 * 564 * The first requires that we call vmalloc, the second kmalloc. We decide 565 * which one to use at run time, based on the size of the request and the 566 * context. Allocations larger than 128KB require vmalloc, in the context 567 * of an ISR they fail. 568 */ 569 570 #if defined(NV_VGX_HYPER) 571 /* 572 * Citrix Hypervisor-8.0 Dom0 sysmem ends up getting fragmented because 573 * of which high-order kmalloc allocations fail. We try to avoid it by 574 * requesting allocations not larger than 8K. 575 * 576 * KVM will be affected low memory pressure situation a lot, 577 * particularly if hugetlbfs hugepages are being used. Hence, 8K applies 578 * here too. 579 */ 580 #define KMALLOC_LIMIT 8192 581 #else 582 #define KMALLOC_LIMIT 131072 583 #endif 584 585 #define VMALLOC_ALLOCATION_SIZE_FLAG (1 << 0) 586 587 NV_STATUS NV_API_CALL os_alloc_mem( 588 void **address, 589 NvU64 size 590 ) 591 { 592 NvU64 original_size = size; 593 unsigned long alloc_size; 594 595 if (address == NULL) 596 return NV_ERR_INVALID_ARGUMENT; 597 598 *address = NULL; 599 NV_MEM_TRACKING_PAD_SIZE(size); 600 601 // check for integer overflow on size 602 if (size < original_size) 603 return NV_ERR_INVALID_ARGUMENT; 604 605 // 606 // NV_KMALLOC, nv_vmalloc take an input of 4 bytes in x86. To avoid 607 // truncation and wrong allocation, below check is required. 608 // 609 alloc_size = size; 610 611 if (alloc_size != size) 612 return NV_ERR_INVALID_PARAMETER; 613 614 if (!NV_MAY_SLEEP()) 615 { 616 if (alloc_size <= KMALLOC_LIMIT) 617 NV_KMALLOC_ATOMIC(*address, alloc_size); 618 } 619 else 620 { 621 if (alloc_size <= KMALLOC_LIMIT) 622 { 623 NV_KMALLOC_NO_OOM(*address, alloc_size); 624 } 625 if (*address == NULL) 626 { 627 *address = nv_vmalloc(alloc_size); 628 alloc_size |= VMALLOC_ALLOCATION_SIZE_FLAG; 629 } 630 } 631 632 NV_MEM_TRACKING_HIDE_SIZE(address, alloc_size); 633 634 return ((*address != NULL) ? NV_OK : NV_ERR_NO_MEMORY); 635 } 636 637 void NV_API_CALL os_free_mem(void *address) 638 { 639 NvU64 size; 640 641 NV_MEM_TRACKING_RETRIEVE_SIZE(address, size); 642 643 if (size & VMALLOC_ALLOCATION_SIZE_FLAG) 644 { 645 size &= ~VMALLOC_ALLOCATION_SIZE_FLAG; 646 nv_vfree(address, size); 647 } 648 else 649 NV_KFREE(address, size); 650 } 651 652 653 /***************************************************************************** 654 * 655 * Name: osGetCurrentTime 656 * 657 *****************************************************************************/ 658 659 NV_STATUS NV_API_CALL os_get_current_time( 660 NvU32 *seconds, 661 NvU32 *useconds 662 ) 663 { 664 struct timespec64 tm; 665 666 ktime_get_real_ts64(&tm); 667 668 *seconds = tm.tv_sec; 669 *useconds = tm.tv_nsec / NSEC_PER_USEC; 670 671 return NV_OK; 672 } 673 674 // 675 // Get the High resolution tick count of the system uptime 676 // 677 NvU64 NV_API_CALL os_get_current_tick_hr(void) 678 { 679 struct timespec64 tm; 680 ktime_get_raw_ts64(&tm); 681 return (NvU64) timespec64_to_ns(&tm); 682 } 683 684 #if BITS_PER_LONG >= 64 685 686 NvU64 NV_API_CALL os_get_current_tick(void) 687 { 688 #if defined(NV_JIFFIES_TO_TIMESPEC_PRESENT) 689 struct timespec ts; 690 jiffies_to_timespec(jiffies, &ts); 691 return (NvU64) timespec_to_ns(&ts); 692 #else 693 struct timespec64 ts; 694 jiffies_to_timespec64(jiffies, &ts); 695 return (NvU64) timespec64_to_ns(&ts); 696 #endif 697 } 698 699 NvU64 NV_API_CALL os_get_tick_resolution(void) 700 { 701 return (NvU64)jiffies_to_usecs(1) * NSEC_PER_USEC; 702 } 703 704 #else 705 706 NvU64 NV_API_CALL os_get_current_tick(void) 707 { 708 /* 709 * 'jiffies' overflows regularly on 32-bit builds (unsigned long is 4 bytes 710 * instead of 8 bytes), so it's unwise to build a tick counter on it, since 711 * the rest of the Resman assumes the 'tick' returned from this function is 712 * monotonically increasing and never overflows. 713 * 714 * Instead, use the previous implementation that we've lived with since the 715 * beginning, which uses system clock time to calculate the tick. This is 716 * subject to problems if the system clock time changes dramatically 717 * (more than a second or so) while the Resman is actively tracking a 718 * timeout. 719 */ 720 NvU32 seconds, useconds; 721 722 (void) os_get_current_time(&seconds, &useconds); 723 724 return ((NvU64)seconds * NSEC_PER_SEC + 725 (NvU64)useconds * NSEC_PER_USEC); 726 } 727 728 NvU64 NV_API_CALL os_get_tick_resolution(void) 729 { 730 /* 731 * os_get_current_tick() uses os_get_current_time(), which has 732 * microsecond resolution. 733 */ 734 return 1000ULL; 735 } 736 737 #endif 738 739 //--------------------------------------------------------------------------- 740 // 741 // Misc services. 742 // 743 //--------------------------------------------------------------------------- 744 745 NV_STATUS NV_API_CALL os_delay_us(NvU32 MicroSeconds) 746 { 747 return nv_sleep_us(MicroSeconds); 748 } 749 750 NV_STATUS NV_API_CALL os_delay(NvU32 MilliSeconds) 751 { 752 return nv_sleep_ms(MilliSeconds); 753 } 754 755 NvU64 NV_API_CALL os_get_cpu_frequency(void) 756 { 757 NvU64 cpu_hz = 0; 758 #if defined(CONFIG_CPU_FREQ) 759 cpu_hz = (cpufreq_get(0) * 1000); 760 #elif defined(NVCPU_X86_64) 761 NvU64 tsc[2]; 762 763 tsc[0] = nv_rdtsc(); 764 mdelay(250); 765 tsc[1] = nv_rdtsc(); 766 767 cpu_hz = ((tsc[1] - tsc[0]) * 4); 768 #endif 769 return cpu_hz; 770 } 771 772 NvU32 NV_API_CALL os_get_current_process(void) 773 { 774 return NV_GET_CURRENT_PROCESS(); 775 } 776 777 void NV_API_CALL os_get_current_process_name(char *buf, NvU32 len) 778 { 779 task_lock(current); 780 strncpy(buf, current->comm, len - 1); 781 buf[len - 1] = '\0'; 782 task_unlock(current); 783 } 784 785 NV_STATUS NV_API_CALL os_get_current_thread(NvU64 *threadId) 786 { 787 if (in_interrupt()) 788 *threadId = 0; 789 else 790 *threadId = (NvU64) current->pid; 791 792 return NV_OK; 793 } 794 795 /*******************************************************************************/ 796 /* */ 797 /* Debug and logging utilities follow */ 798 /* */ 799 /*******************************************************************************/ 800 801 // The current debug display level (default to maximum debug level) 802 NvU32 cur_debuglevel = 0xffffffff; 803 804 /* 805 * The binary core of RM (nv-kernel.o) calls both out_string, and nv_printf. 806 */ 807 inline void NV_API_CALL out_string(const char *str) 808 { 809 printk("%s", str); 810 } 811 812 /* 813 * nv_printf() prints to the kernel log for the driver. 814 * Returns the number of characters written. 815 */ 816 int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...) 817 { 818 va_list arglist; 819 int chars_written = 0; 820 821 if (debuglevel >= ((cur_debuglevel >> 4) & 0x3)) 822 { 823 size_t length; 824 unsigned long flags; 825 826 // When printk is called to extend the output of the previous line 827 // (i.e. when the previous line did not end in \n), the printk call 828 // must contain KERN_CONT. Older kernels still print the line 829 // correctly, but KERN_CONT was technically always required. 830 831 // This means that every call to printk() needs to have a KERN_xxx 832 // prefix. The only way to get this is to rebuild the format string 833 // into a new buffer, with a KERN_xxx prefix prepended. 834 835 // Unfortunately, we can't guarantee that two calls to nv_printf() 836 // won't be interrupted by a printk from another driver. So to be 837 // safe, we always append KERN_CONT. It's still technically wrong, 838 // but it works. 839 840 // The long-term fix is to modify all NV_PRINTF-ish calls so that the 841 // string always contains only one \n (at the end) and NV_PRINTF_EX 842 // is deleted. But that is unlikely to ever happen. 843 844 length = strlen(printf_format); 845 if (length < 1) 846 return 0; 847 848 NV_SPIN_LOCK_IRQSAVE(&nv_error_string_lock, flags); 849 850 // KERN_CONT changed in the 3.6 kernel, so we can't assume its 851 // composition or size. 852 memcpy(nv_error_string, KERN_CONT, sizeof(KERN_CONT) - 1); 853 memcpy(nv_error_string + sizeof(KERN_CONT) - 1, printf_format, length + 1); 854 855 va_start(arglist, printf_format); 856 chars_written = vprintk(nv_error_string, arglist); 857 va_end(arglist); 858 859 NV_SPIN_UNLOCK_IRQRESTORE(&nv_error_string_lock, flags); 860 } 861 862 return chars_written; 863 } 864 865 NvS32 NV_API_CALL os_snprintf(char *buf, NvU32 size, const char *fmt, ...) 866 { 867 va_list arglist; 868 int chars_written; 869 870 va_start(arglist, fmt); 871 chars_written = vsnprintf(buf, size, fmt, arglist); 872 va_end(arglist); 873 874 return chars_written; 875 } 876 877 NvS32 NV_API_CALL os_vsnprintf(char *buf, NvU32 size, const char *fmt, va_list arglist) 878 { 879 return vsnprintf(buf, size, fmt, arglist); 880 } 881 882 void NV_API_CALL os_log_error(const char *fmt, va_list ap) 883 { 884 unsigned long flags; 885 886 NV_SPIN_LOCK_IRQSAVE(&nv_error_string_lock, flags); 887 888 vsnprintf(nv_error_string, MAX_ERROR_STRING, fmt, ap); 889 nv_error_string[MAX_ERROR_STRING - 1] = 0; 890 printk(KERN_ERR "%s", nv_error_string); 891 892 NV_SPIN_UNLOCK_IRQRESTORE(&nv_error_string_lock, flags); 893 } 894 895 void NV_API_CALL os_io_write_byte( 896 NvU32 address, 897 NvU8 value 898 ) 899 { 900 outb(value, address); 901 } 902 903 void NV_API_CALL os_io_write_word( 904 NvU32 address, 905 NvU16 value 906 ) 907 { 908 outw(value, address); 909 } 910 911 void NV_API_CALL os_io_write_dword( 912 NvU32 address, 913 NvU32 value 914 ) 915 { 916 outl(value, address); 917 } 918 919 NvU8 NV_API_CALL os_io_read_byte( 920 NvU32 address 921 ) 922 { 923 return inb(address); 924 } 925 926 NvU16 NV_API_CALL os_io_read_word( 927 NvU32 address 928 ) 929 { 930 return inw(address); 931 } 932 933 NvU32 NV_API_CALL os_io_read_dword( 934 NvU32 address 935 ) 936 { 937 return inl(address); 938 } 939 940 941 static NvBool NV_API_CALL xen_support_fully_virtualized_kernel(void) 942 { 943 #if defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 944 return (os_is_vgx_hyper()); 945 #endif 946 return NV_FALSE; 947 } 948 949 void* NV_API_CALL os_map_kernel_space( 950 NvU64 start, 951 NvU64 size_bytes, 952 NvU32 mode 953 ) 954 { 955 void *vaddr; 956 957 if (!xen_support_fully_virtualized_kernel() && start == 0) 958 { 959 if (mode != NV_MEMORY_CACHED) 960 { 961 nv_printf(NV_DBG_ERRORS, 962 "NVRM: os_map_kernel_space: won't map address 0x%0llx UC!\n", start); 963 return NULL; 964 } 965 else 966 return (void *)PAGE_OFFSET; 967 } 968 969 if (!NV_MAY_SLEEP()) 970 { 971 nv_printf(NV_DBG_ERRORS, 972 "NVRM: os_map_kernel_space: can't map 0x%0llx, invalid context!\n", start); 973 os_dbg_breakpoint(); 974 return NULL; 975 } 976 977 switch (mode) 978 { 979 case NV_MEMORY_CACHED: 980 vaddr = nv_ioremap_cache(start, size_bytes); 981 break; 982 case NV_MEMORY_WRITECOMBINED: 983 vaddr = rm_disable_iomap_wc() ? 984 nv_ioremap_nocache(start, size_bytes) : 985 nv_ioremap_wc(start, size_bytes); 986 break; 987 case NV_MEMORY_UNCACHED: 988 case NV_MEMORY_DEFAULT: 989 vaddr = nv_ioremap_nocache(start, size_bytes); 990 break; 991 default: 992 nv_printf(NV_DBG_ERRORS, 993 "NVRM: os_map_kernel_space: unsupported mode!\n"); 994 return NULL; 995 } 996 997 return vaddr; 998 } 999 1000 void NV_API_CALL os_unmap_kernel_space( 1001 void *addr, 1002 NvU64 size_bytes 1003 ) 1004 { 1005 if (addr == (void *)PAGE_OFFSET) 1006 return; 1007 1008 nv_iounmap(addr, size_bytes); 1009 } 1010 1011 #if NVCPU_IS_AARCH64 1012 1013 static inline void nv_flush_cache_cpu(void *info) 1014 { 1015 if (!nvos_is_chipset_io_coherent()) 1016 { 1017 #if defined(NV_FLUSH_CACHE_ALL_PRESENT) 1018 flush_cache_all(); 1019 #else 1020 WARN_ONCE(0, "kernel does not provide flush_cache_all()\n"); 1021 #endif 1022 } 1023 } 1024 1025 // flush the cache of all cpus 1026 NV_STATUS NV_API_CALL os_flush_cpu_cache_all(void) 1027 { 1028 on_each_cpu(nv_flush_cache_cpu, NULL, 1); 1029 return NV_OK; 1030 } 1031 1032 NV_STATUS NV_API_CALL os_flush_user_cache(void) 1033 { 1034 if (!NV_MAY_SLEEP()) 1035 { 1036 return NV_ERR_NOT_SUPPORTED; 1037 } 1038 1039 // 1040 // The Linux kernel does not export an interface for flushing a range, 1041 // although it is possible. For now, just flush the entire cache to be 1042 // safe. 1043 // 1044 on_each_cpu(nv_flush_cache_cpu, NULL, 1); 1045 return NV_OK; 1046 } 1047 1048 #else // NVCPU_IS_AARCH64 1049 1050 NV_STATUS NV_API_CALL os_flush_cpu_cache_all(void) 1051 { 1052 return NV_ERR_NOT_SUPPORTED; 1053 } 1054 1055 NV_STATUS NV_API_CALL os_flush_user_cache(void) 1056 { 1057 return NV_ERR_NOT_SUPPORTED; 1058 } 1059 1060 #endif 1061 1062 void NV_API_CALL os_flush_cpu_write_combine_buffer(void) 1063 { 1064 wmb(); 1065 } 1066 1067 // override initial debug level from registry 1068 void NV_API_CALL os_dbg_init(void) 1069 { 1070 NvU32 new_debuglevel; 1071 nvidia_stack_t *sp = NULL; 1072 1073 if (nv_kmem_cache_alloc_stack(&sp) != 0) 1074 { 1075 return; 1076 } 1077 1078 if (NV_OK == rm_read_registry_dword(sp, NULL, 1079 "ResmanDebugLevel", 1080 &new_debuglevel)) 1081 { 1082 if (new_debuglevel != (NvU32)~0) 1083 cur_debuglevel = new_debuglevel; 1084 } 1085 1086 nv_kmem_cache_free_stack(sp); 1087 } 1088 1089 void NV_API_CALL os_dbg_set_level(NvU32 new_debuglevel) 1090 { 1091 nv_printf(NV_DBG_SETUP, "NVRM: Changing debuglevel from 0x%x to 0x%x\n", 1092 cur_debuglevel, new_debuglevel); 1093 cur_debuglevel = new_debuglevel; 1094 } 1095 1096 NvU64 NV_API_CALL os_get_max_user_va(void) 1097 { 1098 return TASK_SIZE; 1099 } 1100 1101 NV_STATUS NV_API_CALL os_schedule(void) 1102 { 1103 if (NV_MAY_SLEEP()) 1104 { 1105 set_current_state(TASK_INTERRUPTIBLE); 1106 schedule_timeout(1); 1107 return NV_OK; 1108 } 1109 else 1110 { 1111 nv_printf(NV_DBG_ERRORS, "NVRM: os_schedule: Attempted to yield" 1112 " the CPU while in atomic or interrupt" 1113 " context\n"); 1114 return NV_ERR_ILLEGAL_ACTION; 1115 } 1116 } 1117 1118 typedef struct { 1119 nv_kthread_q_item_t item; 1120 void *data; 1121 } os_queue_data_t; 1122 1123 static void os_execute_work_item(void *_oqd) 1124 { 1125 os_queue_data_t *oqd = _oqd; 1126 nvidia_stack_t *sp = NULL; 1127 void *data = oqd->data; 1128 1129 NV_KFREE(oqd, sizeof(os_queue_data_t)); 1130 1131 if (nv_kmem_cache_alloc_stack(&sp) != 0) 1132 { 1133 return; 1134 } 1135 1136 rm_execute_work_item(sp, data); 1137 1138 nv_kmem_cache_free_stack(sp); 1139 } 1140 1141 NV_STATUS NV_API_CALL os_queue_work_item(struct os_work_queue *queue, void *data) 1142 { 1143 os_queue_data_t *oqd; 1144 nv_kthread_q_t *kthread; 1145 1146 /* Use the global queue unless a valid queue was provided */ 1147 kthread = queue ? &queue->nvk : &nv_kthread_q; 1148 1149 /* Make sure the kthread is active */ 1150 if (unlikely(!kthread->q_kthread)) { 1151 nv_printf(NV_DBG_ERRORS, "NVRM: queue is not enabled\n"); 1152 return NV_ERR_NOT_READY; 1153 } 1154 1155 /* Allocate atomically just in case we're called in atomic context. */ 1156 NV_KMALLOC_ATOMIC(oqd, sizeof(os_queue_data_t)); 1157 if (!oqd) 1158 return NV_ERR_NO_MEMORY; 1159 1160 nv_kthread_q_item_init(&oqd->item, os_execute_work_item, oqd); 1161 oqd->data = data; 1162 1163 nv_kthread_q_schedule_q_item(kthread, &oqd->item); 1164 1165 return NV_OK; 1166 } 1167 1168 NV_STATUS NV_API_CALL os_flush_work_queue(struct os_work_queue *queue) 1169 { 1170 nv_kthread_q_t *kthread; 1171 1172 /* Use the global queue unless a valid queue was provided */ 1173 kthread = queue ? &queue->nvk : &nv_kthread_q; 1174 1175 if (NV_MAY_SLEEP()) 1176 { 1177 if (kthread->q_kthread) 1178 nv_kthread_q_flush(kthread); 1179 1180 return NV_OK; 1181 } 1182 else 1183 { 1184 nv_printf(NV_DBG_ERRORS, 1185 "NVRM: os_flush_work_queue: attempted to execute passive" 1186 "work from an atomic or interrupt context.\n"); 1187 return NV_ERR_ILLEGAL_ACTION; 1188 } 1189 } 1190 1191 extern NvU32 NVreg_EnableDbgBreakpoint; 1192 1193 void NV_API_CALL os_dbg_breakpoint(void) 1194 { 1195 if (NVreg_EnableDbgBreakpoint == 0) 1196 { 1197 return; 1198 } 1199 1200 #if defined(CONFIG_X86_REMOTE_DEBUG) || defined(CONFIG_KGDB) || defined(CONFIG_XMON) 1201 #if defined(NVCPU_X86_64) 1202 __asm__ __volatile__ ("int $3"); 1203 #elif defined(NVCPU_ARM) 1204 __asm__ __volatile__ (".word %c0" :: "i" (KGDB_COMPILED_BREAK)); 1205 #elif defined(NVCPU_AARCH64) 1206 # warning "Need to implement os_dbg_breakpoint() for aarch64" 1207 #elif defined(NVCPU_PPC64LE) 1208 __asm__ __volatile__ ("trap"); 1209 #endif // NVCPU_* 1210 #elif defined(CONFIG_KDB) 1211 KDB_ENTER(); 1212 #endif // CONFIG_X86_REMOTE_DEBUG || CONFIG_KGDB || CONFIG_XMON 1213 } 1214 1215 NvU32 NV_API_CALL os_get_cpu_number(void) 1216 { 1217 NvU32 cpu_id = get_cpu(); 1218 put_cpu(); 1219 return cpu_id; 1220 } 1221 1222 NvU32 NV_API_CALL os_get_cpu_count(void) 1223 { 1224 return NV_NUM_CPUS(); 1225 } 1226 1227 NvBool NV_API_CALL os_pat_supported(void) 1228 { 1229 return (nv_pat_mode != NV_PAT_MODE_DISABLED); 1230 } 1231 1232 NvBool NV_API_CALL os_is_efi_enabled(void) 1233 { 1234 return efi_enabled(EFI_BOOT); 1235 } 1236 1237 void NV_API_CALL os_dump_stack(void) 1238 { 1239 dump_stack(); 1240 } 1241 1242 typedef struct os_spinlock_s 1243 { 1244 nv_spinlock_t lock; 1245 unsigned long eflags; 1246 } os_spinlock_t; 1247 1248 NV_STATUS NV_API_CALL os_alloc_spinlock(void **ppSpinlock) 1249 { 1250 NV_STATUS rmStatus; 1251 os_spinlock_t *os_spinlock; 1252 1253 rmStatus = os_alloc_mem(ppSpinlock, sizeof(os_spinlock_t)); 1254 if (rmStatus != NV_OK) 1255 { 1256 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate spinlock!\n"); 1257 return rmStatus; 1258 } 1259 1260 os_spinlock = (os_spinlock_t *)*ppSpinlock; 1261 NV_SPIN_LOCK_INIT(&os_spinlock->lock); 1262 os_spinlock->eflags = 0; 1263 return NV_OK; 1264 } 1265 1266 void NV_API_CALL os_free_spinlock(void *pSpinlock) 1267 { 1268 os_free_mem(pSpinlock); 1269 } 1270 1271 NvU64 NV_API_CALL os_acquire_spinlock(void *pSpinlock) 1272 { 1273 os_spinlock_t *os_spinlock = (os_spinlock_t *)pSpinlock; 1274 unsigned long eflags; 1275 1276 NV_SPIN_LOCK_IRQSAVE(&os_spinlock->lock, eflags); 1277 os_spinlock->eflags = eflags; 1278 1279 #if defined(NVCPU_X86_64) 1280 eflags &= X86_EFLAGS_IF; 1281 #elif defined(NVCPU_AARCH64) 1282 eflags &= PSR_I_BIT; 1283 #endif 1284 return eflags; 1285 } 1286 1287 void NV_API_CALL os_release_spinlock(void *pSpinlock, NvU64 oldIrql) 1288 { 1289 os_spinlock_t *os_spinlock = (os_spinlock_t *)pSpinlock; 1290 unsigned long eflags; 1291 1292 eflags = os_spinlock->eflags; 1293 os_spinlock->eflags = 0; 1294 NV_SPIN_UNLOCK_IRQRESTORE(&os_spinlock->lock, eflags); 1295 } 1296 1297 #define NV_KERNEL_RELEASE ((LINUX_VERSION_CODE >> 16) & 0x0ff) 1298 #define NV_KERNEL_VERSION ((LINUX_VERSION_CODE >> 8) & 0x0ff) 1299 #define NV_KERNEL_SUBVERSION ((LINUX_VERSION_CODE) & 0x0ff) 1300 1301 NV_STATUS NV_API_CALL os_get_version_info(os_version_info * pOsVersionInfo) 1302 { 1303 NV_STATUS status = NV_OK; 1304 1305 pOsVersionInfo->os_major_version = NV_KERNEL_RELEASE; 1306 pOsVersionInfo->os_minor_version = NV_KERNEL_VERSION; 1307 pOsVersionInfo->os_build_number = NV_KERNEL_SUBVERSION; 1308 1309 #if defined(UTS_RELEASE) 1310 pOsVersionInfo->os_build_version_str = UTS_RELEASE; 1311 #endif 1312 1313 #if defined(UTS_VERSION) 1314 pOsVersionInfo->os_build_date_plus_str = UTS_VERSION; 1315 #endif 1316 1317 return status; 1318 } 1319 1320 NvBool NV_API_CALL os_is_xen_dom0(void) 1321 { 1322 #if defined(NV_DOM0_KERNEL_PRESENT) 1323 return NV_TRUE; 1324 #else 1325 return NV_FALSE; 1326 #endif 1327 } 1328 1329 NvBool NV_API_CALL os_is_vgx_hyper(void) 1330 { 1331 #if defined(NV_VGX_HYPER) 1332 return NV_TRUE; 1333 #else 1334 return NV_FALSE; 1335 #endif 1336 } 1337 1338 NV_STATUS NV_API_CALL os_inject_vgx_msi(NvU16 guestID, NvU64 msiAddr, NvU32 msiData) 1339 { 1340 #if defined(NV_VGX_HYPER) && defined(NV_DOM0_KERNEL_PRESENT) && \ 1341 defined(NV_XEN_IOEMU_INJECT_MSI) 1342 int rc = 0; 1343 rc = xen_ioemu_inject_msi(guestID, msiAddr, msiData); 1344 if (rc) 1345 { 1346 nv_printf(NV_DBG_ERRORS, 1347 "NVRM: %s: can't inject MSI to guest:%d, addr:0x%x, data:0x%x, err:%d\n", 1348 __FUNCTION__, guestID, msiAddr, msiData, rc); 1349 return NV_ERR_OPERATING_SYSTEM; 1350 } 1351 return NV_OK; 1352 #else 1353 return NV_ERR_NOT_SUPPORTED; 1354 #endif 1355 } 1356 1357 NvBool NV_API_CALL os_is_grid_supported(void) 1358 { 1359 #if defined(NV_GRID_BUILD) 1360 return NV_TRUE; 1361 #else 1362 return NV_FALSE; 1363 #endif 1364 } 1365 1366 NvU32 NV_API_CALL os_get_grid_csp_support(void) 1367 { 1368 #if defined(NV_GRID_BUILD_CSP) 1369 return NV_GRID_BUILD_CSP; 1370 #else 1371 return 0; 1372 #endif 1373 } 1374 1375 void NV_API_CALL os_bug_check(NvU32 bugCode, const char *bugCodeStr) 1376 { 1377 panic(bugCodeStr); 1378 } 1379 1380 NV_STATUS NV_API_CALL os_get_euid(NvU32 *pSecToken) 1381 { 1382 *pSecToken = NV_CURRENT_EUID(); 1383 return NV_OK; 1384 } 1385 1386 #if defined(NVCPU_X86_64) || defined(NVCPU_AARCH64) 1387 1388 static NvBool os_verify_checksum(const NvU8 *pMappedAddr, NvU32 length) 1389 { 1390 NvU8 sum = 0; 1391 NvU32 iter = 0; 1392 1393 for (iter = 0; iter < length; iter++) 1394 sum += pMappedAddr[iter]; 1395 1396 return sum == 0; 1397 } 1398 1399 #define _VERIFY_SMBIOS3(_pMappedAddr) \ 1400 _pMappedAddr && \ 1401 (os_mem_cmp(_pMappedAddr, "_SM3_", 5) == 0 && \ 1402 _pMappedAddr[6] < 32 && \ 1403 _pMappedAddr[6] > 0 && \ 1404 os_verify_checksum(_pMappedAddr, _pMappedAddr[6])) 1405 1406 #define OS_VERIFY_SMBIOS3(pMappedAddr) _VERIFY_SMBIOS3((pMappedAddr)) 1407 1408 #define _VERIFY_SMBIOS(_pMappedAddr) \ 1409 _pMappedAddr && \ 1410 (os_mem_cmp(_pMappedAddr, "_SM_", 4) == 0 && \ 1411 _pMappedAddr[5] < 32 && \ 1412 _pMappedAddr[5] > 0 && \ 1413 os_verify_checksum(_pMappedAddr, _pMappedAddr[5]) && \ 1414 os_mem_cmp((_pMappedAddr + 16), "_DMI_", 5) == 0 && \ 1415 os_verify_checksum((_pMappedAddr + 16), 15)) 1416 1417 #define OS_VERIFY_SMBIOS(pMappedAddr) _VERIFY_SMBIOS((pMappedAddr)) 1418 1419 #define SMBIOS_LEGACY_BASE 0xF0000 1420 #define SMBIOS_LEGACY_SIZE 0x10000 1421 1422 static NV_STATUS os_get_smbios_header_legacy(NvU64 *pSmbsAddr) 1423 { 1424 #if !defined(NVCPU_X86_64) 1425 return NV_ERR_NOT_SUPPORTED; 1426 #else 1427 NV_STATUS status = NV_ERR_OPERATING_SYSTEM; 1428 NvU8 *pMappedAddr = NULL; 1429 NvU8 *pIterAddr = NULL; 1430 1431 pMappedAddr = (NvU8*)os_map_kernel_space(SMBIOS_LEGACY_BASE, 1432 SMBIOS_LEGACY_SIZE, 1433 NV_MEMORY_CACHED); 1434 if (pMappedAddr == NULL) 1435 { 1436 return NV_ERR_INSUFFICIENT_RESOURCES; 1437 } 1438 1439 pIterAddr = pMappedAddr; 1440 1441 for (; pIterAddr < (pMappedAddr + SMBIOS_LEGACY_SIZE); pIterAddr += 16) 1442 { 1443 if (OS_VERIFY_SMBIOS3(pIterAddr)) 1444 { 1445 *pSmbsAddr = SMBIOS_LEGACY_BASE + (pIterAddr - pMappedAddr); 1446 status = NV_OK; 1447 break; 1448 } 1449 1450 if (OS_VERIFY_SMBIOS(pIterAddr)) 1451 { 1452 *pSmbsAddr = SMBIOS_LEGACY_BASE + (pIterAddr - pMappedAddr); 1453 status = NV_OK; 1454 break; 1455 } 1456 } 1457 1458 os_unmap_kernel_space(pMappedAddr, SMBIOS_LEGACY_SIZE); 1459 1460 return status; 1461 #endif 1462 } 1463 1464 // This function is needed only if "efi" is enabled. 1465 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI)) 1466 static NV_STATUS os_verify_smbios_header_uefi(NvU64 smbsAddr) 1467 { 1468 NV_STATUS status = NV_ERR_OBJECT_NOT_FOUND; 1469 NvU64 start= 0, offset =0 , size = 32; 1470 NvU8 *pMappedAddr = NULL, *pBufAddr = NULL; 1471 1472 start = smbsAddr; 1473 offset = (start & ~os_page_mask); 1474 start &= os_page_mask; 1475 size = ((size + offset + ~os_page_mask) & os_page_mask); 1476 1477 pBufAddr = (NvU8*)os_map_kernel_space(start, 1478 size, 1479 NV_MEMORY_CACHED); 1480 if (pBufAddr == NULL) 1481 { 1482 return NV_ERR_INSUFFICIENT_RESOURCES; 1483 } 1484 1485 pMappedAddr = pBufAddr + offset; 1486 1487 if (OS_VERIFY_SMBIOS3(pMappedAddr)) 1488 { 1489 status = NV_OK; 1490 goto done; 1491 } 1492 1493 if (OS_VERIFY_SMBIOS(pMappedAddr)) 1494 { 1495 status = NV_OK; 1496 } 1497 1498 done: 1499 os_unmap_kernel_space(pBufAddr, size); 1500 return status; 1501 } 1502 #endif 1503 1504 static NV_STATUS os_get_smbios_header_uefi(NvU64 *pSmbsAddr) 1505 { 1506 NV_STATUS status = NV_ERR_OPERATING_SYSTEM; 1507 1508 // Make sure that efi.h is present before using "struct efi". 1509 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI)) 1510 1511 // Make sure that efi.h has SMBIOS3_TABLE_GUID present. 1512 #if defined(SMBIOS3_TABLE_GUID) 1513 if (efi.smbios3 != EFI_INVALID_TABLE_ADDR) 1514 { 1515 status = os_verify_smbios_header_uefi(efi.smbios3); 1516 if (status == NV_OK) 1517 { 1518 *pSmbsAddr = efi.smbios3; 1519 return NV_OK; 1520 } 1521 } 1522 #endif 1523 1524 if (efi.smbios != EFI_INVALID_TABLE_ADDR) 1525 { 1526 status = os_verify_smbios_header_uefi(efi.smbios); 1527 if (status == NV_OK) 1528 { 1529 *pSmbsAddr = efi.smbios; 1530 return NV_OK; 1531 } 1532 } 1533 #endif 1534 1535 return status; 1536 } 1537 1538 #endif // defined(NVCPU_X86_64) || defined(NVCPU_AARCH64) 1539 1540 // The function locates the SMBIOS entry point. 1541 NV_STATUS NV_API_CALL os_get_smbios_header(NvU64 *pSmbsAddr) 1542 { 1543 1544 #if !defined(NVCPU_X86_64) && !defined(NVCPU_AARCH64) 1545 return NV_ERR_NOT_SUPPORTED; 1546 #else 1547 NV_STATUS status = NV_OK; 1548 1549 if (os_is_efi_enabled()) 1550 { 1551 status = os_get_smbios_header_uefi(pSmbsAddr); 1552 } 1553 else 1554 { 1555 status = os_get_smbios_header_legacy(pSmbsAddr); 1556 } 1557 1558 return status; 1559 #endif 1560 } 1561 1562 NV_STATUS NV_API_CALL os_get_acpi_rsdp_from_uefi 1563 ( 1564 NvU32 *pRsdpAddr 1565 ) 1566 { 1567 NV_STATUS status = NV_ERR_NOT_SUPPORTED; 1568 1569 if (pRsdpAddr == NULL) 1570 { 1571 return NV_ERR_INVALID_STATE; 1572 } 1573 1574 *pRsdpAddr = 0; 1575 1576 // Make sure that efi.h is present before using "struct efi". 1577 #if (defined(NV_LINUX_EFI_H_PRESENT) && defined(CONFIG_EFI)) 1578 1579 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) 1580 { 1581 *pRsdpAddr = efi.acpi20; 1582 status = NV_OK; 1583 } 1584 else if (efi.acpi != EFI_INVALID_TABLE_ADDR) 1585 { 1586 *pRsdpAddr = efi.acpi; 1587 status = NV_OK; 1588 } 1589 else 1590 { 1591 nv_printf(NV_DBG_ERRORS, "NVRM: RSDP Not found!\n"); 1592 status = NV_ERR_OPERATING_SYSTEM; 1593 } 1594 #endif 1595 1596 return status; 1597 } 1598 1599 void NV_API_CALL os_add_record_for_crashLog(void *pbuffer, NvU32 size) 1600 { 1601 } 1602 1603 void NV_API_CALL os_delete_record_for_crashLog(void *pbuffer) 1604 { 1605 } 1606 1607 #if !defined(NV_VGPU_KVM_BUILD) 1608 NV_STATUS NV_API_CALL os_call_vgpu_vfio(void *pvgpu_vfio_info, NvU32 cmd_type) 1609 { 1610 return NV_ERR_NOT_SUPPORTED; 1611 } 1612 #endif 1613 1614 NV_STATUS NV_API_CALL os_alloc_pages_node 1615 ( 1616 NvS32 nid, 1617 NvU32 size, 1618 NvU32 flag, 1619 NvU64 *pAddress 1620 ) 1621 { 1622 NV_STATUS status = NV_ERR_NOT_SUPPORTED; 1623 1624 #if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \ 1625 defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN) 1626 gfp_t gfp_mask; 1627 struct page *alloc_addr; 1628 unsigned int order = get_order(size); 1629 1630 /* 1631 * Explanation of flags used: 1632 * 1633 * 1. __GFP_THISNODE: This will make sure the allocation happens 1634 * on the node specified by nid. 1635 * 1636 * 2. GFP_HIGHUSER_MOVABLE: This makes allocations from ZONE_MOVABLE. 1637 * 1638 * 3. __GFP_COMP: This will make allocations with compound 1639 * pages, which is needed in order to use 1640 * vm_insert_page API. 1641 * 1642 * 4. __GFP_NORETRY: Used to avoid the Linux kernel OOM killer. 1643 * 1644 * 5. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if 1645 * the requested order is too large (just fail 1646 * instead). 1647 * 1648 * 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim. 1649 * This is part of GFP_USER and consequently 1650 * GFP_HIGHUSER_MOVABLE. 1651 * 1652 * Some of these flags are relatively more recent, with the last of them 1653 * (GFP_HIGHUSER_MOVABLE) having been added with this Linux kernel commit: 1654 * 1655 * 2007-07-17 769848c03895b63e5662eb7e4ec8c4866f7d0183 1656 * 1657 * Assume that this feature will only be used on kernels that support all 1658 * of the needed GFP flags. 1659 */ 1660 1661 gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP | 1662 __GFP_NORETRY | __GFP_NOWARN; 1663 1664 #if defined(__GFP_RECLAIM) 1665 if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM) 1666 { 1667 gfp_mask &= ~(__GFP_RECLAIM); 1668 } 1669 #endif // defined(__GFP_RECLAIM) 1670 1671 alloc_addr = alloc_pages_node(nid, gfp_mask, order); 1672 if (alloc_addr == NULL) 1673 { 1674 nv_printf(NV_DBG_INFO, 1675 "NVRM: alloc_pages_node(node = %d, order = %u) failed\n", 1676 nid, order); 1677 status = NV_ERR_NO_MEMORY; 1678 } 1679 else if (page_to_nid(alloc_addr) != nid) 1680 { 1681 // 1682 // We can hit this case when a Linux kernel bug is not patched. 1683 // The needed patch is https://patchwork.kernel.org/patch/10427387/ 1684 // 1685 nv_printf(NV_DBG_ERRORS, 1686 "NVRM: alloc_pages_node(node = %d, order = %u) wrong node ID.\n", 1687 nid, order); 1688 __free_pages(alloc_addr, order); 1689 status = NV_ERR_NO_MEMORY; 1690 } 1691 else 1692 { 1693 *pAddress = (NvU64)page_to_phys(alloc_addr); 1694 status = NV_OK; 1695 } 1696 #endif // GFP flags 1697 1698 return status; 1699 } 1700 1701 NV_STATUS NV_API_CALL os_get_page 1702 ( 1703 NvU64 address 1704 ) 1705 { 1706 get_page(NV_GET_PAGE_STRUCT(address)); 1707 return NV_OK; 1708 } 1709 1710 NV_STATUS NV_API_CALL os_put_page 1711 ( 1712 NvU64 address 1713 ) 1714 { 1715 put_page(NV_GET_PAGE_STRUCT(address)); 1716 return NV_OK; 1717 } 1718 1719 NvU32 NV_API_CALL os_get_page_refcount 1720 ( 1721 NvU64 address 1722 ) 1723 { 1724 return NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(address)); 1725 } 1726 1727 NvU32 NV_API_CALL os_count_tail_pages 1728 ( 1729 NvU64 address 1730 ) 1731 { 1732 NvU32 order = compound_order(compound_head(NV_GET_PAGE_STRUCT(address))); 1733 1734 return 1 << order; 1735 } 1736 1737 void NV_API_CALL os_free_pages_phys 1738 ( 1739 NvU64 address, 1740 NvU32 size 1741 ) 1742 { 1743 __free_pages(NV_GET_PAGE_STRUCT(address), get_order(size)); 1744 } 1745 1746 NV_STATUS NV_API_CALL os_numa_memblock_size 1747 ( 1748 NvU64 *memblock_size 1749 ) 1750 { 1751 #if NV_IS_EXPORT_SYMBOL_PRESENT_memory_block_size_bytes 1752 *memblock_size = memory_block_size_bytes(); 1753 return NV_OK; 1754 #endif 1755 if (nv_ctl_device.numa_memblock_size == 0) 1756 return NV_ERR_INVALID_STATE; 1757 *memblock_size = nv_ctl_device.numa_memblock_size; 1758 return NV_OK; 1759 } 1760 1761 NV_STATUS NV_API_CALL os_open_temporary_file 1762 ( 1763 void **ppFile 1764 ) 1765 { 1766 #if NV_FILESYSTEM_ACCESS_AVAILABLE 1767 #if defined(O_TMPFILE) 1768 struct file *file; 1769 const char *default_path = "/tmp"; 1770 const int flags = O_TMPFILE | O_LARGEFILE | O_RDWR; 1771 const char *path = NVreg_TemporaryFilePath; 1772 1773 /* 1774 * The filp_open() call below depends on the current task's fs_struct 1775 * (current->fs), which may already be NULL if this is called during 1776 * process teardown. 1777 */ 1778 if (current->fs == NULL) 1779 { 1780 return NV_ERR_OPERATING_SYSTEM; 1781 } 1782 1783 if (!path) 1784 { 1785 path = default_path; 1786 } 1787 1788 file = filp_open(path, flags, 0); 1789 if (IS_ERR(file)) 1790 { 1791 if ((path != default_path) && (PTR_ERR(file) == -ENOENT)) 1792 { 1793 nv_printf(NV_DBG_ERRORS, 1794 "NVRM: The temporary file path specified via the NVreg_TemporaryFilePath\n" 1795 "NVRM: module parameter does not exist. Defaulting to /tmp.\n"); 1796 1797 file = filp_open(default_path, flags, 0); 1798 } 1799 } 1800 1801 if (IS_ERR(file)) 1802 { 1803 return NV_ERR_OPERATING_SYSTEM; 1804 } 1805 1806 *ppFile = (void *)file; 1807 1808 return NV_OK; 1809 #else 1810 return NV_ERR_NOT_SUPPORTED; 1811 #endif 1812 #else 1813 return NV_ERR_NOT_SUPPORTED; 1814 #endif 1815 } 1816 1817 void NV_API_CALL os_close_file 1818 ( 1819 void *pFile 1820 ) 1821 { 1822 #if NV_FILESYSTEM_ACCESS_AVAILABLE 1823 filp_close(pFile, NULL); 1824 #endif 1825 } 1826 1827 #define NV_MAX_NUM_FILE_IO_RETRIES 10 1828 1829 NV_STATUS NV_API_CALL os_write_file 1830 ( 1831 void *pFile, 1832 NvU8 *pBuffer, 1833 NvU64 size, 1834 NvU64 offset 1835 ) 1836 { 1837 #if NV_FILESYSTEM_ACCESS_AVAILABLE 1838 loff_t f_pos = offset; 1839 ssize_t num_written; 1840 int num_retries = NV_MAX_NUM_FILE_IO_RETRIES; 1841 1842 retry: 1843 #if defined(NV_KERNEL_WRITE_HAS_POINTER_POS_ARG) 1844 num_written = kernel_write(pFile, pBuffer, size, &f_pos); 1845 #else 1846 num_written = kernel_write(pFile, pBuffer, size, f_pos); 1847 #endif 1848 if (num_written < 0) 1849 { 1850 return NV_ERR_OPERATING_SYSTEM; 1851 } 1852 else if (num_written < size) 1853 { 1854 if (num_written > 0) 1855 { 1856 pBuffer += num_written; 1857 size -= num_written; 1858 } 1859 if (--num_retries > 0) 1860 { 1861 cond_resched(); 1862 goto retry; 1863 } 1864 return NV_ERR_OPERATING_SYSTEM; 1865 } 1866 1867 return NV_OK; 1868 #else 1869 return NV_ERR_NOT_SUPPORTED; 1870 #endif 1871 } 1872 1873 NV_STATUS NV_API_CALL os_read_file 1874 ( 1875 void *pFile, 1876 NvU8 *pBuffer, 1877 NvU64 size, 1878 NvU64 offset 1879 ) 1880 { 1881 #if NV_FILESYSTEM_ACCESS_AVAILABLE 1882 loff_t f_pos = offset; 1883 ssize_t num_read; 1884 int num_retries = NV_MAX_NUM_FILE_IO_RETRIES; 1885 1886 retry: 1887 #if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG) 1888 num_read = kernel_read(pFile, pBuffer, size, &f_pos); 1889 #else 1890 num_read = kernel_read(pFile, f_pos, pBuffer, size); 1891 #endif 1892 if (num_read < 0) 1893 { 1894 return NV_ERR_OPERATING_SYSTEM; 1895 } 1896 else if (num_read < size) 1897 { 1898 if (num_read > 0) 1899 { 1900 pBuffer += num_read; 1901 size -= num_read; 1902 } 1903 if (--num_retries > 0) 1904 { 1905 cond_resched(); 1906 goto retry; 1907 } 1908 return NV_ERR_OPERATING_SYSTEM; 1909 } 1910 1911 return NV_OK; 1912 #else 1913 return NV_ERR_NOT_SUPPORTED; 1914 #endif 1915 } 1916 1917 NV_STATUS NV_API_CALL os_open_readonly_file 1918 ( 1919 const char *filename, 1920 void **ppFile 1921 ) 1922 { 1923 #if NV_FILESYSTEM_ACCESS_AVAILABLE 1924 struct file *file; 1925 1926 /* 1927 * The filp_open() call below depends on the current task's fs_struct 1928 * (current->fs), which may already be NULL if this is called during 1929 * process teardown. 1930 */ 1931 if (current->fs == NULL) 1932 { 1933 return NV_ERR_OPERATING_SYSTEM; 1934 } 1935 1936 file = filp_open(filename, O_RDONLY, 0); 1937 if (IS_ERR(file)) 1938 { 1939 return NV_ERR_OPERATING_SYSTEM; 1940 } 1941 1942 *ppFile = (void *)file; 1943 1944 return NV_OK; 1945 #else 1946 return NV_ERR_NOT_SUPPORTED; 1947 #endif 1948 } 1949 1950 NV_STATUS NV_API_CALL os_open_and_read_file 1951 ( 1952 const char *filename, 1953 NvU8 *buf, 1954 NvU64 count 1955 ) 1956 { 1957 void *fileHandle; 1958 NV_STATUS status; 1959 1960 status = os_open_readonly_file(filename, &fileHandle); 1961 if (status != NV_OK) 1962 { 1963 return status; 1964 } 1965 1966 status = os_read_file(fileHandle, buf, count, 0); 1967 1968 os_close_file(fileHandle); 1969 1970 return status; 1971 } 1972 1973 NvBool NV_API_CALL os_is_nvswitch_present(void) 1974 { 1975 struct pci_device_id nvswitch_pci_table[] = { 1976 { 1977 PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID), 1978 .class = PCI_CLASS_BRIDGE_OTHER << 8, 1979 .class_mask = PCI_ANY_ID 1980 }, 1981 {0} 1982 }; 1983 1984 return !!pci_dev_present(nvswitch_pci_table); 1985 } 1986 1987 /* 1988 * This function may sleep (interruptible). 1989 */ 1990 NV_STATUS NV_API_CALL os_get_random_bytes 1991 ( 1992 NvU8 *bytes, 1993 NvU16 numBytes 1994 ) 1995 { 1996 #if defined NV_WAIT_FOR_RANDOM_BYTES_PRESENT 1997 if (wait_for_random_bytes() < 0) 1998 return NV_ERR_NOT_READY; 1999 #endif 2000 2001 get_random_bytes(bytes, numBytes); 2002 return NV_OK; 2003 } 2004 2005 NV_STATUS NV_API_CALL os_alloc_wait_queue 2006 ( 2007 os_wait_queue **wq 2008 ) 2009 { 2010 NV_KMALLOC(*wq, sizeof(os_wait_queue)); 2011 if (*wq == NULL) 2012 return NV_ERR_NO_MEMORY; 2013 2014 init_completion(&(*wq)->q); 2015 2016 return NV_OK; 2017 } 2018 2019 void NV_API_CALL os_free_wait_queue 2020 ( 2021 os_wait_queue *wq 2022 ) 2023 { 2024 NV_KFREE(wq, sizeof(os_wait_queue)); 2025 } 2026 2027 void NV_API_CALL os_wait_uninterruptible 2028 ( 2029 os_wait_queue *wq 2030 ) 2031 { 2032 wait_for_completion(&wq->q); 2033 } 2034 2035 void NV_API_CALL os_wait_interruptible 2036 ( 2037 os_wait_queue *wq 2038 ) 2039 { 2040 wait_for_completion_interruptible(&wq->q); 2041 } 2042 2043 void NV_API_CALL os_wake_up 2044 ( 2045 os_wait_queue *wq 2046 ) 2047 { 2048 complete_all(&wq->q); 2049 } 2050 2051 nv_cap_t* NV_API_CALL os_nv_cap_init 2052 ( 2053 const char *path 2054 ) 2055 { 2056 return nv_cap_init(path); 2057 } 2058 2059 nv_cap_t* NV_API_CALL os_nv_cap_create_dir_entry 2060 ( 2061 nv_cap_t *parent_cap, 2062 const char *name, 2063 int mode 2064 ) 2065 { 2066 return nv_cap_create_dir_entry(parent_cap, name, mode); 2067 } 2068 2069 nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry 2070 ( 2071 nv_cap_t *parent_cap, 2072 const char *name, 2073 int mode 2074 ) 2075 { 2076 return nv_cap_create_file_entry(parent_cap, name, mode); 2077 } 2078 2079 void NV_API_CALL os_nv_cap_destroy_entry 2080 ( 2081 nv_cap_t *cap 2082 ) 2083 { 2084 nv_cap_destroy_entry(cap); 2085 } 2086 2087 int NV_API_CALL os_nv_cap_validate_and_dup_fd 2088 ( 2089 const nv_cap_t *cap, 2090 int fd 2091 ) 2092 { 2093 return nv_cap_validate_and_dup_fd(cap, fd); 2094 } 2095 2096 void NV_API_CALL os_nv_cap_close_fd 2097 ( 2098 int fd 2099 ) 2100 { 2101 nv_cap_close_fd(fd); 2102 } 2103 2104 NvS32 NV_API_CALL os_imex_channel_count 2105 ( 2106 void 2107 ) 2108 { 2109 return nv_caps_imex_channel_count(); 2110 } 2111 2112 NvS32 NV_API_CALL os_imex_channel_get 2113 ( 2114 NvU64 descriptor 2115 ) 2116 { 2117 return nv_caps_imex_channel_get((int)descriptor); 2118 } 2119 2120 /* 2121 * Reads the total memory and free memory of a NUMA node from the kernel. 2122 */ 2123 NV_STATUS NV_API_CALL os_get_numa_node_memory_usage 2124 ( 2125 NvS32 node_id, 2126 NvU64 *free_memory_bytes, 2127 NvU64 *total_memory_bytes 2128 ) 2129 { 2130 struct pglist_data *pgdat; 2131 struct zone *zone; 2132 NvU32 zone_id; 2133 2134 if (node_id >= MAX_NUMNODES) 2135 { 2136 nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n"); 2137 return NV_ERR_INVALID_ARGUMENT; 2138 } 2139 2140 pgdat = NODE_DATA(node_id); 2141 2142 *free_memory_bytes = 0; 2143 *total_memory_bytes = 0; 2144 2145 for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++) 2146 { 2147 zone = &(pgdat->node_zones[zone_id]); 2148 if (!populated_zone(zone)) 2149 continue; 2150 *free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE); 2151 *total_memory_bytes += (zone->present_pages * PAGE_SIZE); 2152 } 2153 2154 return NV_OK; 2155 } 2156 2157 typedef struct os_numa_gpu_mem_hotplug_notifier_s 2158 { 2159 NvU64 start_pa; 2160 NvU64 size; 2161 nv_pci_info_t pci_info; 2162 struct notifier_block memory_notifier; 2163 } os_numa_gpu_mem_hotplug_notifier_t; 2164 2165 static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb, 2166 unsigned long action, void *data) 2167 { 2168 os_numa_gpu_mem_hotplug_notifier_t *notifier = container_of(nb, 2169 os_numa_gpu_mem_hotplug_notifier_t, 2170 memory_notifier); 2171 struct memory_notify *mhp = data; 2172 NvU64 start_pa = PFN_PHYS(mhp->start_pfn); 2173 NvU64 size = PFN_PHYS(mhp->nr_pages); 2174 2175 if (action == MEM_GOING_ONLINE) 2176 { 2177 // Check if onlining memory falls in the GPU memory range 2178 if ((start_pa >= notifier->start_pa) && 2179 (start_pa + size) <= (notifier->start_pa + notifier->size)) 2180 { 2181 /* 2182 * Verify GPU memory NUMA node has memory only in ZONE_MOVABLE before 2183 * onlining the memory so that incorrect auto online setting doesn't 2184 * cause the memory onlined in a zone where kernel allocations 2185 * could happen, resulting in GPU memory hot unpluggable and requiring 2186 * system reboot. 2187 */ 2188 if (page_zonenum((pfn_to_page(mhp->start_pfn))) != ZONE_MOVABLE) 2189 { 2190 nv_printf(NV_DBG_ERRORS, "NVRM: Failing GPU memory onlining as the onlining zone " 2191 "is not movable. pa: 0x%llx size: 0x%llx\n" 2192 "NVRM: The NVIDIA GPU %04x:%02x:%02x.%x installed in the system\n" 2193 "NVRM: requires auto onlining mode online_movable enabled in\n" 2194 "NVRM: /sys/devices/system/memory/auto_online_blocks\n", 2195 start_pa, size, notifier->pci_info.domain, notifier->pci_info.bus, 2196 notifier->pci_info.slot, notifier->pci_info.function); 2197 return NOTIFY_BAD; 2198 } 2199 } 2200 } 2201 return NOTIFY_OK; 2202 } 2203 2204 #define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4 2205 2206 NV_STATUS NV_API_CALL os_numa_add_gpu_memory 2207 ( 2208 void *handle, 2209 NvU64 offset, 2210 NvU64 size, 2211 NvU32 *nodeId 2212 ) 2213 { 2214 #if defined(NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT) 2215 int node = 0; 2216 nv_linux_state_t *nvl = pci_get_drvdata(handle); 2217 nv_state_t *nv = NV_STATE_PTR(nvl); 2218 NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa; 2219 int ret = 0; 2220 NvU64 memblock_size; 2221 NvU64 size_remaining; 2222 NvU64 calculated_segment_size; 2223 NvU64 segment_size; 2224 NvU64 segment_base; 2225 os_numa_gpu_mem_hotplug_notifier_t notifier = 2226 { 2227 .start_pa = base, 2228 .size = size, 2229 .pci_info = nv->pci_info, 2230 .memory_notifier.notifier_call = os_numa_verify_gpu_memory_zone, 2231 }; 2232 2233 if (nodeId == NULL) 2234 { 2235 return NV_ERR_INVALID_ARGUMENT; 2236 } 2237 2238 if (bitmap_empty(nvl->coherent_link_info.free_node_bitmap, MAX_NUMNODES)) 2239 { 2240 return NV_ERR_IN_USE; 2241 } 2242 node = find_first_bit(nvl->coherent_link_info.free_node_bitmap, MAX_NUMNODES); 2243 if (node == MAX_NUMNODES) 2244 { 2245 return NV_ERR_INVALID_STATE; 2246 } 2247 2248 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE_IN_PROGRESS); 2249 2250 ret = register_memory_notifier(¬ifier.memory_notifier); 2251 if (ret) 2252 { 2253 nv_printf(NV_DBG_ERRORS, "NVRM: Memory hotplug notifier registration failed\n"); 2254 goto failed; 2255 } 2256 2257 // 2258 // Adding all memory at once can take a long time. Split up memory into segments 2259 // with schedule() in between to prevent soft lockups. Memory segments for 2260 // add_memory_driver_managed() need to be aligned to memblock size. 2261 // 2262 // If there are any issues splitting into segments, then add all memory at once. 2263 // 2264 if (os_numa_memblock_size(&memblock_size) == NV_OK) 2265 { 2266 calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size); 2267 } 2268 else 2269 { 2270 // Don't split into segments, add all memory at once 2271 calculated_segment_size = size; 2272 } 2273 2274 segment_size = calculated_segment_size; 2275 segment_base = base; 2276 size_remaining = size; 2277 2278 while ((size_remaining > 0) && 2279 (ret == 0)) 2280 { 2281 if (segment_size > size_remaining) 2282 { 2283 segment_size = size_remaining; 2284 } 2285 2286 #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG 2287 ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE); 2288 #else 2289 ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)"); 2290 #endif 2291 nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n", 2292 ret, segment_base, segment_size); 2293 2294 segment_base += segment_size; 2295 size_remaining -= segment_size; 2296 2297 // Yield CPU to prevent soft lockups 2298 schedule(); 2299 } 2300 unregister_memory_notifier(¬ifier.memory_notifier); 2301 2302 if (ret == 0) 2303 { 2304 struct zone *zone = &NODE_DATA(node)->node_zones[ZONE_MOVABLE]; 2305 NvU64 start_pfn = base >> PAGE_SHIFT; 2306 NvU64 end_pfn = (base + size) >> PAGE_SHIFT; 2307 2308 /* Verify the full GPU memory range passed on is onlined */ 2309 if (zone->zone_start_pfn != start_pfn || 2310 zone_end_pfn(zone) != end_pfn) 2311 { 2312 nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n"); 2313 2314 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2315 // Since zone movable auto onlining failed, need to remove the added memory. 2316 segment_size = calculated_segment_size; 2317 segment_base = base; 2318 size_remaining = size; 2319 2320 while (size_remaining > 0) 2321 { 2322 if (segment_size > size_remaining) 2323 { 2324 segment_size = size_remaining; 2325 } 2326 2327 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG 2328 ret = offline_and_remove_memory(node, segment_base, segment_size); 2329 #else 2330 ret = offline_and_remove_memory(segment_base, segment_size); 2331 #endif 2332 nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n", 2333 ret, segment_base, segment_size); 2334 2335 segment_base += segment_size; 2336 size_remaining -= segment_size; 2337 2338 // Yield CPU to prevent soft lockups 2339 schedule(); 2340 } 2341 #endif 2342 goto failed; 2343 } 2344 2345 /* 2346 * On systems with cpuset cgroup controller enabled, memory alloc on 2347 * this just hotplugged GPU memory node can fail if the 2348 * cpuset_hotplug_work is not scheduled yet. cpuset_hotplug_work is 2349 * where the current->mems_allowed is updated in the path 2350 * cpuset_hotplug_workfn->update_tasks_nodemask. When cpuset is 2351 * enabled and current->mems_allowed is not updated, memory allocation 2352 * with __GFP_THISNODE and this node id fails. cpuset_wait_for_hotplug 2353 * kernel function can be used to wait for the work to finish but that 2354 * is not exported. Adding a time loop to wait for 2355 * current->mems_allowed to be updated as a WAR while an upstream 2356 * kernel fix is being explored. Bug 4385903 2357 */ 2358 if (!node_isset(node, cpuset_current_mems_allowed)) 2359 { 2360 unsigned long delay; 2361 2362 delay = jiffies + (HZ / 10); // 100ms 2363 while(time_before(jiffies, delay) && 2364 !node_isset(node, cpuset_current_mems_allowed)) 2365 { 2366 os_schedule(); 2367 } 2368 2369 if (!node_isset(node, cpuset_current_mems_allowed)) 2370 { 2371 nv_printf(NV_DBG_ERRORS, "NVRM: Hotplugged GPU memory NUMA node: %d " 2372 "not set in current->mems_allowed!\n", node); 2373 } 2374 } 2375 2376 *nodeId = node; 2377 clear_bit(node, nvl->coherent_link_info.free_node_bitmap); 2378 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE); 2379 return NV_OK; 2380 } 2381 nv_printf(NV_DBG_ERRORS, "NVRM: Memory add failed. base: 0x%lx size: 0x%lx ret: %d\n", 2382 base, size, ret); 2383 failed: 2384 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_ONLINE_FAILED); 2385 return NV_ERR_OPERATING_SYSTEM; 2386 #endif 2387 return NV_ERR_NOT_SUPPORTED; 2388 } 2389 2390 2391 typedef struct { 2392 NvU64 base; 2393 NvU64 size; 2394 NvU32 nodeId; 2395 int ret; 2396 } remove_numa_memory_info_t; 2397 2398 static void offline_numa_memory_callback 2399 ( 2400 void *args 2401 ) 2402 { 2403 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2404 remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args; 2405 int ret = 0; 2406 NvU64 memblock_size; 2407 NvU64 size_remaining; 2408 NvU64 calculated_segment_size; 2409 NvU64 segment_size; 2410 NvU64 segment_base; 2411 2412 // 2413 // Removing all memory at once can take a long time. Split up memory into segments 2414 // with schedule() in between to prevent soft lockups. Memory segments for 2415 // offline_and_remove_memory() need to be aligned to memblock size. 2416 // 2417 // If there are any issues splitting into segments, then remove all memory at once. 2418 // 2419 if (os_numa_memblock_size(&memblock_size) == NV_OK) 2420 { 2421 calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size); 2422 } 2423 else 2424 { 2425 // Don't split into segments, remove all memory at once 2426 calculated_segment_size = pNumaInfo->size; 2427 } 2428 2429 segment_size = calculated_segment_size; 2430 segment_base = pNumaInfo->base; 2431 size_remaining = pNumaInfo->size; 2432 2433 while (size_remaining > 0) 2434 { 2435 if (segment_size > size_remaining) 2436 { 2437 segment_size = size_remaining; 2438 } 2439 2440 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG 2441 ret = offline_and_remove_memory(pNumaInfo->nodeId, 2442 segment_base, 2443 segment_size); 2444 #else 2445 ret = offline_and_remove_memory(segment_base, 2446 segment_size); 2447 #endif 2448 nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n", 2449 ret, segment_base, segment_size); 2450 pNumaInfo->ret |= ret; 2451 2452 segment_base += segment_size; 2453 size_remaining -= segment_size; 2454 2455 // Yield CPU to prevent soft lockups 2456 schedule(); 2457 } 2458 #endif 2459 } 2460 2461 NV_STATUS NV_API_CALL os_numa_remove_gpu_memory 2462 ( 2463 void *handle, 2464 NvU64 offset, 2465 NvU64 size, 2466 NvU32 nodeId 2467 ) 2468 { 2469 #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT 2470 nv_linux_state_t *nvl = pci_get_drvdata(handle); 2471 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2472 NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa; 2473 remove_numa_memory_info_t numa_info; 2474 nv_kthread_q_item_t remove_numa_memory_q_item; 2475 int ret; 2476 #endif 2477 2478 if (nodeId >= MAX_NUMNODES) 2479 { 2480 return NV_ERR_INVALID_ARGUMENT; 2481 } 2482 if ((nodeId == NUMA_NO_NODE) || test_bit(nodeId, nvl->coherent_link_info.free_node_bitmap)) 2483 { 2484 return NV_ERR_INVALID_ARGUMENT; 2485 } 2486 2487 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS); 2488 2489 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2490 numa_info.base = base; 2491 numa_info.size = size; 2492 numa_info.nodeId = nodeId; 2493 numa_info.ret = 0; 2494 2495 nv_kthread_q_item_init(&remove_numa_memory_q_item, 2496 offline_numa_memory_callback, 2497 &numa_info); 2498 nv_kthread_q_schedule_q_item(&nvl->remove_numa_memory_q, 2499 &remove_numa_memory_q_item); 2500 nv_kthread_q_flush(&nvl->remove_numa_memory_q); 2501 2502 ret = numa_info.ret; 2503 2504 if (ret == 0) 2505 { 2506 set_bit(nodeId, nvl->coherent_link_info.free_node_bitmap); 2507 2508 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE); 2509 return NV_OK; 2510 } 2511 2512 nv_printf(NV_DBG_ERRORS, "NVRM: Memory remove failed. base: 0x%lx size: 0x%lx ret: %d\n", 2513 base, size, ret); 2514 #endif 2515 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_OFFLINE_FAILED); 2516 return NV_ERR_OPERATING_SYSTEM; 2517 #endif 2518 return NV_ERR_NOT_SUPPORTED; 2519 } 2520 2521 NV_STATUS NV_API_CALL os_offline_page_at_address 2522 ( 2523 NvU64 address 2524 ) 2525 { 2526 #if defined(CONFIG_MEMORY_FAILURE) 2527 int flags = 0; 2528 int ret; 2529 NvU64 pfn; 2530 struct page *page = NV_GET_PAGE_STRUCT(address); 2531 2532 if (page == NULL) 2533 { 2534 nv_printf(NV_DBG_ERRORS, "NVRM: Failed to get page struct for address: 0x%llx\n", 2535 address); 2536 return NV_ERR_INVALID_ARGUMENT; 2537 } 2538 2539 pfn = page_to_pfn(page); 2540 2541 #ifdef NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED 2542 // 2543 // Set MF_SW_SIMULATED flag so Linux kernel can differentiate this from a HW 2544 // memory failure. HW memory failures cannot be unset via unpoison_memory() API. 2545 // 2546 // Currently, RM does not use unpoison_memory(), so it makes no difference 2547 // whether or not MF_SW_SIMULATED is set. Regardless, it is semantically more 2548 // correct to set MF_SW_SIMULATED. 2549 // 2550 flags |= MF_SW_SIMULATED; 2551 #endif 2552 2553 #ifdef NV_MEMORY_FAILURE_HAS_TRAPNO_ARG 2554 ret = memory_failure(pfn, 0, flags); 2555 #else 2556 ret = memory_failure(pfn, flags); 2557 #endif 2558 2559 if (ret != 0) 2560 { 2561 nv_printf(NV_DBG_ERRORS, "NVRM: page offlining failed. address: 0x%llx pfn: 0x%llx ret: %d\n", 2562 address, pfn, ret); 2563 return NV_ERR_OPERATING_SYSTEM; 2564 } 2565 2566 return NV_OK; 2567 #else // !defined(CONFIG_MEMORY_FAILURE) 2568 nv_printf(NV_DBG_ERRORS, "NVRM: memory_failure() not supported by kernel. page offlining failed. address: 0x%llx\n", 2569 address); 2570 return NV_ERR_NOT_SUPPORTED; 2571 #endif 2572 } 2573 2574 void* NV_API_CALL os_get_pid_info(void) 2575 { 2576 return get_task_pid(current, PIDTYPE_PID); 2577 } 2578 2579 void NV_API_CALL os_put_pid_info(void *pid_info) 2580 { 2581 if (pid_info != NULL) 2582 put_pid(pid_info); 2583 } 2584 2585 NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid) 2586 { 2587 if ((pid_info == NULL) || (ns_pid == NULL)) 2588 return NV_ERR_INVALID_ARGUMENT; 2589 2590 *ns_pid = pid_vnr((struct pid *)pid_info); 2591 2592 // The call returns 0 if the PID is not found in the current ns 2593 if (*ns_pid == 0) 2594 return NV_ERR_OBJECT_NOT_FOUND; 2595 2596 return NV_OK; 2597 } 2598 2599