1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> // for MODULE_FIRMWARE 25 26 // must precede "nv.h" and "nv-firmware.h" includes 27 #define NV_FIRMWARE_PATH_FOR_FILENAME(filename) "nvidia/" NV_VERSION_STRING "/" filename 28 #define NV_FIRMWARE_DECLARE_GSP_FILENAME(filename) \ 29 MODULE_FIRMWARE(NV_FIRMWARE_PATH_FOR_FILENAME(filename)); 30 #include "nv-firmware.h" 31 32 #include "nvmisc.h" 33 #include "os-interface.h" 34 #include "nv-linux.h" 35 #include "nv-p2p.h" 36 #include "nv-reg.h" 37 #include "nv-msi.h" 38 #include "nv-pci-table.h" 39 40 #if defined(NV_UVM_ENABLE) 41 #include "nv_uvm_interface.h" 42 #endif 43 44 #if defined(NV_VGPU_KVM_BUILD) 45 #include "nv-vgpu-vfio-interface.h" 46 #endif 47 48 #include "nvlink_proto.h" 49 #include "nvlink_caps.h" 50 51 #include "nv-frontend.h" 52 #include "nv-hypervisor.h" 53 #include "nv-ibmnpu.h" 54 #include "nv-rsync.h" 55 #include "nv-kthread-q.h" 56 #include "nv-pat.h" 57 #include "nv-dmabuf.h" 58 59 #if !defined(CONFIG_RETPOLINE) 60 #include "nv-retpoline.h" 61 #endif 62 63 #include <linux/firmware.h> 64 65 #include <sound/core.h> /* HDA struct snd_card */ 66 67 #include <asm/cache.h> 68 69 #if defined(NV_SOUND_HDAUDIO_H_PRESENT) 70 #include "sound/hdaudio.h" 71 #endif 72 73 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT) 74 #include <sound/core.h> 75 #include <sound/hda_codec.h> 76 #include <sound/hda_verbs.h> 77 #endif 78 79 #if defined(NV_SEQ_READ_ITER_PRESENT) 80 #include <linux/uio.h> 81 #include <linux/seq_file.h> 82 #include <linux/kernfs.h> 83 #endif 84 85 #include <linux/dmi.h> /* System DMI info */ 86 87 #include <linux/ioport.h> 88 89 #include "conftest/patches.h" 90 91 #define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000 92 #define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900 93 #define RM_UNHANDLED_TIMEOUT_US 100000 94 95 const NvBool nv_is_rm_firmware_supported_os = NV_TRUE; 96 97 // Deprecated, use NV_REG_ENABLE_GPU_FIRMWARE instead 98 char *rm_firmware_active = NULL; 99 NV_MODULE_STRING_PARAMETER(rm_firmware_active); 100 101 /* 102 * Global NVIDIA capability state, for GPU driver 103 */ 104 nv_cap_t *nvidia_caps_root = NULL; 105 106 /* 107 * our global state; one per device 108 */ 109 NvU32 num_nv_devices = 0; 110 NvU32 num_probed_nv_devices = 0; 111 112 nv_linux_state_t *nv_linux_devices; 113 114 /* 115 * And one for the control device 116 */ 117 nv_linux_state_t nv_ctl_device = { { 0 } }; 118 extern NvU32 nv_dma_remap_peer_mmio; 119 120 nv_kthread_q_t nv_kthread_q; 121 nv_kthread_q_t nv_deferred_close_kthread_q; 122 123 struct rw_semaphore nv_system_pm_lock; 124 125 #if defined(CONFIG_PM) 126 static nv_power_state_t nv_system_power_state; 127 static nv_pm_action_depth_t nv_system_pm_action_depth; 128 struct semaphore nv_system_power_state_lock; 129 #endif 130 131 void *nvidia_p2p_page_t_cache; 132 static void *nvidia_pte_t_cache; 133 void *nvidia_stack_t_cache; 134 static nvidia_stack_t *__nv_init_sp; 135 136 static int nv_tce_bypass_mode = NV_TCE_BYPASS_MODE_DEFAULT; 137 138 struct semaphore nv_linux_devices_lock; 139 140 static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE; 141 142 // True if all the successfully probed devices support ATS 143 // Assigned at device probe (module init) time 144 NvBool nv_ats_supported = NVCPU_IS_PPC64LE 145 ; 146 147 // allow an easy way to convert all debug printfs related to events 148 // back and forth between 'info' and 'errors' 149 #if defined(NV_DBG_EVENTS) 150 #define NV_DBG_EVENTINFO NV_DBG_ERRORS 151 #else 152 #define NV_DBG_EVENTINFO NV_DBG_INFO 153 #endif 154 155 #if defined(HDA_MAX_CODECS) 156 #define NV_HDA_MAX_CODECS HDA_MAX_CODECS 157 #else 158 #define NV_HDA_MAX_CODECS 8 159 #endif 160 161 /*** 162 *** STATIC functions, only in this file 163 ***/ 164 165 /* nvos_ functions.. do not take a state device parameter */ 166 static int nvos_count_devices(void); 167 168 static nv_alloc_t *nvos_create_alloc(struct device *, int); 169 static int nvos_free_alloc(nv_alloc_t *); 170 171 /*** 172 *** EXPORTS to Linux Kernel 173 ***/ 174 175 static irqreturn_t nvidia_isr_common_bh (void *); 176 static void nvidia_isr_bh_unlocked (void *); 177 static int nvidia_ctl_open (struct inode *, struct file *); 178 static int nvidia_ctl_close (struct inode *, struct file *); 179 180 const char *nv_device_name = MODULE_NAME; 181 static const char *nvidia_stack_cache_name = MODULE_NAME "_stack_cache"; 182 static const char *nvidia_pte_cache_name = MODULE_NAME "_pte_cache"; 183 static const char *nvidia_p2p_page_cache_name = MODULE_NAME "_p2p_page_cache"; 184 185 static int nvidia_open (struct inode *, struct file *); 186 static int nvidia_close (struct inode *, struct file *); 187 static unsigned int nvidia_poll (struct file *, poll_table *); 188 static int nvidia_ioctl (struct inode *, struct file *, unsigned int, unsigned long); 189 190 /* character device entry points*/ 191 nvidia_module_t nv_fops = { 192 .owner = THIS_MODULE, 193 .module_name = MODULE_NAME, 194 .instance = MODULE_INSTANCE_NUMBER, 195 .open = nvidia_open, 196 .close = nvidia_close, 197 .ioctl = nvidia_ioctl, 198 .mmap = nvidia_mmap, 199 .poll = nvidia_poll, 200 }; 201 202 #if defined(CONFIG_PM) 203 static int nv_pmops_suspend (struct device *dev); 204 static int nv_pmops_resume (struct device *dev); 205 static int nv_pmops_freeze (struct device *dev); 206 static int nv_pmops_thaw (struct device *dev); 207 static int nv_pmops_restore (struct device *dev); 208 static int nv_pmops_poweroff (struct device *dev); 209 static int nv_pmops_runtime_suspend (struct device *dev); 210 static int nv_pmops_runtime_resume (struct device *dev); 211 212 struct dev_pm_ops nv_pm_ops = { 213 .suspend = nv_pmops_suspend, 214 .resume = nv_pmops_resume, 215 .freeze = nv_pmops_freeze, 216 .thaw = nv_pmops_thaw, 217 .poweroff = nv_pmops_poweroff, 218 .restore = nv_pmops_restore, 219 .runtime_suspend = nv_pmops_runtime_suspend, 220 .runtime_resume = nv_pmops_runtime_resume, 221 }; 222 #endif 223 224 /*** 225 *** see nv.h for functions exported to other parts of resman 226 ***/ 227 228 /*** 229 *** STATIC functions 230 ***/ 231 232 #if defined(NVCPU_X86_64) 233 #define NV_AMD_SEV_BIT BIT(1) 234 235 static 236 NvBool nv_is_sev_supported( 237 void 238 ) 239 { 240 unsigned int eax, ebx, ecx, edx; 241 242 /* Check for the SME/SEV support leaf */ 243 eax = 0x80000000; 244 ecx = 0; 245 native_cpuid(&eax, &ebx, &ecx, &edx); 246 if (eax < 0x8000001f) 247 return NV_FALSE; 248 249 eax = 0x8000001f; 250 ecx = 0; 251 native_cpuid(&eax, &ebx, &ecx, &edx); 252 /* Check whether SEV is supported */ 253 if (!(eax & NV_AMD_SEV_BIT)) 254 return NV_FALSE; 255 256 return NV_TRUE; 257 } 258 #endif 259 260 static 261 void nv_sev_init( 262 void 263 ) 264 { 265 #if defined(MSR_AMD64_SEV) && defined(NVCPU_X86_64) 266 NvU32 lo_val, hi_val; 267 268 if (!nv_is_sev_supported()) 269 return; 270 271 rdmsr(MSR_AMD64_SEV, lo_val, hi_val); 272 273 os_sev_status = lo_val; 274 #if defined(MSR_AMD64_SEV_ENABLED) 275 os_sev_enabled = (os_sev_status & MSR_AMD64_SEV_ENABLED); 276 #endif 277 #endif 278 } 279 280 static 281 nv_alloc_t *nvos_create_alloc( 282 struct device *dev, 283 int num_pages 284 ) 285 { 286 nv_alloc_t *at; 287 unsigned int pt_size; 288 unsigned int i; 289 290 NV_KZALLOC(at, sizeof(nv_alloc_t)); 291 if (at == NULL) 292 { 293 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate alloc info\n"); 294 return NULL; 295 } 296 297 at->dev = dev; 298 pt_size = num_pages * sizeof(nvidia_pte_t *); 299 300 if (os_alloc_mem((void **)&at->page_table, pt_size) != NV_OK) 301 { 302 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate page table\n"); 303 NV_KFREE(at, sizeof(nv_alloc_t)); 304 return NULL; 305 } 306 307 memset(at->page_table, 0, pt_size); 308 at->num_pages = num_pages; 309 NV_ATOMIC_SET(at->usage_count, 0); 310 311 for (i = 0; i < at->num_pages; i++) 312 { 313 at->page_table[i] = NV_KMEM_CACHE_ALLOC(nvidia_pte_t_cache); 314 if (at->page_table[i] == NULL) 315 { 316 nv_printf(NV_DBG_ERRORS, 317 "NVRM: failed to allocate page table entry\n"); 318 nvos_free_alloc(at); 319 return NULL; 320 } 321 memset(at->page_table[i], 0, sizeof(nvidia_pte_t)); 322 } 323 324 at->pid = os_get_current_process(); 325 326 return at; 327 } 328 329 static 330 int nvos_free_alloc( 331 nv_alloc_t *at 332 ) 333 { 334 unsigned int i; 335 336 if (at == NULL) 337 return -1; 338 339 if (NV_ATOMIC_READ(at->usage_count)) 340 return 1; 341 342 for (i = 0; i < at->num_pages; i++) 343 { 344 if (at->page_table[i] != NULL) 345 NV_KMEM_CACHE_FREE(at->page_table[i], nvidia_pte_t_cache); 346 } 347 os_free_mem(at->page_table); 348 349 NV_KFREE(at, sizeof(nv_alloc_t)); 350 351 return 0; 352 } 353 354 static void 355 nv_module_resources_exit(nv_stack_t *sp) 356 { 357 nv_kmem_cache_free_stack(sp); 358 359 NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache); 360 NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache); 361 NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache); 362 } 363 364 static int __init 365 nv_module_resources_init(nv_stack_t **sp) 366 { 367 int rc = -ENOMEM; 368 369 nvidia_stack_t_cache = NV_KMEM_CACHE_CREATE(nvidia_stack_cache_name, 370 nvidia_stack_t); 371 if (nvidia_stack_t_cache == NULL) 372 { 373 nv_printf(NV_DBG_ERRORS, 374 "NVRM: nvidia_stack_t cache allocation failed.\n"); 375 goto exit; 376 } 377 378 nvidia_pte_t_cache = NV_KMEM_CACHE_CREATE(nvidia_pte_cache_name, 379 nvidia_pte_t); 380 if (nvidia_pte_t_cache == NULL) 381 { 382 nv_printf(NV_DBG_ERRORS, 383 "NVRM: nvidia_pte_t cache allocation failed.\n"); 384 goto exit; 385 } 386 387 nvidia_p2p_page_t_cache = NV_KMEM_CACHE_CREATE(nvidia_p2p_page_cache_name, 388 nvidia_p2p_page_t); 389 if (nvidia_p2p_page_t_cache == NULL) 390 { 391 nv_printf(NV_DBG_ERRORS, 392 "NVRM: nvidia_p2p_page_t cache allocation failed.\n"); 393 goto exit; 394 } 395 396 rc = nv_kmem_cache_alloc_stack(sp); 397 if (rc < 0) 398 { 399 goto exit; 400 } 401 402 exit: 403 if (rc < 0) 404 { 405 nv_kmem_cache_free_stack(*sp); 406 407 NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache); 408 NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache); 409 NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache); 410 } 411 412 return rc; 413 } 414 415 static void 416 nvlink_drivers_exit(void) 417 { 418 #if NVCPU_IS_64_BITS 419 nvswitch_exit(); 420 #endif 421 422 #if defined(NVCPU_PPC64LE) 423 ibmnpu_exit(); 424 #endif 425 426 nvlink_core_exit(); 427 } 428 429 static int __init 430 nvlink_drivers_init(void) 431 { 432 int rc = 0; 433 434 rc = nvlink_core_init(); 435 if (rc < 0) 436 { 437 nv_printf(NV_DBG_INFO, "NVRM: NVLink core init failed.\n"); 438 return rc; 439 } 440 441 #if defined(NVCPU_PPC64LE) 442 rc = ibmnpu_init(); 443 if (rc < 0) 444 { 445 nv_printf(NV_DBG_INFO, "NVRM: IBM NPU init failed.\n"); 446 nvlink_core_exit(); 447 return rc; 448 } 449 #endif 450 451 #if NVCPU_IS_64_BITS 452 rc = nvswitch_init(); 453 if (rc < 0) 454 { 455 nv_printf(NV_DBG_INFO, "NVRM: NVSwitch init failed.\n"); 456 #if defined(NVCPU_PPC64LE) 457 ibmnpu_exit(); 458 #endif 459 nvlink_core_exit(); 460 } 461 #endif 462 463 return rc; 464 } 465 466 static void 467 nv_module_state_exit(nv_stack_t *sp) 468 { 469 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device); 470 471 nv_teardown_pat_support(); 472 473 nv_kthread_q_stop(&nv_deferred_close_kthread_q); 474 nv_kthread_q_stop(&nv_kthread_q); 475 476 nv_lock_destroy_locks(sp, nv); 477 } 478 479 static int 480 nv_module_state_init(nv_stack_t *sp) 481 { 482 int rc; 483 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device); 484 485 nv->os_state = (void *)&nv_ctl_device; 486 487 if (!nv_lock_init_locks(sp, nv)) 488 { 489 return -ENOMEM; 490 } 491 492 rc = nv_kthread_q_init(&nv_kthread_q, "nv_queue"); 493 if (rc != 0) 494 { 495 goto exit; 496 } 497 498 rc = nv_kthread_q_init(&nv_deferred_close_kthread_q, "nv_queue"); 499 if (rc != 0) 500 { 501 nv_kthread_q_stop(&nv_kthread_q); 502 goto exit; 503 } 504 505 rc = nv_init_pat_support(sp); 506 if (rc < 0) 507 { 508 nv_kthread_q_stop(&nv_deferred_close_kthread_q); 509 nv_kthread_q_stop(&nv_kthread_q); 510 goto exit; 511 } 512 513 nv_linux_devices = NULL; 514 NV_INIT_MUTEX(&nv_linux_devices_lock); 515 init_rwsem(&nv_system_pm_lock); 516 517 #if defined(CONFIG_PM) 518 NV_INIT_MUTEX(&nv_system_power_state_lock); 519 nv_system_power_state = NV_POWER_STATE_RUNNING; 520 nv_system_pm_action_depth = NV_PM_ACTION_DEPTH_DEFAULT; 521 #endif 522 523 NV_SPIN_LOCK_INIT(&nv_ctl_device.snapshot_timer_lock); 524 525 exit: 526 if (rc < 0) 527 { 528 nv_lock_destroy_locks(sp, nv); 529 } 530 531 return rc; 532 } 533 534 static void __init 535 nv_registry_keys_init(nv_stack_t *sp) 536 { 537 NV_STATUS status; 538 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device); 539 NvU32 data; 540 541 /* 542 * Determine the TCE bypass mode here so it can be used during 543 * device probe. Also determine whether we should allow 544 * user-mode NUMA onlining of device memory. 545 */ 546 if (NVCPU_IS_PPC64LE) 547 { 548 status = rm_read_registry_dword(sp, nv, 549 NV_REG_TCE_BYPASS_MODE, 550 &data); 551 if ((status == NV_OK) && ((int)data != NV_TCE_BYPASS_MODE_DEFAULT)) 552 { 553 nv_tce_bypass_mode = data; 554 } 555 556 if (NVreg_EnableUserNUMAManagement) 557 { 558 /* Force on the core RM registry key to match. */ 559 status = rm_write_registry_dword(sp, nv, "RMNumaOnlining", 1); 560 WARN_ON(status != NV_OK); 561 } 562 } 563 564 status = rm_read_registry_dword(sp, nv, NV_DMA_REMAP_PEER_MMIO, &data); 565 if (status == NV_OK) 566 { 567 nv_dma_remap_peer_mmio = data; 568 } 569 } 570 571 static void __init 572 nv_report_applied_patches(void) 573 { 574 unsigned i; 575 576 for (i = 0; __nv_patches[i].short_description; i++) 577 { 578 if (i == 0) 579 { 580 nv_printf(NV_DBG_ERRORS, "NVRM: Applied patches:\n"); 581 } 582 583 nv_printf(NV_DBG_ERRORS, 584 "NVRM: Patch #%d: %s\n", i + 1, __nv_patches[i].short_description); 585 } 586 } 587 588 static void 589 nv_drivers_exit(void) 590 { 591 nv_pci_unregister_driver(); 592 593 nvidia_unregister_module(&nv_fops); 594 } 595 596 static int __init 597 nv_drivers_init(void) 598 { 599 int rc; 600 601 rc = nvidia_register_module(&nv_fops); 602 if (rc < 0) 603 { 604 nv_printf(NV_DBG_ERRORS, 605 "NVRM: failed to register character device.\n"); 606 return rc; 607 } 608 609 rc = nv_pci_register_driver(); 610 if (rc < 0) 611 { 612 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA PCI devices found.\n"); 613 rc = -ENODEV; 614 goto exit; 615 } 616 617 exit: 618 if (rc < 0) 619 { 620 nvidia_unregister_module(&nv_fops); 621 } 622 623 return rc; 624 } 625 626 static void 627 nv_module_exit(nv_stack_t *sp) 628 { 629 nv_module_state_exit(sp); 630 631 rm_shutdown_rm(sp); 632 633 nv_destroy_rsync_info(); 634 nvlink_drivers_exit(); 635 636 nv_cap_drv_exit(); 637 638 nv_module_resources_exit(sp); 639 } 640 641 static int __init 642 nv_module_init(nv_stack_t **sp) 643 { 644 int rc; 645 646 rc = nv_module_resources_init(sp); 647 if (rc < 0) 648 { 649 return rc; 650 } 651 652 rc = nv_cap_drv_init(); 653 if (rc < 0) 654 { 655 nv_printf(NV_DBG_ERRORS, "NVRM: nv-cap-drv init failed.\n"); 656 goto cap_drv_exit; 657 } 658 659 rc = nvlink_drivers_init(); 660 if (rc < 0) 661 { 662 goto cap_drv_exit; 663 } 664 665 nv_init_rsync_info(); 666 nv_sev_init(); 667 668 if (!rm_init_rm(*sp)) 669 { 670 nv_printf(NV_DBG_ERRORS, "NVRM: rm_init_rm() failed!\n"); 671 rc = -EIO; 672 goto nvlink_exit; 673 } 674 675 rc = nv_module_state_init(*sp); 676 if (rc < 0) 677 { 678 goto init_rm_exit; 679 } 680 681 return rc; 682 683 init_rm_exit: 684 rm_shutdown_rm(*sp); 685 686 nvlink_exit: 687 nv_destroy_rsync_info(); 688 nvlink_drivers_exit(); 689 690 cap_drv_exit: 691 nv_cap_drv_exit(); 692 nv_module_resources_exit(*sp); 693 694 return rc; 695 } 696 697 /* 698 * In this function we check for the cases where GPU exclusion is not 699 * honored, and issue a warning. 700 * 701 * Only GPUs that support a mechanism to query UUID prior to 702 * initializing the GPU can be excluded, so that we can detect and 703 * exclude them during device probe. This function checks that an 704 * initialized GPU was not specified in the exclusion list, and issues a 705 * warning if so. 706 */ 707 static void 708 nv_assert_not_in_gpu_exclusion_list( 709 nvidia_stack_t *sp, 710 nv_state_t *nv 711 ) 712 { 713 char *uuid = rm_get_gpu_uuid(sp, nv); 714 715 if (uuid == NULL) 716 { 717 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Unable to read UUID"); 718 return; 719 } 720 721 if (nv_is_uuid_in_gpu_exclusion_list(uuid)) 722 { 723 NV_DEV_PRINTF(NV_DBG_WARNINGS, nv, 724 "Could not exclude GPU %s because PBI is not supported\n", 725 uuid); 726 WARN_ON(1); 727 } 728 729 os_free_mem(uuid); 730 731 return; 732 } 733 734 static int __init nv_caps_root_init(void) 735 { 736 nvidia_caps_root = os_nv_cap_init("driver/" MODULE_NAME); 737 738 return (nvidia_caps_root == NULL) ? -ENOENT : 0; 739 } 740 741 static void nv_caps_root_exit(void) 742 { 743 os_nv_cap_destroy_entry(nvidia_caps_root); 744 nvidia_caps_root = NULL; 745 } 746 747 int __init nvidia_init_module(void) 748 { 749 int rc; 750 NvU32 count; 751 nvidia_stack_t *sp = NULL; 752 const NvBool is_nvswitch_present = os_is_nvswitch_present(); 753 754 nv_memdbg_init(); 755 756 rc = nv_procfs_init(); 757 if (rc < 0) 758 { 759 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize procfs.\n"); 760 return rc; 761 } 762 763 rc = nv_caps_root_init(); 764 if (rc < 0) 765 { 766 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize capabilities.\n"); 767 goto procfs_exit; 768 } 769 770 rc = nv_module_init(&sp); 771 if (rc < 0) 772 { 773 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n"); 774 goto caps_root_exit; 775 } 776 777 count = nvos_count_devices(); 778 if ((count == 0) && (!is_nvswitch_present)) 779 { 780 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA GPU found.\n"); 781 rc = -ENODEV; 782 goto module_exit; 783 } 784 785 rc = nv_drivers_init(); 786 if (rc < 0) 787 { 788 goto module_exit; 789 } 790 791 if (num_probed_nv_devices != count) 792 { 793 nv_printf(NV_DBG_ERRORS, 794 "NVRM: The NVIDIA probe routine was not called for %d device(s).\n", 795 count - num_probed_nv_devices); 796 nv_printf(NV_DBG_ERRORS, 797 "NVRM: This can occur when a driver such as: \n" 798 "NVRM: nouveau, rivafb, nvidiafb or rivatv " 799 "\nNVRM: was loaded and obtained ownership of the NVIDIA device(s).\n"); 800 nv_printf(NV_DBG_ERRORS, 801 "NVRM: Try unloading the conflicting kernel module (and/or\n" 802 "NVRM: reconfigure your kernel without the conflicting\n" 803 "NVRM: driver(s)), then try loading the NVIDIA kernel module\n" 804 "NVRM: again.\n"); 805 } 806 807 if ((num_probed_nv_devices == 0) && (!is_nvswitch_present)) 808 { 809 rc = -ENODEV; 810 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA devices probed.\n"); 811 goto drivers_exit; 812 } 813 814 if (num_probed_nv_devices != num_nv_devices) 815 { 816 nv_printf(NV_DBG_ERRORS, 817 "NVRM: The NVIDIA probe routine failed for %d device(s).\n", 818 num_probed_nv_devices - num_nv_devices); 819 } 820 821 if ((num_nv_devices == 0) && (!is_nvswitch_present)) 822 { 823 rc = -ENODEV; 824 nv_printf(NV_DBG_ERRORS, 825 "NVRM: None of the NVIDIA devices were initialized.\n"); 826 goto drivers_exit; 827 } 828 829 /* 830 * Initialize registry keys after PCI driver registration has 831 * completed successfully to support per-device module 832 * parameters. 833 */ 834 nv_registry_keys_init(sp); 835 836 nv_report_applied_patches(); 837 838 nv_printf(NV_DBG_ERRORS, "NVRM: loading %s\n", pNVRM_ID); 839 840 #if defined(NV_UVM_ENABLE) 841 rc = nv_uvm_init(); 842 if (rc != 0) 843 { 844 goto drivers_exit; 845 } 846 #endif 847 848 __nv_init_sp = sp; 849 850 return 0; 851 852 drivers_exit: 853 nv_drivers_exit(); 854 855 module_exit: 856 nv_module_exit(sp); 857 858 caps_root_exit: 859 nv_caps_root_exit(); 860 861 procfs_exit: 862 nv_procfs_exit(); 863 864 return rc; 865 } 866 867 void nvidia_exit_module(void) 868 { 869 nvidia_stack_t *sp = __nv_init_sp; 870 871 #if defined(NV_UVM_ENABLE) 872 nv_uvm_exit(); 873 #endif 874 875 nv_drivers_exit(); 876 877 nv_module_exit(sp); 878 879 nv_caps_root_exit(); 880 881 nv_procfs_exit(); 882 883 nv_memdbg_exit(); 884 } 885 886 static void *nv_alloc_file_private(void) 887 { 888 nv_linux_file_private_t *nvlfp; 889 unsigned int i; 890 891 NV_KZALLOC(nvlfp, sizeof(nv_linux_file_private_t)); 892 if (!nvlfp) 893 return NULL; 894 895 if (rm_is_altstack_in_use()) 896 { 897 for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i) 898 { 899 NV_INIT_MUTEX(&nvlfp->fops_sp_lock[i]); 900 } 901 } 902 903 init_waitqueue_head(&nvlfp->waitqueue); 904 NV_SPIN_LOCK_INIT(&nvlfp->fp_lock); 905 906 return nvlfp; 907 } 908 909 static void nv_free_file_private(nv_linux_file_private_t *nvlfp) 910 { 911 nvidia_event_t *nvet; 912 913 if (nvlfp == NULL) 914 return; 915 916 for (nvet = nvlfp->event_data_head; nvet != NULL; nvet = nvlfp->event_data_head) 917 { 918 nvlfp->event_data_head = nvlfp->event_data_head->next; 919 NV_KFREE(nvet, sizeof(nvidia_event_t)); 920 } 921 922 if (nvlfp->mmap_context.page_array != NULL) 923 { 924 os_free_mem(nvlfp->mmap_context.page_array); 925 } 926 927 NV_KFREE(nvlfp, sizeof(nv_linux_file_private_t)); 928 } 929 930 931 static int nv_is_control_device( 932 struct inode *inode 933 ) 934 { 935 return (minor((inode)->i_rdev) == NV_CONTROL_DEVICE_MINOR); 936 } 937 938 /* 939 * Search the global list of nv devices for the one with the given minor device 940 * number. If found, nvl is returned with nvl->ldata_lock taken. 941 */ 942 static nv_linux_state_t *find_minor(NvU32 minor) 943 { 944 nv_linux_state_t *nvl; 945 946 LOCK_NV_LINUX_DEVICES(); 947 nvl = nv_linux_devices; 948 while (nvl != NULL) 949 { 950 if (nvl->minor_num == minor) 951 { 952 down(&nvl->ldata_lock); 953 break; 954 } 955 nvl = nvl->next; 956 } 957 958 UNLOCK_NV_LINUX_DEVICES(); 959 return nvl; 960 } 961 962 /* 963 * Search the global list of nv devices for the one with the given gpu_id. 964 * If found, nvl is returned with nvl->ldata_lock taken. 965 */ 966 static nv_linux_state_t *find_gpu_id(NvU32 gpu_id) 967 { 968 nv_linux_state_t *nvl; 969 970 LOCK_NV_LINUX_DEVICES(); 971 nvl = nv_linux_devices; 972 while (nvl != NULL) 973 { 974 nv_state_t *nv = NV_STATE_PTR(nvl); 975 if (nv->gpu_id == gpu_id) 976 { 977 down(&nvl->ldata_lock); 978 break; 979 } 980 nvl = nvl->next; 981 } 982 983 UNLOCK_NV_LINUX_DEVICES(); 984 return nvl; 985 } 986 987 /* 988 * Search the global list of nv devices for the one with the given UUID. Devices 989 * with missing UUID information are ignored. If found, nvl is returned with 990 * nvl->ldata_lock taken. 991 */ 992 nv_linux_state_t *find_uuid(const NvU8 *uuid) 993 { 994 nv_linux_state_t *nvl = NULL; 995 nv_state_t *nv; 996 const NvU8 *dev_uuid; 997 998 LOCK_NV_LINUX_DEVICES(); 999 1000 for (nvl = nv_linux_devices; nvl; nvl = nvl->next) 1001 { 1002 nv = NV_STATE_PTR(nvl); 1003 down(&nvl->ldata_lock); 1004 dev_uuid = nv_get_cached_uuid(nv); 1005 if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0) 1006 goto out; 1007 up(&nvl->ldata_lock); 1008 } 1009 1010 out: 1011 UNLOCK_NV_LINUX_DEVICES(); 1012 return nvl; 1013 } 1014 1015 /* 1016 * Search the global list of nv devices. The search logic is: 1017 * 1018 * 1) If any device has the given UUID, return it 1019 * 1020 * 2) If no device has the given UUID but at least one device is missing 1021 * its UUID (for example because rm_init_adapter has not run on it yet), 1022 * return that device. 1023 * 1024 * 3) If no device has the given UUID and all UUIDs are present, return NULL. 1025 * 1026 * In cases 1 and 2, nvl is returned with nvl->ldata_lock taken. 1027 * 1028 * The reason for this weird logic is because UUIDs aren't always available. See 1029 * bug 1642200. 1030 */ 1031 static nv_linux_state_t *find_uuid_candidate(const NvU8 *uuid) 1032 { 1033 nv_linux_state_t *nvl = NULL; 1034 nv_state_t *nv; 1035 const NvU8 *dev_uuid; 1036 int use_missing; 1037 int has_missing = 0; 1038 1039 LOCK_NV_LINUX_DEVICES(); 1040 1041 /* 1042 * Take two passes through the list. The first pass just looks for the UUID. 1043 * The second looks for the target or missing UUIDs. It would be nice if 1044 * this could be done in a single pass by remembering which nvls are missing 1045 * UUIDs, but we have to hold the nvl lock after we check for the UUID. 1046 */ 1047 for (use_missing = 0; use_missing <= 1; use_missing++) 1048 { 1049 for (nvl = nv_linux_devices; nvl; nvl = nvl->next) 1050 { 1051 nv = NV_STATE_PTR(nvl); 1052 down(&nvl->ldata_lock); 1053 dev_uuid = nv_get_cached_uuid(nv); 1054 if (dev_uuid) 1055 { 1056 /* Case 1: If a device has the given UUID, return it */ 1057 if (memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0) 1058 goto out; 1059 } 1060 else 1061 { 1062 /* Case 2: If no device has the given UUID but at least one 1063 * device is missing its UUID, return that device. */ 1064 if (use_missing) 1065 goto out; 1066 has_missing = 1; 1067 } 1068 up(&nvl->ldata_lock); 1069 } 1070 1071 /* Case 3: If no device has the given UUID and all UUIDs are present, 1072 * return NULL. */ 1073 if (!has_missing) 1074 break; 1075 } 1076 1077 out: 1078 UNLOCK_NV_LINUX_DEVICES(); 1079 return nvl; 1080 } 1081 1082 void nv_dev_free_stacks(nv_linux_state_t *nvl) 1083 { 1084 NvU32 i; 1085 for (i = 0; i < NV_DEV_STACK_COUNT; i++) 1086 { 1087 if (nvl->sp[i]) 1088 { 1089 nv_kmem_cache_free_stack(nvl->sp[i]); 1090 nvl->sp[i] = NULL; 1091 } 1092 } 1093 } 1094 1095 static int nv_dev_alloc_stacks(nv_linux_state_t *nvl) 1096 { 1097 NvU32 i; 1098 int rc; 1099 1100 for (i = 0; i < NV_DEV_STACK_COUNT; i++) 1101 { 1102 rc = nv_kmem_cache_alloc_stack(&nvl->sp[i]); 1103 if (rc != 0) 1104 { 1105 nv_dev_free_stacks(nvl); 1106 return rc; 1107 } 1108 } 1109 1110 return 0; 1111 } 1112 1113 static int validate_numa_start_state(nv_linux_state_t *nvl) 1114 { 1115 int rc = 0; 1116 int numa_status = nv_get_numa_status(nvl); 1117 1118 if (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED) 1119 { 1120 if (nv_ctl_device.numa_memblock_size == 0) 1121 { 1122 nv_printf(NV_DBG_ERRORS, "NVRM: numa memblock size of zero " 1123 "found during device start"); 1124 rc = -EINVAL; 1125 } 1126 else 1127 { 1128 /* Keep the individual devices consistent with the control device */ 1129 nvl->numa_memblock_size = nv_ctl_device.numa_memblock_size; 1130 } 1131 } 1132 1133 return rc; 1134 } 1135 1136 NV_STATUS NV_API_CALL nv_get_num_dpaux_instances(nv_state_t *nv, NvU32 *num_instances) 1137 { 1138 *num_instances = nv->num_dpaux_instance; 1139 return NV_OK; 1140 } 1141 1142 void NV_API_CALL 1143 nv_schedule_uvm_isr(nv_state_t *nv) 1144 { 1145 #if defined(NV_UVM_ENABLE) 1146 nv_uvm_event_interrupt(nv_get_cached_uuid(nv)); 1147 #endif 1148 } 1149 1150 /* 1151 * Brings up the device on the first file open. Assumes nvl->ldata_lock is held. 1152 */ 1153 static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp) 1154 { 1155 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 1156 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1157 NvU32 msi_config = 0; 1158 #endif 1159 int rc = 0; 1160 NvBool kthread_init = NV_FALSE; 1161 NvBool power_ref = NV_FALSE; 1162 1163 rc = nv_get_rsync_info(); 1164 if (rc != 0) 1165 { 1166 return rc; 1167 } 1168 1169 rc = validate_numa_start_state(nvl); 1170 if (rc != 0) 1171 { 1172 goto failed; 1173 } 1174 1175 if (dev_is_pci(nvl->dev) && (nv->pci_info.device_id == 0)) 1176 { 1177 nv_printf(NV_DBG_ERRORS, "NVRM: open of non-existent GPU with minor number %d\n", nvl->minor_num); 1178 rc = -ENXIO; 1179 goto failed; 1180 } 1181 1182 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1183 { 1184 if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE) != NV_OK) 1185 { 1186 rc = -EINVAL; 1187 goto failed; 1188 } 1189 power_ref = NV_TRUE; 1190 } 1191 else 1192 { 1193 if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE) != NV_OK) 1194 { 1195 rc = -EINVAL; 1196 goto failed; 1197 } 1198 power_ref = NV_TRUE; 1199 } 1200 1201 rc = nv_init_ibmnpu_devices(nv); 1202 if (rc != 0) 1203 { 1204 nv_printf(NV_DBG_ERRORS, 1205 "NVRM: failed to initialize ibmnpu devices attached to GPU with minor number %d\n", 1206 nvl->minor_num); 1207 goto failed; 1208 } 1209 1210 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1211 { 1212 rc = nv_dev_alloc_stacks(nvl); 1213 if (rc != 0) 1214 goto failed; 1215 } 1216 1217 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1218 if (dev_is_pci(nvl->dev)) 1219 { 1220 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1221 { 1222 rm_read_registry_dword(sp, nv, NV_REG_ENABLE_MSI, &msi_config); 1223 if (msi_config == 1) 1224 { 1225 if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSIX)) 1226 { 1227 nv_init_msix(nv); 1228 } 1229 if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSI) && 1230 !(nv->flags & NV_FLAG_USES_MSIX)) 1231 { 1232 nv_init_msi(nv); 1233 } 1234 } 1235 } 1236 } 1237 #endif 1238 1239 if (((!(nv->flags & NV_FLAG_USES_MSI)) && (!(nv->flags & NV_FLAG_USES_MSIX))) 1240 && (nv->interrupt_line == 0) && !(nv->flags & NV_FLAG_SOC_DISPLAY) 1241 && !(nv->flags & NV_FLAG_SOC_IGPU)) 1242 { 1243 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1244 "No interrupts of any type are available. Cannot use this GPU.\n"); 1245 rc = -EIO; 1246 goto failed; 1247 } 1248 1249 rc = 0; 1250 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1251 { 1252 if (nv->flags & NV_FLAG_SOC_DISPLAY) 1253 { 1254 } 1255 else if (!(nv->flags & NV_FLAG_USES_MSIX)) 1256 { 1257 rc = request_threaded_irq(nv->interrupt_line, nvidia_isr, 1258 nvidia_isr_kthread_bh, nv_default_irq_flags(nv), 1259 nv_device_name, (void *)nvl); 1260 } 1261 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1262 else 1263 { 1264 rc = nv_request_msix_irq(nvl); 1265 } 1266 #endif 1267 } 1268 if (rc != 0) 1269 { 1270 if ((nv->interrupt_line != 0) && (rc == -EBUSY)) 1271 { 1272 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1273 "Tried to get IRQ %d, but another driver\n", 1274 (unsigned int) nv->interrupt_line); 1275 nv_printf(NV_DBG_ERRORS, "NVRM: has it and is not sharing it.\n"); 1276 nv_printf(NV_DBG_ERRORS, "NVRM: You may want to verify that no audio driver"); 1277 nv_printf(NV_DBG_ERRORS, " is using the IRQ.\n"); 1278 } 1279 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "request_irq() failed (%d)\n", rc); 1280 goto failed; 1281 } 1282 1283 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1284 { 1285 rc = os_alloc_mutex(&nvl->isr_bh_unlocked_mutex); 1286 if (rc != 0) 1287 goto failed; 1288 nv_kthread_q_item_init(&nvl->bottom_half_q_item, nvidia_isr_bh_unlocked, (void *)nv); 1289 rc = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name); 1290 if (rc != 0) 1291 goto failed; 1292 kthread_init = NV_TRUE; 1293 1294 rc = nv_kthread_q_init(&nvl->queue.nvk, "nv_queue"); 1295 if (rc) 1296 goto failed; 1297 nv->queue = &nvl->queue; 1298 } 1299 1300 if (!rm_init_adapter(sp, nv)) 1301 { 1302 if (!(nv->flags & NV_FLAG_USES_MSIX) && 1303 !(nv->flags & NV_FLAG_SOC_DISPLAY) && 1304 !(nv->flags & NV_FLAG_SOC_IGPU)) 1305 { 1306 free_irq(nv->interrupt_line, (void *) nvl); 1307 } 1308 else if (nv->flags & NV_FLAG_SOC_DISPLAY) 1309 { 1310 } 1311 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1312 else 1313 { 1314 nv_free_msix_irq(nvl); 1315 } 1316 #endif 1317 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1318 "rm_init_adapter failed, device minor number %d\n", 1319 nvl->minor_num); 1320 rc = -EIO; 1321 goto failed; 1322 } 1323 1324 { 1325 const NvU8 *uuid = rm_get_gpu_uuid_raw(sp, nv); 1326 1327 if (uuid != NULL) 1328 { 1329 #if defined(NV_UVM_ENABLE) 1330 nv_uvm_notify_start_device(uuid); 1331 #endif 1332 } 1333 } 1334 1335 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1336 { 1337 nv_acpi_register_notifier(nvl); 1338 } 1339 1340 nv->flags |= NV_FLAG_OPEN; 1341 1342 /* 1343 * Now that RM init is done, allow dynamic power to control the GPU in FINE 1344 * mode, if enabled. (If the mode is COARSE, this unref will do nothing 1345 * which will cause the GPU to remain powered up.) 1346 * This is balanced by a FINE ref increment at the beginning of 1347 * nv_stop_device(). 1348 */ 1349 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE); 1350 1351 return 0; 1352 1353 failed: 1354 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1355 if (nv->flags & NV_FLAG_USES_MSI) 1356 { 1357 nv->flags &= ~NV_FLAG_USES_MSI; 1358 NV_PCI_DISABLE_MSI(nvl->pci_dev); 1359 if(nvl->irq_count) 1360 NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t)); 1361 } 1362 else if (nv->flags & NV_FLAG_USES_MSIX) 1363 { 1364 nv->flags &= ~NV_FLAG_USES_MSIX; 1365 pci_disable_msix(nvl->pci_dev); 1366 NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t)); 1367 NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry)); 1368 } 1369 1370 if (nvl->msix_bh_mutex) 1371 { 1372 os_free_mutex(nvl->msix_bh_mutex); 1373 nvl->msix_bh_mutex = NULL; 1374 } 1375 #endif 1376 1377 if (nv->queue && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1378 { 1379 nv->queue = NULL; 1380 nv_kthread_q_stop(&nvl->queue.nvk); 1381 } 1382 1383 if (kthread_init && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1384 nv_kthread_q_stop(&nvl->bottom_half_q); 1385 1386 if (nvl->isr_bh_unlocked_mutex) 1387 { 1388 os_free_mutex(nvl->isr_bh_unlocked_mutex); 1389 nvl->isr_bh_unlocked_mutex = NULL; 1390 } 1391 1392 nv_dev_free_stacks(nvl); 1393 1394 nv_unregister_ibmnpu_devices(nv); 1395 1396 if (power_ref) 1397 { 1398 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE); 1399 } 1400 1401 nv_put_rsync_info(); 1402 1403 return rc; 1404 } 1405 1406 /* 1407 * Makes sure the device is ready for operations and increases nvl->usage_count. 1408 * Assumes nvl->ldata_lock is held. 1409 */ 1410 static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp) 1411 { 1412 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 1413 int rc; 1414 NV_STATUS status; 1415 1416 if (os_is_vgx_hyper()) 1417 { 1418 /* fail open if GPU is being unbound */ 1419 if (nv->flags & NV_FLAG_UNBIND_LOCK) 1420 { 1421 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1422 "Open failed as GPU is locked for unbind operation\n"); 1423 return -ENODEV; 1424 } 1425 } 1426 1427 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Opening GPU with minor number %d\n", 1428 nvl->minor_num); 1429 1430 status = nv_check_gpu_state(nv); 1431 if (status == NV_ERR_GPU_IS_LOST) 1432 { 1433 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Device in removal process\n"); 1434 return -ENODEV; 1435 } 1436 1437 if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX)) 1438 return -EMFILE; 1439 1440 if ( ! (nv->flags & NV_FLAG_OPEN)) 1441 { 1442 /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */ 1443 if (NV_ATOMIC_READ(nvl->usage_count) != 0) 1444 { 1445 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1446 "Minor device %u is referenced without being open!\n", 1447 nvl->minor_num); 1448 WARN_ON(1); 1449 return -EBUSY; 1450 } 1451 1452 rc = nv_start_device(nv, sp); 1453 if (rc != 0) 1454 return rc; 1455 } 1456 else if (rm_is_device_sequestered(sp, nv)) 1457 { 1458 /* Do not increment the usage count of sequestered devices. */ 1459 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Device is currently unavailable\n"); 1460 return -EBUSY; 1461 } 1462 1463 NV_ATOMIC_INC(nvl->usage_count); 1464 return 0; 1465 } 1466 1467 static void nv_init_mapping_revocation(nv_linux_state_t *nvl, 1468 struct file *file, 1469 nv_linux_file_private_t *nvlfp, 1470 struct inode *inode) 1471 { 1472 down(&nvl->mmap_lock); 1473 1474 /* Set up struct address_space for use with unmap_mapping_range() */ 1475 address_space_init_once(&nvlfp->mapping); 1476 nvlfp->mapping.host = inode; 1477 nvlfp->mapping.a_ops = inode->i_mapping->a_ops; 1478 #if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO) 1479 nvlfp->mapping.backing_dev_info = inode->i_mapping->backing_dev_info; 1480 #endif 1481 file->f_mapping = &nvlfp->mapping; 1482 1483 /* Add nvlfp to list of open files in nvl for mapping revocation */ 1484 list_add(&nvlfp->entry, &nvl->open_files); 1485 1486 up(&nvl->mmap_lock); 1487 } 1488 1489 /* 1490 ** nvidia_open 1491 ** 1492 ** nv driver open entry point. Sessions are created here. 1493 */ 1494 int 1495 nvidia_open( 1496 struct inode *inode, 1497 struct file *file 1498 ) 1499 { 1500 nv_state_t *nv = NULL; 1501 nv_linux_state_t *nvl = NULL; 1502 int rc = 0; 1503 nv_linux_file_private_t *nvlfp = NULL; 1504 nvidia_stack_t *sp = NULL; 1505 unsigned int i; 1506 unsigned int k; 1507 1508 nv_printf(NV_DBG_INFO, "NVRM: nvidia_open...\n"); 1509 1510 nvlfp = nv_alloc_file_private(); 1511 if (nvlfp == NULL) 1512 { 1513 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate file private!\n"); 1514 return -ENOMEM; 1515 } 1516 1517 rc = nv_kmem_cache_alloc_stack(&sp); 1518 if (rc != 0) 1519 { 1520 nv_free_file_private(nvlfp); 1521 return rc; 1522 } 1523 1524 for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i) 1525 { 1526 rc = nv_kmem_cache_alloc_stack(&nvlfp->fops_sp[i]); 1527 if (rc != 0) 1528 { 1529 nv_kmem_cache_free_stack(sp); 1530 for (k = 0; k < i; ++k) 1531 { 1532 nv_kmem_cache_free_stack(nvlfp->fops_sp[k]); 1533 } 1534 nv_free_file_private(nvlfp); 1535 return rc; 1536 } 1537 } 1538 1539 NV_SET_FILE_PRIVATE(file, nvlfp); 1540 nvlfp->sp = sp; 1541 1542 /* for control device, just jump to its open routine */ 1543 /* after setting up the private data */ 1544 if (nv_is_control_device(inode)) 1545 { 1546 rc = nvidia_ctl_open(inode, file); 1547 if (rc != 0) 1548 goto failed; 1549 return rc; 1550 } 1551 1552 rc = nv_down_read_interruptible(&nv_system_pm_lock); 1553 if (rc < 0) 1554 goto failed; 1555 1556 /* Takes nvl->ldata_lock */ 1557 nvl = find_minor(NV_DEVICE_MINOR_NUMBER(inode)); 1558 if (!nvl) 1559 { 1560 rc = -ENODEV; 1561 up_read(&nv_system_pm_lock); 1562 goto failed; 1563 } 1564 1565 nvlfp->nvptr = nvl; 1566 nv = NV_STATE_PTR(nvl); 1567 1568 if ((nv->flags & NV_FLAG_EXCLUDE) != 0) 1569 { 1570 char *uuid = rm_get_gpu_uuid(sp, nv); 1571 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1572 "open() not permitted for excluded %s\n", 1573 (uuid != NULL) ? uuid : "GPU"); 1574 if (uuid != NULL) 1575 os_free_mem(uuid); 1576 rc = -EPERM; 1577 goto failed1; 1578 } 1579 1580 rc = nv_open_device(nv, sp); 1581 /* Fall-through on error */ 1582 1583 nv_assert_not_in_gpu_exclusion_list(sp, nv); 1584 1585 failed1: 1586 up(&nvl->ldata_lock); 1587 1588 up_read(&nv_system_pm_lock); 1589 failed: 1590 if (rc != 0) 1591 { 1592 if (nvlfp != NULL) 1593 { 1594 nv_kmem_cache_free_stack(sp); 1595 for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i) 1596 { 1597 nv_kmem_cache_free_stack(nvlfp->fops_sp[i]); 1598 } 1599 nv_free_file_private(nvlfp); 1600 NV_SET_FILE_PRIVATE(file, NULL); 1601 } 1602 } 1603 else 1604 { 1605 nv_init_mapping_revocation(nvl, file, nvlfp, inode); 1606 } 1607 1608 return rc; 1609 } 1610 1611 static void validate_numa_shutdown_state(nv_linux_state_t *nvl) 1612 { 1613 int numa_status = nv_get_numa_status(nvl); 1614 WARN_ON((numa_status != NV_IOCTL_NUMA_STATUS_OFFLINE) && 1615 (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED)); 1616 } 1617 1618 void nv_shutdown_adapter(nvidia_stack_t *sp, 1619 nv_state_t *nv, 1620 nv_linux_state_t *nvl) 1621 { 1622 validate_numa_shutdown_state(nvl); 1623 1624 rm_disable_adapter(sp, nv); 1625 1626 // It's safe to call nv_kthread_q_stop even if queue is not initialized 1627 nv_kthread_q_stop(&nvl->bottom_half_q); 1628 1629 if (nv->queue != NULL) 1630 { 1631 nv->queue = NULL; 1632 nv_kthread_q_stop(&nvl->queue.nvk); 1633 } 1634 1635 if (nvl->isr_bh_unlocked_mutex) 1636 { 1637 os_free_mutex(nvl->isr_bh_unlocked_mutex); 1638 nvl->isr_bh_unlocked_mutex = NULL; 1639 } 1640 1641 if (!(nv->flags & NV_FLAG_USES_MSIX) && 1642 !(nv->flags & NV_FLAG_SOC_DISPLAY) && 1643 !(nv->flags & NV_FLAG_SOC_IGPU)) 1644 { 1645 free_irq(nv->interrupt_line, (void *)nvl); 1646 if (nv->flags & NV_FLAG_USES_MSI) 1647 { 1648 NV_PCI_DISABLE_MSI(nvl->pci_dev); 1649 if(nvl->irq_count) 1650 NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t)); 1651 } 1652 } 1653 else if (nv->flags & NV_FLAG_SOC_DISPLAY) 1654 { 1655 } 1656 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 1657 else 1658 { 1659 nv_free_msix_irq(nvl); 1660 pci_disable_msix(nvl->pci_dev); 1661 nv->flags &= ~NV_FLAG_USES_MSIX; 1662 NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry)); 1663 NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t)); 1664 } 1665 #endif 1666 1667 if (nvl->msix_bh_mutex) 1668 { 1669 os_free_mutex(nvl->msix_bh_mutex); 1670 nvl->msix_bh_mutex = NULL; 1671 } 1672 1673 rm_shutdown_adapter(sp, nv); 1674 } 1675 1676 /* 1677 * Tears down the device on the last file close. Assumes nvl->ldata_lock is 1678 * held. 1679 */ 1680 static void nv_stop_device(nv_state_t *nv, nvidia_stack_t *sp) 1681 { 1682 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 1683 static int persistence_mode_notice_logged; 1684 1685 /* 1686 * The GPU needs to be powered on to go through the teardown sequence. 1687 * This balances the FINE unref at the end of nv_start_device(). 1688 */ 1689 rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE); 1690 1691 #if defined(NV_UVM_ENABLE) 1692 { 1693 const NvU8* uuid; 1694 // Inform UVM before disabling adapter. Use cached copy 1695 uuid = nv_get_cached_uuid(nv); 1696 if (uuid != NULL) 1697 { 1698 // this function cannot fail 1699 nv_uvm_notify_stop_device(uuid); 1700 } 1701 } 1702 #endif 1703 /* Adapter is already shutdown as part of nvidia_pci_remove */ 1704 if (!nv->removed) 1705 { 1706 if (nv->flags & NV_FLAG_PERSISTENT_SW_STATE) 1707 { 1708 rm_disable_adapter(sp, nv); 1709 } 1710 else 1711 { 1712 nv_acpi_unregister_notifier(nvl); 1713 nv_shutdown_adapter(sp, nv, nvl); 1714 } 1715 } 1716 1717 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1718 { 1719 nv_dev_free_stacks(nvl); 1720 } 1721 1722 if ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) && 1723 (!persistence_mode_notice_logged) && (!os_is_vgx_hyper())) 1724 { 1725 nv_printf(NV_DBG_ERRORS, "NVRM: Persistence mode is deprecated and" 1726 " will be removed in a future release. Please use" 1727 " nvidia-persistenced instead.\n"); 1728 persistence_mode_notice_logged = 1; 1729 } 1730 1731 /* leave INIT flag alone so we don't reinit every time */ 1732 nv->flags &= ~NV_FLAG_OPEN; 1733 1734 nv_unregister_ibmnpu_devices(nv); 1735 1736 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE)) 1737 { 1738 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE); 1739 } 1740 else 1741 { 1742 /* If in legacy persistence mode, only unref FINE refcount. */ 1743 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE); 1744 } 1745 1746 nv_put_rsync_info(); 1747 } 1748 1749 /* 1750 * Decreases nvl->usage_count, stopping the device when it reaches 0. Assumes 1751 * nvl->ldata_lock is held. 1752 */ 1753 static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp) 1754 { 1755 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 1756 1757 if (NV_ATOMIC_READ(nvl->usage_count) == 0) 1758 { 1759 nv_printf(NV_DBG_ERRORS, 1760 "NVRM: Attempting to close unopened minor device %u!\n", 1761 nvl->minor_num); 1762 WARN_ON(1); 1763 return; 1764 } 1765 1766 if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count)) 1767 nv_stop_device(nv, sp); 1768 } 1769 1770 /* 1771 ** nvidia_close 1772 ** 1773 ** Primary driver close entry point. 1774 */ 1775 1776 static void 1777 nvidia_close_callback( 1778 nv_linux_file_private_t *nvlfp 1779 ) 1780 { 1781 nv_linux_state_t *nvl = nvlfp->nvptr; 1782 nv_state_t *nv = NV_STATE_PTR(nvl); 1783 nvidia_stack_t *sp = nvlfp->sp; 1784 unsigned int i; 1785 NvBool bRemove = NV_FALSE; 1786 1787 rm_cleanup_file_private(sp, nv, &nvlfp->nvfp); 1788 1789 down(&nvl->mmap_lock); 1790 list_del(&nvlfp->entry); 1791 up(&nvl->mmap_lock); 1792 1793 down(&nvl->ldata_lock); 1794 nv_close_device(nv, sp); 1795 1796 bRemove = (!NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) && 1797 (NV_ATOMIC_READ(nvl->usage_count) == 0) && 1798 rm_get_device_remove_flag(sp, nv->gpu_id); 1799 1800 for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i) 1801 { 1802 nv_kmem_cache_free_stack(nvlfp->fops_sp[i]); 1803 } 1804 1805 nv_free_file_private(nvlfp); 1806 1807 /* 1808 * In case of surprise removal of device, we have 2 cases as below: 1809 * 1810 * 1> When nvidia_pci_remove is scheduled prior to nvidia_close. 1811 * nvidia_pci_remove will not destroy linux layer locks & nv linux state 1812 * struct but will set variable nv->removed for nvidia_close. 1813 * Once all the clients are closed, last nvidia_close will clean up linux 1814 * layer locks and nv linux state struct. 1815 * 1816 * 2> When nvidia_close is scheduled prior to nvidia_pci_remove. 1817 * This will be treated as normal working case. nvidia_close will not do 1818 * any cleanup related to linux layer locks and nv linux state struct. 1819 * nvidia_pci_remove when scheduled will do necessary cleanup. 1820 */ 1821 if ((NV_ATOMIC_READ(nvl->usage_count) == 0) && nv->removed) 1822 { 1823 nvidia_frontend_remove_device((void *)&nv_fops, nvl); 1824 nv_lock_destroy_locks(sp, nv); 1825 NV_KFREE(nvl, sizeof(nv_linux_state_t)); 1826 } 1827 else 1828 { 1829 up(&nvl->ldata_lock); 1830 1831 #if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE) 1832 if (bRemove) 1833 { 1834 NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(nvl->pci_dev); 1835 } 1836 #endif 1837 } 1838 1839 nv_kmem_cache_free_stack(sp); 1840 } 1841 1842 static void nvidia_close_deferred(void *data) 1843 { 1844 nv_linux_file_private_t *nvlfp = data; 1845 1846 down_read(&nv_system_pm_lock); 1847 1848 nvidia_close_callback(nvlfp); 1849 1850 up_read(&nv_system_pm_lock); 1851 } 1852 1853 int 1854 nvidia_close( 1855 struct inode *inode, 1856 struct file *file 1857 ) 1858 { 1859 int rc; 1860 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); 1861 nv_linux_state_t *nvl = nvlfp->nvptr; 1862 nv_state_t *nv = NV_STATE_PTR(nvl); 1863 1864 NV_DEV_PRINTF(NV_DBG_INFO, nv, "nvidia_close on GPU with minor number %d\n", NV_DEVICE_MINOR_NUMBER(inode)); 1865 1866 if (nv_is_control_device(inode)) 1867 { 1868 return nvidia_ctl_close(inode, file); 1869 } 1870 1871 NV_SET_FILE_PRIVATE(file, NULL); 1872 1873 rc = nv_down_read_interruptible(&nv_system_pm_lock); 1874 if (rc == 0) 1875 { 1876 nvidia_close_callback(nvlfp); 1877 up_read(&nv_system_pm_lock); 1878 } 1879 else 1880 { 1881 nv_kthread_q_item_init(&nvlfp->deferred_close_q_item, 1882 nvidia_close_deferred, 1883 nvlfp); 1884 rc = nv_kthread_q_schedule_q_item(&nv_deferred_close_kthread_q, 1885 &nvlfp->deferred_close_q_item); 1886 WARN_ON(rc == 0); 1887 } 1888 1889 return 0; 1890 } 1891 1892 unsigned int 1893 nvidia_poll( 1894 struct file *file, 1895 poll_table *wait 1896 ) 1897 { 1898 unsigned int mask = 0; 1899 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); 1900 unsigned long eflags; 1901 nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file); 1902 nv_state_t *nv = NV_STATE_PTR(nvl); 1903 NV_STATUS status; 1904 1905 status = nv_check_gpu_state(nv); 1906 if (status == NV_ERR_GPU_IS_LOST) 1907 { 1908 NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping nvidia_poll\n"); 1909 return POLLHUP; 1910 } 1911 1912 if ((file->f_flags & O_NONBLOCK) == 0) 1913 poll_wait(file, &nvlfp->waitqueue, wait); 1914 1915 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags); 1916 1917 if ((nvlfp->event_data_head != NULL) || nvlfp->dataless_event_pending) 1918 { 1919 mask = (POLLPRI | POLLIN); 1920 nvlfp->dataless_event_pending = NV_FALSE; 1921 } 1922 1923 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags); 1924 1925 return mask; 1926 } 1927 1928 #define NV_CTL_DEVICE_ONLY(nv) \ 1929 { \ 1930 if (((nv)->flags & NV_FLAG_CONTROL) == 0) \ 1931 { \ 1932 status = -EINVAL; \ 1933 goto done; \ 1934 } \ 1935 } 1936 1937 #define NV_ACTUAL_DEVICE_ONLY(nv) \ 1938 { \ 1939 if (((nv)->flags & NV_FLAG_CONTROL) != 0) \ 1940 { \ 1941 status = -EINVAL; \ 1942 goto done; \ 1943 } \ 1944 } 1945 1946 /* 1947 * Fills the ci array with the state of num_entries devices. Returns -EINVAL if 1948 * num_entries isn't big enough to hold all available devices. 1949 */ 1950 static int nvidia_read_card_info(nv_ioctl_card_info_t *ci, size_t num_entries) 1951 { 1952 nv_state_t *nv; 1953 nv_linux_state_t *nvl; 1954 size_t i = 0; 1955 int rc = 0; 1956 1957 /* Clear each card's flags field the lazy way */ 1958 memset(ci, 0, num_entries * sizeof(ci[0])); 1959 1960 LOCK_NV_LINUX_DEVICES(); 1961 1962 if (num_entries < num_nv_devices) 1963 { 1964 rc = -EINVAL; 1965 goto out; 1966 } 1967 1968 for (nvl = nv_linux_devices; nvl && i < num_entries; nvl = nvl->next) 1969 { 1970 nv = NV_STATE_PTR(nvl); 1971 1972 /* We do not include excluded GPUs in the list... */ 1973 if ((nv->flags & NV_FLAG_EXCLUDE) != 0) 1974 continue; 1975 1976 ci[i].valid = NV_TRUE; 1977 ci[i].pci_info.domain = nv->pci_info.domain; 1978 ci[i].pci_info.bus = nv->pci_info.bus; 1979 ci[i].pci_info.slot = nv->pci_info.slot; 1980 ci[i].pci_info.vendor_id = nv->pci_info.vendor_id; 1981 ci[i].pci_info.device_id = nv->pci_info.device_id; 1982 ci[i].gpu_id = nv->gpu_id; 1983 ci[i].interrupt_line = nv->interrupt_line; 1984 ci[i].reg_address = nv->regs->cpu_address; 1985 ci[i].reg_size = nv->regs->size; 1986 ci[i].minor_number = nvl->minor_num; 1987 if (dev_is_pci(nvl->dev)) 1988 { 1989 ci[i].fb_address = nv->fb->cpu_address; 1990 ci[i].fb_size = nv->fb->size; 1991 } 1992 i++; 1993 } 1994 1995 out: 1996 UNLOCK_NV_LINUX_DEVICES(); 1997 return rc; 1998 } 1999 2000 int 2001 nvidia_ioctl( 2002 struct inode *inode, 2003 struct file *file, 2004 unsigned int cmd, 2005 unsigned long i_arg) 2006 { 2007 NV_STATUS rmStatus; 2008 int status = 0; 2009 nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file); 2010 nv_state_t *nv = NV_STATE_PTR(nvl); 2011 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); 2012 nvidia_stack_t *sp = NULL; 2013 nv_ioctl_xfer_t ioc_xfer; 2014 void *arg_ptr = (void *) i_arg; 2015 void *arg_copy = NULL; 2016 size_t arg_size = 0; 2017 int arg_cmd; 2018 2019 nv_printf(NV_DBG_INFO, "NVRM: ioctl(0x%x, 0x%x, 0x%x)\n", 2020 _IOC_NR(cmd), (unsigned int) i_arg, _IOC_SIZE(cmd)); 2021 2022 status = nv_down_read_interruptible(&nv_system_pm_lock); 2023 if (status < 0) 2024 return status; 2025 2026 sp = nv_nvlfp_get_sp(nvlfp, NV_FOPS_STACK_INDEX_IOCTL); 2027 2028 rmStatus = nv_check_gpu_state(nv); 2029 if (rmStatus == NV_ERR_GPU_IS_LOST) 2030 { 2031 nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping nvidia_ioctl\n"); 2032 status = -EINVAL; 2033 goto done; 2034 } 2035 2036 arg_size = _IOC_SIZE(cmd); 2037 arg_cmd = _IOC_NR(cmd); 2038 2039 if (arg_cmd == NV_ESC_IOCTL_XFER_CMD) 2040 { 2041 if (arg_size != sizeof(nv_ioctl_xfer_t)) 2042 { 2043 nv_printf(NV_DBG_ERRORS, 2044 "NVRM: invalid ioctl XFER structure size!\n"); 2045 status = -EINVAL; 2046 goto done; 2047 } 2048 2049 if (NV_COPY_FROM_USER(&ioc_xfer, arg_ptr, sizeof(ioc_xfer))) 2050 { 2051 nv_printf(NV_DBG_ERRORS, 2052 "NVRM: failed to copy in ioctl XFER data!\n"); 2053 status = -EFAULT; 2054 goto done; 2055 } 2056 2057 arg_cmd = ioc_xfer.cmd; 2058 arg_size = ioc_xfer.size; 2059 arg_ptr = NvP64_VALUE(ioc_xfer.ptr); 2060 2061 if (arg_size > NV_ABSOLUTE_MAX_IOCTL_SIZE) 2062 { 2063 nv_printf(NV_DBG_ERRORS, "NVRM: invalid ioctl XFER size!\n"); 2064 status = -EINVAL; 2065 goto done; 2066 } 2067 } 2068 2069 NV_KMALLOC(arg_copy, arg_size); 2070 if (arg_copy == NULL) 2071 { 2072 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate ioctl memory\n"); 2073 status = -ENOMEM; 2074 goto done; 2075 } 2076 2077 if (NV_COPY_FROM_USER(arg_copy, arg_ptr, arg_size)) 2078 { 2079 nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy in ioctl data!\n"); 2080 status = -EFAULT; 2081 goto done; 2082 } 2083 2084 switch (arg_cmd) 2085 { 2086 case NV_ESC_QUERY_DEVICE_INTR: 2087 { 2088 nv_ioctl_query_device_intr *query_intr = arg_copy; 2089 2090 NV_ACTUAL_DEVICE_ONLY(nv); 2091 2092 if ((arg_size < sizeof(*query_intr)) || 2093 (!nv->regs->map)) 2094 { 2095 status = -EINVAL; 2096 goto done; 2097 } 2098 2099 query_intr->intrStatus = 2100 *(nv->regs->map + (NV_RM_DEVICE_INTR_ADDRESS >> 2)); 2101 query_intr->status = NV_OK; 2102 break; 2103 } 2104 2105 /* pass out info about the card */ 2106 case NV_ESC_CARD_INFO: 2107 { 2108 size_t num_arg_devices = arg_size / sizeof(nv_ioctl_card_info_t); 2109 2110 NV_CTL_DEVICE_ONLY(nv); 2111 2112 status = nvidia_read_card_info(arg_copy, num_arg_devices); 2113 break; 2114 } 2115 2116 case NV_ESC_ATTACH_GPUS_TO_FD: 2117 { 2118 size_t num_arg_gpus = arg_size / sizeof(NvU32); 2119 size_t i; 2120 2121 NV_CTL_DEVICE_ONLY(nv); 2122 2123 if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 || 2124 arg_size % sizeof(NvU32) != 0) 2125 { 2126 status = -EINVAL; 2127 goto done; 2128 } 2129 2130 NV_KMALLOC(nvlfp->attached_gpus, arg_size); 2131 if (nvlfp->attached_gpus == NULL) 2132 { 2133 status = -ENOMEM; 2134 goto done; 2135 } 2136 memcpy(nvlfp->attached_gpus, arg_copy, arg_size); 2137 nvlfp->num_attached_gpus = num_arg_gpus; 2138 2139 for (i = 0; i < nvlfp->num_attached_gpus; i++) 2140 { 2141 if (nvlfp->attached_gpus[i] == 0) 2142 { 2143 continue; 2144 } 2145 2146 if (nvidia_dev_get(nvlfp->attached_gpus[i], sp)) 2147 { 2148 while (i--) 2149 { 2150 if (nvlfp->attached_gpus[i] != 0) 2151 nvidia_dev_put(nvlfp->attached_gpus[i], sp); 2152 } 2153 NV_KFREE(nvlfp->attached_gpus, arg_size); 2154 nvlfp->num_attached_gpus = 0; 2155 2156 status = -EINVAL; 2157 break; 2158 } 2159 } 2160 2161 break; 2162 } 2163 2164 case NV_ESC_CHECK_VERSION_STR: 2165 { 2166 NV_CTL_DEVICE_ONLY(nv); 2167 2168 rmStatus = rm_perform_version_check(sp, arg_copy, arg_size); 2169 status = ((rmStatus == NV_OK) ? 0 : -EINVAL); 2170 break; 2171 } 2172 2173 case NV_ESC_SYS_PARAMS: 2174 { 2175 nv_ioctl_sys_params_t *api = arg_copy; 2176 2177 NV_CTL_DEVICE_ONLY(nv); 2178 2179 if (arg_size != sizeof(nv_ioctl_sys_params_t)) 2180 { 2181 status = -EINVAL; 2182 goto done; 2183 } 2184 2185 /* numa_memblock_size should only be set once */ 2186 if (nvl->numa_memblock_size == 0) 2187 { 2188 nvl->numa_memblock_size = api->memblock_size; 2189 } 2190 else 2191 { 2192 status = (nvl->numa_memblock_size == api->memblock_size) ? 2193 0 : -EBUSY; 2194 goto done; 2195 } 2196 break; 2197 } 2198 2199 case NV_ESC_NUMA_INFO: 2200 { 2201 nv_ioctl_numa_info_t *api = arg_copy; 2202 rmStatus = NV_OK; 2203 2204 NV_ACTUAL_DEVICE_ONLY(nv); 2205 2206 if (arg_size != sizeof(nv_ioctl_numa_info_t)) 2207 { 2208 status = -EINVAL; 2209 goto done; 2210 } 2211 2212 api->offline_addresses.numEntries = 2213 ARRAY_SIZE(api->offline_addresses.addresses), 2214 2215 rmStatus = rm_get_gpu_numa_info(sp, nv, 2216 &(api->nid), 2217 &(api->numa_mem_addr), 2218 &(api->numa_mem_size), 2219 (api->offline_addresses.addresses), 2220 &(api->offline_addresses.numEntries)); 2221 if (rmStatus != NV_OK) 2222 { 2223 status = -EBUSY; 2224 goto done; 2225 } 2226 2227 api->status = nv_get_numa_status(nvl); 2228 api->memblock_size = nv_ctl_device.numa_memblock_size; 2229 break; 2230 } 2231 2232 case NV_ESC_SET_NUMA_STATUS: 2233 { 2234 nv_ioctl_set_numa_status_t *api = arg_copy; 2235 rmStatus = NV_OK; 2236 2237 if (!NV_IS_SUSER()) 2238 { 2239 status = -EACCES; 2240 goto done; 2241 } 2242 2243 NV_ACTUAL_DEVICE_ONLY(nv); 2244 2245 if (arg_size != sizeof(nv_ioctl_set_numa_status_t)) 2246 { 2247 status = -EINVAL; 2248 goto done; 2249 } 2250 2251 /* 2252 * The nv_linux_state_t for the device needs to be locked 2253 * in order to prevent additional open()/close() calls from 2254 * manipulating the usage count for the device while we 2255 * determine if NUMA state can be changed. 2256 */ 2257 down(&nvl->ldata_lock); 2258 2259 if (nv_get_numa_status(nvl) != api->status) 2260 { 2261 if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS) 2262 { 2263 /* 2264 * Only the current client should have an open file 2265 * descriptor for the device, to allow safe offlining. 2266 */ 2267 if (NV_ATOMIC_READ(nvl->usage_count) > 1) 2268 { 2269 status = -EBUSY; 2270 goto unlock; 2271 } 2272 else 2273 { 2274 /* 2275 * If this call fails, it indicates that RM 2276 * is not ready to offline memory, and we should keep 2277 * the current NUMA status of ONLINE. 2278 */ 2279 rmStatus = rm_gpu_numa_offline(sp, nv); 2280 if (rmStatus != NV_OK) 2281 { 2282 status = -EBUSY; 2283 goto unlock; 2284 } 2285 } 2286 } 2287 2288 status = nv_set_numa_status(nvl, api->status); 2289 if (status < 0) 2290 { 2291 if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS) 2292 (void) rm_gpu_numa_online(sp, nv); 2293 goto unlock; 2294 } 2295 2296 if (api->status == NV_IOCTL_NUMA_STATUS_ONLINE) 2297 { 2298 rmStatus = rm_gpu_numa_online(sp, nv); 2299 if (rmStatus != NV_OK) 2300 { 2301 status = -EBUSY; 2302 goto unlock; 2303 } 2304 } 2305 } 2306 2307 unlock: 2308 up(&nvl->ldata_lock); 2309 2310 break; 2311 } 2312 2313 case NV_ESC_EXPORT_TO_DMABUF_FD: 2314 { 2315 nv_ioctl_export_to_dma_buf_fd_t *params = arg_copy; 2316 2317 if (arg_size != sizeof(nv_ioctl_export_to_dma_buf_fd_t)) 2318 { 2319 status = -EINVAL; 2320 goto done; 2321 } 2322 2323 NV_ACTUAL_DEVICE_ONLY(nv); 2324 2325 params->status = nv_dma_buf_export(nv, params); 2326 2327 break; 2328 } 2329 2330 default: 2331 rmStatus = rm_ioctl(sp, nv, &nvlfp->nvfp, arg_cmd, arg_copy, arg_size); 2332 status = ((rmStatus == NV_OK) ? 0 : -EINVAL); 2333 break; 2334 } 2335 2336 done: 2337 nv_nvlfp_put_sp(nvlfp, NV_FOPS_STACK_INDEX_IOCTL); 2338 2339 up_read(&nv_system_pm_lock); 2340 2341 if (arg_copy != NULL) 2342 { 2343 if (status != -EFAULT) 2344 { 2345 if (NV_COPY_TO_USER(arg_ptr, arg_copy, arg_size)) 2346 { 2347 nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy out ioctl data\n"); 2348 status = -EFAULT; 2349 } 2350 } 2351 NV_KFREE(arg_copy, arg_size); 2352 } 2353 2354 return status; 2355 } 2356 2357 irqreturn_t 2358 nvidia_isr_msix( 2359 int irq, 2360 void *arg 2361 ) 2362 { 2363 irqreturn_t ret; 2364 nv_linux_state_t *nvl = (void *) arg; 2365 2366 // nvidia_isr_msix() is called for each of the MSI-X vectors and they can 2367 // run in parallel on different CPUs (cores), but this is not currently 2368 // supported by nvidia_isr() and its children. As a big hammer fix just 2369 // spinlock around the nvidia_isr() call to serialize them. 2370 // 2371 // At this point interrupts are disabled on the CPU running our ISR (see 2372 // comments for nv_default_irq_flags()) so a plain spinlock is enough. 2373 NV_SPIN_LOCK(&nvl->msix_isr_lock); 2374 2375 ret = nvidia_isr(irq, arg); 2376 2377 NV_SPIN_UNLOCK(&nvl->msix_isr_lock); 2378 2379 return ret; 2380 } 2381 2382 /* 2383 * driver receives an interrupt 2384 * if someone waiting, then hand it off. 2385 */ 2386 irqreturn_t 2387 nvidia_isr( 2388 int irq, 2389 void *arg 2390 ) 2391 { 2392 nv_linux_state_t *nvl = (void *) arg; 2393 nv_state_t *nv = NV_STATE_PTR(nvl); 2394 NvU32 need_to_run_bottom_half_gpu_lock_held = 0; 2395 NvBool rm_handled = NV_FALSE, uvm_handled = NV_FALSE, rm_fault_handling_needed = NV_FALSE; 2396 NvU32 rm_serviceable_fault_cnt = 0; 2397 NvU32 sec, usec; 2398 NvU16 index = 0; 2399 NvU64 currentTime = 0; 2400 NvBool found_irq = NV_FALSE; 2401 2402 rm_gpu_handle_mmu_faults(nvl->sp[NV_DEV_STACK_ISR], nv, &rm_serviceable_fault_cnt); 2403 rm_fault_handling_needed = (rm_serviceable_fault_cnt != 0); 2404 2405 #if defined (NV_UVM_ENABLE) 2406 // 2407 // Returns NV_OK if the UVM driver handled the interrupt 2408 // 2409 // Returns NV_ERR_NO_INTR_PENDING if the interrupt is not for 2410 // the UVM driver. 2411 // 2412 // Returns NV_WARN_MORE_PROCESSING_REQUIRED if the UVM top-half ISR was 2413 // unable to get its lock(s), due to other (UVM) threads holding them. 2414 // 2415 // RM can normally treat NV_WARN_MORE_PROCESSING_REQUIRED the same as 2416 // NV_ERR_NO_INTR_PENDING, but in some cases the extra information may 2417 // be helpful. 2418 // 2419 if (nv_uvm_event_interrupt(nv_get_cached_uuid(nv)) == NV_OK) 2420 uvm_handled = NV_TRUE; 2421 #endif 2422 2423 rm_handled = rm_isr(nvl->sp[NV_DEV_STACK_ISR], nv, 2424 &need_to_run_bottom_half_gpu_lock_held); 2425 2426 /* Replicating the logic in linux kernel to track unhandled interrupt crossing a threshold */ 2427 if ((nv->flags & NV_FLAG_USES_MSI) || (nv->flags & NV_FLAG_USES_MSIX)) 2428 { 2429 if (nvl->irq_count != NULL) 2430 { 2431 for (index = 0; index < nvl->current_num_irq_tracked; index++) 2432 { 2433 if (nvl->irq_count[index].irq == irq) 2434 { 2435 found_irq = NV_TRUE; 2436 break; 2437 } 2438 2439 found_irq = NV_FALSE; 2440 } 2441 2442 if (!found_irq && nvl->current_num_irq_tracked < nvl->num_intr) 2443 { 2444 index = nvl->current_num_irq_tracked; 2445 nvl->irq_count[index].irq = irq; 2446 nvl->current_num_irq_tracked++; 2447 found_irq = NV_TRUE; 2448 } 2449 2450 if (found_irq) 2451 { 2452 nvl->irq_count[index].total++; 2453 2454 if(rm_handled == NV_FALSE) 2455 { 2456 os_get_current_time(&sec, &usec); 2457 currentTime = ((NvU64)sec) * 1000000 + (NvU64)usec; 2458 2459 /* Reset unhandled count if it's been more than 0.1 seconds since the last unhandled IRQ */ 2460 if ((currentTime - nvl->irq_count[index].last_unhandled) > RM_UNHANDLED_TIMEOUT_US) 2461 nvl->irq_count[index].unhandled = 1; 2462 else 2463 nvl->irq_count[index].unhandled++; 2464 2465 nvl->irq_count[index].last_unhandled = currentTime; 2466 rm_handled = NV_TRUE; 2467 } 2468 2469 if (nvl->irq_count[index].total >= RM_THRESHOLD_TOTAL_IRQ_COUNT) 2470 { 2471 if (nvl->irq_count[index].unhandled > RM_THRESHOLD_UNAHNDLED_IRQ_COUNT) 2472 nv_printf(NV_DBG_ERRORS,"NVRM: Going over RM unhandled interrupt threshold for irq %d\n", irq); 2473 2474 nvl->irq_count[index].total = 0; 2475 nvl->irq_count[index].unhandled = 0; 2476 nvl->irq_count[index].last_unhandled = 0; 2477 } 2478 } 2479 else 2480 nv_printf(NV_DBG_ERRORS,"NVRM: IRQ number out of valid range\n"); 2481 } 2482 } 2483 2484 if (need_to_run_bottom_half_gpu_lock_held) 2485 { 2486 return IRQ_WAKE_THREAD; 2487 } 2488 else 2489 { 2490 // 2491 // If rm_isr does not need to run a bottom half and mmu_faults_copied 2492 // indicates that bottom half is needed, then we enqueue a kthread based 2493 // bottom half, as this specific bottom_half will acquire the GPU lock 2494 // 2495 if (rm_fault_handling_needed) 2496 nv_kthread_q_schedule_q_item(&nvl->bottom_half_q, &nvl->bottom_half_q_item); 2497 } 2498 2499 return IRQ_RETVAL(rm_handled || uvm_handled || rm_fault_handling_needed); 2500 } 2501 2502 irqreturn_t 2503 nvidia_isr_kthread_bh( 2504 int irq, 2505 void *data 2506 ) 2507 { 2508 return nvidia_isr_common_bh(data); 2509 } 2510 2511 irqreturn_t 2512 nvidia_isr_msix_kthread_bh( 2513 int irq, 2514 void *data 2515 ) 2516 { 2517 NV_STATUS status; 2518 irqreturn_t ret; 2519 nv_state_t *nv = (nv_state_t *) data; 2520 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2521 2522 // 2523 // Synchronize kthreads servicing bottom halves for different MSI-X vectors 2524 // as they share same pre-allocated alt-stack. 2525 // 2526 status = os_acquire_mutex(nvl->msix_bh_mutex); 2527 // os_acquire_mutex can only fail if we cannot sleep and we can 2528 WARN_ON(status != NV_OK); 2529 2530 ret = nvidia_isr_common_bh(data); 2531 2532 os_release_mutex(nvl->msix_bh_mutex); 2533 2534 return ret; 2535 } 2536 2537 static irqreturn_t 2538 nvidia_isr_common_bh( 2539 void *data 2540 ) 2541 { 2542 nv_state_t *nv = (nv_state_t *) data; 2543 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2544 nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_ISR_BH]; 2545 NV_STATUS status; 2546 2547 status = nv_check_gpu_state(nv); 2548 if (status == NV_ERR_GPU_IS_LOST) 2549 { 2550 nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping ISR bottom half\n"); 2551 } 2552 else 2553 { 2554 rm_isr_bh(sp, nv); 2555 } 2556 2557 return IRQ_HANDLED; 2558 } 2559 2560 static void 2561 nvidia_isr_bh_unlocked( 2562 void * args 2563 ) 2564 { 2565 nv_state_t *nv = (nv_state_t *) args; 2566 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2567 nvidia_stack_t *sp; 2568 NV_STATUS status; 2569 2570 // 2571 // Synchronize kthreads servicing unlocked bottom half as they 2572 // share same pre-allocated stack for alt-stack 2573 // 2574 status = os_acquire_mutex(nvl->isr_bh_unlocked_mutex); 2575 if (status != NV_OK) 2576 { 2577 nv_printf(NV_DBG_ERRORS, "NVRM: %s: Unable to take bottom_half mutex!\n", 2578 __FUNCTION__); 2579 WARN_ON(1); 2580 } 2581 2582 sp = nvl->sp[NV_DEV_STACK_ISR_BH_UNLOCKED]; 2583 2584 status = nv_check_gpu_state(nv); 2585 if (status == NV_ERR_GPU_IS_LOST) 2586 { 2587 nv_printf(NV_DBG_INFO, 2588 "NVRM: GPU is lost, skipping unlocked ISR bottom half\n"); 2589 } 2590 else 2591 { 2592 rm_isr_bh_unlocked(sp, nv); 2593 } 2594 2595 os_release_mutex(nvl->isr_bh_unlocked_mutex); 2596 } 2597 2598 static void 2599 nvidia_rc_timer_callback( 2600 struct nv_timer *nv_timer 2601 ) 2602 { 2603 nv_linux_state_t *nvl = container_of(nv_timer, nv_linux_state_t, rc_timer); 2604 nv_state_t *nv = NV_STATE_PTR(nvl); 2605 nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_TIMER]; 2606 NV_STATUS status; 2607 2608 status = nv_check_gpu_state(nv); 2609 if (status == NV_ERR_GPU_IS_LOST) 2610 { 2611 nv_printf(NV_DBG_INFO, 2612 "NVRM: GPU is lost, skipping device timer callbacks\n"); 2613 return; 2614 } 2615 2616 if (rm_run_rc_callback(sp, nv) == NV_OK) 2617 { 2618 // set another timeout 1 sec in the future: 2619 mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ); 2620 } 2621 } 2622 2623 /* 2624 ** nvidia_ctl_open 2625 ** 2626 ** nv control driver open entry point. Sessions are created here. 2627 */ 2628 static int 2629 nvidia_ctl_open( 2630 struct inode *inode, 2631 struct file *file 2632 ) 2633 { 2634 nv_linux_state_t *nvl = &nv_ctl_device; 2635 nv_state_t *nv = NV_STATE_PTR(nvl); 2636 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); 2637 2638 nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_open\n"); 2639 2640 down(&nvl->ldata_lock); 2641 2642 /* save the nv away in file->private_data */ 2643 nvlfp->nvptr = nvl; 2644 2645 if (NV_ATOMIC_READ(nvl->usage_count) == 0) 2646 { 2647 nv->flags |= (NV_FLAG_OPEN | NV_FLAG_CONTROL); 2648 } 2649 2650 NV_ATOMIC_INC(nvl->usage_count); 2651 up(&nvl->ldata_lock); 2652 2653 return 0; 2654 } 2655 2656 2657 /* 2658 ** nvidia_ctl_close 2659 */ 2660 static int 2661 nvidia_ctl_close( 2662 struct inode *inode, 2663 struct file *file 2664 ) 2665 { 2666 nv_alloc_t *at, *next; 2667 nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file); 2668 nv_state_t *nv = NV_STATE_PTR(nvl); 2669 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); 2670 nvidia_stack_t *sp = nvlfp->sp; 2671 unsigned int i; 2672 2673 nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_close\n"); 2674 2675 down(&nvl->ldata_lock); 2676 if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count)) 2677 { 2678 nv->flags &= ~NV_FLAG_OPEN; 2679 } 2680 up(&nvl->ldata_lock); 2681 2682 rm_cleanup_file_private(sp, nv, &nvlfp->nvfp); 2683 2684 if (nvlfp->free_list != NULL) 2685 { 2686 at = nvlfp->free_list; 2687 while (at != NULL) 2688 { 2689 next = at->next; 2690 if (at->pid == os_get_current_process()) 2691 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2692 nv_free_pages(nv, at->num_pages, 2693 at->flags.contig, 2694 at->cache_type, 2695 (void *)at); 2696 at = next; 2697 } 2698 } 2699 2700 if (nvlfp->num_attached_gpus != 0) 2701 { 2702 size_t i; 2703 2704 for (i = 0; i < nvlfp->num_attached_gpus; i++) 2705 { 2706 if (nvlfp->attached_gpus[i] != 0) 2707 nvidia_dev_put(nvlfp->attached_gpus[i], sp); 2708 } 2709 2710 NV_KFREE(nvlfp->attached_gpus, sizeof(NvU32) * nvlfp->num_attached_gpus); 2711 nvlfp->num_attached_gpus = 0; 2712 } 2713 2714 for (i = 0; i < NV_FOPS_STACK_INDEX_COUNT; ++i) 2715 { 2716 nv_kmem_cache_free_stack(nvlfp->fops_sp[i]); 2717 } 2718 2719 nv_free_file_private(nvlfp); 2720 NV_SET_FILE_PRIVATE(file, NULL); 2721 2722 nv_kmem_cache_free_stack(sp); 2723 2724 return 0; 2725 } 2726 2727 2728 void NV_API_CALL 2729 nv_set_dma_address_size( 2730 nv_state_t *nv, 2731 NvU32 phys_addr_bits 2732 ) 2733 { 2734 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2735 NvU64 start_addr = nv_get_dma_start_address(nv); 2736 NvU64 new_mask = (((NvU64)1) << phys_addr_bits) - 1; 2737 2738 nvl->dma_dev.addressable_range.limit = start_addr + new_mask; 2739 2740 /* 2741 * The only scenario in which we definitely should not update the DMA mask 2742 * is on POWER, when using TCE bypass mode (see nv_get_dma_start_address() 2743 * for details), since the meaning of the DMA mask is overloaded in that 2744 * case. 2745 */ 2746 if (!nvl->tce_bypass_enabled) 2747 { 2748 dma_set_mask(&nvl->pci_dev->dev, new_mask); 2749 /* Certain kernels have a bug which causes pci_set_consistent_dma_mask 2750 * to call GPL sme_active symbol, this bug has already been fixed in a 2751 * minor release update but detect the failure scenario here to prevent 2752 * an installation regression */ 2753 #if !NV_IS_EXPORT_SYMBOL_GPL_sme_active 2754 dma_set_coherent_mask(&nvl->pci_dev->dev, new_mask); 2755 #endif 2756 } 2757 } 2758 2759 static NvUPtr 2760 nv_map_guest_pages(nv_alloc_t *at, 2761 NvU64 address, 2762 NvU32 page_count, 2763 NvU32 page_idx) 2764 { 2765 struct page **pages; 2766 NvU32 j; 2767 NvUPtr virt_addr; 2768 2769 NV_KMALLOC(pages, sizeof(struct page *) * page_count); 2770 if (pages == NULL) 2771 { 2772 nv_printf(NV_DBG_ERRORS, 2773 "NVRM: failed to allocate vmap() page descriptor table!\n"); 2774 return 0; 2775 } 2776 2777 for (j = 0; j < page_count; j++) 2778 { 2779 pages[j] = NV_GET_PAGE_STRUCT(at->page_table[page_idx+j]->phys_addr); 2780 } 2781 2782 virt_addr = nv_vm_map_pages(pages, page_count, 2783 at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted); 2784 NV_KFREE(pages, sizeof(struct page *) * page_count); 2785 2786 return virt_addr; 2787 } 2788 2789 NV_STATUS NV_API_CALL 2790 nv_alias_pages( 2791 nv_state_t *nv, 2792 NvU32 page_cnt, 2793 NvU32 contiguous, 2794 NvU32 cache_type, 2795 NvU64 guest_id, 2796 NvU64 *pte_array, 2797 void **priv_data 2798 ) 2799 { 2800 nv_alloc_t *at; 2801 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2802 NvU32 i=0; 2803 nvidia_pte_t *page_ptr = NULL; 2804 2805 at = nvos_create_alloc(nvl->dev, page_cnt); 2806 2807 if (at == NULL) 2808 { 2809 return NV_ERR_NO_MEMORY; 2810 } 2811 2812 at->cache_type = cache_type; 2813 if (contiguous) 2814 at->flags.contig = NV_TRUE; 2815 #if defined(NVCPU_AARCH64) 2816 if (at->cache_type != NV_MEMORY_CACHED) 2817 at->flags.aliased = NV_TRUE; 2818 #endif 2819 2820 at->flags.guest = NV_TRUE; 2821 2822 at->order = get_order(at->num_pages * PAGE_SIZE); 2823 2824 for (i=0; i < at->num_pages; ++i) 2825 { 2826 page_ptr = at->page_table[i]; 2827 2828 if (contiguous && i>0) 2829 { 2830 page_ptr->dma_addr = pte_array[0] + (i << PAGE_SHIFT); 2831 } 2832 else 2833 { 2834 page_ptr->dma_addr = pte_array[i]; 2835 } 2836 2837 page_ptr->phys_addr = page_ptr->dma_addr; 2838 2839 /* aliased pages will be mapped on demand. */ 2840 page_ptr->virt_addr = 0x0; 2841 } 2842 2843 at->guest_id = guest_id; 2844 *priv_data = at; 2845 NV_ATOMIC_INC(at->usage_count); 2846 2847 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2848 2849 return NV_OK; 2850 } 2851 2852 /* 2853 * This creates a dummy nv_alloc_t for peer IO mem, so that it can 2854 * be mapped using NvRmMapMemory. 2855 */ 2856 NV_STATUS NV_API_CALL nv_register_peer_io_mem( 2857 nv_state_t *nv, 2858 NvU64 *phys_addr, 2859 NvU64 page_count, 2860 void **priv_data 2861 ) 2862 { 2863 nv_alloc_t *at; 2864 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2865 NvU64 i; 2866 NvU64 addr; 2867 2868 at = nvos_create_alloc(nvl->dev, page_count); 2869 2870 if (at == NULL) 2871 return NV_ERR_NO_MEMORY; 2872 2873 // IO regions should be uncached and contiguous 2874 at->cache_type = NV_MEMORY_UNCACHED; 2875 at->flags.contig = NV_TRUE; 2876 #if defined(NVCPU_AARCH64) 2877 at->flags.aliased = NV_TRUE; 2878 #endif 2879 at->flags.peer_io = NV_TRUE; 2880 2881 at->order = get_order(at->num_pages * PAGE_SIZE); 2882 2883 addr = phys_addr[0]; 2884 2885 for (i = 0; i < page_count; i++) 2886 { 2887 at->page_table[i]->phys_addr = addr; 2888 addr += PAGE_SIZE; 2889 } 2890 2891 // No struct page array exists for this memory. 2892 at->user_pages = NULL; 2893 2894 *priv_data = at; 2895 2896 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2897 2898 return NV_OK; 2899 } 2900 2901 void NV_API_CALL nv_unregister_peer_io_mem( 2902 nv_state_t *nv, 2903 void *priv_data 2904 ) 2905 { 2906 nv_alloc_t *at = priv_data; 2907 2908 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2909 2910 nvos_free_alloc(at); 2911 } 2912 2913 /* 2914 * By registering user pages, we create a dummy nv_alloc_t for it, so that the 2915 * rest of the RM can treat it like any other alloc. 2916 * 2917 * This also converts the page array to an array of physical addresses. 2918 */ 2919 NV_STATUS NV_API_CALL nv_register_user_pages( 2920 nv_state_t *nv, 2921 NvU64 page_count, 2922 NvU64 *phys_addr, 2923 void *import_priv, 2924 void **priv_data 2925 ) 2926 { 2927 nv_alloc_t *at; 2928 NvU64 i; 2929 struct page **user_pages; 2930 nv_linux_state_t *nvl; 2931 nvidia_pte_t *page_ptr; 2932 2933 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%x\n", page_count); 2934 user_pages = *priv_data; 2935 nvl = NV_GET_NVL_FROM_NV_STATE(nv); 2936 2937 at = nvos_create_alloc(nvl->dev, page_count); 2938 2939 if (at == NULL) 2940 { 2941 return NV_ERR_NO_MEMORY; 2942 } 2943 2944 /* 2945 * Anonymous memory currently must be write-back cacheable, and we can't 2946 * enforce contiguity. 2947 */ 2948 at->cache_type = NV_MEMORY_UNCACHED; 2949 #if defined(NVCPU_AARCH64) 2950 at->flags.aliased = NV_TRUE; 2951 #endif 2952 2953 at->flags.user = NV_TRUE; 2954 2955 at->order = get_order(at->num_pages * PAGE_SIZE); 2956 2957 for (i = 0; i < page_count; i++) 2958 { 2959 /* 2960 * We only assign the physical address and not the DMA address, since 2961 * this allocation hasn't been DMA-mapped yet. 2962 */ 2963 page_ptr = at->page_table[i]; 2964 page_ptr->phys_addr = page_to_phys(user_pages[i]); 2965 2966 phys_addr[i] = page_ptr->phys_addr; 2967 } 2968 2969 /* Save off the user pages array to be restored later */ 2970 at->user_pages = user_pages; 2971 2972 /* Save off the import private data to be returned later */ 2973 if (import_priv != NULL) 2974 { 2975 at->import_priv = import_priv; 2976 } 2977 2978 *priv_data = at; 2979 2980 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2981 2982 return NV_OK; 2983 } 2984 2985 void NV_API_CALL nv_unregister_user_pages( 2986 nv_state_t *nv, 2987 NvU64 page_count, 2988 void **import_priv, 2989 void **priv_data 2990 ) 2991 { 2992 nv_alloc_t *at = *priv_data; 2993 2994 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%x\n", page_count); 2995 2996 NV_PRINT_AT(NV_DBG_MEMINFO, at); 2997 2998 WARN_ON(!at->flags.user); 2999 3000 /* Restore the user pages array for the caller to handle */ 3001 *priv_data = at->user_pages; 3002 3003 /* Return the import private data for the caller to handle */ 3004 if (import_priv != NULL) 3005 { 3006 *import_priv = at->import_priv; 3007 } 3008 3009 nvos_free_alloc(at); 3010 } 3011 3012 /* 3013 * This creates a dummy nv_alloc_t for existing physical allocations, so 3014 * that it can be mapped using NvRmMapMemory and BAR2 code path. 3015 */ 3016 NV_STATUS NV_API_CALL nv_register_phys_pages( 3017 nv_state_t *nv, 3018 NvU64 *phys_addr, 3019 NvU64 page_count, 3020 NvU32 cache_type, 3021 void **priv_data 3022 ) 3023 { 3024 nv_alloc_t *at; 3025 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3026 NvU64 i; 3027 NvU64 addr; 3028 3029 at = nvos_create_alloc(nvl->dev, page_count); 3030 3031 if (at == NULL) 3032 return NV_ERR_NO_MEMORY; 3033 /* 3034 * Setting memory flags to cacheable and discontiguous. 3035 */ 3036 at->cache_type = cache_type; 3037 3038 /* 3039 * Only physical address is available so we don't try to reuse existing 3040 * mappings 3041 */ 3042 at->flags.physical = NV_TRUE; 3043 3044 at->order = get_order(at->num_pages * PAGE_SIZE); 3045 3046 for (i = 0, addr = phys_addr[0]; i < page_count; addr = phys_addr[++i]) 3047 { 3048 at->page_table[i]->phys_addr = addr; 3049 } 3050 3051 at->user_pages = NULL; 3052 *priv_data = at; 3053 3054 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3055 3056 return NV_OK; 3057 } 3058 3059 NV_STATUS NV_API_CALL nv_register_sgt( 3060 nv_state_t *nv, 3061 NvU64 *phys_addr, 3062 NvU64 page_count, 3063 NvU32 cache_type, 3064 void **priv_data, 3065 struct sg_table *import_sgt, 3066 void *import_priv 3067 ) 3068 { 3069 nv_alloc_t *at; 3070 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3071 3072 unsigned int i, j = 0; 3073 NvU64 sg_addr, sg_off, sg_len; 3074 struct scatterlist *sg; 3075 3076 at = nvos_create_alloc(nvl->dev, page_count); 3077 3078 if (at == NULL) 3079 return NV_ERR_NO_MEMORY; 3080 3081 /* Populate phys addrs with DMA addrs from SGT */ 3082 for_each_sg(import_sgt->sgl, sg, import_sgt->nents, i) 3083 { 3084 /* 3085 * It is possible for dma_map_sg() to merge scatterlist entries, so 3086 * make sure we account for that here. 3087 */ 3088 for (sg_addr = sg_dma_address(sg), sg_len = sg_dma_len(sg), sg_off = 0; 3089 (sg_off < sg_len) && (j < page_count); 3090 sg_off += PAGE_SIZE, j++) 3091 { 3092 phys_addr[j] = sg_addr + sg_off; 3093 } 3094 } 3095 3096 /* 3097 * Setting memory flags to cacheable and discontiguous. 3098 */ 3099 at->cache_type = cache_type; 3100 3101 at->import_sgt = import_sgt; 3102 3103 /* Save off the import private data to be returned later */ 3104 if (import_priv != NULL) 3105 { 3106 at->import_priv = import_priv; 3107 } 3108 3109 at->order = get_order(at->num_pages * PAGE_SIZE); 3110 3111 *priv_data = at; 3112 3113 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3114 3115 return NV_OK; 3116 } 3117 3118 void NV_API_CALL nv_unregister_sgt( 3119 nv_state_t *nv, 3120 struct sg_table **import_sgt, 3121 void **import_priv, 3122 void *priv_data 3123 ) 3124 { 3125 nv_alloc_t *at = priv_data; 3126 3127 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_sgt\n"); 3128 3129 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3130 3131 /* Restore the imported SGT for the caller to handle */ 3132 *import_sgt = at->import_sgt; 3133 3134 /* Return the import private data for the caller to handle */ 3135 if (import_priv != NULL) 3136 { 3137 *import_priv = at->import_priv; 3138 } 3139 3140 nvos_free_alloc(at); 3141 } 3142 3143 void NV_API_CALL nv_unregister_phys_pages( 3144 nv_state_t *nv, 3145 void *priv_data 3146 ) 3147 { 3148 nv_alloc_t *at = priv_data; 3149 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3150 3151 nvos_free_alloc(at); 3152 } 3153 3154 NV_STATUS NV_API_CALL nv_get_num_phys_pages( 3155 void *pAllocPrivate, 3156 NvU32 *pNumPages 3157 ) 3158 { 3159 nv_alloc_t *at = pAllocPrivate; 3160 3161 if (!pNumPages) { 3162 return NV_ERR_INVALID_ARGUMENT; 3163 } 3164 3165 *pNumPages = at->num_pages; 3166 3167 return NV_OK; 3168 } 3169 3170 NV_STATUS NV_API_CALL nv_get_phys_pages( 3171 void *pAllocPrivate, 3172 void *pPages, 3173 NvU32 *pNumPages 3174 ) 3175 { 3176 nv_alloc_t *at = pAllocPrivate; 3177 struct page **pages = (struct page **)pPages; 3178 NvU32 page_count; 3179 int i; 3180 3181 if (!pNumPages || !pPages) { 3182 return NV_ERR_INVALID_ARGUMENT; 3183 } 3184 3185 page_count = NV_MIN(*pNumPages, at->num_pages); 3186 3187 for (i = 0; i < page_count; i++) { 3188 pages[i] = NV_GET_PAGE_STRUCT(at->page_table[i]->phys_addr); 3189 } 3190 3191 *pNumPages = page_count; 3192 3193 return NV_OK; 3194 } 3195 3196 void* NV_API_CALL nv_alloc_kernel_mapping( 3197 nv_state_t *nv, 3198 void *pAllocPrivate, 3199 NvU64 pageIndex, 3200 NvU32 pageOffset, 3201 NvU64 size, 3202 void **pPrivate 3203 ) 3204 { 3205 nv_alloc_t *at = pAllocPrivate; 3206 NvU32 j, page_count; 3207 NvUPtr virt_addr; 3208 struct page **pages; 3209 NvBool isUserAllocatedMem; 3210 3211 // 3212 // For User allocated memory (like ErrorNotifier's) which is NOT allocated 3213 // nor owned by RM, the RM driver just stores the physical address 3214 // corresponding to that memory and does not map it until required. 3215 // In that case, in page tables the virt_addr == 0, so first we need to map 3216 // those pages to obtain virtual address. 3217 // 3218 isUserAllocatedMem = at->flags.user && 3219 !at->page_table[pageIndex]->virt_addr && 3220 at->page_table[pageIndex]->phys_addr; 3221 3222 // 3223 // User memory may NOT have kernel VA. So check this and fallback to else 3224 // case to create one. 3225 // 3226 if (((size + pageOffset) <= PAGE_SIZE) && 3227 !at->flags.guest && !at->flags.aliased && 3228 !isUserAllocatedMem && !at->flags.physical) 3229 { 3230 *pPrivate = NULL; 3231 return (void *)(at->page_table[pageIndex]->virt_addr + pageOffset); 3232 } 3233 else 3234 { 3235 size += pageOffset; 3236 page_count = (size >> PAGE_SHIFT) + ((size & ~NV_PAGE_MASK) ? 1 : 0); 3237 3238 if (at->flags.guest) 3239 { 3240 virt_addr = nv_map_guest_pages(at, 3241 nv->bars[NV_GPU_BAR_INDEX_REGS].cpu_address, 3242 page_count, pageIndex); 3243 } 3244 else 3245 { 3246 NV_KMALLOC(pages, sizeof(struct page *) * page_count); 3247 if (pages == NULL) 3248 { 3249 nv_printf(NV_DBG_ERRORS, 3250 "NVRM: failed to allocate vmap() page descriptor table!\n"); 3251 return NULL; 3252 } 3253 3254 for (j = 0; j < page_count; j++) 3255 pages[j] = NV_GET_PAGE_STRUCT(at->page_table[pageIndex+j]->phys_addr); 3256 3257 virt_addr = nv_vm_map_pages(pages, page_count, 3258 at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted); 3259 NV_KFREE(pages, sizeof(struct page *) * page_count); 3260 } 3261 3262 if (virt_addr == 0) 3263 { 3264 nv_printf(NV_DBG_ERRORS, "NVRM: failed to map pages!\n"); 3265 return NULL; 3266 } 3267 3268 *pPrivate = (void *)(NvUPtr)page_count; 3269 return (void *)(virt_addr + pageOffset); 3270 } 3271 3272 return NULL; 3273 } 3274 3275 NV_STATUS NV_API_CALL nv_free_kernel_mapping( 3276 nv_state_t *nv, 3277 void *pAllocPrivate, 3278 void *address, 3279 void *pPrivate 3280 ) 3281 { 3282 nv_alloc_t *at = pAllocPrivate; 3283 NvUPtr virt_addr; 3284 NvU32 page_count; 3285 3286 virt_addr = ((NvUPtr)address & NV_PAGE_MASK); 3287 page_count = (NvUPtr)pPrivate; 3288 3289 if (at->flags.guest) 3290 { 3291 nv_iounmap((void *)virt_addr, (page_count * PAGE_SIZE)); 3292 } 3293 else if (pPrivate != NULL) 3294 { 3295 nv_vm_unmap_pages(virt_addr, page_count); 3296 } 3297 3298 return NV_OK; 3299 } 3300 3301 NV_STATUS NV_API_CALL nv_alloc_pages( 3302 nv_state_t *nv, 3303 NvU32 page_count, 3304 NvBool contiguous, 3305 NvU32 cache_type, 3306 NvBool zeroed, 3307 NvBool unencrypted, 3308 NvS32 node_id, 3309 NvU64 *pte_array, 3310 void **priv_data 3311 ) 3312 { 3313 nv_alloc_t *at; 3314 NV_STATUS status = NV_ERR_NO_MEMORY; 3315 nv_linux_state_t *nvl = NULL; 3316 NvBool will_remap = NV_FALSE; 3317 NvU32 i; 3318 struct device *dev = NULL; 3319 3320 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_alloc_pages: %d pages, nodeid %d\n", page_count, node_id); 3321 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: contig %d cache_type %d\n", 3322 contiguous, cache_type); 3323 3324 // 3325 // system memory allocation can be associated with a client instead of a gpu 3326 // handle the case where per device state is NULL 3327 // 3328 if(nv) 3329 { 3330 nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3331 will_remap = nv_requires_dma_remap(nv); 3332 dev = nvl->dev; 3333 } 3334 3335 if (nv_encode_caching(NULL, cache_type, NV_MEMORY_TYPE_SYSTEM)) 3336 return NV_ERR_NOT_SUPPORTED; 3337 3338 at = nvos_create_alloc(dev, page_count); 3339 if (at == NULL) 3340 return NV_ERR_NO_MEMORY; 3341 3342 at->cache_type = cache_type; 3343 3344 if (contiguous) 3345 at->flags.contig = NV_TRUE; 3346 if (zeroed) 3347 at->flags.zeroed = NV_TRUE; 3348 #if defined(NVCPU_AARCH64) 3349 if (at->cache_type != NV_MEMORY_CACHED) 3350 at->flags.aliased = NV_TRUE; 3351 #endif 3352 if (unencrypted) 3353 at->flags.unencrypted = NV_TRUE; 3354 3355 #if defined(NVCPU_PPC64LE) 3356 /* 3357 * Starting on Power9 systems, DMA addresses for NVLink are no longer the 3358 * same as used over PCIe. There is an address compression scheme required 3359 * for NVLink ONLY which impacts the upper address bits of the DMA address. 3360 * 3361 * This divergence between PCIe and NVLink DMA mappings breaks assumptions 3362 * in the driver where during initialization we allocate system memory 3363 * for the GPU to access over PCIe before NVLink is trained -- and some of 3364 * these mappings persist on the GPU. If these persistent mappings are not 3365 * equivalent they will cause invalid DMA accesses from the GPU once we 3366 * switch to NVLink. 3367 * 3368 * To work around this we limit all system memory allocations from the driver 3369 * during the period before NVLink is enabled to be from NUMA node 0 (CPU 0) 3370 * which has a CPU real address with the upper address bits (above bit 42) 3371 * set to 0. Effectively making the PCIe and NVLink DMA mappings equivalent 3372 * allowing persistent system memory mappings already programmed on the GPU 3373 * to remain valid after NVLink is enabled. 3374 * 3375 * See Bug 1920398 for more details. 3376 */ 3377 if (nv && nvl->npu && !nvl->dma_dev.nvlink) 3378 { 3379 at->flags.node = NV_TRUE; 3380 at->node_id = 0; 3381 } 3382 #endif 3383 3384 if (node_id != NUMA_NO_NODE) 3385 { 3386 at->flags.node = NV_TRUE; 3387 at->node_id = node_id; 3388 } 3389 3390 if (at->flags.contig) 3391 status = nv_alloc_contig_pages(nv, at); 3392 else 3393 status = nv_alloc_system_pages(nv, at); 3394 3395 if (status != NV_OK) 3396 goto failed; 3397 3398 for (i = 0; i < ((contiguous) ? 1 : page_count); i++) 3399 { 3400 /* 3401 * The contents of the pte_array[] depend on whether or not this device 3402 * requires DMA-remapping. If it does, it should be the phys addresses 3403 * used by the DMA-remapping paths, otherwise it should be the actual 3404 * address that the device should use for DMA (which, confusingly, may 3405 * be different than the CPU physical address, due to a static DMA 3406 * offset). 3407 */ 3408 if ((nv == NULL) || will_remap) 3409 { 3410 pte_array[i] = at->page_table[i]->phys_addr; 3411 } 3412 else 3413 { 3414 pte_array[i] = nv_phys_to_dma(dev, 3415 at->page_table[i]->phys_addr); 3416 } 3417 } 3418 3419 *priv_data = at; 3420 NV_ATOMIC_INC(at->usage_count); 3421 3422 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3423 3424 return NV_OK; 3425 3426 failed: 3427 nvos_free_alloc(at); 3428 3429 return status; 3430 } 3431 3432 NV_STATUS NV_API_CALL nv_free_pages( 3433 nv_state_t *nv, 3434 NvU32 page_count, 3435 NvBool contiguous, 3436 NvU32 cache_type, 3437 void *priv_data 3438 ) 3439 { 3440 NV_STATUS rmStatus = NV_OK; 3441 nv_alloc_t *at = priv_data; 3442 3443 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_free_pages: 0x%x\n", page_count); 3444 3445 NV_PRINT_AT(NV_DBG_MEMINFO, at); 3446 3447 /* 3448 * If the 'at' usage count doesn't drop to zero here, not all of 3449 * the user mappings have been torn down in time - we can't 3450 * safely free the memory. We report success back to the RM, but 3451 * defer the actual free operation until later. 3452 * 3453 * This is described in greater detail in the comments above the 3454 * nvidia_vma_(open|release)() callbacks in nv-mmap.c. 3455 */ 3456 if (!NV_ATOMIC_DEC_AND_TEST(at->usage_count)) 3457 return NV_OK; 3458 3459 if (!at->flags.guest) 3460 { 3461 if (at->flags.contig) 3462 nv_free_contig_pages(at); 3463 else 3464 nv_free_system_pages(at); 3465 } 3466 3467 nvos_free_alloc(at); 3468 3469 return rmStatus; 3470 } 3471 3472 NvBool nv_lock_init_locks 3473 ( 3474 nvidia_stack_t *sp, 3475 nv_state_t *nv 3476 ) 3477 { 3478 nv_linux_state_t *nvl; 3479 nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3480 3481 NV_INIT_MUTEX(&nvl->ldata_lock); 3482 NV_INIT_MUTEX(&nvl->mmap_lock); 3483 3484 NV_ATOMIC_SET(nvl->usage_count, 0); 3485 3486 if (!rm_init_event_locks(sp, nv)) 3487 return NV_FALSE; 3488 3489 return NV_TRUE; 3490 } 3491 3492 void nv_lock_destroy_locks 3493 ( 3494 nvidia_stack_t *sp, 3495 nv_state_t *nv 3496 ) 3497 { 3498 rm_destroy_event_locks(sp, nv); 3499 } 3500 3501 void NV_API_CALL nv_post_event( 3502 nv_event_t *event, 3503 NvHandle handle, 3504 NvU32 index, 3505 NvU32 info32, 3506 NvU16 info16, 3507 NvBool data_valid 3508 ) 3509 { 3510 nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(event->nvfp); 3511 unsigned long eflags; 3512 nvidia_event_t *nvet; 3513 3514 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags); 3515 3516 if (data_valid) 3517 { 3518 NV_KMALLOC_ATOMIC(nvet, sizeof(nvidia_event_t)); 3519 if (nvet == NULL) 3520 { 3521 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags); 3522 return; 3523 } 3524 3525 if (nvlfp->event_data_tail != NULL) 3526 nvlfp->event_data_tail->next = nvet; 3527 if (nvlfp->event_data_head == NULL) 3528 nvlfp->event_data_head = nvet; 3529 nvlfp->event_data_tail = nvet; 3530 nvet->next = NULL; 3531 3532 nvet->event = *event; 3533 nvet->event.hObject = handle; 3534 nvet->event.index = index; 3535 nvet->event.info32 = info32; 3536 nvet->event.info16 = info16; 3537 } 3538 // 3539 // 'event_pending' is interpreted by nvidia_poll() and nv_get_event() to 3540 // mean that an event without data is pending. Therefore, only set it to 3541 // true here if newly posted event is dataless. 3542 // 3543 else 3544 { 3545 nvlfp->dataless_event_pending = NV_TRUE; 3546 } 3547 3548 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags); 3549 3550 wake_up_interruptible(&nvlfp->waitqueue); 3551 } 3552 3553 NvBool NV_API_CALL nv_is_rm_firmware_active( 3554 nv_state_t *nv 3555 ) 3556 { 3557 if (rm_firmware_active) 3558 { 3559 // "all" here means all GPUs 3560 if (strcmp(rm_firmware_active, "all") == 0) 3561 return NV_TRUE; 3562 } 3563 return NV_FALSE; 3564 } 3565 3566 const void* NV_API_CALL nv_get_firmware( 3567 nv_state_t *nv, 3568 nv_firmware_type_t fw_type, 3569 nv_firmware_chip_family_t fw_chip_family, 3570 const void **fw_buf, 3571 NvU32 *fw_size 3572 ) 3573 { 3574 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3575 const struct firmware *fw; 3576 3577 // path is relative to /lib/firmware 3578 // if this fails it will print an error to dmesg 3579 if (request_firmware(&fw, nv_firmware_path(fw_type, fw_chip_family), nvl->dev) != 0) 3580 return NULL; 3581 3582 *fw_size = fw->size; 3583 *fw_buf = fw->data; 3584 3585 return fw; 3586 } 3587 3588 void NV_API_CALL nv_put_firmware( 3589 const void *fw_handle 3590 ) 3591 { 3592 release_firmware(fw_handle); 3593 } 3594 3595 nv_file_private_t* NV_API_CALL nv_get_file_private( 3596 NvS32 fd, 3597 NvBool ctl, 3598 void **os_private 3599 ) 3600 { 3601 struct file *filp = NULL; 3602 nv_linux_file_private_t *nvlfp = NULL; 3603 dev_t rdev = 0; 3604 3605 filp = fget(fd); 3606 3607 if (filp == NULL || !NV_FILE_INODE(filp)) 3608 { 3609 goto fail; 3610 } 3611 3612 rdev = (NV_FILE_INODE(filp))->i_rdev; 3613 3614 if (MAJOR(rdev) != NV_MAJOR_DEVICE_NUMBER) 3615 { 3616 goto fail; 3617 } 3618 3619 if (ctl) 3620 { 3621 if (MINOR(rdev) != NV_CONTROL_DEVICE_MINOR) 3622 goto fail; 3623 } 3624 else 3625 { 3626 NvBool found = NV_FALSE; 3627 int i; 3628 3629 for (i = 0; i <= NV_FRONTEND_CONTROL_DEVICE_MINOR_MIN; i++) 3630 { 3631 if ((nv_minor_num_table[i] != NULL) && (MINOR(rdev) == i)) 3632 { 3633 found = NV_TRUE; 3634 break; 3635 } 3636 } 3637 3638 if (!found) 3639 goto fail; 3640 } 3641 3642 nvlfp = NV_GET_LINUX_FILE_PRIVATE(filp); 3643 3644 *os_private = filp; 3645 3646 return &nvlfp->nvfp; 3647 3648 fail: 3649 3650 if (filp != NULL) 3651 { 3652 fput(filp); 3653 } 3654 3655 return NULL; 3656 } 3657 3658 void NV_API_CALL nv_put_file_private( 3659 void *os_private 3660 ) 3661 { 3662 struct file *filp = os_private; 3663 fput(filp); 3664 } 3665 3666 int NV_API_CALL nv_get_event( 3667 nv_file_private_t *nvfp, 3668 nv_event_t *event, 3669 NvU32 *pending 3670 ) 3671 { 3672 nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(nvfp); 3673 nvidia_event_t *nvet; 3674 unsigned long eflags; 3675 3676 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags); 3677 3678 nvet = nvlfp->event_data_head; 3679 if (nvet == NULL) 3680 { 3681 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags); 3682 return NV_ERR_GENERIC; 3683 } 3684 3685 *event = nvet->event; 3686 3687 if (nvlfp->event_data_tail == nvet) 3688 nvlfp->event_data_tail = NULL; 3689 nvlfp->event_data_head = nvet->next; 3690 3691 *pending = (nvlfp->event_data_head != NULL); 3692 3693 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags); 3694 3695 NV_KFREE(nvet, sizeof(nvidia_event_t)); 3696 3697 return NV_OK; 3698 } 3699 3700 int NV_API_CALL nv_start_rc_timer( 3701 nv_state_t *nv 3702 ) 3703 { 3704 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3705 3706 if (nv->rc_timer_enabled) 3707 return -1; 3708 3709 nv_printf(NV_DBG_INFO, "NVRM: initializing rc timer\n"); 3710 3711 nv_timer_setup(&nvl->rc_timer, nvidia_rc_timer_callback); 3712 3713 nv->rc_timer_enabled = 1; 3714 3715 // set the timeout for 1 second in the future: 3716 mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ); 3717 3718 nv_printf(NV_DBG_INFO, "NVRM: rc timer initialized\n"); 3719 3720 return 0; 3721 } 3722 3723 int NV_API_CALL nv_stop_rc_timer( 3724 nv_state_t *nv 3725 ) 3726 { 3727 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3728 3729 if (!nv->rc_timer_enabled) 3730 return -1; 3731 3732 nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n"); 3733 nv->rc_timer_enabled = 0; 3734 del_timer_sync(&nvl->rc_timer.kernel_timer); 3735 nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n"); 3736 3737 return 0; 3738 } 3739 3740 #define SNAPSHOT_TIMER_FREQ (jiffies + HZ / NV_SNAPSHOT_TIMER_HZ) 3741 3742 static void snapshot_timer_callback(struct nv_timer *timer) 3743 { 3744 nv_linux_state_t *nvl = &nv_ctl_device; 3745 nv_state_t *nv = NV_STATE_PTR(nvl); 3746 unsigned long flags; 3747 3748 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags); 3749 if (nvl->snapshot_callback != NULL) 3750 { 3751 nvl->snapshot_callback(nv->profiler_context); 3752 mod_timer(&timer->kernel_timer, SNAPSHOT_TIMER_FREQ); 3753 } 3754 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags); 3755 } 3756 3757 void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context)) 3758 { 3759 nv_linux_state_t *nvl = &nv_ctl_device; 3760 3761 nvl->snapshot_callback = snapshot_callback; 3762 nv_timer_setup(&nvl->snapshot_timer, snapshot_timer_callback); 3763 mod_timer(&nvl->snapshot_timer.kernel_timer, SNAPSHOT_TIMER_FREQ); 3764 } 3765 3766 void NV_API_CALL nv_stop_snapshot_timer(void) 3767 { 3768 nv_linux_state_t *nvl = &nv_ctl_device; 3769 NvBool timer_active; 3770 unsigned long flags; 3771 3772 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags); 3773 timer_active = nvl->snapshot_callback != NULL; 3774 nvl->snapshot_callback = NULL; 3775 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags); 3776 3777 if (timer_active) 3778 del_timer_sync(&nvl->snapshot_timer.kernel_timer); 3779 } 3780 3781 void NV_API_CALL nv_flush_snapshot_timer(void) 3782 { 3783 nv_linux_state_t *nvl = &nv_ctl_device; 3784 nv_state_t *nv = NV_STATE_PTR(nvl); 3785 unsigned long flags; 3786 3787 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags); 3788 if (nvl->snapshot_callback != NULL) 3789 nvl->snapshot_callback(nv->profiler_context); 3790 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags); 3791 } 3792 3793 static int __init 3794 nvos_count_devices(void) 3795 { 3796 int count; 3797 3798 count = nv_pci_count_devices(); 3799 3800 return count; 3801 } 3802 3803 NvBool nvos_is_chipset_io_coherent(void) 3804 { 3805 if (nv_chipset_is_io_coherent == NV_TRISTATE_INDETERMINATE) 3806 { 3807 nvidia_stack_t *sp = NULL; 3808 if (nv_kmem_cache_alloc_stack(&sp) != 0) 3809 { 3810 nv_printf(NV_DBG_ERRORS, 3811 "NVRM: cannot allocate stack for platform coherence check callback \n"); 3812 WARN_ON(1); 3813 return NV_FALSE; 3814 } 3815 3816 nv_chipset_is_io_coherent = rm_is_chipset_io_coherent(sp); 3817 3818 nv_kmem_cache_free_stack(sp); 3819 } 3820 3821 return nv_chipset_is_io_coherent; 3822 } 3823 3824 #if defined(CONFIG_PM) 3825 static NV_STATUS 3826 nv_power_management( 3827 nv_state_t *nv, 3828 nv_pm_action_t pm_action 3829 ) 3830 { 3831 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3832 int status = NV_OK; 3833 nvidia_stack_t *sp = NULL; 3834 3835 if (nv_kmem_cache_alloc_stack(&sp) != 0) 3836 { 3837 return NV_ERR_NO_MEMORY; 3838 } 3839 3840 status = nv_check_gpu_state(nv); 3841 if (status == NV_ERR_GPU_IS_LOST) 3842 { 3843 NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping PM event\n"); 3844 goto failure; 3845 } 3846 3847 switch (pm_action) 3848 { 3849 case NV_PM_ACTION_STANDBY: 3850 /* fall through */ 3851 case NV_PM_ACTION_HIBERNATE: 3852 { 3853 status = rm_power_management(sp, nv, pm_action); 3854 3855 nv_kthread_q_stop(&nvl->bottom_half_q); 3856 3857 nv_disable_pat_support(); 3858 break; 3859 } 3860 case NV_PM_ACTION_RESUME: 3861 { 3862 nv_enable_pat_support(); 3863 3864 nv_kthread_q_item_init(&nvl->bottom_half_q_item, 3865 nvidia_isr_bh_unlocked, (void *)nv); 3866 3867 status = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name); 3868 if (status != NV_OK) 3869 break; 3870 3871 status = rm_power_management(sp, nv, pm_action); 3872 break; 3873 } 3874 default: 3875 status = NV_ERR_INVALID_ARGUMENT; 3876 break; 3877 } 3878 3879 failure: 3880 nv_kmem_cache_free_stack(sp); 3881 3882 return status; 3883 } 3884 3885 static NV_STATUS 3886 nv_restore_user_channels( 3887 nv_state_t *nv 3888 ) 3889 { 3890 NV_STATUS status = NV_OK; 3891 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3892 nv_stack_t *sp = NULL; 3893 3894 if (nv_kmem_cache_alloc_stack(&sp) != 0) 3895 { 3896 return NV_ERR_NO_MEMORY; 3897 } 3898 3899 down(&nvl->ldata_lock); 3900 3901 if ((nv->flags & NV_FLAG_OPEN) == 0) 3902 { 3903 goto done; 3904 } 3905 3906 status = rm_restart_user_channels(sp, nv); 3907 WARN_ON(status != NV_OK); 3908 3909 down(&nvl->mmap_lock); 3910 3911 nv_set_safe_to_mmap_locked(nv, NV_TRUE); 3912 3913 up(&nvl->mmap_lock); 3914 3915 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE); 3916 3917 done: 3918 up(&nvl->ldata_lock); 3919 3920 nv_kmem_cache_free_stack(sp); 3921 3922 return status; 3923 } 3924 3925 static NV_STATUS 3926 nv_preempt_user_channels( 3927 nv_state_t *nv 3928 ) 3929 { 3930 NV_STATUS status = NV_OK; 3931 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 3932 nv_stack_t *sp = NULL; 3933 3934 if (nv_kmem_cache_alloc_stack(&sp) != 0) 3935 { 3936 return NV_ERR_NO_MEMORY; 3937 } 3938 3939 down(&nvl->ldata_lock); 3940 3941 if ((nv->flags & NV_FLAG_OPEN) == 0) 3942 { 3943 goto done; 3944 } 3945 3946 status = rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE); 3947 WARN_ON(status != NV_OK); 3948 3949 down(&nvl->mmap_lock); 3950 3951 nv_set_safe_to_mmap_locked(nv, NV_FALSE); 3952 nv_revoke_gpu_mappings_locked(nv); 3953 3954 up(&nvl->mmap_lock); 3955 3956 status = rm_stop_user_channels(sp, nv); 3957 WARN_ON(status != NV_OK); 3958 3959 done: 3960 up(&nvl->ldata_lock); 3961 3962 nv_kmem_cache_free_stack(sp); 3963 3964 return status; 3965 } 3966 3967 static NV_STATUS 3968 nvidia_suspend( 3969 struct device *dev, 3970 nv_pm_action_t pm_action, 3971 NvBool is_procfs_suspend 3972 ) 3973 { 3974 NV_STATUS status = NV_OK; 3975 struct pci_dev *pci_dev = NULL; 3976 nv_linux_state_t *nvl; 3977 nv_state_t *nv; 3978 3979 if (dev_is_pci(dev)) 3980 { 3981 pci_dev = to_pci_dev(dev); 3982 nvl = pci_get_drvdata(pci_dev); 3983 } 3984 else 3985 { 3986 nvl = dev_get_drvdata(dev); 3987 } 3988 nv = NV_STATE_PTR(nvl); 3989 3990 down(&nvl->ldata_lock); 3991 3992 if (((nv->flags & NV_FLAG_OPEN) == 0) && 3993 ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) == 0)) 3994 { 3995 goto done; 3996 } 3997 3998 if ((nv->flags & NV_FLAG_SUSPENDED) != 0) 3999 { 4000 nvl->suspend_count++; 4001 goto pci_pm; 4002 } 4003 4004 if (nv->preserve_vidmem_allocations && !is_procfs_suspend) 4005 { 4006 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 4007 "PreserveVideoMemoryAllocations module parameter is set. " 4008 "System Power Management attempted without driver procfs suspend interface. " 4009 "Please refer to the 'Configuring Power Management Support' section in the driver README.\n"); 4010 status = NV_ERR_NOT_SUPPORTED; 4011 goto done; 4012 } 4013 4014 nvidia_modeset_suspend(nv->gpu_id); 4015 4016 status = nv_power_management(nv, pm_action); 4017 4018 if (status != NV_OK) 4019 { 4020 nvidia_modeset_resume(nv->gpu_id); 4021 goto done; 4022 } 4023 else 4024 { 4025 nv->flags |= NV_FLAG_SUSPENDED; 4026 } 4027 4028 pci_pm: 4029 /* 4030 * Check if PCI power state should be D0 during system suspend. The PCI PM 4031 * core will change the power state only if the driver has not saved the 4032 * state in it's suspend callback. 4033 */ 4034 if ((nv->d0_state_in_suspend) && (pci_dev != NULL) && 4035 !is_procfs_suspend && (pm_action == NV_PM_ACTION_STANDBY)) 4036 { 4037 pci_save_state(pci_dev); 4038 } 4039 4040 done: 4041 up(&nvl->ldata_lock); 4042 4043 return status; 4044 } 4045 4046 static NV_STATUS 4047 nvidia_resume( 4048 struct device *dev, 4049 nv_pm_action_t pm_action 4050 ) 4051 { 4052 NV_STATUS status = NV_OK; 4053 struct pci_dev *pci_dev; 4054 nv_linux_state_t *nvl; 4055 nv_state_t *nv; 4056 4057 if (dev_is_pci(dev)) 4058 { 4059 pci_dev = to_pci_dev(dev); 4060 nvl = pci_get_drvdata(pci_dev); 4061 } 4062 else 4063 { 4064 nvl = dev_get_drvdata(dev); 4065 } 4066 nv = NV_STATE_PTR(nvl); 4067 4068 down(&nvl->ldata_lock); 4069 4070 if ((nv->flags & NV_FLAG_SUSPENDED) == 0) 4071 { 4072 goto done; 4073 } 4074 4075 if (nvl->suspend_count != 0) 4076 { 4077 nvl->suspend_count--; 4078 } 4079 else 4080 { 4081 status = nv_power_management(nv, pm_action); 4082 4083 if (status == NV_OK) 4084 { 4085 nvidia_modeset_resume(nv->gpu_id); 4086 nv->flags &= ~NV_FLAG_SUSPENDED; 4087 } 4088 } 4089 4090 done: 4091 up(&nvl->ldata_lock); 4092 4093 return status; 4094 } 4095 4096 static NV_STATUS 4097 nv_resume_devices( 4098 nv_pm_action_t pm_action, 4099 nv_pm_action_depth_t pm_action_depth 4100 ) 4101 { 4102 nv_linux_state_t *nvl; 4103 NvBool resume_devices = NV_TRUE; 4104 NV_STATUS status; 4105 4106 if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET) 4107 { 4108 goto resume_modeset; 4109 } 4110 4111 if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM) 4112 { 4113 resume_devices = NV_FALSE; 4114 } 4115 4116 LOCK_NV_LINUX_DEVICES(); 4117 4118 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4119 { 4120 if (resume_devices) 4121 { 4122 status = nvidia_resume(nvl->dev, pm_action); 4123 WARN_ON(status != NV_OK); 4124 } 4125 } 4126 4127 UNLOCK_NV_LINUX_DEVICES(); 4128 4129 status = nv_uvm_resume(); 4130 WARN_ON(status != NV_OK); 4131 4132 LOCK_NV_LINUX_DEVICES(); 4133 4134 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4135 { 4136 status = nv_restore_user_channels(NV_STATE_PTR(nvl)); 4137 WARN_ON(status != NV_OK); 4138 } 4139 4140 UNLOCK_NV_LINUX_DEVICES(); 4141 4142 resume_modeset: 4143 nvidia_modeset_resume(0); 4144 4145 return NV_OK; 4146 } 4147 4148 static NV_STATUS 4149 nv_suspend_devices( 4150 nv_pm_action_t pm_action, 4151 nv_pm_action_depth_t pm_action_depth 4152 ) 4153 { 4154 nv_linux_state_t *nvl; 4155 NvBool resume_devices = NV_FALSE; 4156 NV_STATUS status = NV_OK; 4157 4158 nvidia_modeset_suspend(0); 4159 4160 if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET) 4161 { 4162 return NV_OK; 4163 } 4164 4165 LOCK_NV_LINUX_DEVICES(); 4166 4167 for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next) 4168 { 4169 status = nv_preempt_user_channels(NV_STATE_PTR(nvl)); 4170 WARN_ON(status != NV_OK); 4171 } 4172 4173 UNLOCK_NV_LINUX_DEVICES(); 4174 4175 if (status == NV_OK) 4176 { 4177 status = nv_uvm_suspend(); 4178 WARN_ON(status != NV_OK); 4179 } 4180 if (status != NV_OK) 4181 { 4182 goto done; 4183 } 4184 4185 if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM) 4186 { 4187 return NV_OK; 4188 } 4189 4190 LOCK_NV_LINUX_DEVICES(); 4191 4192 for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next) 4193 { 4194 status = nvidia_suspend(nvl->dev, pm_action, NV_TRUE); 4195 WARN_ON(status != NV_OK); 4196 } 4197 if (status != NV_OK) 4198 { 4199 resume_devices = NV_TRUE; 4200 } 4201 4202 UNLOCK_NV_LINUX_DEVICES(); 4203 4204 done: 4205 if (status != NV_OK) 4206 { 4207 LOCK_NV_LINUX_DEVICES(); 4208 4209 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4210 { 4211 if (resume_devices) 4212 { 4213 nvidia_resume(nvl->dev, pm_action); 4214 } 4215 4216 nv_restore_user_channels(NV_STATE_PTR(nvl)); 4217 } 4218 4219 UNLOCK_NV_LINUX_DEVICES(); 4220 } 4221 4222 return status; 4223 } 4224 4225 NV_STATUS 4226 nv_set_system_power_state( 4227 nv_power_state_t power_state, 4228 nv_pm_action_depth_t pm_action_depth 4229 ) 4230 { 4231 NV_STATUS status; 4232 nv_pm_action_t pm_action; 4233 4234 switch (power_state) 4235 { 4236 case NV_POWER_STATE_IN_HIBERNATE: 4237 pm_action = NV_PM_ACTION_HIBERNATE; 4238 break; 4239 case NV_POWER_STATE_IN_STANDBY: 4240 pm_action = NV_PM_ACTION_STANDBY; 4241 break; 4242 case NV_POWER_STATE_RUNNING: 4243 pm_action = NV_PM_ACTION_RESUME; 4244 break; 4245 default: 4246 return NV_ERR_INVALID_ARGUMENT; 4247 } 4248 4249 down(&nv_system_power_state_lock); 4250 4251 if (nv_system_power_state == power_state) 4252 { 4253 status = NV_OK; 4254 goto done; 4255 } 4256 4257 if (power_state == NV_POWER_STATE_RUNNING) 4258 { 4259 status = nv_resume_devices(pm_action, nv_system_pm_action_depth); 4260 up_write(&nv_system_pm_lock); 4261 } 4262 else 4263 { 4264 if (nv_system_power_state != NV_POWER_STATE_RUNNING) 4265 { 4266 status = NV_ERR_INVALID_ARGUMENT; 4267 goto done; 4268 } 4269 4270 nv_system_pm_action_depth = pm_action_depth; 4271 4272 down_write(&nv_system_pm_lock); 4273 status = nv_suspend_devices(pm_action, nv_system_pm_action_depth); 4274 if (status != NV_OK) 4275 { 4276 up_write(&nv_system_pm_lock); 4277 goto done; 4278 } 4279 } 4280 4281 nv_system_power_state = power_state; 4282 4283 done: 4284 up(&nv_system_power_state_lock); 4285 4286 return status; 4287 } 4288 4289 int nv_pmops_suspend( 4290 struct device *dev 4291 ) 4292 { 4293 NV_STATUS status; 4294 4295 status = nvidia_suspend(dev, NV_PM_ACTION_STANDBY, NV_FALSE); 4296 return (status == NV_OK) ? 0 : -EIO; 4297 } 4298 4299 int nv_pmops_resume( 4300 struct device *dev 4301 ) 4302 { 4303 NV_STATUS status; 4304 4305 status = nvidia_resume(dev, NV_PM_ACTION_RESUME); 4306 return (status == NV_OK) ? 0 : -EIO; 4307 } 4308 4309 int nv_pmops_freeze( 4310 struct device *dev 4311 ) 4312 { 4313 NV_STATUS status; 4314 4315 status = nvidia_suspend(dev, NV_PM_ACTION_HIBERNATE, NV_FALSE); 4316 return (status == NV_OK) ? 0 : -EIO; 4317 } 4318 4319 int nv_pmops_thaw( 4320 struct device *dev 4321 ) 4322 { 4323 return 0; 4324 } 4325 4326 int nv_pmops_restore( 4327 struct device *dev 4328 ) 4329 { 4330 NV_STATUS status; 4331 4332 status = nvidia_resume(dev, NV_PM_ACTION_RESUME); 4333 return (status == NV_OK) ? 0 : -EIO; 4334 } 4335 4336 int nv_pmops_poweroff( 4337 struct device *dev 4338 ) 4339 { 4340 return 0; 4341 } 4342 4343 static int 4344 nvidia_transition_dynamic_power( 4345 struct device *dev, 4346 NvBool enter 4347 ) 4348 { 4349 struct pci_dev *pci_dev = to_pci_dev(dev); 4350 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev); 4351 nv_state_t *nv = NV_STATE_PTR(nvl); 4352 nvidia_stack_t *sp = NULL; 4353 NV_STATUS status; 4354 4355 if ((nv->flags & (NV_FLAG_OPEN | NV_FLAG_PERSISTENT_SW_STATE)) == 0) 4356 { 4357 return 0; 4358 } 4359 4360 if (nv_kmem_cache_alloc_stack(&sp) != 0) 4361 { 4362 return -ENOMEM; 4363 } 4364 4365 status = rm_transition_dynamic_power(sp, nv, enter); 4366 4367 nv_kmem_cache_free_stack(sp); 4368 4369 return (status == NV_OK) ? 0 : -EIO; 4370 } 4371 4372 int nv_pmops_runtime_suspend( 4373 struct device *dev 4374 ) 4375 { 4376 return nvidia_transition_dynamic_power(dev, NV_TRUE); 4377 } 4378 4379 int nv_pmops_runtime_resume( 4380 struct device *dev 4381 ) 4382 { 4383 return nvidia_transition_dynamic_power(dev, NV_FALSE); 4384 } 4385 #endif /* defined(CONFIG_PM) */ 4386 4387 nv_state_t* NV_API_CALL nv_get_adapter_state( 4388 NvU32 domain, 4389 NvU8 bus, 4390 NvU8 slot 4391 ) 4392 { 4393 nv_linux_state_t *nvl; 4394 4395 LOCK_NV_LINUX_DEVICES(); 4396 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4397 { 4398 nv_state_t *nv = NV_STATE_PTR(nvl); 4399 if (nv->pci_info.domain == domain && nv->pci_info.bus == bus 4400 && nv->pci_info.slot == slot) 4401 { 4402 UNLOCK_NV_LINUX_DEVICES(); 4403 return nv; 4404 } 4405 } 4406 UNLOCK_NV_LINUX_DEVICES(); 4407 4408 return NULL; 4409 } 4410 4411 nv_state_t* NV_API_CALL nv_get_ctl_state(void) 4412 { 4413 return NV_STATE_PTR(&nv_ctl_device); 4414 } 4415 4416 NV_STATUS NV_API_CALL nv_log_error( 4417 nv_state_t *nv, 4418 NvU32 error_number, 4419 const char *format, 4420 va_list ap 4421 ) 4422 { 4423 NV_STATUS status = NV_OK; 4424 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4425 4426 nv_report_error(nvl->pci_dev, error_number, format, ap); 4427 #if defined(CONFIG_CRAY_XT) 4428 status = nvos_forward_error_to_cray(nvl->pci_dev, error_number, 4429 format, ap); 4430 #endif 4431 4432 return status; 4433 } 4434 4435 NvU64 NV_API_CALL nv_get_dma_start_address( 4436 nv_state_t *nv 4437 ) 4438 { 4439 #if defined(NVCPU_PPC64LE) 4440 struct pci_dev *pci_dev; 4441 dma_addr_t dma_addr; 4442 NvU64 saved_dma_mask; 4443 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4444 4445 /* 4446 * If TCE bypass is disabled via a module parameter, then just return 4447 * the default (which is 0). 4448 * 4449 * Otherwise, the DMA start address only needs to be set once, and it 4450 * won't change afterward. Just return the cached value if asked again, 4451 * to avoid the kernel printing redundant messages to the kernel 4452 * log when we call pci_set_dma_mask(). 4453 */ 4454 if ((nv_tce_bypass_mode == NV_TCE_BYPASS_MODE_DISABLE) || 4455 (nvl->tce_bypass_enabled)) 4456 { 4457 return nvl->dma_dev.addressable_range.start; 4458 } 4459 4460 pci_dev = nvl->pci_dev; 4461 4462 /* 4463 * Linux on IBM POWER8 offers 2 different DMA set-ups, sometimes 4464 * referred to as "windows". 4465 * 4466 * The "default window" provides a 2GB region of PCI address space 4467 * located below the 32-bit line. The IOMMU is used to provide a 4468 * "rich" mapping--any page in system memory can be mapped at an 4469 * arbitrary address within this window. The mappings are dynamic 4470 * and pass in and out of being as pci_map*()/pci_unmap*() calls 4471 * are made. 4472 * 4473 * Dynamic DMA Windows (sometimes "Huge DDW") provides a linear 4474 * mapping of the system's entire physical address space at some 4475 * fixed offset above the 59-bit line. IOMMU is still used, and 4476 * pci_map*()/pci_unmap*() are still required, but mappings are 4477 * static. They're effectively set up in advance, and any given 4478 * system page will always map to the same PCI bus address. I.e. 4479 * physical 0x00000000xxxxxxxx => PCI 0x08000000xxxxxxxx 4480 * 4481 * This driver does not support the 2G default window because 4482 * of its limited size, and for reasons having to do with UVM. 4483 * 4484 * Linux on POWER8 will only provide the DDW-style full linear 4485 * mapping when the driver claims support for 64-bit DMA addressing 4486 * (a pre-requisite because the PCI addresses used in this case will 4487 * be near the top of the 64-bit range). The linear mapping 4488 * is not available in all system configurations. 4489 * 4490 * Detect whether the linear mapping is present by claiming 4491 * 64-bit support and then mapping physical page 0. For historical 4492 * reasons, Linux on POWER8 will never map a page to PCI address 0x0. 4493 * In the "default window" case page 0 will be mapped to some 4494 * non-zero address below the 32-bit line. In the 4495 * DDW/linear-mapping case, it will be mapped to address 0 plus 4496 * some high-order offset. 4497 * 4498 * If the linear mapping is present and sane then return the offset 4499 * as the starting address for all DMA mappings. 4500 */ 4501 saved_dma_mask = pci_dev->dma_mask; 4502 if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) != 0) 4503 { 4504 goto done; 4505 } 4506 4507 dma_addr = pci_map_single(pci_dev, NULL, 1, DMA_BIDIRECTIONAL); 4508 if (pci_dma_mapping_error(pci_dev, dma_addr)) 4509 { 4510 pci_set_dma_mask(pci_dev, saved_dma_mask); 4511 goto done; 4512 } 4513 4514 pci_unmap_single(pci_dev, dma_addr, 1, DMA_BIDIRECTIONAL); 4515 4516 /* 4517 * From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation 4518 * of full 64-bit DMA support will establish a window in address-space 4519 * with the high 14 bits being constant and the bottom up-to-50 bits 4520 * varying with the mapping." 4521 * 4522 * Unfortunately, we don't have any good interfaces or definitions from 4523 * the kernel to get information about the DMA offset assigned by OS. 4524 * However, we have been told that the offset will be defined by the top 4525 * 14 bits of the address, and bits 40-49 will not vary for any DMA 4526 * mappings until 1TB of system memory is surpassed; this limitation is 4527 * essential for us to function properly since our current GPUs only 4528 * support 40 physical address bits. We are in a fragile place where we 4529 * need to tell the OS that we're capable of 64-bit addressing, while 4530 * relying on the assumption that the top 24 bits will not vary in this 4531 * case. 4532 * 4533 * The way we try to compute the window, then, is mask the trial mapping 4534 * against the DMA capabilities of the device. That way, devices with 4535 * greater addressing capabilities will only take the bits it needs to 4536 * define the window. 4537 */ 4538 if ((dma_addr & DMA_BIT_MASK(32)) != 0) 4539 { 4540 /* 4541 * Huge DDW not available - page 0 mapped to non-zero address below 4542 * the 32-bit line. 4543 */ 4544 nv_printf(NV_DBG_WARNINGS, 4545 "NVRM: DMA window limited by platform\n"); 4546 pci_set_dma_mask(pci_dev, saved_dma_mask); 4547 goto done; 4548 } 4549 else if ((dma_addr & saved_dma_mask) != 0) 4550 { 4551 NvU64 memory_size = os_get_num_phys_pages() * PAGE_SIZE; 4552 if ((dma_addr & ~saved_dma_mask) != 4553 ((dma_addr + memory_size) & ~saved_dma_mask)) 4554 { 4555 /* 4556 * The physical window straddles our addressing limit boundary, 4557 * e.g., for an adapter that can address up to 1TB, the window 4558 * crosses the 40-bit limit so that the lower end of the range 4559 * has different bits 63:40 than the higher end of the range. 4560 * We can only handle a single, static value for bits 63:40, so 4561 * we must fall back here. 4562 */ 4563 nv_printf(NV_DBG_WARNINGS, 4564 "NVRM: DMA window limited by memory size\n"); 4565 pci_set_dma_mask(pci_dev, saved_dma_mask); 4566 goto done; 4567 } 4568 } 4569 4570 nvl->tce_bypass_enabled = NV_TRUE; 4571 nvl->dma_dev.addressable_range.start = dma_addr & ~(saved_dma_mask); 4572 4573 /* Update the coherent mask to match */ 4574 dma_set_coherent_mask(&pci_dev->dev, pci_dev->dma_mask); 4575 4576 done: 4577 return nvl->dma_dev.addressable_range.start; 4578 #else 4579 return 0; 4580 #endif 4581 } 4582 4583 NV_STATUS NV_API_CALL nv_set_primary_vga_status( 4584 nv_state_t *nv 4585 ) 4586 { 4587 /* IORESOURCE_ROM_SHADOW wasn't added until 2.6.10 */ 4588 #if defined(IORESOURCE_ROM_SHADOW) 4589 nv_linux_state_t *nvl; 4590 struct pci_dev *pci_dev; 4591 4592 nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4593 pci_dev = nvl->pci_dev; 4594 4595 nv->primary_vga = ((NV_PCI_RESOURCE_FLAGS(pci_dev, PCI_ROM_RESOURCE) & 4596 IORESOURCE_ROM_SHADOW) == IORESOURCE_ROM_SHADOW); 4597 return NV_OK; 4598 #else 4599 return NV_ERR_NOT_SUPPORTED; 4600 #endif 4601 } 4602 4603 NV_STATUS NV_API_CALL nv_pci_trigger_recovery( 4604 nv_state_t *nv 4605 ) 4606 { 4607 NV_STATUS status = NV_ERR_NOT_SUPPORTED; 4608 #if defined(NV_PCI_ERROR_RECOVERY) 4609 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4610 4611 /* 4612 * Calling readl() on PPC64LE will allow the kernel to check its state for 4613 * the device and update it accordingly. This needs to be done before 4614 * checking if the PCI channel is offline, so that we don't check stale 4615 * state. 4616 * 4617 * This will also kick off the recovery process for the device. 4618 */ 4619 if (NV_PCI_ERROR_RECOVERY_ENABLED()) 4620 { 4621 if (readl(nv->regs->map) == 0xFFFFFFFF) 4622 { 4623 if (pci_channel_offline(nvl->pci_dev)) 4624 { 4625 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 4626 "PCI channel for the device is offline\n"); 4627 status = NV_OK; 4628 } 4629 } 4630 } 4631 #endif 4632 return status; 4633 } 4634 4635 NvBool NV_API_CALL nv_requires_dma_remap( 4636 nv_state_t *nv 4637 ) 4638 { 4639 NvBool dma_remap = NV_FALSE; 4640 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4641 dma_remap = !nv_dma_maps_swiotlb(nvl->dev); 4642 return dma_remap; 4643 } 4644 4645 /* 4646 * Intended for use by external kernel modules to list nvidia gpu ids. 4647 */ 4648 NvBool nvidia_get_gpuid_list(NvU32 *gpu_ids, NvU32 *gpu_count) 4649 { 4650 nv_linux_state_t *nvl; 4651 unsigned int count; 4652 NvBool ret = NV_TRUE; 4653 4654 LOCK_NV_LINUX_DEVICES(); 4655 4656 count = 0; 4657 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4658 count++; 4659 4660 if (*gpu_count == 0) 4661 { 4662 goto done; 4663 } 4664 else if ((*gpu_count) < count) 4665 { 4666 ret = NV_FALSE; 4667 goto done; 4668 } 4669 4670 count = 0; 4671 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 4672 { 4673 nv_state_t *nv = NV_STATE_PTR(nvl); 4674 gpu_ids[count++] = nv->gpu_id; 4675 } 4676 4677 4678 done: 4679 4680 *gpu_count = count; 4681 4682 UNLOCK_NV_LINUX_DEVICES(); 4683 4684 return ret; 4685 } 4686 4687 /* 4688 * Kernel-level analog to nvidia_open, intended for use by external 4689 * kernel modules. This increments the ref count of the device with 4690 * the given gpu_id and makes sure the device has been initialized. 4691 * 4692 * Clients of this interface are counted by the RM reset path, to ensure a 4693 * GPU is not reset while the GPU is active. 4694 * 4695 * Returns -ENODEV if the given gpu_id does not exist. 4696 */ 4697 int nvidia_dev_get(NvU32 gpu_id, nvidia_stack_t *sp) 4698 { 4699 nv_linux_state_t *nvl; 4700 int rc; 4701 4702 /* Takes nvl->ldata_lock */ 4703 nvl = find_gpu_id(gpu_id); 4704 if (!nvl) 4705 return -ENODEV; 4706 4707 rc = nv_open_device(NV_STATE_PTR(nvl), sp); 4708 4709 if (rc == 0) 4710 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK); 4711 4712 up(&nvl->ldata_lock); 4713 return rc; 4714 } 4715 4716 /* 4717 * Kernel-level analog to nvidia_close, intended for use by external 4718 * kernel modules. This decrements the ref count of the device with 4719 * the given gpu_id, potentially tearing it down. 4720 */ 4721 void nvidia_dev_put(NvU32 gpu_id, nvidia_stack_t *sp) 4722 { 4723 nv_linux_state_t *nvl; 4724 4725 /* Takes nvl->ldata_lock */ 4726 nvl = find_gpu_id(gpu_id); 4727 if (!nvl) 4728 return; 4729 4730 nv_close_device(NV_STATE_PTR(nvl), sp); 4731 4732 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK); 4733 4734 up(&nvl->ldata_lock); 4735 } 4736 4737 /* 4738 * Like nvidia_dev_get but uses UUID instead of gpu_id. Note that this may 4739 * trigger initialization and teardown of unrelated devices to look up their 4740 * UUIDs. 4741 * 4742 * Clients of this interface are counted by the RM reset path, to ensure a 4743 * GPU is not reset while the GPU is active. 4744 */ 4745 int nvidia_dev_get_uuid(const NvU8 *uuid, nvidia_stack_t *sp) 4746 { 4747 nv_state_t *nv = NULL; 4748 nv_linux_state_t *nvl = NULL; 4749 const NvU8 *dev_uuid; 4750 int rc = 0; 4751 4752 /* Takes nvl->ldata_lock */ 4753 nvl = find_uuid_candidate(uuid); 4754 while (nvl) 4755 { 4756 nv = NV_STATE_PTR(nvl); 4757 4758 /* 4759 * If the device is missing its UUID, this call exists solely so 4760 * rm_get_gpu_uuid_raw will be called and we can inspect the UUID. 4761 */ 4762 rc = nv_open_device(nv, sp); 4763 if (rc != 0) 4764 goto out; 4765 4766 /* The UUID should always be present following nv_open_device */ 4767 dev_uuid = nv_get_cached_uuid(nv); 4768 WARN_ON(!dev_uuid); 4769 if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0) 4770 break; 4771 4772 /* No match, try again. */ 4773 nv_close_device(nv, sp); 4774 up(&nvl->ldata_lock); 4775 nvl = find_uuid_candidate(uuid); 4776 } 4777 4778 if (nvl) 4779 { 4780 rc = 0; 4781 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK); 4782 } 4783 else 4784 rc = -ENODEV; 4785 4786 out: 4787 if (nvl) 4788 up(&nvl->ldata_lock); 4789 return rc; 4790 } 4791 4792 /* 4793 * Like nvidia_dev_put but uses UUID instead of gpu_id. 4794 */ 4795 void nvidia_dev_put_uuid(const NvU8 *uuid, nvidia_stack_t *sp) 4796 { 4797 nv_linux_state_t *nvl; 4798 4799 /* Callers must already have called nvidia_dev_get_uuid() */ 4800 4801 /* Takes nvl->ldata_lock */ 4802 nvl = find_uuid(uuid); 4803 if (!nvl) 4804 return; 4805 4806 nv_close_device(NV_STATE_PTR(nvl), sp); 4807 4808 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK); 4809 4810 up(&nvl->ldata_lock); 4811 } 4812 4813 int nvidia_dev_block_gc6(const NvU8 *uuid, nvidia_stack_t *sp) 4814 4815 { 4816 nv_linux_state_t *nvl; 4817 4818 /* Callers must already have called nvidia_dev_get_uuid() */ 4819 4820 /* Takes nvl->ldata_lock */ 4821 nvl = find_uuid(uuid); 4822 if (!nvl) 4823 return -ENODEV; 4824 4825 if (rm_ref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE) != NV_OK) 4826 { 4827 up(&nvl->ldata_lock); 4828 return -EINVAL; 4829 } 4830 4831 up(&nvl->ldata_lock); 4832 4833 return 0; 4834 } 4835 4836 int nvidia_dev_unblock_gc6(const NvU8 *uuid, nvidia_stack_t *sp) 4837 4838 { 4839 nv_linux_state_t *nvl; 4840 4841 /* Callers must already have called nvidia_dev_get_uuid() */ 4842 4843 /* Takes nvl->ldata_lock */ 4844 nvl = find_uuid(uuid); 4845 if (!nvl) 4846 return -ENODEV; 4847 4848 rm_unref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE); 4849 4850 up(&nvl->ldata_lock); 4851 4852 return 0; 4853 } 4854 4855 NV_STATUS NV_API_CALL nv_get_device_memory_config( 4856 nv_state_t *nv, 4857 NvU64 *compr_addr_sys_phys, 4858 NvU64 *addr_guest_phys, 4859 NvU32 *addr_width, 4860 NvS32 *node_id 4861 ) 4862 { 4863 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4864 NV_STATUS status = NV_ERR_NOT_SUPPORTED; 4865 4866 if (!nv_platform_supports_numa(nvl)) 4867 { 4868 return NV_ERR_NOT_SUPPORTED; 4869 } 4870 4871 #if defined(NVCPU_PPC64LE) 4872 nv_npu_numa_info_t *numa_info; 4873 4874 numa_info = &nvl->npu->numa_info; 4875 4876 if (node_id != NULL) 4877 { 4878 *node_id = nvl->numa_info.node_id; 4879 } 4880 4881 if (compr_addr_sys_phys != NULL) 4882 { 4883 *compr_addr_sys_phys = 4884 numa_info->compr_sys_phys_addr; 4885 } 4886 4887 if (addr_guest_phys != NULL) 4888 { 4889 *addr_guest_phys = 4890 numa_info->guest_phys_addr; 4891 } 4892 4893 if (addr_width != NULL) 4894 { 4895 *addr_width = nv_volta_dma_addr_size - nv_volta_addr_space_width; 4896 } 4897 4898 status = NV_OK; 4899 #endif 4900 4901 return status; 4902 } 4903 4904 #if defined(NVCPU_PPC64LE) 4905 4906 NV_STATUS NV_API_CALL nv_get_nvlink_line_rate( 4907 nv_state_t *nvState, 4908 NvU32 *linerate 4909 ) 4910 { 4911 #if defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT) 4912 4913 nv_linux_state_t *nvl; 4914 struct pci_dev *npuDev; 4915 NvU32 *pSpeedPtr = NULL; 4916 NvU32 speed; 4917 int len; 4918 4919 if (nvState != NULL) 4920 nvl = NV_GET_NVL_FROM_NV_STATE(nvState); 4921 else 4922 return NV_ERR_INVALID_ARGUMENT; 4923 4924 if (!nvl->npu) 4925 { 4926 return NV_ERR_NOT_SUPPORTED; 4927 } 4928 4929 npuDev = nvl->npu->devs[0]; 4930 if (!npuDev->dev.of_node) 4931 { 4932 nv_printf(NV_DBG_ERRORS, "NVRM: %s: OF Node not found in IBM-NPU device node\n", 4933 __FUNCTION__); 4934 return NV_ERR_NOT_SUPPORTED; 4935 } 4936 4937 pSpeedPtr = (NvU32 *) of_get_property(npuDev->dev.of_node, "ibm,nvlink-speed", &len); 4938 4939 if (pSpeedPtr) 4940 { 4941 speed = (NvU32) be32_to_cpup(pSpeedPtr); 4942 } 4943 else 4944 { 4945 return NV_ERR_NOT_SUPPORTED; 4946 } 4947 4948 if (!speed) 4949 { 4950 return NV_ERR_NOT_SUPPORTED; 4951 } 4952 else 4953 { 4954 *linerate = speed; 4955 } 4956 4957 return NV_OK; 4958 4959 #endif 4960 4961 return NV_ERR_NOT_SUPPORTED; 4962 } 4963 4964 #endif 4965 4966 NV_STATUS NV_API_CALL nv_indicate_idle( 4967 nv_state_t *nv 4968 ) 4969 { 4970 #if defined(NV_PM_RUNTIME_AVAILABLE) 4971 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 4972 struct device *dev = nvl->dev; 4973 struct file *file = nvl->sysfs_config_file; 4974 loff_t f_pos = 0; 4975 char buf; 4976 4977 pm_runtime_put_noidle(dev); 4978 4979 #if defined(NV_SEQ_READ_ITER_PRESENT) 4980 { 4981 struct kernfs_open_file *of = ((struct seq_file *)file->private_data)->private; 4982 struct kernfs_node *kn; 4983 4984 mutex_lock(&of->mutex); 4985 kn = of->kn; 4986 if (kn != NULL && atomic_inc_unless_negative(&kn->active)) 4987 { 4988 if ((kn->attr.ops != NULL) && (kn->attr.ops->read != NULL)) 4989 { 4990 kn->attr.ops->read(of, &buf, 1, f_pos); 4991 } 4992 atomic_dec(&kn->active); 4993 } 4994 mutex_unlock(&of->mutex); 4995 } 4996 #else 4997 #if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG) 4998 kernel_read(file, &buf, 1, &f_pos); 4999 #else 5000 kernel_read(file, f_pos, &buf, 1); 5001 #endif 5002 #endif 5003 5004 return NV_OK; 5005 #else 5006 return NV_ERR_NOT_SUPPORTED; 5007 #endif 5008 } 5009 5010 NV_STATUS NV_API_CALL nv_indicate_not_idle( 5011 nv_state_t *nv 5012 ) 5013 { 5014 #if defined(NV_PM_RUNTIME_AVAILABLE) 5015 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5016 struct device *dev = nvl->dev; 5017 5018 pm_runtime_get_noresume(dev); 5019 5020 nvl->is_forced_shutdown = NV_TRUE; 5021 pci_bus_type.shutdown(dev); 5022 5023 return NV_OK; 5024 #else 5025 return NV_ERR_NOT_SUPPORTED; 5026 #endif 5027 } 5028 5029 void NV_API_CALL nv_idle_holdoff( 5030 nv_state_t *nv 5031 ) 5032 { 5033 #if defined(NV_PM_RUNTIME_AVAILABLE) 5034 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5035 struct device *dev = nvl->dev; 5036 5037 pm_runtime_get_noresume(dev); 5038 #endif 5039 } 5040 5041 NvBool NV_API_CALL nv_dynamic_power_available( 5042 nv_state_t *nv 5043 ) 5044 { 5045 #if defined(NV_PM_RUNTIME_AVAILABLE) 5046 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5047 return nvl->sysfs_config_file != NULL; 5048 #else 5049 return NV_FALSE; 5050 #endif 5051 } 5052 5053 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */ 5054 void nv_linux_add_device_locked(nv_linux_state_t *nvl) 5055 { 5056 if (nv_linux_devices == NULL) { 5057 nv_linux_devices = nvl; 5058 } 5059 else 5060 { 5061 nv_linux_state_t *tnvl; 5062 for (tnvl = nv_linux_devices; tnvl->next != NULL; tnvl = tnvl->next); 5063 tnvl->next = nvl; 5064 } 5065 } 5066 5067 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */ 5068 void nv_linux_remove_device_locked(nv_linux_state_t *nvl) 5069 { 5070 if (nvl == nv_linux_devices) { 5071 nv_linux_devices = nvl->next; 5072 } 5073 else 5074 { 5075 nv_linux_state_t *tnvl; 5076 for (tnvl = nv_linux_devices; tnvl->next != nvl; tnvl = tnvl->next); 5077 tnvl->next = nvl->next; 5078 } 5079 } 5080 5081 void NV_API_CALL nv_control_soc_irqs(nv_state_t *nv, NvBool bEnable) 5082 { 5083 int count; 5084 unsigned long flags; 5085 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5086 5087 if (nv->current_soc_irq != -1) 5088 return; 5089 5090 NV_SPIN_LOCK_IRQSAVE(&nvl->soc_isr_lock, flags); 5091 if (bEnable) 5092 { 5093 for (count = 0; count < nv->num_soc_irqs; count++) 5094 { 5095 if (nv->soc_irq_info[count].ref_count == 0) 5096 { 5097 nv->soc_irq_info[count].ref_count++; 5098 enable_irq(nv->soc_irq_info[count].irq_num); 5099 } 5100 } 5101 } 5102 else 5103 { 5104 for (count = 0; count < nv->num_soc_irqs; count++) 5105 { 5106 if (nv->soc_irq_info[count].ref_count == 1) 5107 { 5108 nv->soc_irq_info[count].ref_count--; 5109 disable_irq_nosync(nv->soc_irq_info[count].irq_num); 5110 } 5111 } 5112 } 5113 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->soc_isr_lock, flags); 5114 } 5115 5116 NvU32 NV_API_CALL nv_get_dev_minor(nv_state_t *nv) 5117 { 5118 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5119 5120 return nvl->minor_num; 5121 } 5122 5123 NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap(int fd, int *duped_fd) 5124 { 5125 *duped_fd = nvlink_cap_acquire(fd, NVLINK_CAP_FABRIC_MANAGEMENT); 5126 if (*duped_fd < 0) 5127 { 5128 return NV_ERR_INSUFFICIENT_PERMISSIONS; 5129 } 5130 5131 return NV_OK; 5132 } 5133 5134 /* 5135 * Wakes up the NVIDIA GPU HDA codec and contoller by reading 5136 * codec proc file. 5137 */ 5138 void NV_API_CALL nv_audio_dynamic_power( 5139 nv_state_t *nv 5140 ) 5141 { 5142 /* 5143 * The runtime power management for nvidia HDA controller can be possible 5144 * after commit 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA 5145 * controller"). This commit has also moved 'PCI_CLASS_MULTIMEDIA_HD_AUDIO' 5146 * macro from <sound/hdaudio.h> to <linux/pci_ids.h>. 5147 * If 'NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT' is not defined, then 5148 * this function will be stub function. 5149 * 5150 * Also, check if runtime PM is enabled in the kernel (with 5151 * 'NV_PM_RUNTIME_AVAILABLE') and stub this function if it is disabled. This 5152 * function uses kernel fields only present when the kconfig has runtime PM 5153 * enabled. 5154 */ 5155 #if defined(NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT) && defined(NV_PM_RUNTIME_AVAILABLE) 5156 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5157 struct device *dev = nvl->dev; 5158 struct pci_dev *audio_pci_dev, *pci_dev; 5159 struct snd_card *card; 5160 5161 if (!dev_is_pci(dev)) 5162 return; 5163 5164 pci_dev = to_pci_dev(dev); 5165 5166 audio_pci_dev = os_pci_init_handle(NV_PCI_DOMAIN_NUMBER(pci_dev), 5167 NV_PCI_BUS_NUMBER(pci_dev), 5168 NV_PCI_SLOT_NUMBER(pci_dev), 5169 1, NULL, NULL); 5170 5171 if (audio_pci_dev == NULL) 5172 return; 5173 5174 /* 5175 * Check if HDA controller is in pm suspended state. The HDA contoller 5176 * can not be runtime resumed if this API is called during system 5177 * suspend/resume time and HDA controller is in pm suspended state. 5178 */ 5179 if (audio_pci_dev->dev.power.is_suspended) 5180 return; 5181 5182 card = pci_get_drvdata(audio_pci_dev); 5183 if (card == NULL) 5184 return; 5185 5186 /* 5187 * Commit be57bfffb7b5 ("ALSA: hda: move hda_codec.h to include/sound") 5188 * in v4.20-rc1 moved "hda_codec.h" header file from the private sound 5189 * folder to include/sound. 5190 */ 5191 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT) 5192 { 5193 struct list_head *p; 5194 struct hda_codec *codec = NULL; 5195 unsigned int cmd, res; 5196 5197 /* 5198 * Traverse the list of devices which the sound card maintains and 5199 * search for HDA codec controller. 5200 */ 5201 list_for_each_prev(p, &card->devices) 5202 { 5203 struct snd_device *pdev = list_entry(p, struct snd_device, list); 5204 5205 if (pdev->type == SNDRV_DEV_CODEC) 5206 { 5207 codec = pdev->device_data; 5208 5209 /* 5210 * NVIDIA HDA codec controller uses linux kernel HDA codec 5211 * driver. Commit 05852448690d ("ALSA: hda - Support indirect 5212 * execution of verbs") added support for overriding exec_verb. 5213 * This codec->core.exec_verb will be codec_exec_verb() for 5214 * NVIDIA HDA codec driver. 5215 */ 5216 if (codec->core.exec_verb == NULL) 5217 { 5218 return; 5219 } 5220 5221 break; 5222 } 5223 } 5224 5225 if (codec == NULL) 5226 { 5227 return; 5228 } 5229 5230 /* If HDA codec controller is already runtime active, then return */ 5231 if (snd_hdac_is_power_on(&codec->core)) 5232 { 5233 return; 5234 } 5235 5236 /* 5237 * Encode codec verb for getting vendor ID from root node. 5238 * Refer Intel High Definition Audio Specification for more details. 5239 */ 5240 cmd = (codec->addr << 28) | (AC_NODE_ROOT << 20) | 5241 (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID; 5242 5243 /* 5244 * It will internally increment the runtime PM refcount, 5245 * wake-up the audio codec controller and send the HW 5246 * command for getting vendor ID. Once the vendor ID will be 5247 * returned back, then it will decrement the runtime PM refcount 5248 * and runtime suspend audio codec controller again (If refcount is 5249 * zero) once auto suspend counter expires. 5250 */ 5251 codec->core.exec_verb(&codec->core, cmd, 0, &res); 5252 } 5253 #else 5254 { 5255 int codec_addr; 5256 5257 /* 5258 * The filp_open() call below depends on the current task's fs_struct 5259 * (current->fs), which may already be NULL if this is called during 5260 * process teardown. 5261 */ 5262 if (current->fs == NULL) 5263 return; 5264 5265 /* If device is runtime active, then return */ 5266 if (audio_pci_dev->dev.power.runtime_status == RPM_ACTIVE) 5267 return; 5268 5269 for (codec_addr = 0; codec_addr < NV_HDA_MAX_CODECS; codec_addr++) 5270 { 5271 char filename[48]; 5272 NvU8 buf; 5273 int ret; 5274 5275 ret = snprintf(filename, sizeof(filename), 5276 "/proc/asound/card%d/codec#%d", 5277 card->number, codec_addr); 5278 5279 if (ret > 0 && ret < sizeof(filename) && 5280 (os_open_and_read_file(filename, &buf, 1) == NV_OK)) 5281 { 5282 break; 5283 } 5284 } 5285 } 5286 #endif 5287 #endif 5288 } 5289 5290 static int nv_match_dev_state(const void *data, struct file *filp, unsigned fd) 5291 { 5292 nv_linux_state_t *nvl = NULL; 5293 dev_t rdev = 0; 5294 5295 if (filp == NULL || 5296 filp->private_data == NULL || 5297 NV_FILE_INODE(filp) == NULL) 5298 return 0; 5299 5300 rdev = (NV_FILE_INODE(filp))->i_rdev; 5301 if (MAJOR(rdev) != NV_MAJOR_DEVICE_NUMBER) 5302 return 0; 5303 5304 nvl = NV_GET_NVL_FROM_FILEP(filp); 5305 if (nvl == NULL) 5306 return 0; 5307 5308 return (data == nvl); 5309 } 5310 5311 NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *nv, void *os_info) 5312 { 5313 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5314 5315 return nv_match_dev_state(nvl, os_info, -1); 5316 } 5317 5318 NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *nv) 5319 { 5320 struct files_struct *files = current->files; 5321 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5322 5323 return !!iterate_fd(files, 0, nv_match_dev_state, nvl); 5324 } 5325 5326 NvBool NV_API_CALL nv_platform_supports_s0ix(void) 5327 { 5328 #if defined(CONFIG_ACPI) 5329 return (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) != 0; 5330 #else 5331 return NV_FALSE; 5332 #endif 5333 } 5334 5335 NvBool NV_API_CALL nv_s2idle_pm_configured(void) 5336 { 5337 NvU8 buf[8]; 5338 5339 #if defined(NV_SEQ_READ_ITER_PRESENT) 5340 struct file *file; 5341 ssize_t num_read; 5342 struct kiocb kiocb; 5343 struct iov_iter iter; 5344 struct kvec iov = { 5345 .iov_base = &buf, 5346 .iov_len = sizeof(buf), 5347 }; 5348 5349 if (os_open_readonly_file("/sys/power/mem_sleep", (void **)&file) != NV_OK) 5350 { 5351 return NV_FALSE; 5352 } 5353 5354 /* 5355 * init_sync_kiocb() internally uses GPL licensed __get_task_ioprio() from 5356 * v5.20-rc1. 5357 */ 5358 #if defined(NV_GET_TASK_IOPRIO_PRESENT) 5359 memset(&kiocb, 0, sizeof(kiocb)); 5360 kiocb.ki_filp = file; 5361 kiocb.ki_flags = iocb_flags(file); 5362 kiocb.ki_ioprio = IOPRIO_DEFAULT; 5363 #else 5364 init_sync_kiocb(&kiocb, file); 5365 #endif 5366 5367 kiocb.ki_pos = 0; 5368 iov_iter_kvec(&iter, READ, &iov, 1, sizeof(buf)); 5369 5370 num_read = seq_read_iter(&kiocb, &iter); 5371 5372 os_close_file((void *)file); 5373 5374 if (num_read != sizeof(buf)) 5375 { 5376 return NV_FALSE; 5377 } 5378 #else 5379 if (os_open_and_read_file("/sys/power/mem_sleep", buf, 5380 sizeof(buf)) != NV_OK) 5381 { 5382 return NV_FALSE; 5383 } 5384 #endif 5385 5386 return (memcmp(buf, "[s2idle]", 8) == 0); 5387 } 5388 5389 /* 5390 * Function query system chassis info, to figure out if the platform is 5391 * Laptop or Notebook. 5392 * This function should be used when querying GPU form factor information is 5393 * not possible via core RM or if querying both system and GPU form factor 5394 * information is necessary. 5395 */ 5396 NvBool NV_API_CALL nv_is_chassis_notebook(void) 5397 { 5398 const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); 5399 5400 // 5401 // Return true only for Laptop & Notebook 5402 // As per SMBIOS spec Laptop = 9 and Notebook = 10 5403 // 5404 return (chassis_type && (!strcmp(chassis_type, "9") || !strcmp(chassis_type, "10"))); 5405 } 5406 5407 void NV_API_CALL nv_allow_runtime_suspend 5408 ( 5409 nv_state_t *nv 5410 ) 5411 { 5412 #if defined(NV_PM_RUNTIME_AVAILABLE) 5413 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5414 struct device *dev = nvl->dev; 5415 5416 spin_lock_irq(&dev->power.lock); 5417 5418 if (dev->power.runtime_auto == false) 5419 { 5420 dev->power.runtime_auto = true; 5421 atomic_add_unless(&dev->power.usage_count, -1, 0); 5422 } 5423 5424 spin_unlock_irq(&dev->power.lock); 5425 #endif 5426 } 5427 5428 void NV_API_CALL nv_disallow_runtime_suspend 5429 ( 5430 nv_state_t *nv 5431 ) 5432 { 5433 #if defined(NV_PM_RUNTIME_AVAILABLE) 5434 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); 5435 struct device *dev = nvl->dev; 5436 5437 spin_lock_irq(&dev->power.lock); 5438 5439 if (dev->power.runtime_auto == true) 5440 { 5441 dev->power.runtime_auto = false; 5442 atomic_inc(&dev->power.usage_count); 5443 } 5444 5445 spin_unlock_irq(&dev->power.lock); 5446 #endif 5447 } 5448 5449 NvU32 NV_API_CALL nv_get_os_type(void) 5450 { 5451 return OS_TYPE_LINUX; 5452 } 5453 5454 void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size) 5455 { 5456 #if NVCPU_IS_PPC64LE 5457 return nv_ibmnpu_cache_flush_range(nv, cpu_virtual, size); 5458 #elif NVCPU_IS_AARCH64 5459 NvU64 va, cbsize; 5460 NvU64 end_cpu_virtual = cpu_virtual + size; 5461 5462 nv_printf(NV_DBG_INFO, 5463 "Flushing CPU virtual range [0x%llx, 0x%llx)\n", 5464 cpu_virtual, end_cpu_virtual); 5465 5466 cbsize = cache_line_size(); 5467 // Align address to line size 5468 cpu_virtual = NV_ALIGN_UP(cpu_virtual, cbsize); 5469 5470 // Force eviction of any cache lines from the NUMA-onlined region. 5471 for (va = cpu_virtual; va < end_cpu_virtual; va += cbsize) 5472 { 5473 asm volatile("dc civac, %0" : : "r" (va): "memory"); 5474 // Reschedule if necessary to avoid lockup warnings 5475 cond_resched(); 5476 } 5477 asm volatile("dsb sy" : : : "memory"); 5478 #endif 5479 } 5480 5481 static struct resource *nv_next_resource(struct resource *p) 5482 { 5483 if (p->child != NULL) 5484 return p->child; 5485 5486 while ((p->sibling == NULL) && (p->parent != NULL)) 5487 p = p->parent; 5488 5489 return p->sibling; 5490 } 5491 5492 /* 5493 * Function to get the correct PCI Bus memory window which can be mapped 5494 * in the real mode emulator (emu). 5495 * The function gets called during the initialization of the emu before 5496 * remapping it to OS. 5497 */ 5498 void NV_API_CALL nv_get_updated_emu_seg( 5499 NvU32 *start, 5500 NvU32 *end 5501 ) 5502 { 5503 struct resource *p; 5504 5505 if (*start >= *end) 5506 return; 5507 5508 for (p = iomem_resource.child; (p != NULL); p = nv_next_resource(p)) 5509 { 5510 /* If we passed the resource we are looking for, stop */ 5511 if (p->start > *end) 5512 { 5513 p = NULL; 5514 break; 5515 } 5516 5517 /* Skip until we find a range that matches what we look for */ 5518 if (p->end < *start) 5519 continue; 5520 5521 if ((p->end > *end) && (p->child)) 5522 continue; 5523 5524 if ((p->flags & IORESOURCE_MEM) != IORESOURCE_MEM) 5525 continue; 5526 5527 /* Found a match, break */ 5528 break; 5529 } 5530 5531 if (p != NULL) 5532 { 5533 *start = max((resource_size_t)*start, p->start); 5534 *end = min((resource_size_t)*end, p->end); 5535 } 5536 } 5537 5538