1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2001-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #ifndef _NV_LINUX_H_ 25 #define _NV_LINUX_H_ 26 27 #include "nvstatus.h" 28 #include "nv.h" 29 #include "nv-ioctl-numa.h" 30 #include "conftest.h" 31 32 #include "nv-lock.h" 33 #include "nv-pgprot.h" 34 #include "nv-mm.h" 35 #include "os-interface.h" 36 #include "nv-timer.h" 37 #include "nv-time.h" 38 #include "nv-chardev-numbers.h" 39 40 #define NV_KERNEL_NAME "Linux" 41 42 #ifndef AUTOCONF_INCLUDED 43 #if defined(NV_GENERATED_AUTOCONF_H_PRESENT) 44 #include <generated/autoconf.h> 45 #else 46 #include <linux/autoconf.h> 47 #endif 48 #endif 49 50 #if defined(NV_GENERATED_UTSRELEASE_H_PRESENT) 51 #include <generated/utsrelease.h> 52 #endif 53 54 #if defined(NV_GENERATED_COMPILE_H_PRESENT) 55 #include <generated/compile.h> 56 #endif 57 58 #include <linux/version.h> 59 #include <linux/utsname.h> 60 61 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) 62 #error "This driver does not support kernels older than 2.6.32!" 63 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0) 64 # define KERNEL_2_6 65 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) 66 # define KERNEL_3 67 #else 68 #error "This driver does not support development kernels!" 69 #endif 70 71 #if defined (CONFIG_SMP) && !defined (__SMP__) 72 #define __SMP__ 73 #endif 74 75 #if defined (CONFIG_MODVERSIONS) && !defined (MODVERSIONS) 76 # define MODVERSIONS 77 #endif 78 79 #include <linux/kernel.h> 80 #include <linux/module.h> 81 #include <linux/kmod.h> 82 #include <asm/bug.h> 83 84 #include <linux/mm.h> 85 86 #if !defined(VM_RESERVED) 87 #define VM_RESERVED 0x00000000 88 #endif 89 #if !defined(VM_DONTEXPAND) 90 #define VM_DONTEXPAND 0x00000000 91 #endif 92 #if !defined(VM_DONTDUMP) 93 #define VM_DONTDUMP 0x00000000 94 #endif 95 96 #include <linux/init.h> /* module_init, module_exit */ 97 #include <linux/types.h> /* pic_t, size_t, __u32, etc */ 98 #include <linux/errno.h> /* error codes */ 99 #include <linux/list.h> /* circular linked list */ 100 #include <linux/stddef.h> /* NULL, offsetof */ 101 #include <linux/wait.h> /* wait queues */ 102 #include <linux/string.h> /* strchr(), strpbrk() */ 103 104 #include <linux/ctype.h> /* isspace(), etc */ 105 #include <linux/console.h> /* acquire_console_sem(), etc */ 106 #include <linux/cpufreq.h> /* cpufreq_get */ 107 108 #include <linux/slab.h> /* kmalloc, kfree, etc */ 109 #include <linux/vmalloc.h> /* vmalloc, vfree, etc */ 110 111 #include <linux/poll.h> /* poll_wait */ 112 #include <linux/delay.h> /* mdelay, udelay */ 113 114 #include <linux/sched.h> /* suser(), capable() replacement */ 115 116 #include <linux/random.h> /* get_random_bytes() */ 117 118 #if defined(NV_LINUX_DMA_BUF_H_PRESENT) 119 #include <linux/dma-buf.h> 120 #endif 121 122 #if defined(NV_DRM_AVAILABLE) 123 #if defined(NV_DRM_DRM_DEVICE_H_PRESENT) 124 #include <drm/drm_device.h> 125 #endif 126 127 #if defined(NV_DRM_DRM_DRV_H_PRESENT) 128 #include <drm/drm_drv.h> 129 #endif 130 131 #if defined(NV_DRM_DRMP_H_PRESENT) 132 #include <drm/drmP.h> 133 #endif 134 135 #if defined(NV_DRM_DRM_GEM_H_PRESENT) 136 #include <drm/drm_gem.h> 137 #endif 138 #endif /* NV_DRM_AVAILABLE */ 139 140 /* 141 * sched.h was refactored with this commit (as part of Linux 4.11) 142 * 2017-03-03 1827adb11ad26b2290dc9fe2aaf54976b2439865 143 */ 144 #if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT) 145 #include <linux/sched/signal.h> /* task_lock(), task_unlock() */ 146 #endif 147 148 #if defined(NV_LINUX_SCHED_TASK_H_PRESENT) 149 #include <linux/sched/task.h> /* task_lock(), task_unlock() */ 150 #endif 151 152 /* task and signal-related items, for kernels < 4.11: */ 153 #include <linux/sched.h> /* task_lock(), task_unlock() */ 154 155 #include <linux/moduleparam.h> /* module_param() */ 156 #include <asm/tlbflush.h> /* flush_tlb(), flush_tlb_all() */ 157 158 #include <linux/pci.h> /* pci_find_class, etc */ 159 #include <linux/interrupt.h> /* tasklets, interrupt helpers */ 160 #include <linux/timer.h> 161 #include <linux/file.h> /* fget(), fput() */ 162 #include <linux/rbtree.h> 163 #include <linux/cpu.h> /* CPU hotplug support */ 164 165 #include <linux/pm_runtime.h> /* pm_runtime_* */ 166 #include <linux/fdtable.h> /* files_fdtable, etc */ 167 168 #include <asm/div64.h> /* do_div() */ 169 #if defined(NV_ASM_SYSTEM_H_PRESENT) 170 #include <asm/system.h> /* cli, sli, save_flags */ 171 #endif 172 #include <asm/io.h> /* ioremap, virt_to_phys */ 173 #include <asm/uaccess.h> /* access_ok */ 174 #include <asm/page.h> /* PAGE_OFFSET */ 175 #include <asm/pgtable.h> /* pte bit definitions */ 176 #include <asm/bitops.h> /* __set_bit() */ 177 178 #if defined(NV_LINUX_TIME_H_PRESENT) 179 #include <linux/time.h> /* FD_SET() */ 180 #endif 181 182 #include "nv-list-helpers.h" 183 184 /* 185 * Use current->cred->euid, instead of calling current_euid(). 186 * The latter can pull in the GPL-only debug_lockdep_rcu_enabled() 187 * symbol when CONFIG_PROVE_RCU. That is only used for debugging. 188 * 189 * The Linux kernel relies on the assumption that only the current process 190 * is permitted to change its cred structure. Therefore, current_euid() 191 * does not require the RCU's read lock on current->cred. 192 */ 193 #define NV_CURRENT_EUID() (__kuid_val(current->cred->euid)) 194 195 #if defined(CONFIG_VGA_ARB) 196 #include <linux/vgaarb.h> 197 #endif 198 199 #include <linux/pagemap.h> 200 #include <linux/dma-mapping.h> 201 202 #if defined(NV_LINUX_DMA_MAP_OPS_H_PRESENT) 203 #include <linux/dma-map-ops.h> 204 #endif 205 206 #if defined(CONFIG_SWIOTLB) && defined(NVCPU_AARCH64) 207 #include <linux/swiotlb.h> 208 #endif 209 210 #include <linux/scatterlist.h> 211 #include <linux/completion.h> 212 #include <linux/highmem.h> 213 214 #include <linux/nodemask.h> 215 #include <linux/memory.h> 216 217 #include <linux/workqueue.h> /* workqueue */ 218 #include "nv-kthread-q.h" /* kthread based queue */ 219 220 #if defined(NV_LINUX_EFI_H_PRESENT) 221 #include <linux/efi.h> /* efi_enabled */ 222 #endif 223 224 #include <linux/fb.h> /* fb_info struct */ 225 #include <linux/screen_info.h> /* screen_info */ 226 227 #if !defined(CONFIG_PCI) 228 #warning "Attempting to build driver for a platform with no PCI support!" 229 #include <asm-generic/pci-dma-compat.h> 230 #endif 231 232 #if defined(CONFIG_CRAY_XT) 233 #include <cray/cray_nvidia.h> 234 NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32, 235 const char *, va_list); 236 #endif 237 238 #if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH) 239 #include <asm/eeh.h> 240 #define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled() 241 #define NV_PCI_ERROR_RECOVERY 242 #endif 243 244 #if defined(NV_ASM_SET_MEMORY_H_PRESENT) 245 #include <asm/set_memory.h> 246 #endif 247 248 #if defined(NV_SET_MEMORY_UC_PRESENT) 249 #undef NV_SET_PAGES_UC_PRESENT 250 #endif 251 252 #if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) && !defined(NVCPU_RISCV64) 253 #if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT) 254 #error "This driver requires the ability to change memory types!" 255 #endif 256 #endif 257 258 /* 259 * Traditionally, CONFIG_XEN indicated that the target kernel was 260 * built exclusively for use under a Xen hypervisor, requiring 261 * modifications to or disabling of a variety of NVIDIA graphics 262 * driver code paths. As of the introduction of CONFIG_PARAVIRT 263 * and support for Xen hypervisors within the CONFIG_PARAVIRT_GUEST 264 * architecture, CONFIG_XEN merely indicates that the target 265 * kernel can run under a Xen hypervisor, but not that it will. 266 * 267 * If CONFIG_XEN and CONFIG_PARAVIRT are defined, the old Xen 268 * specific code paths are disabled. If the target kernel executes 269 * stand-alone, the NVIDIA graphics driver will work fine. If the 270 * kernels executes under a Xen (or other) hypervisor, however, the 271 * NVIDIA graphics driver has no way of knowing and is unlikely 272 * to work correctly. 273 */ 274 #if defined(CONFIG_XEN) && !defined(CONFIG_PARAVIRT) 275 #include <asm/maddr.h> 276 #include <xen/interface/memory.h> 277 #define NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL 278 #endif 279 280 #ifdef CONFIG_KDB 281 #include <linux/kdb.h> 282 #include <asm/kdb.h> 283 #endif 284 285 #if defined(CONFIG_X86_REMOTE_DEBUG) 286 #include <linux/gdb.h> 287 #endif 288 289 #if defined(DEBUG) && defined(CONFIG_KGDB) && \ 290 defined(NVCPU_AARCH64) 291 #include <asm/kgdb.h> 292 #endif 293 294 #if defined(NVCPU_X86_64) && !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 295 #define NV_ENABLE_PAT_SUPPORT 296 #endif 297 298 #define NV_PAT_MODE_DISABLED 0 299 #define NV_PAT_MODE_KERNEL 1 300 #define NV_PAT_MODE_BUILTIN 2 301 302 extern int nv_pat_mode; 303 304 #if defined(CONFIG_HOTPLUG_CPU) 305 #define NV_ENABLE_HOTPLUG_CPU 306 #include <linux/notifier.h> /* struct notifier_block, etc */ 307 #endif 308 309 #if (defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)) 310 #include <linux/i2c.h> 311 #endif 312 313 #if defined(CONFIG_ACPI) 314 #include <linux/acpi.h> 315 #define NV_LINUX_ACPI_EVENTS_SUPPORTED 1 316 #endif 317 318 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 319 #define NV_ACPI_WALK_NAMESPACE(type, start_object, max_depth, \ 320 user_function, args...) \ 321 acpi_walk_namespace(type, start_object, max_depth, \ 322 user_function, NULL, args) 323 #endif 324 325 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL) 326 #define NV_CONFIG_PREEMPT_RT 1 327 #endif 328 329 #if defined(NV_WRITE_CR4_PRESENT) 330 #define NV_READ_CR4() read_cr4() 331 #define NV_WRITE_CR4(cr4) write_cr4(cr4) 332 #else 333 #define NV_READ_CR4() __read_cr4() 334 #define NV_WRITE_CR4(cr4) __write_cr4(cr4) 335 #endif 336 337 #ifndef get_cpu 338 #define get_cpu() smp_processor_id() 339 #define put_cpu() 340 #endif 341 342 #if !defined(unregister_hotcpu_notifier) 343 #define unregister_hotcpu_notifier unregister_cpu_notifier 344 #endif 345 #if !defined(register_hotcpu_notifier) 346 #define register_hotcpu_notifier register_cpu_notifier 347 #endif 348 349 #if defined(NVCPU_X86_64) 350 #if !defined(pmd_large) 351 #define pmd_large(_pmd) \ 352 ((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) 353 #endif 354 #endif /* defined(NVCPU_X86_64) */ 355 356 #define NV_PAGE_COUNT(page) \ 357 ((unsigned int)page_count(page)) 358 #define NV_GET_PAGE_COUNT(page_ptr) \ 359 (NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr))) 360 #define NV_GET_PAGE_FLAGS(page_ptr) \ 361 (NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags) 362 363 /* 364 * Before the introduction of VM_PFNMAP, there was an VM_UNPAGED flag. 365 * Drivers which wanted to call remap_pfn_range on normal pages had to use this 366 * VM_UNPAGED flag *and* set PageReserved. With the introduction of VM_PFNMAP, 367 * that restriction went away. This is described in commit 368 * 369 * 2005-10-28 6aab341e0a28aff100a09831c5300a2994b8b986 370 * ("mm: re-architect the VM_UNPAGED logic") 371 * 372 * , which added VM_PFNMAP and vm_normal_page. Therefore, if VM_PFNMAP is 373 * defined, then we do *not* need to mark a page as reserved, in order to 374 * call remap_pfn_range(). 375 */ 376 #if !defined(VM_PFNMAP) 377 #define NV_MAYBE_RESERVE_PAGE(ptr_ptr) \ 378 SetPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)) 379 #define NV_MAYBE_UNRESERVE_PAGE(page_ptr) \ 380 ClearPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)) 381 #else 382 #define NV_MAYBE_RESERVE_PAGE(ptr_ptr) 383 #define NV_MAYBE_UNRESERVE_PAGE(page_ptr) 384 #endif /* defined(VM_PFNMAP) */ 385 386 #if !defined(__GFP_COMP) 387 #define __GFP_COMP 0 388 #endif 389 390 #if !defined(DEBUG) && defined(__GFP_NOWARN) 391 #define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN) 392 #define NV_GFP_ATOMIC (GFP_ATOMIC | __GFP_NOWARN) 393 #else 394 #define NV_GFP_KERNEL (GFP_KERNEL) 395 #define NV_GFP_ATOMIC (GFP_ATOMIC) 396 #endif 397 398 #if defined(GFP_DMA32) 399 /* 400 * GFP_DMA32 is similar to GFP_DMA, but instructs the Linux zone 401 * allocator to allocate memory from the first 4GB on platforms 402 * such as Linux/x86-64; the alternative is to use an IOMMU such 403 * as the one implemented with the K8 GART, if available. 404 */ 405 #define NV_GFP_DMA32 (NV_GFP_KERNEL | GFP_DMA32) 406 #else 407 #define NV_GFP_DMA32 (NV_GFP_KERNEL) 408 #endif 409 410 typedef enum 411 { 412 NV_MEMORY_TYPE_SYSTEM, /* Memory mapped for ROM, SBIOS and physical RAM. */ 413 NV_MEMORY_TYPE_REGISTERS, 414 NV_MEMORY_TYPE_FRAMEBUFFER, 415 NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */ 416 } nv_memory_type_t; 417 418 #if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) || defined(NVCPU_RISCV64) 419 #define NV_ALLOW_WRITE_COMBINING(mt) 1 420 #elif defined(NVCPU_X86_64) 421 #if defined(NV_ENABLE_PAT_SUPPORT) 422 #define NV_ALLOW_WRITE_COMBINING(mt) \ 423 ((nv_pat_mode != NV_PAT_MODE_DISABLED) && \ 424 ((mt) != NV_MEMORY_TYPE_REGISTERS)) 425 #else 426 #define NV_ALLOW_WRITE_COMBINING(mt) 0 427 #endif 428 #endif 429 430 #if !defined(IRQF_SHARED) 431 #define IRQF_SHARED SA_SHIRQ 432 #endif 433 434 #define NV_MAX_RECURRING_WARNING_MESSAGES 10 435 436 /* various memory tracking/debugging techniques 437 * disabled for retail builds, enabled for debug builds 438 */ 439 440 // allow an easy way to convert all debug printfs related to memory 441 // management back and forth between 'info' and 'errors' 442 #if defined(NV_DBG_MEM) 443 #define NV_DBG_MEMINFO NV_DBG_ERRORS 444 #else 445 #define NV_DBG_MEMINFO NV_DBG_INFO 446 #endif 447 448 #define NV_MEM_TRACKING_PAD_SIZE(size) \ 449 (size) = NV_ALIGN_UP((size + sizeof(void *)), sizeof(void *)) 450 451 #define NV_MEM_TRACKING_HIDE_SIZE(ptr, size) \ 452 if ((ptr != NULL) && (*(ptr) != NULL)) \ 453 { \ 454 NvU8 *__ptr; \ 455 *(unsigned long *) *(ptr) = (size); \ 456 __ptr = *(ptr); __ptr += sizeof(void *); \ 457 *(ptr) = (void *) __ptr; \ 458 } 459 #define NV_MEM_TRACKING_RETRIEVE_SIZE(ptr, size) \ 460 { \ 461 NvU8 *__ptr = (ptr); __ptr -= sizeof(void *); \ 462 (ptr) = (void *) __ptr; \ 463 (size) = *(unsigned long *) (ptr); \ 464 } 465 466 /* keep track of memory usage */ 467 #include "nv-memdbg.h" 468 469 static inline void *nv_vmalloc(unsigned long size) 470 { 471 #if defined(NV_VMALLOC_HAS_PGPROT_T_ARG) 472 void *ptr = __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 473 #else 474 void *ptr = __vmalloc(size, GFP_KERNEL); 475 #endif 476 if (ptr) 477 NV_MEMDBG_ADD(ptr, size); 478 return ptr; 479 } 480 481 static inline void nv_vfree(void *ptr, NvU64 size) 482 { 483 NV_MEMDBG_REMOVE(ptr, size); 484 vfree(ptr); 485 } 486 487 static inline void *nv_ioremap(NvU64 phys, NvU64 size) 488 { 489 #if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT) 490 void *ptr = ioremap_driver_hardened(phys, size); 491 #else 492 void *ptr = ioremap(phys, size); 493 #endif 494 if (ptr) 495 NV_MEMDBG_ADD(ptr, size); 496 return ptr; 497 } 498 499 static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size) 500 { 501 return nv_ioremap(phys, size); 502 } 503 504 static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size) 505 { 506 void *ptr = NULL; 507 #if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT) 508 ptr = ioremap_cache_shared(phys, size); 509 #elif defined(NV_IOREMAP_CACHE_PRESENT) 510 ptr = ioremap_cache(phys, size); 511 #elif defined(NVCPU_PPC64LE) 512 // 513 // ioremap_cache() has been only implemented correctly for ppc64le with 514 // commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel 515 // does provide a default implementation of ioremap_cache() that would be 516 // incorrect for our use (creating an uncached mapping) before the 517 // referenced commit, but that implementation is not exported and the 518 // NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in 519 // this #elif branch. 520 // 521 // At the same time, ppc64le have supported ioremap_prot() since May 2011 522 // (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we 523 // support on power. 524 // 525 ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL)); 526 #else 527 return nv_ioremap(phys, size); 528 #endif 529 530 if (ptr) 531 NV_MEMDBG_ADD(ptr, size); 532 533 return ptr; 534 } 535 536 static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size) 537 { 538 void *ptr = NULL; 539 #if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT) 540 ptr = ioremap_driver_hardened_wc(phys, size); 541 #elif defined(NV_IOREMAP_WC_PRESENT) 542 ptr = ioremap_wc(phys, size); 543 #else 544 return nv_ioremap_nocache(phys, size); 545 #endif 546 547 if (ptr) 548 NV_MEMDBG_ADD(ptr, size); 549 550 return ptr; 551 } 552 553 static inline void nv_iounmap(void *ptr, NvU64 size) 554 { 555 NV_MEMDBG_REMOVE(ptr, size); 556 iounmap(ptr); 557 } 558 559 static NvBool nv_numa_node_has_memory(int node_id) 560 { 561 if (node_id < 0 || node_id >= MAX_NUMNODES) 562 return NV_FALSE; 563 return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE; 564 } 565 566 #define NV_KMALLOC(ptr, size) \ 567 { \ 568 (ptr) = kmalloc(size, NV_GFP_KERNEL); \ 569 if (ptr) \ 570 NV_MEMDBG_ADD(ptr, size); \ 571 } 572 573 #define NV_KZALLOC(ptr, size) \ 574 { \ 575 (ptr) = kzalloc(size, NV_GFP_KERNEL); \ 576 if (ptr) \ 577 NV_MEMDBG_ADD(ptr, size); \ 578 } 579 580 #define NV_KMALLOC_ATOMIC(ptr, size) \ 581 { \ 582 (ptr) = kmalloc(size, NV_GFP_ATOMIC); \ 583 if (ptr) \ 584 NV_MEMDBG_ADD(ptr, size); \ 585 } 586 587 #if defined(__GFP_RETRY_MAYFAIL) 588 #define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_RETRY_MAYFAIL) 589 #elif defined(__GFP_NORETRY) 590 #define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_NORETRY) 591 #else 592 #define NV_GFP_NO_OOM (NV_GFP_KERNEL) 593 #endif 594 595 #define NV_KMALLOC_NO_OOM(ptr, size) \ 596 { \ 597 (ptr) = kmalloc(size, NV_GFP_NO_OOM); \ 598 if (ptr) \ 599 NV_MEMDBG_ADD(ptr, size); \ 600 } 601 602 #define NV_KFREE(ptr, size) \ 603 { \ 604 NV_MEMDBG_REMOVE(ptr, size); \ 605 kfree((void *) (ptr)); \ 606 } 607 608 #define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \ 609 { \ 610 (ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \ 611 } 612 613 #define NV_GET_FREE_PAGES(ptr, order, gfp_mask) \ 614 { \ 615 (ptr) = __get_free_pages(gfp_mask, order); \ 616 } 617 618 #define NV_FREE_PAGES(ptr, order) \ 619 { \ 620 free_pages(ptr, order); \ 621 } 622 623 static inline pgprot_t nv_sme_clr(pgprot_t prot) 624 { 625 #if defined(__sme_clr) 626 return __pgprot(__sme_clr(pgprot_val(prot))); 627 #else 628 return prot; 629 #endif // __sme_clr 630 } 631 632 static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra) 633 { 634 pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra); 635 636 #if defined(pgprot_decrypted) 637 return pgprot_decrypted(prot); 638 #else 639 return nv_sme_clr(prot); 640 #endif // pgprot_decrypted 641 } 642 643 #if defined(PAGE_KERNEL_NOENC) 644 #if defined(__pgprot_mask) 645 #define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE) 646 #elif defined(default_pgprot) 647 #define NV_PAGE_KERNEL_NOCACHE_NOENC default_pgprot(__PAGE_KERNEL_NOCACHE) 648 #elif defined( __pgprot) 649 #define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot(__PAGE_KERNEL_NOCACHE) 650 #else 651 #error "Unsupported kernel!!!" 652 #endif 653 #endif 654 655 static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count, 656 NvBool cached, NvBool unencrypted) 657 { 658 void *ptr; 659 pgprot_t prot = PAGE_KERNEL; 660 #if defined(NVCPU_X86_64) 661 #if defined(PAGE_KERNEL_NOENC) 662 if (unencrypted) 663 { 664 prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) : 665 nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0); 666 } 667 else 668 #endif 669 { 670 prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE; 671 } 672 #elif defined(NVCPU_AARCH64) 673 prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL); 674 #endif 675 /* All memory cached in PPC64LE; can't honor 'cached' input. */ 676 ptr = vmap(pages, page_count, VM_MAP, prot); 677 if (ptr) 678 NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE); 679 return (NvUPtr)ptr; 680 } 681 682 static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count) 683 { 684 vunmap((void *)vaddr); 685 NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE); 686 } 687 688 #if defined(NV_GET_NUM_PHYSPAGES_PRESENT) 689 #define NV_NUM_PHYSPAGES get_num_physpages() 690 #else 691 #define NV_NUM_PHYSPAGES num_physpages 692 #endif 693 #define NV_GET_CURRENT_PROCESS() current->tgid 694 #define NV_IN_ATOMIC() in_atomic() 695 #define NV_LOCAL_BH_DISABLE() local_bh_disable() 696 #define NV_LOCAL_BH_ENABLE() local_bh_enable() 697 #define NV_COPY_TO_USER(to, from, n) copy_to_user(to, from, n) 698 #define NV_COPY_FROM_USER(to, from, n) copy_from_user(to, from, n) 699 700 #define NV_IS_SUSER() capable(CAP_SYS_ADMIN) 701 #define NV_PCI_DEVICE_NAME(pci_dev) ((pci_dev)->pretty_name) 702 #define NV_CLI() local_irq_disable() 703 #define NV_SAVE_FLAGS(eflags) local_save_flags(eflags) 704 #define NV_RESTORE_FLAGS(eflags) local_irq_restore(eflags) 705 #define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !NV_IN_ATOMIC()) 706 #define NV_MODULE_PARAMETER(x) module_param(x, int, 0) 707 #define NV_MODULE_STRING_PARAMETER(x) module_param(x, charp, 0) 708 #undef MODULE_PARM 709 710 #define NV_NUM_CPUS() num_possible_cpus() 711 712 static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa) 713 { 714 #if defined(NV_PHYS_TO_DMA_PRESENT) 715 return phys_to_dma(dev, pa); 716 #elif defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 717 return phys_to_machine(pa); 718 #else 719 return (dma_addr_t)pa; 720 #endif 721 } 722 723 #define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page)) 724 #define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff) 725 #define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start) 726 #define NV_VMA_OFFSET(vma) (((NvU64)(vma)->vm_pgoff) << PAGE_SHIFT) 727 #define NV_VMA_PRIVATE(vma) ((vma)->vm_private_data) 728 #define NV_VMA_FILE(vma) ((vma)->vm_file) 729 730 #define NV_DEVICE_MINOR_NUMBER(x) minor((x)->i_rdev) 731 732 #define NV_PCI_DISABLE_DEVICE(pci_dev) \ 733 { \ 734 NvU16 __cmd[2]; \ 735 pci_read_config_word((pci_dev), PCI_COMMAND, &__cmd[0]); \ 736 pci_disable_device(pci_dev); \ 737 pci_read_config_word((pci_dev), PCI_COMMAND, &__cmd[1]); \ 738 __cmd[1] |= PCI_COMMAND_MEMORY; \ 739 pci_write_config_word((pci_dev), PCI_COMMAND, \ 740 (__cmd[1] | (__cmd[0] & PCI_COMMAND_IO))); \ 741 } 742 743 #define NV_PCI_RESOURCE_START(pci_dev, bar) pci_resource_start(pci_dev, (bar)) 744 #define NV_PCI_RESOURCE_SIZE(pci_dev, bar) pci_resource_len(pci_dev, (bar)) 745 #define NV_PCI_RESOURCE_FLAGS(pci_dev, bar) pci_resource_flags(pci_dev, (bar)) 746 747 #define NV_PCI_RESOURCE_VALID(pci_dev, bar) \ 748 ((NV_PCI_RESOURCE_START(pci_dev, bar) != 0) && \ 749 (NV_PCI_RESOURCE_SIZE(pci_dev, bar) != 0)) 750 751 #define NV_PCI_DOMAIN_NUMBER(pci_dev) (NvU32)pci_domain_nr(pci_dev->bus) 752 #define NV_PCI_BUS_NUMBER(pci_dev) (pci_dev)->bus->number 753 #define NV_PCI_DEVFN(pci_dev) (pci_dev)->devfn 754 #define NV_PCI_SLOT_NUMBER(pci_dev) PCI_SLOT(NV_PCI_DEVFN(pci_dev)) 755 756 #if defined(CONFIG_X86_UV) && defined(NV_CONFIG_X86_UV) 757 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus,devfn) \ 758 ({ \ 759 struct pci_dev *__dev = NULL; \ 760 while ((__dev = pci_get_device(PCI_VENDOR_ID_NVIDIA, \ 761 PCI_ANY_ID, __dev)) != NULL) \ 762 { \ 763 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 764 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 765 (NV_PCI_DEVFN(__dev) == devfn)) \ 766 { \ 767 break; \ 768 } \ 769 } \ 770 if (__dev == NULL) \ 771 { \ 772 while ((__dev = pci_get_class((PCI_CLASS_BRIDGE_HOST << 8), \ 773 __dev)) != NULL) \ 774 { \ 775 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 776 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 777 (NV_PCI_DEVFN(__dev) == devfn)) \ 778 { \ 779 break; \ 780 } \ 781 } \ 782 } \ 783 if (__dev == NULL) \ 784 { \ 785 while ((__dev = pci_get_class((PCI_CLASS_BRIDGE_PCI << 8), \ 786 __dev)) != NULL) \ 787 { \ 788 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 789 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 790 (NV_PCI_DEVFN(__dev) == devfn)) \ 791 { \ 792 break; \ 793 } \ 794 } \ 795 } \ 796 if (__dev == NULL) \ 797 { \ 798 while ((__dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, \ 799 __dev)) != NULL) \ 800 { \ 801 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 802 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 803 (NV_PCI_DEVFN(__dev) == devfn)) \ 804 { \ 805 break; \ 806 } \ 807 } \ 808 } \ 809 __dev; \ 810 }) 811 #elif defined(NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT) 812 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus, devfn) \ 813 pci_get_domain_bus_and_slot(domain, bus, devfn) 814 #else 815 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus,devfn) \ 816 ({ \ 817 struct pci_dev *__dev = NULL; \ 818 while ((__dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, \ 819 __dev)) != NULL) \ 820 { \ 821 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 822 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 823 (NV_PCI_DEVFN(__dev) == devfn)) \ 824 { \ 825 break; \ 826 } \ 827 } \ 828 __dev; \ 829 }) 830 #endif 831 832 #if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64 833 #define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev) 834 #endif 835 836 #define NV_PRINT_AT(nv_debug_level,at) \ 837 { \ 838 nv_printf(nv_debug_level, \ 839 "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \ 840 "page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \ 841 at->num_pages, NV_ATOMIC_READ(at->usage_count), \ 842 at->flags, at->page_table); \ 843 } 844 845 #define NV_PRINT_VMA(nv_debug_level,vma) \ 846 { \ 847 nv_printf(nv_debug_level, \ 848 "NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \ 849 __FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \ 850 NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \ 851 } 852 853 #ifndef minor 854 # define minor(x) MINOR(x) 855 #endif 856 857 #if defined(cpu_relax) 858 #define NV_CPU_RELAX() cpu_relax() 859 #else 860 #define NV_CPU_RELAX() barrier() 861 #endif 862 863 #ifndef IRQ_RETVAL 864 typedef void irqreturn_t; 865 #define IRQ_RETVAL(a) 866 #endif 867 868 #if !defined(PCI_COMMAND_SERR) 869 #define PCI_COMMAND_SERR 0x100 870 #endif 871 #if !defined(PCI_COMMAND_INTX_DISABLE) 872 #define PCI_COMMAND_INTX_DISABLE 0x400 873 #endif 874 875 #ifndef PCI_CAP_ID_EXP 876 #define PCI_CAP_ID_EXP 0x10 877 #endif 878 879 /* 880 * On Linux on PPC64LE enable basic support for Linux PCI error recovery (see 881 * Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error 882 * notification and data collection, not actual recovery of the device. 883 */ 884 #if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH) 885 #include <asm/eeh.h> 886 #define NV_PCI_ERROR_RECOVERY 887 #endif 888 889 /* 890 * If the host OS has page sizes larger than 4KB, we may have a security 891 * problem. Registers are typically grouped in 4KB pages, but if there are 892 * larger pages, then the smallest userspace mapping possible (e.g., a page) 893 * may give more access than intended to the user. 894 */ 895 #define NV_4K_PAGE_ISOLATION_REQUIRED(addr, size) \ 896 ((PAGE_SIZE > NV_RM_PAGE_SIZE) && \ 897 ((size) <= NV_RM_PAGE_SIZE) && \ 898 (((addr) >> NV_RM_PAGE_SHIFT) == \ 899 (((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT))) 900 901 /* 902 * The kernel may have a workaround for this, by providing a method to isolate 903 * a single 4K page in a given mapping. 904 */ 905 #if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN) 906 #define NV_4K_PAGE_ISOLATION_PRESENT 907 #define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr) \ 908 ((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT))) 909 #define NV_4K_PAGE_ISOLATION_MMAP_LEN(size) PAGE_SIZE 910 #define NV_4K_PAGE_ISOLATION_ACCESS_START(addr) \ 911 ((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK))) 912 #define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size) \ 913 ((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) & \ 914 ~NV_RM_PAGE_MASK) 915 #define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN 916 #endif 917 918 static inline int nv_remap_page_range(struct vm_area_struct *vma, 919 unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot) 920 { 921 int ret = -1; 922 923 #if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION) 924 if ((size == PAGE_SIZE) && 925 ((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0)) 926 { 927 /* 928 * remap_4k_pfn() hardcodes the length to a single OS page, and checks 929 * whether applying the page isolation workaround will cause PTE 930 * corruption (in which case it will fail, and this is an unsupported 931 * configuration). 932 */ 933 #if defined(NV_HASH__REMAP_4K_PFN_PRESENT) 934 ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot); 935 #else 936 ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot); 937 #endif 938 } 939 else 940 #endif 941 { 942 ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size, 943 prot); 944 } 945 946 return ret; 947 } 948 949 static inline int nv_io_remap_page_range(struct vm_area_struct *vma, 950 NvU64 phys_addr, NvU64 size, NvU32 extra_prot) 951 { 952 int ret = -1; 953 #if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 954 ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size, 955 nv_adjust_pgprot(vma->vm_page_prot, extra_prot)); 956 #else 957 ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT), 958 size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot)); 959 #endif 960 return ret; 961 } 962 963 static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma, 964 NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot) 965 { 966 /* 967 * vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20 968 */ 969 #if defined(NV_VMF_INSERT_PFN_PROT_PRESENT) 970 return vmf_insert_pfn_prot(vma, virt_addr, pfn, 971 __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot)); 972 #else 973 int ret = -EINVAL; 974 /* 975 * Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be 976 * used when remapping. 977 * 978 * vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support 979 * was added in Linux 4.8. 980 * 981 * Rather than tampering with the vma to make use of extra_prot with 982 * vm_insert_pfn() on older kernels, for now, just fail in this case, as 983 * it's not expected to be used currently. 984 */ 985 #if defined(NV_VM_INSERT_PFN_PROT_PRESENT) 986 ret = vm_insert_pfn_prot(vma, virt_addr, pfn, 987 __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot)); 988 #elif !defined(NV_4K_PAGE_ISOLATION_PRESENT) 989 ret = vm_insert_pfn(vma, virt_addr, pfn); 990 #endif 991 switch (ret) 992 { 993 case 0: 994 case -EBUSY: 995 /* 996 * EBUSY indicates that another thread already handled 997 * the faulted range. 998 */ 999 return VM_FAULT_NOPAGE; 1000 case -ENOMEM: 1001 return VM_FAULT_OOM; 1002 default: 1003 break; 1004 } 1005 #endif /* defined(NV_VMF_INSERT_PFN_PROT_PRESENT) */ 1006 return VM_FAULT_SIGBUS; 1007 } 1008 1009 /* Converts BAR index to Linux specific PCI BAR index */ 1010 static inline NvU8 nv_bar_index_to_os_bar_index 1011 ( 1012 struct pci_dev *dev, 1013 NvU8 nv_bar_index 1014 ) 1015 { 1016 NvU8 bar_index = 0; 1017 NvU8 i; 1018 1019 BUG_ON(nv_bar_index >= NV_GPU_NUM_BARS); 1020 1021 for (i = 0; i < nv_bar_index; i++) 1022 { 1023 if (NV_PCI_RESOURCE_FLAGS(dev, bar_index) & PCI_BASE_ADDRESS_MEM_TYPE_64) 1024 { 1025 bar_index += 2; 1026 } 1027 else 1028 { 1029 bar_index++; 1030 } 1031 } 1032 1033 return bar_index; 1034 } 1035 1036 #define NV_PAGE_MASK (NvU64)(long)PAGE_MASK 1037 1038 extern void *nvidia_stack_t_cache; 1039 1040 /* 1041 * On Linux, when a kmem cache is created, a new sysfs entry is created for the 1042 * same unless it's merged with an existing cache. Upstream Linux kernel commit 1043 * 3b7b314053d021601940c50b07f5f1423ae67e21 (version 4.12+) made cache 1044 * destruction asynchronous which creates a race between cache destroy and 1045 * create. A new cache created with attributes as a previous cache, which is 1046 * scheduled for destruction, can try to create a sysfs entry with the same 1047 * conflicting name. Upstream Linux kernel commit 1048 * d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 (4.18) fixes this issue by cleaning 1049 * up sysfs entry within slab_mutex, so the entry is deleted before a cache with 1050 * the same attributes could be created. 1051 * 1052 * To workaround this kernel issue, we take two steps: 1053 * - Create unmergeable caches: a kmem_cache with a constructor is unmergeable. 1054 * So, we define an empty contructor for the same. Creating an unmergeable 1055 * cache ensures that the kernel doesn't generate an internal name and always 1056 * uses our name instead. 1057 * 1058 * - Generate a unique cache name by appending the current timestamp (ns). We 1059 * wait for the timestamp to increment by at least one to ensure that we do 1060 * not hit a name conflict in cache create -> destroy (async) -> create cycle. 1061 */ 1062 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1063 static inline void nv_kmem_ctor_dummy(void *arg) 1064 { 1065 (void)arg; 1066 } 1067 #else 1068 #define nv_kmem_ctor_dummy NULL 1069 #endif 1070 1071 #define NV_KMEM_CACHE_CREATE(name, type) \ 1072 nv_kmem_cache_create(name, sizeof(type), 0) 1073 1074 /* The NULL pointer check is required for kernels older than 4.3 */ 1075 #define NV_KMEM_CACHE_DESTROY(kmem_cache) \ 1076 if (kmem_cache != NULL) \ 1077 { \ 1078 kmem_cache_destroy(kmem_cache); \ 1079 } 1080 1081 #define NV_KMEM_CACHE_ALLOC(kmem_cache) \ 1082 kmem_cache_alloc(kmem_cache, GFP_KERNEL) 1083 #define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \ 1084 kmem_cache_free(kmem_cache, ptr) 1085 1086 static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) 1087 { 1088 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1089 /* 1090 * We cannot call kmem_cache_zalloc directly as it adds the __GFP_ZERO 1091 * flag. This flag together with the presence of a slab constructor is 1092 * flagged as a potential bug by the Linux kernel since it is the role 1093 * of a constructor to fill an allocated object with the desired 1094 * pattern. In our case, we specified a (dummy) constructor as a 1095 * workaround for a bug and not to zero-initialize objects. So, we take 1096 * the pain here to memset allocated object ourselves. 1097 */ 1098 void *object = kmem_cache_alloc(k, flags); 1099 if (object) 1100 memset(object, 0, kmem_cache_size(k)); 1101 return object; 1102 #else 1103 return kmem_cache_zalloc(k, flags); 1104 #endif 1105 } 1106 1107 static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack) 1108 { 1109 nvidia_stack_t *sp = NULL; 1110 #if defined(NVCPU_X86_64) 1111 if (rm_is_altstack_in_use()) 1112 { 1113 sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache); 1114 if (sp == NULL) 1115 return -ENOMEM; 1116 sp->size = sizeof(sp->stack); 1117 sp->top = sp->stack + sp->size; 1118 } 1119 #endif 1120 *stack = sp; 1121 return 0; 1122 } 1123 1124 static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack) 1125 { 1126 #if defined(NVCPU_X86_64) 1127 if (stack != NULL && rm_is_altstack_in_use()) 1128 { 1129 NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache); 1130 } 1131 #endif 1132 } 1133 1134 #if defined(NVCPU_X86_64) 1135 /* 1136 * RAM is cached on Linux by default, we can assume there's 1137 * nothing to be done here. This is not the case for the 1138 * other memory spaces: we will have made an attempt to add 1139 * a WC MTRR for the frame buffer. 1140 * 1141 * If a WC MTRR is present, we can't satisfy the WB mapping 1142 * attempt here, since the achievable effective memory 1143 * types in that case are WC and UC, if not it's typically 1144 * UC (MTRRdefType is UC); we could only satisfy WB mapping 1145 * requests with a WB MTRR. 1146 */ 1147 #define NV_ALLOW_CACHING(mt) ((mt) == NV_MEMORY_TYPE_SYSTEM) 1148 #else 1149 #define NV_ALLOW_CACHING(mt) ((mt) != NV_MEMORY_TYPE_REGISTERS) 1150 #endif 1151 1152 typedef struct nvidia_pte_s { 1153 NvU64 phys_addr; 1154 unsigned long virt_addr; 1155 NvU64 dma_addr; 1156 #ifdef CONFIG_XEN 1157 unsigned int guest_pfn; 1158 #endif 1159 unsigned int page_count; 1160 } nvidia_pte_t; 1161 1162 typedef struct nv_alloc_s { 1163 struct nv_alloc_s *next; 1164 struct device *dev; 1165 atomic_t usage_count; 1166 struct { 1167 NvBool contig : 1; 1168 NvBool guest : 1; 1169 NvBool zeroed : 1; 1170 NvBool aliased : 1; 1171 NvBool user : 1; 1172 NvBool node : 1; 1173 NvBool peer_io : 1; 1174 NvBool physical : 1; 1175 NvBool unencrypted : 1; 1176 NvBool coherent : 1; 1177 } flags; 1178 unsigned int cache_type; 1179 unsigned int num_pages; 1180 unsigned int order; 1181 unsigned int size; 1182 nvidia_pte_t **page_table; /* list of physical pages allocated */ 1183 unsigned int pid; 1184 struct page **user_pages; 1185 NvU64 guest_id; /* id of guest VM */ 1186 NvS32 node_id; /* Node id for memory allocation when node is set in flags */ 1187 void *import_priv; 1188 struct sg_table *import_sgt; 1189 } nv_alloc_t; 1190 1191 /** 1192 * nv_is_dma_direct - return true if direct_dma is enabled 1193 * 1194 * Starting with the 5.0 kernel, SWIOTLB is merged into 1195 * direct_dma, so systems without an IOMMU use direct_dma. We 1196 * need to know if this is the case, so that we can use a 1197 * different check for SWIOTLB enablement. 1198 */ 1199 static inline NvBool nv_is_dma_direct(struct device *dev) 1200 { 1201 NvBool is_direct = NV_FALSE; 1202 1203 #if defined(NV_DMA_IS_DIRECT_PRESENT) 1204 if (dma_is_direct(get_dma_ops(dev))) 1205 is_direct = NV_TRUE; 1206 #endif 1207 1208 return is_direct; 1209 } 1210 1211 /** 1212 * nv_dma_maps_swiotlb - return NV_TRUE if swiotlb is enabled 1213 * 1214 * SWIOTLB creates bounce buffers for the DMA mapping layer to 1215 * use if a driver asks the kernel to map a DMA buffer that is 1216 * outside of the device's addressable range. The driver does 1217 * not function correctly if bounce buffers are enabled for the 1218 * device. So if SWIOTLB is enabled, we should avoid making 1219 * mapping calls. 1220 */ 1221 static inline NvBool 1222 nv_dma_maps_swiotlb(struct device *dev) 1223 { 1224 NvBool swiotlb_in_use = NV_FALSE; 1225 #if defined(CONFIG_SWIOTLB) 1226 #if defined(NV_DMA_OPS_PRESENT) || defined(NV_GET_DMA_OPS_PRESENT) || \ 1227 defined(NV_SWIOTLB_DMA_OPS_PRESENT) 1228 /* 1229 * We only use the 'dma_ops' symbol on older x86_64 kernels; later kernels, 1230 * including those for other architectures, have converged on the 1231 * get_dma_ops() interface. 1232 */ 1233 #if defined(NV_GET_DMA_OPS_PRESENT) 1234 /* 1235 * The __attribute__ ((unused)) is necessary because in at least one 1236 * case, *none* of the preprocessor branches below are taken, and 1237 * so the ops variable ends up never being referred to at all. This can 1238 * happen with the (NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs == 1) 1239 * case. 1240 */ 1241 const struct dma_map_ops *ops __attribute__ ((unused)) = get_dma_ops(dev); 1242 #else 1243 const struct dma_mapping_ops *ops __attribute__ ((unused)) = dma_ops; 1244 #endif 1245 1246 /* 1247 * The switch from dma_mapping_ops -> dma_map_ops coincided with the 1248 * switch from swiotlb_map_sg -> swiotlb_map_sg_attrs. 1249 */ 1250 #if defined(NVCPU_AARCH64) && \ 1251 defined(NV_NONCOHERENT_SWIOTLB_DMA_OPS_PRESENT) 1252 /* AArch64 exports these symbols directly */ 1253 swiotlb_in_use = ((ops == &noncoherent_swiotlb_dma_ops) || 1254 (ops == &coherent_swiotlb_dma_ops)); 1255 #elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs != 0 1256 swiotlb_in_use = (ops->map_sg == swiotlb_map_sg_attrs); 1257 #elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops != 0 1258 swiotlb_in_use = (ops == &swiotlb_dma_ops); 1259 #endif 1260 /* 1261 * The "else" case that is not shown 1262 * (for NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs == 0 || 1263 * NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops == 0) does 1264 * nothing, and ends up dropping us out to the last line of this function, 1265 * effectively returning false. The nearly-human-readable version of that 1266 * case is "struct swiotlb_dma_ops is present (NV_SWIOTLB_DMA_OPS_PRESENT 1267 * is defined) but neither swiotlb_map_sg_attrs nor swiotlb_dma_ops is 1268 * present". 1269 * 1270 * That can happen on kernels that fall within below range: 1271 * 1272 * 2017-12-24 4bd89ed39b2ab8dc4ac4b6c59b07d420b0213bec 1273 * ("swiotlb: remove various exports") 1274 * 2018-06-28 210d0797c97d0e8f3b1a932a0dc143f4c57008a3 1275 * ("swiotlb: export swiotlb_dma_ops") 1276 * 1277 * Related to this: Between above two commits, this driver has no way of 1278 * detecting whether or not the SWIOTLB is in use. Furthermore, the 1279 * driver cannot support DMA remapping. That leads to the following 1280 * point: "swiotlb=force" is not supported for kernels falling in above 1281 * range. 1282 * 1283 * The other "else" case that is not shown: 1284 * Starting with the 5.0 kernel, swiotlb is integrated into dma_direct, 1285 * which is used when there's no IOMMU. In these kernels, ops == NULL, 1286 * swiotlb_dma_ops no longer exists, and we do not support swiotlb=force 1287 * (doing so would require detecting when swiotlb=force is enabled and 1288 * then returning NV_TRUE even when dma_direct is in use). So for now, 1289 * we just return NV_FALSE and in nv_compute_gfp_mask() we check for 1290 * whether swiotlb could possibly be used (outside of swiotlb=force). 1291 */ 1292 #endif 1293 1294 /* 1295 * Commit 2017-11-07 d7b417fa08d ("x86/mm: Add DMA support for 1296 * SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV 1297 * is active in all cases. 1298 */ 1299 if (os_cc_enabled) 1300 swiotlb_in_use = NV_TRUE; 1301 #endif 1302 1303 return swiotlb_in_use; 1304 } 1305 1306 /* 1307 * TODO: Bug 1522381 will allow us to move these mapping relationships into 1308 * common code. 1309 */ 1310 1311 /* 1312 * Bug 1606851: the Linux kernel scatterlist code doesn't work for regions 1313 * greater than or equal to 4GB, due to regular use of unsigned int 1314 * throughout. So we need to split our mappings into 4GB-minus-1-page-or-less 1315 * chunks and manage them separately. 1316 */ 1317 typedef struct nv_dma_submap_s { 1318 NvU32 page_count; 1319 NvU32 sg_map_count; 1320 struct sg_table sgt; 1321 NvBool imported; 1322 } nv_dma_submap_t; 1323 1324 typedef struct nv_dma_map_s { 1325 struct page **pages; 1326 NvU64 page_count; 1327 NvBool contiguous; 1328 NvU32 cache_type; 1329 struct sg_table *import_sgt; 1330 1331 union 1332 { 1333 struct 1334 { 1335 NvU32 submap_count; 1336 nv_dma_submap_t *submaps; 1337 } discontig; 1338 1339 struct 1340 { 1341 NvU64 dma_addr; 1342 } contig; 1343 } mapping; 1344 1345 struct device *dev; 1346 } nv_dma_map_t; 1347 1348 #define NV_FOR_EACH_DMA_SUBMAP(dm, sm, i) \ 1349 for (i = 0, sm = &dm->mapping.discontig.submaps[0]; \ 1350 i < dm->mapping.discontig.submap_count; \ 1351 i++, sm = &dm->mapping.discontig.submaps[i]) 1352 1353 /* 1354 * On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy. 1355 * Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K. 1356 * Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range 1357 * must be aligned at 64K boundary. 1358 */ 1359 #if defined(CONFIG_ARM64_4K_PAGES) 1360 #define NV_DMA_U32_MAX_4K_PAGES ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1)) 1361 #define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16)) 1362 #else 1363 #define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_U32_MAX >> PAGE_SHIFT)) 1364 #endif 1365 1366 #define NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(s) (s * NV_DMA_SUBMAP_MAX_PAGES) 1367 1368 /* 1369 * DO NOT use sg_alloc_table_from_pages on Xen Server, even if it's available. 1370 * This will glom multiple pages into a single sg element, which 1371 * xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use 1372 * single-page sg elements on Xen Server. 1373 */ 1374 #if !defined(NV_DOM0_KERNEL_PRESENT) 1375 #define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \ 1376 ((sg_alloc_table_from_pages(&sm->sgt, \ 1377 &dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \ 1378 sm->page_count, 0, \ 1379 sm->page_count * PAGE_SIZE, NV_GFP_KERNEL) == 0) ? NV_OK : \ 1380 NV_ERR_OPERATING_SYSTEM) 1381 #else 1382 #define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \ 1383 ((sg_alloc_table(&sm->sgt, sm->page_count, NV_GFP_KERNEL)) == \ 1384 0 ? NV_OK : NV_ERR_OPERATING_SYSTEM) 1385 #endif 1386 1387 typedef struct nv_ibmnpu_info nv_ibmnpu_info_t; 1388 1389 typedef struct nv_work_s { 1390 struct work_struct task; 1391 void *data; 1392 } nv_work_t; 1393 1394 #define NV_MAX_REGISTRY_KEYS_LENGTH 512 1395 1396 typedef enum 1397 { 1398 NV_DEV_STACK_TIMER, 1399 NV_DEV_STACK_ISR, 1400 NV_DEV_STACK_ISR_BH, 1401 NV_DEV_STACK_ISR_BH_UNLOCKED, 1402 NV_DEV_STACK_GPU_WAKEUP, 1403 NV_DEV_STACK_COUNT 1404 } nvidia_linux_dev_stack_t; 1405 1406 /* Linux version of the opaque type used for os_queue_work_item() */ 1407 struct os_work_queue { 1408 nv_kthread_q_t nvk; 1409 }; 1410 1411 /* Linux version of the opaque type used for os_wait_*() */ 1412 struct os_wait_queue { 1413 struct completion q; 1414 }; 1415 1416 /* 1417 * To report error in msi/msix when unhandled count reaches a threshold 1418 */ 1419 1420 typedef struct nv_irq_count_info_s 1421 { 1422 int irq; 1423 NvU64 unhandled; 1424 NvU64 total; 1425 NvU64 last_unhandled; 1426 } nv_irq_count_info_t; 1427 1428 /* Linux-specific version of nv_dma_device_t */ 1429 struct nv_dma_device { 1430 struct { 1431 NvU64 start; 1432 NvU64 limit; 1433 } addressable_range; 1434 1435 struct device *dev; 1436 NvBool nvlink; 1437 }; 1438 1439 /* Properties of the coherent link */ 1440 typedef struct coherent_link_info_s { 1441 /* Physical Address of the GPU memory in SOC AMAP. In the case of 1442 * baremetal OS environment it is System Physical Address(SPA) and in the case 1443 * of virutalized OS environment it is Intermediate Physical Address(IPA) */ 1444 NvU64 gpu_mem_pa; 1445 1446 /* Physical address of the reserved portion of the GPU memory, applicable 1447 * only in Grace Hopper self hosted passthrough virtualizatioan platform. */ 1448 NvU64 rsvd_mem_pa; 1449 1450 /* Bitmap of NUMA node ids, corresponding to the reserved PXMs, 1451 * available for adding GPU memory to the kernel as system RAM */ 1452 DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES); 1453 } coherent_link_info_t; 1454 1455 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 1456 /* 1457 * acpi data storage structure 1458 * 1459 * This structure retains the pointer to the device, 1460 * and any other baggage we want to carry along 1461 * 1462 */ 1463 typedef struct 1464 { 1465 nvidia_stack_t *sp; 1466 struct acpi_device *device; 1467 struct acpi_handle *handle; 1468 void *notifier_data; 1469 int notify_handler_installed; 1470 } nv_acpi_t; 1471 #endif 1472 1473 /* linux-specific version of old nv_state_t */ 1474 /* this is a general os-specific state structure. the first element *must* be 1475 the general state structure, for the generic unix-based code */ 1476 typedef struct nv_linux_state_s { 1477 nv_state_t nv_state; 1478 1479 atomic_t usage_count; 1480 NvU32 suspend_count; 1481 1482 struct device *dev; 1483 struct pci_dev *pci_dev; 1484 1485 /* IBM-NPU info associated with this GPU */ 1486 nv_ibmnpu_info_t *npu; 1487 1488 /* coherent link information */ 1489 coherent_link_info_t coherent_link_info; 1490 1491 /* Dedicated queue to be used for removing FB memory which is onlined 1492 * to kernel as a NUMA node. Refer Bug : 3879845*/ 1493 nv_kthread_q_t remove_numa_memory_q; 1494 1495 /* NUMA node information for the platforms where GPU memory is presented 1496 * as a NUMA node to the kernel */ 1497 struct { 1498 /* NUMA node id >=0 when the platform supports GPU memory as NUMA node 1499 * otherwise it holds the value of NUMA_NO_NODE */ 1500 NvS32 node_id; 1501 1502 /* NUMA online/offline status for platforms that support GPU memory as 1503 * NUMA node */ 1504 atomic_t status; 1505 NvBool use_auto_online; 1506 } numa_info; 1507 1508 nvidia_stack_t *sp[NV_DEV_STACK_COUNT]; 1509 1510 char registry_keys[NV_MAX_REGISTRY_KEYS_LENGTH]; 1511 1512 nv_work_t work; 1513 1514 /* get a timer callback every second */ 1515 struct nv_timer rc_timer; 1516 1517 /* lock for linux-specific data, not used by core rm */ 1518 struct semaphore ldata_lock; 1519 1520 /* proc directory information */ 1521 struct proc_dir_entry *proc_dir; 1522 1523 NvU32 minor_num; 1524 struct nv_linux_state_s *next; 1525 1526 /* DRM private information */ 1527 struct drm_device *drm; 1528 1529 /* kthread based bottom half servicing queue and elements */ 1530 nv_kthread_q_t bottom_half_q; 1531 nv_kthread_q_item_t bottom_half_q_item; 1532 1533 /* Lock for unlocked bottom half protecting common allocated stack */ 1534 void *isr_bh_unlocked_mutex; 1535 1536 NvBool tce_bypass_enabled; 1537 1538 NvU32 num_intr; 1539 1540 /* Lock serializing ISRs for different MSI-X vectors */ 1541 nv_spinlock_t msix_isr_lock; 1542 1543 /* Lock serializing bottom halves for different MSI-X vectors */ 1544 void *msix_bh_mutex; 1545 1546 struct msix_entry *msix_entries; 1547 1548 NvU64 numa_memblock_size; 1549 1550 struct { 1551 struct backlight_device *dev; 1552 NvU32 displayId; 1553 const char *device_name; 1554 } backlight; 1555 1556 /* 1557 * file handle for pci sysfs config file (/sys/bus/pci/devices/.../config) 1558 * which will be opened during device probe 1559 */ 1560 struct file *sysfs_config_file; 1561 1562 /* Per-GPU queue */ 1563 struct os_work_queue queue; 1564 1565 /* GPU user mapping revocation/remapping (only for non-CTL device) */ 1566 struct semaphore mmap_lock; /* Protects all fields in this category */ 1567 struct list_head open_files; 1568 NvBool all_mappings_revoked; 1569 NvBool safe_to_mmap; 1570 NvBool gpu_wakeup_callback_needed; 1571 1572 /* Per-device notifier block for ACPI events */ 1573 struct notifier_block acpi_nb; 1574 1575 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 1576 nv_acpi_t* nv_acpi_object; 1577 #endif 1578 1579 /* Lock serializing ISRs for different SOC vectors */ 1580 nv_spinlock_t soc_isr_lock; 1581 void *soc_bh_mutex; 1582 1583 struct nv_timer snapshot_timer; 1584 nv_spinlock_t snapshot_timer_lock; 1585 void (*snapshot_callback)(void *context); 1586 1587 /* count for unhandled, total and timestamp of irq */ 1588 nv_irq_count_info_t *irq_count; 1589 1590 /* Max number of irq triggered and are getting tracked */ 1591 NvU16 current_num_irq_tracked; 1592 1593 NvBool is_forced_shutdown; 1594 1595 struct nv_dma_device dma_dev; 1596 struct nv_dma_device niso_dma_dev; 1597 1598 /* 1599 * Background kthread for handling deferred open operations 1600 * (e.g. from O_NONBLOCK). 1601 * 1602 * Adding to open_q and reading/writing is_accepting_opens 1603 * are protected by nvl->open_q_lock (not nvl->ldata_lock). 1604 * This allows new deferred open operations to be enqueued without 1605 * blocking behind previous ones (which hold nvl->ldata_lock). 1606 * 1607 * Adding to open_q is only safe if is_accepting_opens is true. 1608 * This prevents open operations from racing with device removal. 1609 * 1610 * Stopping open_q is only safe after setting is_accepting_opens to false. 1611 * This ensures that the open_q (and the larger nvl structure) will 1612 * outlive any of the open operations enqueued. 1613 */ 1614 nv_kthread_q_t open_q; 1615 NvBool is_accepting_opens; 1616 struct semaphore open_q_lock; 1617 } nv_linux_state_t; 1618 1619 extern nv_linux_state_t *nv_linux_devices; 1620 1621 /* 1622 * Macros to protect operations on nv_linux_devices list 1623 * Lock acquisition order while using the nv_linux_devices list 1624 * 1. LOCK_NV_LINUX_DEVICES() 1625 * 2. Traverse the list 1626 * If the list is traversed to search for an element say nvl, 1627 * acquire the nvl->ldata_lock before step 3 1628 * 3. UNLOCK_NV_LINUX_DEVICES() 1629 * 4. Release nvl->ldata_lock after any read/write access to the 1630 * nvl element is complete 1631 */ 1632 extern struct semaphore nv_linux_devices_lock; 1633 #define LOCK_NV_LINUX_DEVICES() down(&nv_linux_devices_lock) 1634 #define UNLOCK_NV_LINUX_DEVICES() up(&nv_linux_devices_lock) 1635 1636 /* 1637 * Lock to synchronize system power management transitions, 1638 * and to protect the global system PM state. The procfs power 1639 * management interface acquires this lock in write mode for 1640 * the duration of the sleep operation, any other paths accessing 1641 * device state must acquire the lock in read mode. 1642 */ 1643 extern struct rw_semaphore nv_system_pm_lock; 1644 1645 extern NvBool nv_ats_supported; 1646 1647 /* 1648 * file-private data 1649 * hide a pointer to our data structures in a file-private ptr 1650 * there are times we need to grab this data back from the file 1651 * data structure.. 1652 */ 1653 1654 typedef struct nvidia_event 1655 { 1656 struct nvidia_event *next; 1657 nv_event_t event; 1658 } nvidia_event_t; 1659 1660 typedef struct 1661 { 1662 nv_file_private_t nvfp; 1663 1664 nvidia_stack_t *sp; 1665 nv_alloc_t *free_list; 1666 nv_linux_state_t *nvptr; 1667 nvidia_event_t *event_data_head, *event_data_tail; 1668 NvBool dataless_event_pending; 1669 nv_spinlock_t fp_lock; 1670 wait_queue_head_t waitqueue; 1671 nv_kthread_q_item_t deferred_close_q_item; 1672 NvU32 *attached_gpus; 1673 size_t num_attached_gpus; 1674 nv_alloc_mapping_context_t mmap_context; 1675 struct address_space mapping; 1676 1677 nv_kthread_q_item_t open_q_item; 1678 struct completion open_complete; 1679 nv_linux_state_t *deferred_open_nvl; 1680 int open_rc; 1681 NV_STATUS adapter_status; 1682 1683 struct list_head entry; 1684 } nv_linux_file_private_t; 1685 1686 static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t *nvfp) 1687 { 1688 return container_of(nvfp, nv_linux_file_private_t, nvfp); 1689 } 1690 1691 static inline int nv_wait_open_complete_interruptible(nv_linux_file_private_t *nvlfp) 1692 { 1693 return wait_for_completion_interruptible(&nvlfp->open_complete); 1694 } 1695 1696 static inline void nv_wait_open_complete(nv_linux_file_private_t *nvlfp) 1697 { 1698 wait_for_completion(&nvlfp->open_complete); 1699 } 1700 1701 static inline NvBool nv_is_open_complete(nv_linux_file_private_t *nvlfp) 1702 { 1703 return completion_done(&nvlfp->open_complete); 1704 } 1705 1706 #define NV_SET_FILE_PRIVATE(filep,data) ((filep)->private_data = (data)) 1707 #define NV_GET_LINUX_FILE_PRIVATE(filep) ((nv_linux_file_private_t *)(filep)->private_data) 1708 1709 /* for the card devices */ 1710 #define NV_GET_NVL_FROM_FILEP(filep) (NV_GET_LINUX_FILE_PRIVATE(filep)->nvptr) 1711 #define NV_GET_NVL_FROM_NV_STATE(nv) ((nv_linux_state_t *)nv->os_state) 1712 1713 #define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state) 1714 1715 #define NV_ATOMIC_READ(data) atomic_read(&(data)) 1716 #define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val)) 1717 #define NV_ATOMIC_INC(data) atomic_inc(&(data)) 1718 #define NV_ATOMIC_DEC(data) atomic_dec(&(data)) 1719 #define NV_ATOMIC_DEC_AND_TEST(data) atomic_dec_and_test(&(data)) 1720 1721 static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned int size, 1722 unsigned int align) 1723 { 1724 char *name_unique; 1725 struct kmem_cache *cache; 1726 1727 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1728 size_t len; 1729 NvU64 tm_ns = nv_ktime_get_raw_ns(); 1730 1731 /* 1732 * Wait for timer to change at least once. This ensures 1733 * that the name generated below is always unique. 1734 */ 1735 while (tm_ns == nv_ktime_get_raw_ns()); 1736 tm_ns = nv_ktime_get_raw_ns(); 1737 1738 /* 20 is the max length of a 64-bit integer printed in decimal */ 1739 len = strlen(name) + 20 + 1; 1740 name_unique = kzalloc(len, GFP_KERNEL); 1741 if (!name_unique) 1742 return NULL; 1743 1744 if (snprintf(name_unique, len, "%s-%llu", name, tm_ns) >= len) 1745 { 1746 WARN(1, "kmem cache name too long: %s\n", name); 1747 kfree(name_unique); 1748 return NULL; 1749 } 1750 #else 1751 name_unique = (char *)name; 1752 #endif 1753 cache = kmem_cache_create(name_unique, size, align, 0, nv_kmem_ctor_dummy); 1754 if (name_unique != name) 1755 kfree(name_unique); 1756 1757 return cache; 1758 } 1759 1760 #if defined(CONFIG_PCI_IOV) 1761 #define NV_PCI_SRIOV_SUPPORT 1762 #endif /* CONFIG_PCI_IOV */ 1763 1764 #define NV_PCIE_CFG_MAX_OFFSET 0x1000 1765 1766 #include "nv-proto.h" 1767 1768 /* 1769 * Check if GPU is present on the bus by checking flag 1770 * NV_FLAG_IN_SURPRISE_REMOVAL(set when eGPU is removed from TB3). 1771 */ 1772 static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv) 1773 { 1774 #if !defined(NVCPU_PPC64LE) 1775 if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) 1776 { 1777 return NV_ERR_GPU_IS_LOST; 1778 } 1779 #endif 1780 1781 return NV_OK; 1782 } 1783 1784 extern NvU32 NVreg_EnableUserNUMAManagement; 1785 extern NvU32 NVreg_RegisterPCIDriver; 1786 extern NvU32 NVreg_EnableResizableBar; 1787 extern NvU32 NVreg_EnableNonblockingOpen; 1788 1789 extern NvU32 num_probed_nv_devices; 1790 extern NvU32 num_nv_devices; 1791 1792 #define NV_FILE_INODE(file) (file)->f_inode 1793 1794 static inline int nv_is_control_device(struct inode *inode) 1795 { 1796 return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE); 1797 } 1798 1799 #if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) 1800 #define NV_VGX_HYPER 1801 #if defined(NV_XEN_IOEMU_INJECT_MSI) 1802 #include <xen/ioemu.h> 1803 #endif 1804 #endif 1805 1806 static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index) 1807 { 1808 NvU64 bus_addr = 0; 1809 #if defined(NV_PCI_BUS_ADDRESS_PRESENT) 1810 bus_addr = pci_bus_address(dev, bar_index); 1811 #elif defined(CONFIG_PCI) 1812 struct pci_bus_region region; 1813 1814 pcibios_resource_to_bus(dev, ®ion, &dev->resource[bar_index]); 1815 bus_addr = region.start; 1816 #endif 1817 return bus_addr; 1818 } 1819 1820 /* 1821 * Decrements the usage count of the allocation, and moves the allocation to 1822 * the given nvlfp's free list if the usage count drops to zero. 1823 * 1824 * Returns NV_TRUE if the allocation is moved to the nvlfp's free list. 1825 */ 1826 static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t *at) 1827 { 1828 NV_PRINT_AT(NV_DBG_MEMINFO, at); 1829 1830 if (NV_ATOMIC_DEC_AND_TEST(at->usage_count)) 1831 { 1832 NV_ATOMIC_INC(at->usage_count); 1833 1834 at->next = nvlfp->free_list; 1835 nvlfp->free_list = at; 1836 return NV_TRUE; 1837 } 1838 1839 return NV_FALSE; 1840 } 1841 1842 /* 1843 * RB_EMPTY_ROOT was added in 2.6.18 by this commit: 1844 * 2006-06-21 dd67d051529387f6e44d22d1d5540ef281965fdd 1845 */ 1846 #if !defined(RB_EMPTY_ROOT) 1847 #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 1848 #endif 1849 1850 /* 1851 * Starting on Power9 systems, DMA addresses for NVLink are no longer 1852 * the same as used over PCIe. 1853 * 1854 * Power9 supports a 56-bit Real Address. This address range is compressed 1855 * when accessed over NVLink to allow the GPU to access all of memory using 1856 * its 47-bit Physical address. 1857 * 1858 * If there is an NPU device present on the system, it implies that NVLink 1859 * sysmem links are present and we need to apply the required address 1860 * conversion for NVLink within the driver. 1861 * 1862 * See Bug 1920398 for further background and details. 1863 * 1864 * Note, a deviation from the documented compression scheme is that the 1865 * upper address bits (i.e. bit 56-63) instead of being set to zero are 1866 * preserved during NVLink address compression so the orignal PCIe DMA 1867 * address can be reconstructed on expansion. These bits can be safely 1868 * ignored on NVLink since they are truncated by the GPU. 1869 * 1870 * Bug 1968345: As a performance enhancement it is the responsibility of 1871 * the caller on PowerPC platforms to check for presence of an NPU device 1872 * before the address transformation is applied. 1873 */ 1874 static inline NvU64 nv_compress_nvlink_addr(NvU64 addr) 1875 { 1876 NvU64 addr47 = addr; 1877 1878 #if defined(NVCPU_PPC64LE) 1879 addr47 = addr & ((1ULL << 43) - 1); 1880 addr47 |= (addr & (0x3ULL << 45)) >> 2; 1881 WARN_ON(addr47 & (1ULL << 44)); 1882 addr47 |= (addr & (0x3ULL << 49)) >> 4; 1883 addr47 |= addr & ~((1ULL << 56) - 1); 1884 #endif 1885 1886 return addr47; 1887 } 1888 1889 static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47) 1890 { 1891 NvU64 addr = addr47; 1892 1893 #if defined(NVCPU_PPC64LE) 1894 addr = addr47 & ((1ULL << 43) - 1); 1895 addr |= (addr47 & (3ULL << 43)) << 2; 1896 addr |= (addr47 & (3ULL << 45)) << 4; 1897 addr |= addr47 & ~((1ULL << 56) - 1); 1898 #endif 1899 1900 return addr; 1901 } 1902 1903 // Default flags for ISRs 1904 static inline NvU32 nv_default_irq_flags(nv_state_t *nv) 1905 { 1906 NvU32 flags = 0; 1907 1908 /* 1909 * Request IRQs to be disabled in our ISRs to keep consistency across the 1910 * supported kernel versions. 1911 * 1912 * IRQF_DISABLED has been made the default in 2.6.35 with commit e58aa3d2d0cc 1913 * from March 2010. And it has been later completely removed in 4.1 with commit 1914 * d8bf368d0631 from March 2015. Add it to our flags if it's defined to get the 1915 * same behaviour on pre-2.6.35 kernels as on recent ones. 1916 */ 1917 #if defined(IRQF_DISABLED) 1918 flags |= IRQF_DISABLED; 1919 #endif 1920 1921 /* 1922 * For legacy interrupts, also allow sharing. Sharing doesn't make sense 1923 * for MSI(-X) as on Linux they are never shared across different devices 1924 * and we only register one ISR today. 1925 */ 1926 if ((nv->flags & (NV_FLAG_USES_MSI | NV_FLAG_USES_MSIX)) == 0) 1927 flags |= IRQF_SHARED; 1928 1929 return flags; 1930 } 1931 1932 /* 1933 * From v3.7-rc1 kernel have stopped exporting get_unused_fd() and started 1934 * exporting get_unused_fd_flags(), as of this commit: 1935 * 2012-09-26 1a7bd2265fc ("make get_unused_fd_flags() a function") 1936 */ 1937 #if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd 1938 #define NV_GET_UNUSED_FD() get_unused_fd() 1939 #else 1940 #define NV_GET_UNUSED_FD() get_unused_fd_flags(0) 1941 #endif 1942 1943 #if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd_flags 1944 #define NV_GET_UNUSED_FD_FLAGS(flags) get_unused_fd_flags(flags) 1945 #else 1946 #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) 1947 #endif 1948 1949 #define MODULE_BASE_NAME "nvidia" 1950 #define MODULE_INSTANCE_NUMBER 0 1951 #define MODULE_INSTANCE_STRING "" 1952 #define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING 1953 1954 NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*); 1955 1956 static inline void nv_mutex_destroy(struct mutex *lock) 1957 { 1958 mutex_destroy(lock); 1959 } 1960 1961 static inline NvBool nv_platform_supports_numa(nv_linux_state_t *nvl) 1962 { 1963 return nvl->numa_info.node_id != NUMA_NO_NODE; 1964 } 1965 1966 static inline int nv_get_numa_status(nv_linux_state_t *nvl) 1967 { 1968 if (!nv_platform_supports_numa(nvl)) 1969 { 1970 return NV_IOCTL_NUMA_STATUS_DISABLED; 1971 } 1972 1973 return NV_ATOMIC_READ(nvl->numa_info.status); 1974 } 1975 1976 static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status) 1977 { 1978 if (!nv_platform_supports_numa(nvl)) 1979 { 1980 return -EINVAL; 1981 } 1982 1983 NV_ATOMIC_SET(nvl->numa_info.status, status); 1984 return 0; 1985 } 1986 1987 static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl) 1988 { 1989 return nvl->numa_info.use_auto_online; 1990 } 1991 1992 typedef struct { 1993 NvU64 base; 1994 NvU64 size; 1995 NvU32 nodeId; 1996 int ret; 1997 } remove_numa_memory_info_t; 1998 1999 static void offline_numa_memory_callback 2000 ( 2001 void *args 2002 ) 2003 { 2004 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2005 remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args; 2006 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG 2007 pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId, 2008 pNumaInfo->base, 2009 pNumaInfo->size); 2010 #else 2011 pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base, 2012 pNumaInfo->size); 2013 #endif 2014 #endif 2015 } 2016 2017 typedef enum 2018 { 2019 NV_NUMA_STATUS_DISABLED = 0, 2020 NV_NUMA_STATUS_OFFLINE = 1, 2021 NV_NUMA_STATUS_ONLINE_IN_PROGRESS = 2, 2022 NV_NUMA_STATUS_ONLINE = 3, 2023 NV_NUMA_STATUS_ONLINE_FAILED = 4, 2024 NV_NUMA_STATUS_OFFLINE_IN_PROGRESS = 5, 2025 NV_NUMA_STATUS_OFFLINE_FAILED = 6, 2026 NV_NUMA_STATUS_COUNT 2027 } nv_numa_status_t; 2028 2029 #if defined(NV_LINUX_PLATFORM_DEVICE_H_PRESENT) 2030 #include <linux/platform_device.h> 2031 #endif 2032 2033 #if defined(NV_LINUX_MUTEX_H_PRESENT) 2034 #include <linux/mutex.h> 2035 #endif 2036 2037 #if defined(NV_LINUX_RESET_H_PRESENT) 2038 #include <linux/reset.h> 2039 #endif 2040 2041 #if defined(NV_LINUX_DMA_BUF_H_PRESENT) 2042 #include <linux/dma-buf.h> 2043 #endif 2044 2045 #if defined(NV_LINUX_GPIO_H_PRESENT) 2046 #include <linux/gpio.h> 2047 #endif 2048 2049 #if defined(NV_LINUX_OF_GPIO_H_PRESENT) 2050 #include <linux/of_gpio.h> 2051 #endif 2052 2053 #if defined(NV_LINUX_OF_DEVICE_H_PRESENT) 2054 #include <linux/of_device.h> 2055 #endif 2056 2057 #if defined(NV_LINUX_OF_PLATFORM_H_PRESENT) 2058 #include <linux/of_platform.h> 2059 #endif 2060 2061 #if defined(NV_LINUX_INTERCONNECT_H_PRESENT) 2062 #include <linux/interconnect.h> 2063 #endif 2064 2065 #if defined(NV_LINUX_PM_RUNTIME_H_PRESENT) 2066 #include <linux/pm_runtime.h> 2067 #endif 2068 2069 #if defined(NV_LINUX_CLK_H_PRESENT) 2070 #include <linux/clk.h> 2071 #endif 2072 2073 #if defined(NV_LINUX_CLK_PROVIDER_H_PRESENT) 2074 #include <linux/clk-provider.h> 2075 #endif 2076 2077 #define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol) 2078 #define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol 2079 2080 #endif /* _NV_LINUX_H_ */ 2081