1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #ifndef _NV_LINUX_H_ 25 #define _NV_LINUX_H_ 26 27 #include "nvstatus.h" 28 #include "nv.h" 29 #include "nv-ioctl-numa.h" 30 #include "conftest.h" 31 32 #include "nv-lock.h" 33 #include "nv-pgprot.h" 34 #include "nv-mm.h" 35 #include "os-interface.h" 36 #include "nv-timer.h" 37 #include "nv-time.h" 38 39 #define NV_KERNEL_NAME "Linux" 40 41 #ifndef AUTOCONF_INCLUDED 42 #if defined(NV_GENERATED_AUTOCONF_H_PRESENT) 43 #include <generated/autoconf.h> 44 #else 45 #include <linux/autoconf.h> 46 #endif 47 #endif 48 49 #if defined(NV_GENERATED_UTSRELEASE_H_PRESENT) 50 #include <generated/utsrelease.h> 51 #endif 52 53 #if defined(NV_GENERATED_COMPILE_H_PRESENT) 54 #include <generated/compile.h> 55 #endif 56 57 #include <linux/version.h> 58 #include <linux/utsname.h> 59 60 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) 61 #error "This driver does not support kernels older than 2.6.32!" 62 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0) 63 # define KERNEL_2_6 64 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) 65 # define KERNEL_3 66 #else 67 #error "This driver does not support development kernels!" 68 #endif 69 70 #if defined (CONFIG_SMP) && !defined (__SMP__) 71 #define __SMP__ 72 #endif 73 74 #if defined (CONFIG_MODVERSIONS) && !defined (MODVERSIONS) 75 # define MODVERSIONS 76 #endif 77 78 #include <linux/kernel.h> 79 #include <linux/module.h> 80 #include <linux/kmod.h> 81 #include <asm/bug.h> 82 83 #include <linux/mm.h> 84 85 #if !defined(VM_RESERVED) 86 #define VM_RESERVED 0x00000000 87 #endif 88 #if !defined(VM_DONTEXPAND) 89 #define VM_DONTEXPAND 0x00000000 90 #endif 91 #if !defined(VM_DONTDUMP) 92 #define VM_DONTDUMP 0x00000000 93 #endif 94 95 #include <linux/init.h> /* module_init, module_exit */ 96 #include <linux/types.h> /* pic_t, size_t, __u32, etc */ 97 #include <linux/errno.h> /* error codes */ 98 #include <linux/list.h> /* circular linked list */ 99 #include <linux/stddef.h> /* NULL, offsetof */ 100 #include <linux/wait.h> /* wait queues */ 101 #include <linux/string.h> /* strchr(), strpbrk() */ 102 103 #include <linux/ctype.h> /* isspace(), etc */ 104 #include <linux/console.h> /* acquire_console_sem(), etc */ 105 #include <linux/cpufreq.h> /* cpufreq_get */ 106 107 #include <linux/slab.h> /* kmalloc, kfree, etc */ 108 #include <linux/vmalloc.h> /* vmalloc, vfree, etc */ 109 110 #include <linux/poll.h> /* poll_wait */ 111 #include <linux/delay.h> /* mdelay, udelay */ 112 113 #include <linux/sched.h> /* suser(), capable() replacement */ 114 115 #include <linux/random.h> /* get_random_bytes() */ 116 117 #if defined(NV_LINUX_DMA_BUF_H_PRESENT) 118 #include <linux/dma-buf.h> 119 #endif 120 121 #if defined(NV_DRM_AVAILABLE) 122 #if defined(NV_DRM_DRM_DEVICE_H_PRESENT) 123 #include <drm/drm_device.h> 124 #endif 125 126 #if defined(NV_DRM_DRM_DRV_H_PRESENT) 127 #include <drm/drm_drv.h> 128 #endif 129 130 #if defined(NV_DRM_DRMP_H_PRESENT) 131 #include <drm/drmP.h> 132 #endif 133 134 #if defined(NV_DRM_DRM_GEM_H_PRESENT) 135 #include <drm/drm_gem.h> 136 #endif 137 #endif /* NV_DRM_AVAILABLE */ 138 139 /* 140 * sched.h was refactored with this commit (as part of Linux 4.11) 141 * 2017-03-03 1827adb11ad26b2290dc9fe2aaf54976b2439865 142 */ 143 #if defined(NV_LINUX_SCHED_SIGNAL_H_PRESENT) 144 #include <linux/sched/signal.h> /* task_lock(), task_unlock() */ 145 #endif 146 147 #if defined(NV_LINUX_SCHED_TASK_H_PRESENT) 148 #include <linux/sched/task.h> /* task_lock(), task_unlock() */ 149 #endif 150 151 /* task and signal-related items, for kernels < 4.11: */ 152 #include <linux/sched.h> /* task_lock(), task_unlock() */ 153 154 #include <linux/moduleparam.h> /* module_param() */ 155 #include <asm/tlbflush.h> /* flush_tlb(), flush_tlb_all() */ 156 157 #include <linux/pci.h> /* pci_find_class, etc */ 158 #include <linux/interrupt.h> /* tasklets, interrupt helpers */ 159 #include <linux/timer.h> 160 #include <linux/file.h> /* fget(), fput() */ 161 #include <linux/rbtree.h> 162 #include <linux/cpu.h> /* CPU hotplug support */ 163 164 #include <linux/pm_runtime.h> /* pm_runtime_* */ 165 #include <linux/fdtable.h> /* files_fdtable, etc */ 166 167 #include <asm/div64.h> /* do_div() */ 168 #if defined(NV_ASM_SYSTEM_H_PRESENT) 169 #include <asm/system.h> /* cli, sli, save_flags */ 170 #endif 171 #include <asm/io.h> /* ioremap, virt_to_phys */ 172 #include <asm/uaccess.h> /* access_ok */ 173 #include <asm/page.h> /* PAGE_OFFSET */ 174 #include <asm/pgtable.h> /* pte bit definitions */ 175 #include <asm/bitops.h> /* __set_bit() */ 176 177 #if defined(NV_LINUX_TIME_H_PRESENT) 178 #include <linux/time.h> /* FD_SET() */ 179 #endif 180 181 #include "nv-list-helpers.h" 182 183 /* 184 * Use current->cred->euid, instead of calling current_euid(). 185 * The latter can pull in the GPL-only debug_lockdep_rcu_enabled() 186 * symbol when CONFIG_PROVE_RCU. That is only used for debugging. 187 * 188 * The Linux kernel relies on the assumption that only the current process 189 * is permitted to change its cred structure. Therefore, current_euid() 190 * does not require the RCU's read lock on current->cred. 191 */ 192 #define NV_CURRENT_EUID() (__kuid_val(current->cred->euid)) 193 194 #if defined(CONFIG_VGA_ARB) 195 #include <linux/vgaarb.h> 196 #endif 197 198 #include <linux/pagemap.h> 199 #include <linux/dma-mapping.h> 200 201 #if defined(NV_LINUX_DMA_MAP_OPS_H_PRESENT) 202 #include <linux/dma-map-ops.h> 203 #endif 204 205 #if defined(CONFIG_SWIOTLB) && defined(NVCPU_AARCH64) 206 #include <linux/swiotlb.h> 207 #endif 208 209 #include <linux/scatterlist.h> 210 #include <linux/completion.h> 211 #include <linux/highmem.h> 212 213 #include <linux/nodemask.h> 214 #include <linux/memory.h> 215 216 #include <linux/workqueue.h> /* workqueue */ 217 #include "nv-kthread-q.h" /* kthread based queue */ 218 219 #if defined(NV_LINUX_EFI_H_PRESENT) 220 #include <linux/efi.h> /* efi_enabled */ 221 #endif 222 223 #include <linux/fb.h> /* fb_info struct */ 224 #include <linux/screen_info.h> /* screen_info */ 225 226 #if !defined(CONFIG_PCI) 227 #warning "Attempting to build driver for a platform with no PCI support!" 228 #include <asm-generic/pci-dma-compat.h> 229 #endif 230 231 #if defined(CONFIG_CRAY_XT) 232 #include <cray/cray_nvidia.h> 233 NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32, 234 const char *, va_list); 235 #endif 236 237 #if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH) 238 #include <asm/eeh.h> 239 #define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled() 240 #define NV_PCI_ERROR_RECOVERY 241 #endif 242 243 #if defined(NV_ASM_SET_MEMORY_H_PRESENT) 244 #include <asm/set_memory.h> 245 #endif 246 247 #if defined(NV_SET_MEMORY_UC_PRESENT) 248 #undef NV_SET_PAGES_UC_PRESENT 249 #endif 250 251 #if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) 252 #if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT) 253 #error "This driver requires the ability to change memory types!" 254 #endif 255 #endif 256 257 /* 258 * Traditionally, CONFIG_XEN indicated that the target kernel was 259 * built exclusively for use under a Xen hypervisor, requiring 260 * modifications to or disabling of a variety of NVIDIA graphics 261 * driver code paths. As of the introduction of CONFIG_PARAVIRT 262 * and support for Xen hypervisors within the CONFIG_PARAVIRT_GUEST 263 * architecture, CONFIG_XEN merely indicates that the target 264 * kernel can run under a Xen hypervisor, but not that it will. 265 * 266 * If CONFIG_XEN and CONFIG_PARAVIRT are defined, the old Xen 267 * specific code paths are disabled. If the target kernel executes 268 * stand-alone, the NVIDIA graphics driver will work fine. If the 269 * kernels executes under a Xen (or other) hypervisor, however, the 270 * NVIDIA graphics driver has no way of knowing and is unlikely 271 * to work correctly. 272 */ 273 #if defined(CONFIG_XEN) && !defined(CONFIG_PARAVIRT) 274 #include <asm/maddr.h> 275 #include <xen/interface/memory.h> 276 #define NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL 277 #endif 278 279 #ifdef CONFIG_KDB 280 #include <linux/kdb.h> 281 #include <asm/kdb.h> 282 #endif 283 284 #if defined(CONFIG_X86_REMOTE_DEBUG) 285 #include <linux/gdb.h> 286 #endif 287 288 #if defined(DEBUG) && defined(CONFIG_KGDB) && \ 289 defined(NVCPU_AARCH64) 290 #include <asm/kgdb.h> 291 #endif 292 293 #if defined(NVCPU_X86_64) && !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 294 #define NV_ENABLE_PAT_SUPPORT 295 #endif 296 297 #define NV_PAT_MODE_DISABLED 0 298 #define NV_PAT_MODE_KERNEL 1 299 #define NV_PAT_MODE_BUILTIN 2 300 301 extern int nv_pat_mode; 302 303 #if defined(CONFIG_HOTPLUG_CPU) 304 #define NV_ENABLE_HOTPLUG_CPU 305 #include <linux/notifier.h> /* struct notifier_block, etc */ 306 #endif 307 308 #if (defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)) 309 #include <linux/i2c.h> 310 #endif 311 312 #if defined(CONFIG_ACPI) 313 #include <linux/acpi.h> 314 #define NV_LINUX_ACPI_EVENTS_SUPPORTED 1 315 #endif 316 317 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 318 #define NV_ACPI_WALK_NAMESPACE(type, start_object, max_depth, \ 319 user_function, args...) \ 320 acpi_walk_namespace(type, start_object, max_depth, \ 321 user_function, NULL, args) 322 #endif 323 324 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL) 325 #define NV_CONFIG_PREEMPT_RT 1 326 #endif 327 328 #if defined(NV_WRITE_CR4_PRESENT) 329 #define NV_READ_CR4() read_cr4() 330 #define NV_WRITE_CR4(cr4) write_cr4(cr4) 331 #else 332 #define NV_READ_CR4() __read_cr4() 333 #define NV_WRITE_CR4(cr4) __write_cr4(cr4) 334 #endif 335 336 #ifndef get_cpu 337 #define get_cpu() smp_processor_id() 338 #define put_cpu() 339 #endif 340 341 #if !defined(unregister_hotcpu_notifier) 342 #define unregister_hotcpu_notifier unregister_cpu_notifier 343 #endif 344 #if !defined(register_hotcpu_notifier) 345 #define register_hotcpu_notifier register_cpu_notifier 346 #endif 347 348 #if defined(NVCPU_X86_64) 349 #if !defined(pmd_large) 350 #define pmd_large(_pmd) \ 351 ((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) 352 #endif 353 #endif /* defined(NVCPU_X86_64) */ 354 355 #define NV_PAGE_COUNT(page) \ 356 ((unsigned int)page_count(page)) 357 #define NV_GET_PAGE_COUNT(page_ptr) \ 358 (NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr))) 359 #define NV_GET_PAGE_FLAGS(page_ptr) \ 360 (NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags) 361 362 /* 363 * Before the introduction of VM_PFNMAP, there was an VM_UNPAGED flag. 364 * Drivers which wanted to call remap_pfn_range on normal pages had to use this 365 * VM_UNPAGED flag *and* set PageReserved. With the introduction of VM_PFNMAP, 366 * that restriction went away. This is described in commit 367 * 368 * 2005-10-28 6aab341e0a28aff100a09831c5300a2994b8b986 369 * ("mm: re-architect the VM_UNPAGED logic") 370 * 371 * , which added VM_PFNMAP and vm_normal_page. Therefore, if VM_PFNMAP is 372 * defined, then we do *not* need to mark a page as reserved, in order to 373 * call remap_pfn_range(). 374 */ 375 #if !defined(VM_PFNMAP) 376 #define NV_MAYBE_RESERVE_PAGE(ptr_ptr) \ 377 SetPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)) 378 #define NV_MAYBE_UNRESERVE_PAGE(page_ptr) \ 379 ClearPageReserved(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)) 380 #else 381 #define NV_MAYBE_RESERVE_PAGE(ptr_ptr) 382 #define NV_MAYBE_UNRESERVE_PAGE(page_ptr) 383 #endif /* defined(VM_PFNMAP) */ 384 385 #if !defined(__GFP_COMP) 386 #define __GFP_COMP 0 387 #endif 388 389 #if !defined(DEBUG) && defined(__GFP_NOWARN) 390 #define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN) 391 #define NV_GFP_ATOMIC (GFP_ATOMIC | __GFP_NOWARN) 392 #else 393 #define NV_GFP_KERNEL (GFP_KERNEL) 394 #define NV_GFP_ATOMIC (GFP_ATOMIC) 395 #endif 396 397 #if defined(GFP_DMA32) 398 /* 399 * GFP_DMA32 is similar to GFP_DMA, but instructs the Linux zone 400 * allocator to allocate memory from the first 4GB on platforms 401 * such as Linux/x86-64; the alternative is to use an IOMMU such 402 * as the one implemented with the K8 GART, if available. 403 */ 404 #define NV_GFP_DMA32 (NV_GFP_KERNEL | GFP_DMA32) 405 #else 406 #define NV_GFP_DMA32 (NV_GFP_KERNEL) 407 #endif 408 409 extern NvBool nvos_is_chipset_io_coherent(void); 410 411 #if defined(NVCPU_X86_64) 412 #define CACHE_FLUSH() asm volatile("wbinvd":::"memory") 413 #define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory") 414 #elif defined(NVCPU_AARCH64) 415 static inline void nv_flush_cache_cpu(void *info) 416 { 417 if (!nvos_is_chipset_io_coherent()) 418 { 419 #if defined(NV_FLUSH_CACHE_ALL_PRESENT) 420 flush_cache_all(); 421 #else 422 WARN_ONCE(0, "NVRM: kernel does not support flush_cache_all()\n"); 423 #endif 424 } 425 } 426 #define CACHE_FLUSH() nv_flush_cache_cpu(NULL) 427 #define CACHE_FLUSH_ALL() on_each_cpu(nv_flush_cache_cpu, NULL, 1) 428 #define WRITE_COMBINE_FLUSH() mb() 429 #elif defined(NVCPU_PPC64LE) 430 #define CACHE_FLUSH() asm volatile("sync; \n" \ 431 "isync; \n" ::: "memory") 432 #define WRITE_COMBINE_FLUSH() CACHE_FLUSH() 433 #endif 434 435 typedef enum 436 { 437 NV_MEMORY_TYPE_SYSTEM, /* Memory mapped for ROM, SBIOS and physical RAM. */ 438 NV_MEMORY_TYPE_REGISTERS, 439 NV_MEMORY_TYPE_FRAMEBUFFER, 440 NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */ 441 } nv_memory_type_t; 442 443 #if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) 444 #define NV_ALLOW_WRITE_COMBINING(mt) 1 445 #elif defined(NVCPU_X86_64) 446 #if defined(NV_ENABLE_PAT_SUPPORT) 447 #define NV_ALLOW_WRITE_COMBINING(mt) \ 448 ((nv_pat_mode != NV_PAT_MODE_DISABLED) && \ 449 ((mt) != NV_MEMORY_TYPE_REGISTERS)) 450 #else 451 #define NV_ALLOW_WRITE_COMBINING(mt) 0 452 #endif 453 #endif 454 455 #if !defined(IRQF_SHARED) 456 #define IRQF_SHARED SA_SHIRQ 457 #endif 458 459 #define NV_MAX_RECURRING_WARNING_MESSAGES 10 460 461 /* various memory tracking/debugging techniques 462 * disabled for retail builds, enabled for debug builds 463 */ 464 465 // allow an easy way to convert all debug printfs related to memory 466 // management back and forth between 'info' and 'errors' 467 #if defined(NV_DBG_MEM) 468 #define NV_DBG_MEMINFO NV_DBG_ERRORS 469 #else 470 #define NV_DBG_MEMINFO NV_DBG_INFO 471 #endif 472 473 #define NV_MEM_TRACKING_PAD_SIZE(size) \ 474 (size) = NV_ALIGN_UP((size + sizeof(void *)), sizeof(void *)) 475 476 #define NV_MEM_TRACKING_HIDE_SIZE(ptr, size) \ 477 if ((ptr != NULL) && (*(ptr) != NULL)) \ 478 { \ 479 NvU8 *__ptr; \ 480 *(unsigned long *) *(ptr) = (size); \ 481 __ptr = *(ptr); __ptr += sizeof(void *); \ 482 *(ptr) = (void *) __ptr; \ 483 } 484 #define NV_MEM_TRACKING_RETRIEVE_SIZE(ptr, size) \ 485 { \ 486 NvU8 *__ptr = (ptr); __ptr -= sizeof(void *); \ 487 (ptr) = (void *) __ptr; \ 488 (size) = *(unsigned long *) (ptr); \ 489 } 490 491 /* keep track of memory usage */ 492 #include "nv-memdbg.h" 493 494 static inline void *nv_vmalloc(unsigned long size) 495 { 496 #if defined(NV_VMALLOC_HAS_PGPROT_T_ARG) 497 void *ptr = __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 498 #else 499 void *ptr = __vmalloc(size, GFP_KERNEL); 500 #endif 501 if (ptr) 502 NV_MEMDBG_ADD(ptr, size); 503 return ptr; 504 } 505 506 static inline void nv_vfree(void *ptr, NvU64 size) 507 { 508 NV_MEMDBG_REMOVE(ptr, size); 509 vfree(ptr); 510 } 511 512 static inline void *nv_ioremap(NvU64 phys, NvU64 size) 513 { 514 void *ptr = ioremap(phys, size); 515 if (ptr) 516 NV_MEMDBG_ADD(ptr, size); 517 return ptr; 518 } 519 520 static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size) 521 { 522 return nv_ioremap(phys, size); 523 } 524 525 static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size) 526 { 527 #if defined(NV_IOREMAP_CACHE_PRESENT) 528 void *ptr = ioremap_cache(phys, size); 529 if (ptr) 530 NV_MEMDBG_ADD(ptr, size); 531 return ptr; 532 #elif defined(NVCPU_PPC64LE) 533 // 534 // ioremap_cache() has been only implemented correctly for ppc64le with 535 // commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel 536 // does provide a default implementation of ioremap_cache() that would be 537 // incorrect for our use (creating an uncached mapping) before the 538 // referenced commit, but that implementation is not exported and the 539 // NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in 540 // this #elif branch. 541 // 542 // At the same time, ppc64le have supported ioremap_prot() since May 2011 543 // (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we 544 // support on power. 545 // 546 void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL)); 547 if (ptr) 548 NV_MEMDBG_ADD(ptr, size); 549 return ptr; 550 #else 551 return nv_ioremap(phys, size); 552 #endif 553 } 554 555 static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size) 556 { 557 #if defined(NV_IOREMAP_WC_PRESENT) 558 void *ptr = ioremap_wc(phys, size); 559 if (ptr) 560 NV_MEMDBG_ADD(ptr, size); 561 return ptr; 562 #else 563 return nv_ioremap_nocache(phys, size); 564 #endif 565 } 566 567 static inline void nv_iounmap(void *ptr, NvU64 size) 568 { 569 NV_MEMDBG_REMOVE(ptr, size); 570 iounmap(ptr); 571 } 572 573 static NvBool nv_numa_node_has_memory(int node_id) 574 { 575 if (node_id < 0 || node_id >= MAX_NUMNODES) 576 return NV_FALSE; 577 return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE; 578 } 579 580 #define NV_KMALLOC(ptr, size) \ 581 { \ 582 (ptr) = kmalloc(size, NV_GFP_KERNEL); \ 583 if (ptr) \ 584 NV_MEMDBG_ADD(ptr, size); \ 585 } 586 587 #define NV_KZALLOC(ptr, size) \ 588 { \ 589 (ptr) = kzalloc(size, NV_GFP_KERNEL); \ 590 if (ptr) \ 591 NV_MEMDBG_ADD(ptr, size); \ 592 } 593 594 #define NV_KMALLOC_ATOMIC(ptr, size) \ 595 { \ 596 (ptr) = kmalloc(size, NV_GFP_ATOMIC); \ 597 if (ptr) \ 598 NV_MEMDBG_ADD(ptr, size); \ 599 } 600 601 #if defined(__GFP_RETRY_MAYFAIL) 602 #define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_RETRY_MAYFAIL) 603 #elif defined(__GFP_NORETRY) 604 #define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_NORETRY) 605 #else 606 #define NV_GFP_NO_OOM (NV_GFP_KERNEL) 607 #endif 608 609 #define NV_KMALLOC_NO_OOM(ptr, size) \ 610 { \ 611 (ptr) = kmalloc(size, NV_GFP_NO_OOM); \ 612 if (ptr) \ 613 NV_MEMDBG_ADD(ptr, size); \ 614 } 615 616 #define NV_KFREE(ptr, size) \ 617 { \ 618 NV_MEMDBG_REMOVE(ptr, size); \ 619 kfree((void *) (ptr)); \ 620 } 621 622 #define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \ 623 { \ 624 (ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \ 625 } 626 627 #define NV_GET_FREE_PAGES(ptr, order, gfp_mask) \ 628 { \ 629 (ptr) = __get_free_pages(gfp_mask, order); \ 630 } 631 632 #define NV_FREE_PAGES(ptr, order) \ 633 { \ 634 free_pages(ptr, order); \ 635 } 636 637 extern NvU64 nv_shared_gpa_boundary; 638 639 static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra) 640 { 641 pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra); 642 #if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT) 643 /* 644 * When AMD memory encryption is enabled, device memory mappings with the 645 * C-bit set read as 0xFF, so ensure the bit is cleared for user mappings. 646 * 647 * If cc_mkdec() is present, then pgprot_decrypted() can't be used. 648 */ 649 #if defined(NV_CC_MKDEC_PRESENT) 650 if (nv_shared_gpa_boundary != 0) 651 { 652 /* 653 * By design, a VM using vTOM doesn't see the SEV setting and 654 * for AMD with vTOM, *set* means decrypted. 655 */ 656 prot = __pgprot(nv_shared_gpa_boundary | (pgprot_val(vm_prot))); 657 } 658 else 659 { 660 prot = __pgprot(__sme_clr(pgprot_val(vm_prot))); 661 } 662 #else 663 prot = pgprot_decrypted(prot); 664 #endif 665 #endif 666 667 return prot; 668 } 669 670 #if defined(PAGE_KERNEL_NOENC) 671 #if defined(__pgprot_mask) 672 #define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE) 673 #elif defined(default_pgprot) 674 #define NV_PAGE_KERNEL_NOCACHE_NOENC default_pgprot(__PAGE_KERNEL_NOCACHE) 675 #elif defined( __pgprot) 676 #define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot(__PAGE_KERNEL_NOCACHE) 677 #else 678 #error "Unsupported kernel!!!" 679 #endif 680 #endif 681 682 static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count, 683 NvBool cached, NvBool unencrypted) 684 { 685 void *ptr; 686 pgprot_t prot = PAGE_KERNEL; 687 #if defined(NVCPU_X86_64) 688 #if defined(PAGE_KERNEL_NOENC) 689 if (unencrypted) 690 { 691 prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) : 692 nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0); 693 } 694 else 695 #endif 696 { 697 prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE; 698 } 699 #elif defined(NVCPU_AARCH64) 700 prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL); 701 #endif 702 /* All memory cached in PPC64LE; can't honor 'cached' input. */ 703 ptr = vmap(pages, page_count, VM_MAP, prot); 704 if (ptr) 705 NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE); 706 return (NvUPtr)ptr; 707 } 708 709 static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count) 710 { 711 vunmap((void *)vaddr); 712 NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE); 713 } 714 715 #if defined(NV_GET_NUM_PHYSPAGES_PRESENT) 716 #define NV_NUM_PHYSPAGES get_num_physpages() 717 #else 718 #define NV_NUM_PHYSPAGES num_physpages 719 #endif 720 #define NV_GET_CURRENT_PROCESS() current->tgid 721 #define NV_IN_ATOMIC() in_atomic() 722 #define NV_LOCAL_BH_DISABLE() local_bh_disable() 723 #define NV_LOCAL_BH_ENABLE() local_bh_enable() 724 #define NV_COPY_TO_USER(to, from, n) copy_to_user(to, from, n) 725 #define NV_COPY_FROM_USER(to, from, n) copy_from_user(to, from, n) 726 727 #define NV_IS_SUSER() capable(CAP_SYS_ADMIN) 728 #define NV_PCI_DEVICE_NAME(pci_dev) ((pci_dev)->pretty_name) 729 #define NV_CLI() local_irq_disable() 730 #define NV_SAVE_FLAGS(eflags) local_save_flags(eflags) 731 #define NV_RESTORE_FLAGS(eflags) local_irq_restore(eflags) 732 #define NV_MAY_SLEEP() (!irqs_disabled() && !in_interrupt() && !NV_IN_ATOMIC()) 733 #define NV_MODULE_PARAMETER(x) module_param(x, int, 0) 734 #define NV_MODULE_STRING_PARAMETER(x) module_param(x, charp, 0) 735 #undef MODULE_PARM 736 737 #define NV_NUM_CPUS() num_possible_cpus() 738 739 static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa) 740 { 741 #if defined(NV_PHYS_TO_DMA_PRESENT) 742 return phys_to_dma(dev, pa); 743 #elif defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 744 return phys_to_machine(pa); 745 #else 746 return (dma_addr_t)pa; 747 #endif 748 } 749 750 #define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page)) 751 #define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff) 752 #define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start) 753 #define NV_VMA_OFFSET(vma) (((NvU64)(vma)->vm_pgoff) << PAGE_SHIFT) 754 #define NV_VMA_PRIVATE(vma) ((vma)->vm_private_data) 755 #define NV_VMA_FILE(vma) ((vma)->vm_file) 756 757 #define NV_DEVICE_MINOR_NUMBER(x) minor((x)->i_rdev) 758 #define NV_CONTROL_DEVICE_MINOR 255 759 760 #define NV_PCI_DISABLE_DEVICE(pci_dev) \ 761 { \ 762 NvU16 __cmd[2]; \ 763 pci_read_config_word((pci_dev), PCI_COMMAND, &__cmd[0]); \ 764 pci_disable_device(pci_dev); \ 765 pci_read_config_word((pci_dev), PCI_COMMAND, &__cmd[1]); \ 766 __cmd[1] |= PCI_COMMAND_MEMORY; \ 767 pci_write_config_word((pci_dev), PCI_COMMAND, \ 768 (__cmd[1] | (__cmd[0] & PCI_COMMAND_IO))); \ 769 } 770 771 #define NV_PCI_RESOURCE_START(pci_dev, bar) pci_resource_start(pci_dev, (bar)) 772 #define NV_PCI_RESOURCE_SIZE(pci_dev, bar) pci_resource_len(pci_dev, (bar)) 773 #define NV_PCI_RESOURCE_FLAGS(pci_dev, bar) pci_resource_flags(pci_dev, (bar)) 774 775 #define NV_PCI_RESOURCE_VALID(pci_dev, bar) \ 776 ((NV_PCI_RESOURCE_START(pci_dev, bar) != 0) && \ 777 (NV_PCI_RESOURCE_SIZE(pci_dev, bar) != 0)) 778 779 #define NV_PCI_DOMAIN_NUMBER(pci_dev) (NvU32)pci_domain_nr(pci_dev->bus) 780 #define NV_PCI_BUS_NUMBER(pci_dev) (pci_dev)->bus->number 781 #define NV_PCI_DEVFN(pci_dev) (pci_dev)->devfn 782 #define NV_PCI_SLOT_NUMBER(pci_dev) PCI_SLOT(NV_PCI_DEVFN(pci_dev)) 783 784 #if defined(CONFIG_X86_UV) && defined(NV_CONFIG_X86_UV) 785 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus,devfn) \ 786 ({ \ 787 struct pci_dev *__dev = NULL; \ 788 while ((__dev = pci_get_device(PCI_VENDOR_ID_NVIDIA, \ 789 PCI_ANY_ID, __dev)) != NULL) \ 790 { \ 791 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 792 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 793 (NV_PCI_DEVFN(__dev) == devfn)) \ 794 { \ 795 break; \ 796 } \ 797 } \ 798 if (__dev == NULL) \ 799 { \ 800 while ((__dev = pci_get_class((PCI_CLASS_BRIDGE_HOST << 8), \ 801 __dev)) != NULL) \ 802 { \ 803 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 804 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 805 (NV_PCI_DEVFN(__dev) == devfn)) \ 806 { \ 807 break; \ 808 } \ 809 } \ 810 } \ 811 if (__dev == NULL) \ 812 { \ 813 while ((__dev = pci_get_class((PCI_CLASS_BRIDGE_PCI << 8), \ 814 __dev)) != NULL) \ 815 { \ 816 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 817 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 818 (NV_PCI_DEVFN(__dev) == devfn)) \ 819 { \ 820 break; \ 821 } \ 822 } \ 823 } \ 824 if (__dev == NULL) \ 825 { \ 826 while ((__dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, \ 827 __dev)) != NULL) \ 828 { \ 829 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 830 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 831 (NV_PCI_DEVFN(__dev) == devfn)) \ 832 { \ 833 break; \ 834 } \ 835 } \ 836 } \ 837 __dev; \ 838 }) 839 #elif defined(NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT) 840 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus, devfn) \ 841 pci_get_domain_bus_and_slot(domain, bus, devfn) 842 #else 843 #define NV_GET_DOMAIN_BUS_AND_SLOT(domain,bus,devfn) \ 844 ({ \ 845 struct pci_dev *__dev = NULL; \ 846 while ((__dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, \ 847 __dev)) != NULL) \ 848 { \ 849 if ((NV_PCI_DOMAIN_NUMBER(__dev) == domain) && \ 850 (NV_PCI_BUS_NUMBER(__dev) == bus) && \ 851 (NV_PCI_DEVFN(__dev) == devfn)) \ 852 { \ 853 break; \ 854 } \ 855 } \ 856 __dev; \ 857 }) 858 #endif 859 860 #if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64 861 #define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev) 862 #endif 863 864 #define NV_PRINT_AT(nv_debug_level,at) \ 865 { \ 866 nv_printf(nv_debug_level, \ 867 "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \ 868 "page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \ 869 at->num_pages, NV_ATOMIC_READ(at->usage_count), \ 870 at->flags, at->page_table); \ 871 } 872 873 #define NV_PRINT_VMA(nv_debug_level,vma) \ 874 { \ 875 nv_printf(nv_debug_level, \ 876 "NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \ 877 __FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \ 878 NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \ 879 } 880 881 #ifndef minor 882 # define minor(x) MINOR(x) 883 #endif 884 885 #if defined(cpu_relax) 886 #define NV_CPU_RELAX() cpu_relax() 887 #else 888 #define NV_CPU_RELAX() barrier() 889 #endif 890 891 #ifndef IRQ_RETVAL 892 typedef void irqreturn_t; 893 #define IRQ_RETVAL(a) 894 #endif 895 896 #if !defined(PCI_COMMAND_SERR) 897 #define PCI_COMMAND_SERR 0x100 898 #endif 899 #if !defined(PCI_COMMAND_INTX_DISABLE) 900 #define PCI_COMMAND_INTX_DISABLE 0x400 901 #endif 902 903 #ifndef PCI_CAP_ID_EXP 904 #define PCI_CAP_ID_EXP 0x10 905 #endif 906 907 /* 908 * On Linux on PPC64LE enable basic support for Linux PCI error recovery (see 909 * Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error 910 * notification and data collection, not actual recovery of the device. 911 */ 912 #if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH) 913 #include <asm/eeh.h> 914 #define NV_PCI_ERROR_RECOVERY 915 #endif 916 917 /* 918 * If the host OS has page sizes larger than 4KB, we may have a security 919 * problem. Registers are typically grouped in 4KB pages, but if there are 920 * larger pages, then the smallest userspace mapping possible (e.g., a page) 921 * may give more access than intended to the user. 922 */ 923 #define NV_4K_PAGE_ISOLATION_REQUIRED(addr, size) \ 924 ((PAGE_SIZE > NV_RM_PAGE_SIZE) && \ 925 ((size) <= NV_RM_PAGE_SIZE) && \ 926 (((addr) >> NV_RM_PAGE_SHIFT) == \ 927 (((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT))) 928 929 /* 930 * The kernel may have a workaround for this, by providing a method to isolate 931 * a single 4K page in a given mapping. 932 */ 933 #if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN) 934 #define NV_4K_PAGE_ISOLATION_PRESENT 935 #define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr) \ 936 ((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT))) 937 #define NV_4K_PAGE_ISOLATION_MMAP_LEN(size) PAGE_SIZE 938 #define NV_4K_PAGE_ISOLATION_ACCESS_START(addr) \ 939 ((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK))) 940 #define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size) \ 941 ((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) & \ 942 ~NV_RM_PAGE_MASK) 943 #define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN 944 #endif 945 946 static inline int nv_remap_page_range(struct vm_area_struct *vma, 947 unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot) 948 { 949 int ret = -1; 950 951 #if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION) 952 if ((size == PAGE_SIZE) && 953 ((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0)) 954 { 955 /* 956 * remap_4k_pfn() hardcodes the length to a single OS page, and checks 957 * whether applying the page isolation workaround will cause PTE 958 * corruption (in which case it will fail, and this is an unsupported 959 * configuration). 960 */ 961 #if defined(NV_HASH__REMAP_4K_PFN_PRESENT) 962 ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot); 963 #else 964 ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot); 965 #endif 966 } 967 else 968 #endif 969 { 970 ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size, 971 prot); 972 } 973 974 return ret; 975 } 976 977 static inline int nv_io_remap_page_range(struct vm_area_struct *vma, 978 NvU64 phys_addr, NvU64 size, NvU32 extra_prot) 979 { 980 int ret = -1; 981 #if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL) 982 ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size, 983 nv_adjust_pgprot(vma->vm_page_prot, extra_prot)); 984 #else 985 ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT), 986 size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot)); 987 #endif 988 return ret; 989 } 990 991 static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma, 992 NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot) 993 { 994 /* 995 * vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20 996 */ 997 #if defined(NV_VMF_INSERT_PFN_PROT_PRESENT) 998 return vmf_insert_pfn_prot(vma, virt_addr, pfn, 999 __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot)); 1000 #else 1001 int ret = -EINVAL; 1002 /* 1003 * Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be 1004 * used when remapping. 1005 * 1006 * vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support 1007 * was added in Linux 4.8. 1008 * 1009 * Rather than tampering with the vma to make use of extra_prot with 1010 * vm_insert_pfn() on older kernels, for now, just fail in this case, as 1011 * it's not expected to be used currently. 1012 */ 1013 #if defined(NV_VM_INSERT_PFN_PROT_PRESENT) 1014 ret = vm_insert_pfn_prot(vma, virt_addr, pfn, 1015 __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot)); 1016 #elif !defined(NV_4K_PAGE_ISOLATION_PRESENT) 1017 ret = vm_insert_pfn(vma, virt_addr, pfn); 1018 #endif 1019 switch (ret) 1020 { 1021 case 0: 1022 case -EBUSY: 1023 /* 1024 * EBUSY indicates that another thread already handled 1025 * the faulted range. 1026 */ 1027 return VM_FAULT_NOPAGE; 1028 case -ENOMEM: 1029 return VM_FAULT_OOM; 1030 default: 1031 break; 1032 } 1033 #endif /* defined(NV_VMF_INSERT_PFN_PROT_PRESENT) */ 1034 return VM_FAULT_SIGBUS; 1035 } 1036 1037 /* Converts BAR index to Linux specific PCI BAR index */ 1038 static inline NvU8 nv_bar_index_to_os_bar_index 1039 ( 1040 struct pci_dev *dev, 1041 NvU8 nv_bar_index 1042 ) 1043 { 1044 NvU8 bar_index = 0; 1045 NvU8 i; 1046 1047 BUG_ON(nv_bar_index >= NV_GPU_NUM_BARS); 1048 1049 for (i = 0; i < nv_bar_index; i++) 1050 { 1051 if (NV_PCI_RESOURCE_FLAGS(dev, bar_index) & PCI_BASE_ADDRESS_MEM_TYPE_64) 1052 { 1053 bar_index += 2; 1054 } 1055 else 1056 { 1057 bar_index++; 1058 } 1059 } 1060 1061 return bar_index; 1062 } 1063 1064 #define NV_PAGE_MASK (NvU64)(long)PAGE_MASK 1065 1066 extern void *nvidia_stack_t_cache; 1067 1068 /* 1069 * On Linux, when a kmem cache is created, a new sysfs entry is created for the 1070 * same unless it's merged with an existing cache. Upstream Linux kernel commit 1071 * 3b7b314053d021601940c50b07f5f1423ae67e21 (version 4.12+) made cache 1072 * destruction asynchronous which creates a race between cache destroy and 1073 * create. A new cache created with attributes as a previous cache, which is 1074 * scheduled for destruction, can try to create a sysfs entry with the same 1075 * conflicting name. Upstream Linux kernel commit 1076 * d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 (4.18) fixes this issue by cleaning 1077 * up sysfs entry within slab_mutex, so the entry is deleted before a cache with 1078 * the same attributes could be created. 1079 * 1080 * To workaround this kernel issue, we take two steps: 1081 * - Create unmergeable caches: a kmem_cache with a constructor is unmergeable. 1082 * So, we define an empty contructor for the same. Creating an unmergeable 1083 * cache ensures that the kernel doesn't generate an internal name and always 1084 * uses our name instead. 1085 * 1086 * - Generate a unique cache name by appending the current timestamp (ns). We 1087 * wait for the timestamp to increment by at least one to ensure that we do 1088 * not hit a name conflict in cache create -> destroy (async) -> create cycle. 1089 */ 1090 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1091 static inline void nv_kmem_ctor_dummy(void *arg) 1092 { 1093 (void)arg; 1094 } 1095 #else 1096 #define nv_kmem_ctor_dummy NULL 1097 #endif 1098 1099 #define NV_KMEM_CACHE_CREATE(name, type) \ 1100 nv_kmem_cache_create(name, sizeof(type), 0) 1101 1102 /* The NULL pointer check is required for kernels older than 4.3 */ 1103 #define NV_KMEM_CACHE_DESTROY(kmem_cache) \ 1104 if (kmem_cache != NULL) \ 1105 { \ 1106 kmem_cache_destroy(kmem_cache); \ 1107 } 1108 1109 #define NV_KMEM_CACHE_ALLOC(kmem_cache) \ 1110 kmem_cache_alloc(kmem_cache, GFP_KERNEL) 1111 #define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \ 1112 kmem_cache_free(kmem_cache, ptr) 1113 1114 static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) 1115 { 1116 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1117 /* 1118 * We cannot call kmem_cache_zalloc directly as it adds the __GFP_ZERO 1119 * flag. This flag together with the presence of a slab constructor is 1120 * flagged as a potential bug by the Linux kernel since it is the role 1121 * of a constructor to fill an allocated object with the desired 1122 * pattern. In our case, we specified a (dummy) constructor as a 1123 * workaround for a bug and not to zero-initialize objects. So, we take 1124 * the pain here to memset allocated object ourselves. 1125 */ 1126 void *object = kmem_cache_alloc(k, flags); 1127 if (object) 1128 memset(object, 0, kmem_cache_size(k)); 1129 return object; 1130 #else 1131 return kmem_cache_zalloc(k, flags); 1132 #endif 1133 } 1134 1135 static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack) 1136 { 1137 nvidia_stack_t *sp = NULL; 1138 #if defined(NVCPU_X86_64) 1139 if (rm_is_altstack_in_use()) 1140 { 1141 sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache); 1142 if (sp == NULL) 1143 return -ENOMEM; 1144 sp->size = sizeof(sp->stack); 1145 sp->top = sp->stack + sp->size; 1146 } 1147 #endif 1148 *stack = sp; 1149 return 0; 1150 } 1151 1152 static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack) 1153 { 1154 #if defined(NVCPU_X86_64) 1155 if (stack != NULL && rm_is_altstack_in_use()) 1156 { 1157 NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache); 1158 } 1159 #endif 1160 } 1161 1162 #if defined(NVCPU_X86_64) 1163 /* 1164 * RAM is cached on Linux by default, we can assume there's 1165 * nothing to be done here. This is not the case for the 1166 * other memory spaces: we will have made an attempt to add 1167 * a WC MTRR for the frame buffer. 1168 * 1169 * If a WC MTRR is present, we can't satisfy the WB mapping 1170 * attempt here, since the achievable effective memory 1171 * types in that case are WC and UC, if not it's typically 1172 * UC (MTRRdefType is UC); we could only satisfy WB mapping 1173 * requests with a WB MTRR. 1174 */ 1175 #define NV_ALLOW_CACHING(mt) ((mt) == NV_MEMORY_TYPE_SYSTEM) 1176 #else 1177 #define NV_ALLOW_CACHING(mt) ((mt) != NV_MEMORY_TYPE_REGISTERS) 1178 #endif 1179 1180 typedef struct nvidia_pte_s { 1181 NvU64 phys_addr; 1182 unsigned long virt_addr; 1183 NvU64 dma_addr; 1184 #ifdef CONFIG_XEN 1185 unsigned int guest_pfn; 1186 #endif 1187 unsigned int page_count; 1188 } nvidia_pte_t; 1189 1190 typedef struct nv_alloc_s { 1191 struct nv_alloc_s *next; 1192 struct device *dev; 1193 atomic_t usage_count; 1194 struct { 1195 NvBool contig : 1; 1196 NvBool guest : 1; 1197 NvBool zeroed : 1; 1198 NvBool aliased : 1; 1199 NvBool user : 1; 1200 NvBool node : 1; 1201 NvBool peer_io : 1; 1202 NvBool physical : 1; 1203 NvBool unencrypted : 1; 1204 NvBool coherent : 1; 1205 } flags; 1206 unsigned int cache_type; 1207 unsigned int num_pages; 1208 unsigned int order; 1209 unsigned int size; 1210 nvidia_pte_t **page_table; /* list of physical pages allocated */ 1211 unsigned int pid; 1212 struct page **user_pages; 1213 NvU64 guest_id; /* id of guest VM */ 1214 NvS32 node_id; /* Node id for memory allocation when node is set in flags */ 1215 void *import_priv; 1216 struct sg_table *import_sgt; 1217 } nv_alloc_t; 1218 1219 /** 1220 * nv_is_dma_direct - return true if direct_dma is enabled 1221 * 1222 * Starting with the 5.0 kernel, SWIOTLB is merged into 1223 * direct_dma, so systems without an IOMMU use direct_dma. We 1224 * need to know if this is the case, so that we can use a 1225 * different check for SWIOTLB enablement. 1226 */ 1227 static inline NvBool nv_is_dma_direct(struct device *dev) 1228 { 1229 NvBool is_direct = NV_FALSE; 1230 1231 #if defined(NV_DMA_IS_DIRECT_PRESENT) 1232 if (dma_is_direct(get_dma_ops(dev))) 1233 is_direct = NV_TRUE; 1234 #endif 1235 1236 return is_direct; 1237 } 1238 1239 /** 1240 * nv_dma_maps_swiotlb - return NV_TRUE if swiotlb is enabled 1241 * 1242 * SWIOTLB creates bounce buffers for the DMA mapping layer to 1243 * use if a driver asks the kernel to map a DMA buffer that is 1244 * outside of the device's addressable range. The driver does 1245 * not function correctly if bounce buffers are enabled for the 1246 * device. So if SWIOTLB is enabled, we should avoid making 1247 * mapping calls. 1248 */ 1249 static inline NvBool 1250 nv_dma_maps_swiotlb(struct device *dev) 1251 { 1252 NvBool swiotlb_in_use = NV_FALSE; 1253 #if defined(CONFIG_SWIOTLB) 1254 #if defined(NV_DMA_OPS_PRESENT) || defined(NV_GET_DMA_OPS_PRESENT) || \ 1255 defined(NV_SWIOTLB_DMA_OPS_PRESENT) 1256 /* 1257 * We only use the 'dma_ops' symbol on older x86_64 kernels; later kernels, 1258 * including those for other architectures, have converged on the 1259 * get_dma_ops() interface. 1260 */ 1261 #if defined(NV_GET_DMA_OPS_PRESENT) 1262 /* 1263 * The __attribute__ ((unused)) is necessary because in at least one 1264 * case, *none* of the preprocessor branches below are taken, and 1265 * so the ops variable ends up never being referred to at all. This can 1266 * happen with the (NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs == 1) 1267 * case. 1268 */ 1269 const struct dma_map_ops *ops __attribute__ ((unused)) = get_dma_ops(dev); 1270 #else 1271 const struct dma_mapping_ops *ops __attribute__ ((unused)) = dma_ops; 1272 #endif 1273 1274 /* 1275 * The switch from dma_mapping_ops -> dma_map_ops coincided with the 1276 * switch from swiotlb_map_sg -> swiotlb_map_sg_attrs. 1277 */ 1278 #if defined(NVCPU_AARCH64) && \ 1279 defined(NV_NONCOHERENT_SWIOTLB_DMA_OPS_PRESENT) 1280 /* AArch64 exports these symbols directly */ 1281 swiotlb_in_use = ((ops == &noncoherent_swiotlb_dma_ops) || 1282 (ops == &coherent_swiotlb_dma_ops)); 1283 #elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs != 0 1284 swiotlb_in_use = (ops->map_sg == swiotlb_map_sg_attrs); 1285 #elif NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops != 0 1286 swiotlb_in_use = (ops == &swiotlb_dma_ops); 1287 #endif 1288 /* 1289 * The "else" case that is not shown 1290 * (for NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_map_sg_attrs == 0 || 1291 * NV_IS_EXPORT_SYMBOL_PRESENT_swiotlb_dma_ops == 0) does 1292 * nothing, and ends up dropping us out to the last line of this function, 1293 * effectively returning false. The nearly-human-readable version of that 1294 * case is "struct swiotlb_dma_ops is present (NV_SWIOTLB_DMA_OPS_PRESENT 1295 * is defined) but neither swiotlb_map_sg_attrs nor swiotlb_dma_ops is 1296 * present". 1297 * 1298 * That can happen on kernels that fall within below range: 1299 * 1300 * 2017-12-24 4bd89ed39b2ab8dc4ac4b6c59b07d420b0213bec 1301 * ("swiotlb: remove various exports") 1302 * 2018-06-28 210d0797c97d0e8f3b1a932a0dc143f4c57008a3 1303 * ("swiotlb: export swiotlb_dma_ops") 1304 * 1305 * Related to this: Between above two commits, this driver has no way of 1306 * detecting whether or not the SWIOTLB is in use. Furthermore, the 1307 * driver cannot support DMA remapping. That leads to the following 1308 * point: "swiotlb=force" is not supported for kernels falling in above 1309 * range. 1310 * 1311 * The other "else" case that is not shown: 1312 * Starting with the 5.0 kernel, swiotlb is integrated into dma_direct, 1313 * which is used when there's no IOMMU. In these kernels, ops == NULL, 1314 * swiotlb_dma_ops no longer exists, and we do not support swiotlb=force 1315 * (doing so would require detecting when swiotlb=force is enabled and 1316 * then returning NV_TRUE even when dma_direct is in use). So for now, 1317 * we just return NV_FALSE and in nv_compute_gfp_mask() we check for 1318 * whether swiotlb could possibly be used (outside of swiotlb=force). 1319 */ 1320 #endif 1321 1322 /* 1323 * Commit 2017-11-07 d7b417fa08d ("x86/mm: Add DMA support for 1324 * SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV 1325 * is active in all cases. 1326 */ 1327 if (os_sev_enabled) 1328 swiotlb_in_use = NV_TRUE; 1329 #endif 1330 1331 return swiotlb_in_use; 1332 } 1333 1334 /* 1335 * TODO: Bug 1522381 will allow us to move these mapping relationships into 1336 * common code. 1337 */ 1338 1339 /* 1340 * Bug 1606851: the Linux kernel scatterlist code doesn't work for regions 1341 * greater than or equal to 4GB, due to regular use of unsigned int 1342 * throughout. So we need to split our mappings into 4GB-minus-1-page-or-less 1343 * chunks and manage them separately. 1344 */ 1345 typedef struct nv_dma_submap_s { 1346 NvU32 page_count; 1347 NvU32 sg_map_count; 1348 struct sg_table sgt; 1349 NvBool imported; 1350 } nv_dma_submap_t; 1351 1352 typedef struct nv_dma_map_s { 1353 struct page **pages; 1354 NvU64 page_count; 1355 NvBool contiguous; 1356 NvU32 cache_type; 1357 struct sg_table *import_sgt; 1358 1359 union 1360 { 1361 struct 1362 { 1363 NvU32 submap_count; 1364 nv_dma_submap_t *submaps; 1365 } discontig; 1366 1367 struct 1368 { 1369 NvU64 dma_addr; 1370 } contig; 1371 } mapping; 1372 1373 struct device *dev; 1374 } nv_dma_map_t; 1375 1376 #define NV_FOR_EACH_DMA_SUBMAP(dm, sm, i) \ 1377 for (i = 0, sm = &dm->mapping.discontig.submaps[0]; \ 1378 i < dm->mapping.discontig.submap_count; \ 1379 i++, sm = &dm->mapping.discontig.submaps[i]) 1380 1381 #define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_U32_MAX >> PAGE_SHIFT)) 1382 #define NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(s) (s * NV_DMA_SUBMAP_MAX_PAGES) 1383 1384 /* 1385 * DO NOT use sg_alloc_table_from_pages on Xen Server, even if it's available. 1386 * This will glom multiple pages into a single sg element, which 1387 * xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use 1388 * single-page sg elements on Xen Server. 1389 */ 1390 #if !defined(NV_DOM0_KERNEL_PRESENT) 1391 #define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \ 1392 ((sg_alloc_table_from_pages(&sm->sgt, \ 1393 &dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \ 1394 sm->page_count, 0, \ 1395 sm->page_count * PAGE_SIZE, NV_GFP_KERNEL) == 0) ? NV_OK : \ 1396 NV_ERR_OPERATING_SYSTEM) 1397 #else 1398 #define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \ 1399 ((sg_alloc_table(&sm->sgt, sm->page_count, NV_GFP_KERNEL)) == \ 1400 0 ? NV_OK : NV_ERR_OPERATING_SYSTEM) 1401 #endif 1402 1403 typedef struct nv_ibmnpu_info nv_ibmnpu_info_t; 1404 1405 typedef struct nv_work_s { 1406 struct work_struct task; 1407 void *data; 1408 } nv_work_t; 1409 1410 #define NV_MAX_REGISTRY_KEYS_LENGTH 512 1411 1412 typedef enum 1413 { 1414 NV_DEV_STACK_TIMER, 1415 NV_DEV_STACK_ISR, 1416 NV_DEV_STACK_ISR_BH, 1417 NV_DEV_STACK_ISR_BH_UNLOCKED, 1418 NV_DEV_STACK_GPU_WAKEUP, 1419 NV_DEV_STACK_COUNT 1420 } nvidia_linux_dev_stack_t; 1421 1422 /* Linux version of the opaque type used for os_queue_work_item() */ 1423 struct os_work_queue { 1424 nv_kthread_q_t nvk; 1425 }; 1426 1427 /* Linux version of the opaque type used for os_wait_*() */ 1428 struct os_wait_queue { 1429 struct completion q; 1430 }; 1431 1432 /* 1433 * To report error in msi/msix when unhandled count reaches a threshold 1434 */ 1435 1436 typedef struct nv_irq_count_info_s 1437 { 1438 int irq; 1439 NvU64 unhandled; 1440 NvU64 total; 1441 NvU64 last_unhandled; 1442 } nv_irq_count_info_t; 1443 1444 /* Linux-specific version of nv_dma_device_t */ 1445 struct nv_dma_device { 1446 struct { 1447 NvU64 start; 1448 NvU64 limit; 1449 } addressable_range; 1450 1451 struct device *dev; 1452 NvBool nvlink; 1453 }; 1454 1455 /* Properties of the coherent link */ 1456 typedef struct coherent_link_info_s { 1457 /* Physical Address of the GPU memory in SOC AMAP. In the case of 1458 * baremetal OS environment it is System Physical Address(SPA) and in the case 1459 * of virutalized OS environment it is Intermediate Physical Address(IPA) */ 1460 NvU64 gpu_mem_pa; 1461 /* Bitmap of NUMA node ids, corresponding to the reserved PXMs, 1462 * available for adding GPU memory to the kernel as system RAM */ 1463 DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES); 1464 } coherent_link_info_t; 1465 1466 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 1467 /* 1468 * acpi data storage structure 1469 * 1470 * This structure retains the pointer to the device, 1471 * and any other baggage we want to carry along 1472 * 1473 */ 1474 typedef struct 1475 { 1476 nvidia_stack_t *sp; 1477 struct acpi_device *device; 1478 struct acpi_handle *handle; 1479 void *notifier_data; 1480 int notify_handler_installed; 1481 } nv_acpi_t; 1482 #endif 1483 1484 /* linux-specific version of old nv_state_t */ 1485 /* this is a general os-specific state structure. the first element *must* be 1486 the general state structure, for the generic unix-based code */ 1487 typedef struct nv_linux_state_s { 1488 nv_state_t nv_state; 1489 1490 atomic_t usage_count; 1491 NvU32 suspend_count; 1492 1493 struct device *dev; 1494 struct pci_dev *pci_dev; 1495 1496 /* IBM-NPU info associated with this GPU */ 1497 nv_ibmnpu_info_t *npu; 1498 1499 /* coherent link information */ 1500 coherent_link_info_t coherent_link_info; 1501 1502 /* Dedicated queue to be used for removing FB memory which is onlined 1503 * to kernel as a NUMA node. Refer Bug : 3879845*/ 1504 nv_kthread_q_t remove_numa_memory_q; 1505 1506 /* NUMA node information for the platforms where GPU memory is presented 1507 * as a NUMA node to the kernel */ 1508 struct { 1509 /* NUMA node id >=0 when the platform supports GPU memory as NUMA node 1510 * otherwise it holds the value of NUMA_NO_NODE */ 1511 NvS32 node_id; 1512 1513 /* NUMA online/offline status for platforms that support GPU memory as 1514 * NUMA node */ 1515 atomic_t status; 1516 NvBool use_auto_online; 1517 } numa_info; 1518 1519 nvidia_stack_t *sp[NV_DEV_STACK_COUNT]; 1520 1521 char registry_keys[NV_MAX_REGISTRY_KEYS_LENGTH]; 1522 1523 nv_work_t work; 1524 1525 /* get a timer callback every second */ 1526 struct nv_timer rc_timer; 1527 1528 /* lock for linux-specific data, not used by core rm */ 1529 struct semaphore ldata_lock; 1530 1531 /* proc directory information */ 1532 struct proc_dir_entry *proc_dir; 1533 1534 NvU32 minor_num; 1535 struct nv_linux_state_s *next; 1536 1537 /* DRM private information */ 1538 struct drm_device *drm; 1539 1540 /* kthread based bottom half servicing queue and elements */ 1541 nv_kthread_q_t bottom_half_q; 1542 nv_kthread_q_item_t bottom_half_q_item; 1543 1544 /* Lock for unlocked bottom half protecting common allocated stack */ 1545 void *isr_bh_unlocked_mutex; 1546 1547 NvBool tce_bypass_enabled; 1548 1549 NvU32 num_intr; 1550 1551 /* Lock serializing ISRs for different MSI-X vectors */ 1552 nv_spinlock_t msix_isr_lock; 1553 1554 /* Lock serializing bottom halves for different MSI-X vectors */ 1555 void *msix_bh_mutex; 1556 1557 struct msix_entry *msix_entries; 1558 1559 NvU64 numa_memblock_size; 1560 1561 struct { 1562 struct backlight_device *dev; 1563 NvU32 displayId; 1564 const char *device_name; 1565 } backlight; 1566 1567 /* 1568 * file handle for pci sysfs config file (/sys/bus/pci/devices/.../config) 1569 * which will be opened during device probe 1570 */ 1571 struct file *sysfs_config_file; 1572 1573 /* Per-GPU queue */ 1574 struct os_work_queue queue; 1575 1576 /* GPU user mapping revocation/remapping (only for non-CTL device) */ 1577 struct semaphore mmap_lock; /* Protects all fields in this category */ 1578 struct list_head open_files; 1579 NvBool all_mappings_revoked; 1580 NvBool safe_to_mmap; 1581 NvBool gpu_wakeup_callback_needed; 1582 1583 /* Per-device notifier block for ACPI events */ 1584 struct notifier_block acpi_nb; 1585 1586 #if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED) 1587 nv_acpi_t* nv_acpi_object; 1588 #endif 1589 1590 /* Lock serializing ISRs for different SOC vectors */ 1591 nv_spinlock_t soc_isr_lock; 1592 void *soc_bh_mutex; 1593 1594 struct nv_timer snapshot_timer; 1595 nv_spinlock_t snapshot_timer_lock; 1596 void (*snapshot_callback)(void *context); 1597 1598 /* count for unhandled, total and timestamp of irq */ 1599 nv_irq_count_info_t *irq_count; 1600 1601 /* Max number of irq triggered and are getting tracked */ 1602 NvU16 current_num_irq_tracked; 1603 1604 NvBool is_forced_shutdown; 1605 1606 struct nv_dma_device dma_dev; 1607 struct nv_dma_device niso_dma_dev; 1608 } nv_linux_state_t; 1609 1610 extern nv_linux_state_t *nv_linux_devices; 1611 1612 /* 1613 * Macros to protect operations on nv_linux_devices list 1614 * Lock acquisition order while using the nv_linux_devices list 1615 * 1. LOCK_NV_LINUX_DEVICES() 1616 * 2. Traverse the list 1617 * If the list is traversed to search for an element say nvl, 1618 * acquire the nvl->ldata_lock before step 3 1619 * 3. UNLOCK_NV_LINUX_DEVICES() 1620 * 4. Release nvl->ldata_lock after any read/write access to the 1621 * nvl element is complete 1622 */ 1623 extern struct semaphore nv_linux_devices_lock; 1624 #define LOCK_NV_LINUX_DEVICES() down(&nv_linux_devices_lock) 1625 #define UNLOCK_NV_LINUX_DEVICES() up(&nv_linux_devices_lock) 1626 1627 /* 1628 * Lock to synchronize system power management transitions, 1629 * and to protect the global system PM state. The procfs power 1630 * management interface acquires this lock in write mode for 1631 * the duration of the sleep operation, any other paths accessing 1632 * device state must acquire the lock in read mode. 1633 */ 1634 extern struct rw_semaphore nv_system_pm_lock; 1635 1636 extern NvBool nv_ats_supported; 1637 1638 /* 1639 * file-private data 1640 * hide a pointer to our data structures in a file-private ptr 1641 * there are times we need to grab this data back from the file 1642 * data structure.. 1643 */ 1644 1645 typedef struct nvidia_event 1646 { 1647 struct nvidia_event *next; 1648 nv_event_t event; 1649 } nvidia_event_t; 1650 1651 typedef enum 1652 { 1653 NV_FOPS_STACK_INDEX_MMAP, 1654 NV_FOPS_STACK_INDEX_IOCTL, 1655 NV_FOPS_STACK_INDEX_COUNT 1656 } nvidia_entry_point_index_t; 1657 1658 typedef struct 1659 { 1660 nv_file_private_t nvfp; 1661 1662 nvidia_stack_t *sp; 1663 nvidia_stack_t *fops_sp[NV_FOPS_STACK_INDEX_COUNT]; 1664 struct semaphore fops_sp_lock[NV_FOPS_STACK_INDEX_COUNT]; 1665 nv_alloc_t *free_list; 1666 void *nvptr; 1667 nvidia_event_t *event_data_head, *event_data_tail; 1668 NvBool dataless_event_pending; 1669 nv_spinlock_t fp_lock; 1670 wait_queue_head_t waitqueue; 1671 nv_kthread_q_item_t deferred_close_q_item; 1672 NvU32 *attached_gpus; 1673 size_t num_attached_gpus; 1674 nv_alloc_mapping_context_t mmap_context; 1675 struct address_space mapping; 1676 1677 struct list_head entry; 1678 } nv_linux_file_private_t; 1679 1680 static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t *nvfp) 1681 { 1682 return container_of(nvfp, nv_linux_file_private_t, nvfp); 1683 } 1684 1685 #define NV_SET_FILE_PRIVATE(filep,data) ((filep)->private_data = (data)) 1686 #define NV_GET_LINUX_FILE_PRIVATE(filep) ((nv_linux_file_private_t *)(filep)->private_data) 1687 1688 /* for the card devices */ 1689 #define NV_GET_NVL_FROM_FILEP(filep) (NV_GET_LINUX_FILE_PRIVATE(filep)->nvptr) 1690 #define NV_GET_NVL_FROM_NV_STATE(nv) ((nv_linux_state_t *)nv->os_state) 1691 1692 #define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state) 1693 1694 static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which) 1695 { 1696 #if defined(NVCPU_X86_64) 1697 if (rm_is_altstack_in_use()) 1698 { 1699 down(&nvlfp->fops_sp_lock[which]); 1700 return nvlfp->fops_sp[which]; 1701 } 1702 #endif 1703 return NULL; 1704 } 1705 1706 static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which) 1707 { 1708 #if defined(NVCPU_X86_64) 1709 if (rm_is_altstack_in_use()) 1710 { 1711 up(&nvlfp->fops_sp_lock[which]); 1712 } 1713 #endif 1714 } 1715 1716 #define NV_ATOMIC_READ(data) atomic_read(&(data)) 1717 #define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val)) 1718 #define NV_ATOMIC_INC(data) atomic_inc(&(data)) 1719 #define NV_ATOMIC_DEC(data) atomic_dec(&(data)) 1720 #define NV_ATOMIC_DEC_AND_TEST(data) atomic_dec_and_test(&(data)) 1721 1722 static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned int size, 1723 unsigned int align) 1724 { 1725 char *name_unique; 1726 struct kmem_cache *cache; 1727 1728 #if defined(NV_KMEM_CACHE_HAS_KOBJ_REMOVE_WORK) && !defined(NV_SYSFS_SLAB_UNLINK_PRESENT) 1729 size_t len; 1730 NvU64 tm_ns = nv_ktime_get_raw_ns(); 1731 1732 /* 1733 * Wait for timer to change at least once. This ensures 1734 * that the name generated below is always unique. 1735 */ 1736 while (tm_ns == nv_ktime_get_raw_ns()); 1737 tm_ns = nv_ktime_get_raw_ns(); 1738 1739 /* 20 is the max length of a 64-bit integer printed in decimal */ 1740 len = strlen(name) + 20 + 1; 1741 name_unique = kzalloc(len, GFP_KERNEL); 1742 if (!name_unique) 1743 return NULL; 1744 1745 if (snprintf(name_unique, len, "%s-%llu", name, tm_ns) >= len) 1746 { 1747 WARN(1, "kmem cache name too long: %s\n", name); 1748 kfree(name_unique); 1749 return NULL; 1750 } 1751 #else 1752 name_unique = (char *)name; 1753 #endif 1754 cache = kmem_cache_create(name_unique, size, align, 0, nv_kmem_ctor_dummy); 1755 if (name_unique != name) 1756 kfree(name_unique); 1757 1758 return cache; 1759 } 1760 1761 #if defined(CONFIG_PCI_IOV) 1762 #define NV_PCI_SRIOV_SUPPORT 1763 #endif /* CONFIG_PCI_IOV */ 1764 1765 #define NV_PCIE_CFG_MAX_OFFSET 0x1000 1766 1767 #include "nv-proto.h" 1768 1769 /* 1770 * Check if GPU is present on the bus by checking flag 1771 * NV_FLAG_IN_SURPRISE_REMOVAL(set when eGPU is removed from TB3). 1772 */ 1773 static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv) 1774 { 1775 #if !defined(NVCPU_PPC64LE) 1776 if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) 1777 { 1778 return NV_ERR_GPU_IS_LOST; 1779 } 1780 #endif 1781 1782 return NV_OK; 1783 } 1784 1785 extern NvU32 NVreg_EnableUserNUMAManagement; 1786 extern NvU32 NVreg_RegisterPCIDriver; 1787 extern NvU32 NVreg_EnableResizableBar; 1788 1789 extern NvU32 num_probed_nv_devices; 1790 extern NvU32 num_nv_devices; 1791 1792 #define NV_FILE_INODE(file) (file)->f_inode 1793 1794 #if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) 1795 #define NV_VGX_HYPER 1796 #if defined(NV_XEN_IOEMU_INJECT_MSI) 1797 #include <xen/ioemu.h> 1798 #endif 1799 #endif 1800 1801 static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index) 1802 { 1803 NvU64 bus_addr = 0; 1804 #if defined(NV_PCI_BUS_ADDRESS_PRESENT) 1805 bus_addr = pci_bus_address(dev, bar_index); 1806 #elif defined(CONFIG_PCI) 1807 struct pci_bus_region region; 1808 1809 pcibios_resource_to_bus(dev, ®ion, &dev->resource[bar_index]); 1810 bus_addr = region.start; 1811 #endif 1812 return bus_addr; 1813 } 1814 1815 /* 1816 * Decrements the usage count of the allocation, and moves the allocation to 1817 * the given nvlfp's free list if the usage count drops to zero. 1818 * 1819 * Returns NV_TRUE if the allocation is moved to the nvlfp's free list. 1820 */ 1821 static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t *at) 1822 { 1823 NV_PRINT_AT(NV_DBG_MEMINFO, at); 1824 1825 if (NV_ATOMIC_DEC_AND_TEST(at->usage_count)) 1826 { 1827 NV_ATOMIC_INC(at->usage_count); 1828 1829 at->next = nvlfp->free_list; 1830 nvlfp->free_list = at; 1831 return NV_TRUE; 1832 } 1833 1834 return NV_FALSE; 1835 } 1836 1837 /* 1838 * RB_EMPTY_ROOT was added in 2.6.18 by this commit: 1839 * 2006-06-21 dd67d051529387f6e44d22d1d5540ef281965fdd 1840 */ 1841 #if !defined(RB_EMPTY_ROOT) 1842 #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 1843 #endif 1844 1845 /* 1846 * Starting on Power9 systems, DMA addresses for NVLink are no longer 1847 * the same as used over PCIe. 1848 * 1849 * Power9 supports a 56-bit Real Address. This address range is compressed 1850 * when accessed over NVLink to allow the GPU to access all of memory using 1851 * its 47-bit Physical address. 1852 * 1853 * If there is an NPU device present on the system, it implies that NVLink 1854 * sysmem links are present and we need to apply the required address 1855 * conversion for NVLink within the driver. 1856 * 1857 * See Bug 1920398 for further background and details. 1858 * 1859 * Note, a deviation from the documented compression scheme is that the 1860 * upper address bits (i.e. bit 56-63) instead of being set to zero are 1861 * preserved during NVLink address compression so the orignal PCIe DMA 1862 * address can be reconstructed on expansion. These bits can be safely 1863 * ignored on NVLink since they are truncated by the GPU. 1864 * 1865 * Bug 1968345: As a performance enhancement it is the responsibility of 1866 * the caller on PowerPC platforms to check for presence of an NPU device 1867 * before the address transformation is applied. 1868 */ 1869 static inline NvU64 nv_compress_nvlink_addr(NvU64 addr) 1870 { 1871 NvU64 addr47 = addr; 1872 1873 #if defined(NVCPU_PPC64LE) 1874 addr47 = addr & ((1ULL << 43) - 1); 1875 addr47 |= (addr & (0x3ULL << 45)) >> 2; 1876 WARN_ON(addr47 & (1ULL << 44)); 1877 addr47 |= (addr & (0x3ULL << 49)) >> 4; 1878 addr47 |= addr & ~((1ULL << 56) - 1); 1879 #endif 1880 1881 return addr47; 1882 } 1883 1884 static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47) 1885 { 1886 NvU64 addr = addr47; 1887 1888 #if defined(NVCPU_PPC64LE) 1889 addr = addr47 & ((1ULL << 43) - 1); 1890 addr |= (addr47 & (3ULL << 43)) << 2; 1891 addr |= (addr47 & (3ULL << 45)) << 4; 1892 addr |= addr47 & ~((1ULL << 56) - 1); 1893 #endif 1894 1895 return addr; 1896 } 1897 1898 // Default flags for ISRs 1899 static inline NvU32 nv_default_irq_flags(nv_state_t *nv) 1900 { 1901 NvU32 flags = 0; 1902 1903 /* 1904 * Request IRQs to be disabled in our ISRs to keep consistency across the 1905 * supported kernel versions. 1906 * 1907 * IRQF_DISABLED has been made the default in 2.6.35 with commit e58aa3d2d0cc 1908 * from March 2010. And it has been later completely removed in 4.1 with commit 1909 * d8bf368d0631 from March 2015. Add it to our flags if it's defined to get the 1910 * same behaviour on pre-2.6.35 kernels as on recent ones. 1911 */ 1912 #if defined(IRQF_DISABLED) 1913 flags |= IRQF_DISABLED; 1914 #endif 1915 1916 /* 1917 * For legacy interrupts, also allow sharing. Sharing doesn't make sense 1918 * for MSI(-X) as on Linux they are never shared across different devices 1919 * and we only register one ISR today. 1920 */ 1921 if ((nv->flags & (NV_FLAG_USES_MSI | NV_FLAG_USES_MSIX)) == 0) 1922 flags |= IRQF_SHARED; 1923 1924 return flags; 1925 } 1926 1927 /* 1928 * From v3.7-rc1 kernel have stopped exporting get_unused_fd() and started 1929 * exporting get_unused_fd_flags(), as of this commit: 1930 * 2012-09-26 1a7bd2265fc ("make get_unused_fd_flags() a function") 1931 */ 1932 #if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd 1933 #define NV_GET_UNUSED_FD() get_unused_fd() 1934 #else 1935 #define NV_GET_UNUSED_FD() get_unused_fd_flags(0) 1936 #endif 1937 1938 #if NV_IS_EXPORT_SYMBOL_PRESENT_get_unused_fd_flags 1939 #define NV_GET_UNUSED_FD_FLAGS(flags) get_unused_fd_flags(flags) 1940 #else 1941 #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) 1942 #endif 1943 1944 #define MODULE_BASE_NAME "nvidia" 1945 #define MODULE_INSTANCE_NUMBER 0 1946 #define MODULE_INSTANCE_STRING "" 1947 #define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING 1948 1949 NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*); 1950 1951 static inline void nv_mutex_destroy(struct mutex *lock) 1952 { 1953 mutex_destroy(lock); 1954 } 1955 1956 static inline NvBool nv_platform_supports_numa(nv_linux_state_t *nvl) 1957 { 1958 return nvl->numa_info.node_id != NUMA_NO_NODE; 1959 } 1960 1961 static inline int nv_get_numa_status(nv_linux_state_t *nvl) 1962 { 1963 if (!nv_platform_supports_numa(nvl)) 1964 { 1965 return NV_IOCTL_NUMA_STATUS_DISABLED; 1966 } 1967 1968 return NV_ATOMIC_READ(nvl->numa_info.status); 1969 } 1970 1971 static inline int nv_set_numa_status(nv_linux_state_t *nvl, int status) 1972 { 1973 if (!nv_platform_supports_numa(nvl)) 1974 { 1975 return -EINVAL; 1976 } 1977 1978 NV_ATOMIC_SET(nvl->numa_info.status, status); 1979 return 0; 1980 } 1981 1982 static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl) 1983 { 1984 return nvl->numa_info.use_auto_online; 1985 } 1986 1987 typedef struct { 1988 NvU64 base; 1989 NvU64 size; 1990 NvU32 nodeId; 1991 int ret; 1992 } remove_numa_memory_info_t; 1993 1994 static void offline_numa_memory_callback 1995 ( 1996 void *args 1997 ) 1998 { 1999 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT 2000 remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args; 2001 #ifdef NV_REMOVE_MEMORY_HAS_NID_ARG 2002 pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId, 2003 pNumaInfo->base, 2004 pNumaInfo->size); 2005 #else 2006 pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base, 2007 pNumaInfo->size); 2008 #endif 2009 #endif 2010 } 2011 2012 typedef enum 2013 { 2014 NV_NUMA_STATUS_DISABLED = 0, 2015 NV_NUMA_STATUS_OFFLINE = 1, 2016 NV_NUMA_STATUS_ONLINE_IN_PROGRESS = 2, 2017 NV_NUMA_STATUS_ONLINE = 3, 2018 NV_NUMA_STATUS_ONLINE_FAILED = 4, 2019 NV_NUMA_STATUS_OFFLINE_IN_PROGRESS = 5, 2020 NV_NUMA_STATUS_OFFLINE_FAILED = 6, 2021 NV_NUMA_STATUS_COUNT 2022 } nv_numa_status_t; 2023 2024 #if defined(NV_LINUX_PLATFORM_DEVICE_H_PRESENT) 2025 #include <linux/platform_device.h> 2026 #endif 2027 2028 #if defined(NV_LINUX_MUTEX_H_PRESENT) 2029 #include <linux/mutex.h> 2030 #endif 2031 2032 #if defined(NV_LINUX_RESET_H_PRESENT) 2033 #include <linux/reset.h> 2034 #endif 2035 2036 #if defined(NV_LINUX_DMA_BUF_H_PRESENT) 2037 #include <linux/dma-buf.h> 2038 #endif 2039 2040 #if defined(NV_LINUX_GPIO_H_PRESENT) 2041 #include <linux/gpio.h> 2042 #endif 2043 2044 #if defined(NV_LINUX_OF_GPIO_H_PRESENT) 2045 #include <linux/of_gpio.h> 2046 #endif 2047 2048 #if defined(NV_LINUX_OF_DEVICE_H_PRESENT) 2049 #include <linux/of_device.h> 2050 #endif 2051 2052 #if defined(NV_LINUX_OF_PLATFORM_H_PRESENT) 2053 #include <linux/of_platform.h> 2054 #endif 2055 2056 #if defined(NV_LINUX_INTERCONNECT_H_PRESENT) 2057 #include <linux/interconnect.h> 2058 #endif 2059 2060 #if defined(NV_LINUX_PM_RUNTIME_H_PRESENT) 2061 #include <linux/pm_runtime.h> 2062 #endif 2063 2064 #if defined(NV_LINUX_CLK_H_PRESENT) 2065 #include <linux/clk.h> 2066 #endif 2067 2068 #if defined(NV_LINUX_CLK_PROVIDER_H_PRESENT) 2069 #include <linux/clk-provider.h> 2070 #endif 2071 2072 #endif /* _NV_LINUX_H_ */ 2073