xref: /open-nvidia-gpu/kernel-open/nvidia/nv.c (revision 3bf16b89)
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <linux/module.h>  // for MODULE_FIRMWARE
25 
26 // must precede "nv.h" and "nv-firmware.h" includes
27 #define NV_FIRMWARE_PATH_FOR_FILENAME(filename)  "nvidia/" NV_VERSION_STRING "/" filename
28 #define NV_FIRMWARE_DECLARE_GSP_FILENAME(filename) \
29     MODULE_FIRMWARE(NV_FIRMWARE_PATH_FOR_FILENAME(filename));
30 #include "nv-firmware.h"
31 
32 #include "nvmisc.h"
33 #include "os-interface.h"
34 #include "nv-linux.h"
35 #include "nv-p2p.h"
36 #include "nv-reg.h"
37 #include "nv-msi.h"
38 #include "nv-pci-table.h"
39 #include "nv-chardev-numbers.h"
40 
41 #if defined(NV_UVM_ENABLE)
42 #include "nv_uvm_interface.h"
43 #endif
44 
45 #if defined(NV_VGPU_KVM_BUILD)
46 #include "nv-vgpu-vfio-interface.h"
47 #endif
48 
49 #include "nvlink_proto.h"
50 #include "nvlink_caps.h"
51 
52 #include "nv-hypervisor.h"
53 #include "nv-ibmnpu.h"
54 #include "nv-rsync.h"
55 #include "nv-kthread-q.h"
56 #include "nv-pat.h"
57 #include "nv-dmabuf.h"
58 #include "nv-caps-imex.h"
59 
60 /*
61  * Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
62  * CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
63  */
64 #if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
65 #include "nv-retpoline.h"
66 #endif
67 
68 #include <linux/firmware.h>
69 #include <linux/cdev.h>
70 
71 #include <sound/core.h>             /* HDA struct snd_card */
72 
73 #include <asm/cache.h>
74 
75 #if defined(NV_SOUND_HDAUDIO_H_PRESENT)
76 #include "sound/hdaudio.h"
77 #endif
78 
79 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT)
80 #include <sound/core.h>
81 #include <sound/hda_codec.h>
82 #include <sound/hda_verbs.h>
83 #endif
84 
85 #if defined(NV_SEQ_READ_ITER_PRESENT)
86 #include <linux/uio.h>
87 #include <linux/seq_file.h>
88 #include <linux/kernfs.h>
89 #endif
90 
91 #include <linux/dmi.h>              /* System DMI info */
92 
93 #include <linux/ioport.h>
94 
95 #if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
96 #include <linux/cc_platform.h>
97 #endif
98 
99 #if defined(NV_ASM_CPUFEATURE_H_PRESENT)
100 #include <asm/cpufeature.h>
101 #endif
102 
103 #include "conftest/patches.h"
104 
105 #include "detect-self-hosted.h"
106 
107 #define RM_THRESHOLD_TOTAL_IRQ_COUNT     100000
108 #define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
109 #define RM_UNHANDLED_TIMEOUT_US          100000
110 
111 MODULE_LICENSE("Dual MIT/GPL");
112 
113 MODULE_INFO(supported, "external");
114 MODULE_VERSION(NV_VERSION_STRING);
115 MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);
116 
117 /*
118  * MODULE_IMPORT_NS() is added by commit id 8651ec01daeda
119  * ("module: add support for symbol namespaces") in 5.4
120  */
121 #if defined(MODULE_IMPORT_NS)
122 /*
123  * DMA_BUF namespace is added by commit id 16b0314aa746
124  * ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16
125  */
126 MODULE_IMPORT_NS(DMA_BUF);
127 #endif  // defined(MODULE_IMPORT_NS)
128 
129 const NvBool nv_is_rm_firmware_supported_os = NV_TRUE;
130 
131 // Deprecated, use NV_REG_ENABLE_GPU_FIRMWARE instead
132 char *rm_firmware_active = NULL;
133 NV_MODULE_STRING_PARAMETER(rm_firmware_active);
134 
135 /*
136  * Global NVIDIA capability state, for GPU driver
137  */
138 nv_cap_t *nvidia_caps_root = NULL;
139 
140 /*
141  * Global counts for tracking if all devices were initialized properly
142  */
143 NvU32 num_nv_devices = 0;
144 NvU32 num_probed_nv_devices = 0;
145 
146 /*
147  * Global list and table of per-device state
148  * note: both nv_linux_devices and nv_linux_minor_num_table
149  *       are protected by nv_linux_devices_lock
150  */
151 nv_linux_state_t *nv_linux_devices;
152 static nv_linux_state_t *nv_linux_minor_num_table[NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1];
153 
154 // Global state for the control device
155 nv_linux_state_t nv_ctl_device = { { 0 } };
156 
157 // cdev covering the region of regular (non-control) devices
158 static struct cdev nv_linux_devices_cdev;
159 
160 // cdev covering the control device
161 static struct cdev nv_linux_control_device_cdev;
162 
163 extern NvU32 nv_dma_remap_peer_mmio;
164 
165 nv_kthread_q_t nv_kthread_q;
166 nv_kthread_q_t nv_deferred_close_kthread_q;
167 
168 struct rw_semaphore nv_system_pm_lock;
169 
170 #if defined(CONFIG_PM)
171 static nv_power_state_t nv_system_power_state;
172 static nv_pm_action_depth_t nv_system_pm_action_depth;
173 struct semaphore nv_system_power_state_lock;
174 #endif
175 
176 void *nvidia_p2p_page_t_cache;
177 static void *nvidia_pte_t_cache;
178 void *nvidia_stack_t_cache;
179 static nvidia_stack_t *__nv_init_sp;
180 
181 static int nv_tce_bypass_mode = NV_TCE_BYPASS_MODE_DEFAULT;
182 
183 struct semaphore nv_linux_devices_lock;
184 
185 // True if all the successfully probed devices support ATS
186 // Assigned at device probe (module init) time
187 NvBool nv_ats_supported = NVCPU_IS_PPC64LE
188 #if defined(NV_PCI_DEV_HAS_ATS_ENABLED)
189                           || NV_TRUE
190 #endif
191 ;
192 
193 // allow an easy way to convert all debug printfs related to events
194 // back and forth between 'info' and 'errors'
195 #if defined(NV_DBG_EVENTS)
196 #define NV_DBG_EVENTINFO NV_DBG_ERRORS
197 #else
198 #define NV_DBG_EVENTINFO NV_DBG_INFO
199 #endif
200 
201 #if defined(HDA_MAX_CODECS)
202 #define NV_HDA_MAX_CODECS HDA_MAX_CODECS
203 #else
204 #define NV_HDA_MAX_CODECS 8
205 #endif
206 
207 /***
208  *** STATIC functions, only in this file
209  ***/
210 
211 /* nvos_ functions.. do not take a state device parameter  */
212 static int      nvos_count_devices(void);
213 
214 static nv_alloc_t  *nvos_create_alloc(struct device *, NvU64);
215 static int          nvos_free_alloc(nv_alloc_t *);
216 
217 /***
218  *** EXPORTS to Linux Kernel
219  ***/
220 
221 static irqreturn_t   nvidia_isr_common_bh   (void *);
222 static void          nvidia_isr_bh_unlocked (void *);
223 static int           nvidia_ctl_open        (struct inode *, struct file *);
224 static int           nvidia_ctl_close       (struct inode *, struct file *);
225 
226 const char *nv_device_name = MODULE_NAME;
227 static const char *nvidia_stack_cache_name = MODULE_NAME "_stack_cache";
228 static const char *nvidia_pte_cache_name = MODULE_NAME "_pte_cache";
229 static const char *nvidia_p2p_page_cache_name = MODULE_NAME "_p2p_page_cache";
230 
231 static int           nvidia_open           (struct inode *, struct file *);
232 static int           nvidia_close          (struct inode *, struct file *);
233 static unsigned int  nvidia_poll           (struct file *, poll_table *);
234 static int           nvidia_ioctl          (struct inode *, struct file *, unsigned int, unsigned long);
235 static long          nvidia_unlocked_ioctl (struct file *, unsigned int, unsigned long);
236 
237 /* character device entry points*/
238 static struct file_operations nvidia_fops = {
239     .owner     = THIS_MODULE,
240     .poll      = nvidia_poll,
241     .unlocked_ioctl = nvidia_unlocked_ioctl,
242 #if NVCPU_IS_X86_64 || NVCPU_IS_AARCH64
243     .compat_ioctl = nvidia_unlocked_ioctl,
244 #endif
245     .mmap      = nvidia_mmap,
246     .open      = nvidia_open,
247     .release   = nvidia_close,
248 };
249 
250 #if defined(CONFIG_PM)
251 static int           nv_pmops_suspend          (struct device *dev);
252 static int           nv_pmops_resume           (struct device *dev);
253 static int           nv_pmops_freeze           (struct device *dev);
254 static int           nv_pmops_thaw             (struct device *dev);
255 static int           nv_pmops_restore          (struct device *dev);
256 static int           nv_pmops_poweroff         (struct device *dev);
257 static int           nv_pmops_runtime_suspend  (struct device *dev);
258 static int           nv_pmops_runtime_resume   (struct device *dev);
259 
260 struct dev_pm_ops nv_pm_ops = {
261     .suspend         = nv_pmops_suspend,
262     .resume          = nv_pmops_resume,
263     .freeze          = nv_pmops_freeze,
264     .thaw            = nv_pmops_thaw,
265     .poweroff        = nv_pmops_poweroff,
266     .restore         = nv_pmops_restore,
267     .runtime_suspend = nv_pmops_runtime_suspend,
268     .runtime_resume  = nv_pmops_runtime_resume,
269 };
270 #endif
271 
272 /***
273  *** see nv.h for functions exported to other parts of resman
274  ***/
275 
276 /***
277  *** STATIC functions
278  ***/
279 
280 static
nv_detect_conf_compute_platform(void)281 void nv_detect_conf_compute_platform(
282     void
283 )
284 {
285 #if defined(NV_CC_PLATFORM_PRESENT)
286     os_cc_enabled = cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
287 
288 #if defined(X86_FEATURE_TDX_GUEST)
289     if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
290     {
291         os_cc_tdx_enabled = NV_TRUE;
292     }
293 #endif
294 #else
295     os_cc_enabled = NV_FALSE;
296     os_cc_tdx_enabled = NV_FALSE;
297 #endif
298 }
299 
300 static
nvos_create_alloc(struct device * dev,NvU64 num_pages)301 nv_alloc_t *nvos_create_alloc(
302     struct device *dev,
303     NvU64          num_pages
304 )
305 {
306     nv_alloc_t  *at;
307     NvU64        pt_size;
308     unsigned int i;
309 
310     NV_KZALLOC(at, sizeof(nv_alloc_t));
311     if (at == NULL)
312     {
313         nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate alloc info\n");
314         return NULL;
315     }
316 
317     at->dev = dev;
318     pt_size = num_pages *  sizeof(nvidia_pte_t *);
319     //
320     // Check for multiplication overflow and check whether num_pages value can fit in at->num_pages.
321     //
322     if ((num_pages != 0) && ((pt_size / num_pages) != sizeof(nvidia_pte_t*)))
323     {
324         nv_printf(NV_DBG_ERRORS, "NVRM: Invalid page table allocation - Number of pages exceeds max value.\n");
325         NV_KFREE(at, sizeof(nv_alloc_t));
326         return NULL;
327     }
328 
329     at->num_pages = num_pages;
330     if (at->num_pages != num_pages)
331     {
332         nv_printf(NV_DBG_ERRORS, "NVRM: Invalid page table allocation - requested size overflows.\n");
333         NV_KFREE(at, sizeof(nv_alloc_t));
334         return NULL;
335     }
336 
337     if (os_alloc_mem((void **)&at->page_table, pt_size) != NV_OK)
338     {
339         nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate page table\n");
340         NV_KFREE(at, sizeof(nv_alloc_t));
341         return NULL;
342     }
343 
344     memset(at->page_table, 0, pt_size);
345     NV_ATOMIC_SET(at->usage_count, 0);
346 
347     for (i = 0; i < at->num_pages; i++)
348     {
349         at->page_table[i] = NV_KMEM_CACHE_ALLOC(nvidia_pte_t_cache);
350         if (at->page_table[i] == NULL)
351         {
352             nv_printf(NV_DBG_ERRORS,
353                       "NVRM: failed to allocate page table entry\n");
354             nvos_free_alloc(at);
355             return NULL;
356         }
357         memset(at->page_table[i], 0, sizeof(nvidia_pte_t));
358     }
359 
360     at->pid = os_get_current_process();
361 
362     return at;
363 }
364 
365 static
nvos_free_alloc(nv_alloc_t * at)366 int nvos_free_alloc(
367     nv_alloc_t *at
368 )
369 {
370     unsigned int i;
371 
372     if (at == NULL)
373         return -1;
374 
375     if (NV_ATOMIC_READ(at->usage_count))
376         return 1;
377 
378     for (i = 0; i < at->num_pages; i++)
379     {
380         if (at->page_table[i] != NULL)
381             NV_KMEM_CACHE_FREE(at->page_table[i], nvidia_pte_t_cache);
382     }
383     os_free_mem(at->page_table);
384 
385     NV_KFREE(at, sizeof(nv_alloc_t));
386 
387     return 0;
388 }
389 
390 static void
nv_module_resources_exit(nv_stack_t * sp)391 nv_module_resources_exit(nv_stack_t *sp)
392 {
393     nv_kmem_cache_free_stack(sp);
394 
395     NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache);
396     NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache);
397     NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache);
398 }
399 
400 static int __init
nv_module_resources_init(nv_stack_t ** sp)401 nv_module_resources_init(nv_stack_t **sp)
402 {
403     int rc = -ENOMEM;
404 
405     nvidia_stack_t_cache = NV_KMEM_CACHE_CREATE(nvidia_stack_cache_name,
406                                                 nvidia_stack_t);
407     if (nvidia_stack_t_cache == NULL)
408     {
409         nv_printf(NV_DBG_ERRORS,
410                   "NVRM: nvidia_stack_t cache allocation failed.\n");
411         goto exit;
412     }
413 
414     nvidia_pte_t_cache = NV_KMEM_CACHE_CREATE(nvidia_pte_cache_name,
415                                               nvidia_pte_t);
416     if (nvidia_pte_t_cache == NULL)
417     {
418         nv_printf(NV_DBG_ERRORS,
419                   "NVRM: nvidia_pte_t cache allocation failed.\n");
420         goto exit;
421     }
422 
423     nvidia_p2p_page_t_cache = NV_KMEM_CACHE_CREATE(nvidia_p2p_page_cache_name,
424                                                    nvidia_p2p_page_t);
425     if (nvidia_p2p_page_t_cache == NULL)
426     {
427         nv_printf(NV_DBG_ERRORS,
428                   "NVRM: nvidia_p2p_page_t cache allocation failed.\n");
429         goto exit;
430     }
431 
432     rc = nv_kmem_cache_alloc_stack(sp);
433     if (rc < 0)
434     {
435         goto exit;
436     }
437 
438 exit:
439     if (rc < 0)
440     {
441         nv_kmem_cache_free_stack(*sp);
442 
443         NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache);
444         NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache);
445         NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache);
446     }
447 
448     return rc;
449 }
450 
451 static void
nvlink_drivers_exit(void)452 nvlink_drivers_exit(void)
453 {
454 #if NVCPU_IS_64_BITS
455     nvswitch_exit();
456 #endif
457 
458 #if defined(NVCPU_PPC64LE)
459     ibmnpu_exit();
460 #endif
461 
462     nvlink_core_exit();
463 }
464 
465 static int __init
nvlink_drivers_init(void)466 nvlink_drivers_init(void)
467 {
468     int rc = 0;
469 
470     rc = nvlink_core_init();
471     if (rc < 0)
472     {
473         nv_printf(NV_DBG_INFO, "NVRM: NVLink core init failed.\n");
474         return rc;
475     }
476 
477 #if defined(NVCPU_PPC64LE)
478     rc = ibmnpu_init();
479     if (rc < 0)
480     {
481         nv_printf(NV_DBG_INFO, "NVRM: IBM NPU init failed.\n");
482         nvlink_core_exit();
483         return rc;
484     }
485 #endif
486 
487 #if NVCPU_IS_64_BITS
488     rc = nvswitch_init();
489     if (rc < 0)
490     {
491         nv_printf(NV_DBG_INFO, "NVRM: NVSwitch init failed.\n");
492 #if defined(NVCPU_PPC64LE)
493         ibmnpu_exit();
494 #endif
495         nvlink_core_exit();
496     }
497 #endif
498 
499     return rc;
500 }
501 
502 static void
nv_module_state_exit(nv_stack_t * sp)503 nv_module_state_exit(nv_stack_t *sp)
504 {
505     nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
506 
507     nv_teardown_pat_support();
508 
509     nv_kthread_q_stop(&nv_deferred_close_kthread_q);
510     nv_kthread_q_stop(&nv_kthread_q);
511 
512     nv_lock_destroy_locks(sp, nv);
513 }
514 
515 static int
nv_module_state_init(nv_stack_t * sp)516 nv_module_state_init(nv_stack_t *sp)
517 {
518     int rc;
519     nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
520 
521     nv->os_state = (void *)&nv_ctl_device;
522 
523     if (!nv_lock_init_locks(sp, nv))
524     {
525         return -ENOMEM;
526     }
527 
528     rc = nv_kthread_q_init(&nv_kthread_q, "nv_queue");
529     if (rc != 0)
530     {
531         goto exit;
532     }
533 
534     rc = nv_kthread_q_init(&nv_deferred_close_kthread_q, "nv_queue");
535     if (rc != 0)
536     {
537         nv_kthread_q_stop(&nv_kthread_q);
538         goto exit;
539     }
540 
541     rc = nv_init_pat_support(sp);
542     if (rc < 0)
543     {
544         nv_kthread_q_stop(&nv_deferred_close_kthread_q);
545         nv_kthread_q_stop(&nv_kthread_q);
546         goto exit;
547     }
548 
549     nv_linux_devices = NULL;
550     memset(nv_linux_minor_num_table, 0, sizeof(nv_linux_minor_num_table));
551     NV_INIT_MUTEX(&nv_linux_devices_lock);
552     init_rwsem(&nv_system_pm_lock);
553 
554 #if defined(CONFIG_PM)
555     NV_INIT_MUTEX(&nv_system_power_state_lock);
556     nv_system_power_state = NV_POWER_STATE_RUNNING;
557     nv_system_pm_action_depth = NV_PM_ACTION_DEPTH_DEFAULT;
558 #endif
559 
560     NV_SPIN_LOCK_INIT(&nv_ctl_device.snapshot_timer_lock);
561 
562 exit:
563     if (rc < 0)
564     {
565         nv_lock_destroy_locks(sp, nv);
566     }
567 
568     return rc;
569 }
570 
571 static void __init
nv_registry_keys_init(nv_stack_t * sp)572 nv_registry_keys_init(nv_stack_t *sp)
573 {
574     NV_STATUS status;
575     nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
576     NvU32 data;
577 
578     /*
579      * Determine the TCE bypass mode here so it can be used during
580      * device probe.  Also determine whether we should allow
581      * user-mode NUMA onlining of device memory.
582      */
583     if (NVCPU_IS_PPC64LE)
584     {
585         status = rm_read_registry_dword(sp, nv,
586                                         NV_REG_TCE_BYPASS_MODE,
587                                         &data);
588         if ((status == NV_OK) && ((int)data != NV_TCE_BYPASS_MODE_DEFAULT))
589         {
590             nv_tce_bypass_mode = data;
591         }
592 
593         if (NVreg_EnableUserNUMAManagement)
594         {
595             /* Force on the core RM registry key to match. */
596             status = rm_write_registry_dword(sp, nv, "RMNumaOnlining", 1);
597             WARN_ON(status != NV_OK);
598         }
599     }
600 
601     status = rm_read_registry_dword(sp, nv, NV_DMA_REMAP_PEER_MMIO, &data);
602     if (status == NV_OK)
603     {
604         nv_dma_remap_peer_mmio = data;
605     }
606 }
607 
608 static void __init
nv_report_applied_patches(void)609 nv_report_applied_patches(void)
610 {
611     unsigned i;
612 
613     for (i = 0; __nv_patches[i].short_description; i++)
614     {
615         if (i == 0)
616         {
617             nv_printf(NV_DBG_ERRORS, "NVRM: Applied patches:\n");
618         }
619 
620         nv_printf(NV_DBG_ERRORS,
621             "NVRM:    Patch #%d: %s\n", i + 1, __nv_patches[i].short_description);
622     }
623 }
624 
625 static void
nv_drivers_exit(void)626 nv_drivers_exit(void)
627 {
628     nv_pci_unregister_driver();
629 }
630 
631 static int __init
nv_drivers_init(void)632 nv_drivers_init(void)
633 {
634     int rc;
635 
636     rc = nv_pci_register_driver();
637     if (rc < 0)
638     {
639         nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA PCI devices found.\n");
640         rc = -ENODEV;
641         goto exit;
642     }
643 
644 exit:
645     return rc;
646 }
647 
648 static void
nv_module_exit(nv_stack_t * sp)649 nv_module_exit(nv_stack_t *sp)
650 {
651     nv_module_state_exit(sp);
652 
653     rm_shutdown_rm(sp);
654 
655     nv_destroy_rsync_info();
656     nvlink_drivers_exit();
657 
658     nv_cap_drv_exit();
659 
660     nv_module_resources_exit(sp);
661 }
662 
663 static int __init
nv_module_init(nv_stack_t ** sp)664 nv_module_init(nv_stack_t **sp)
665 {
666     int rc;
667 
668     rc = nv_module_resources_init(sp);
669     if (rc < 0)
670     {
671         return rc;
672     }
673 
674     rc = nv_cap_drv_init();
675     if (rc < 0)
676     {
677         nv_printf(NV_DBG_ERRORS, "NVRM: nv-cap-drv init failed.\n");
678         goto cap_drv_exit;
679     }
680 
681     rc = nvlink_drivers_init();
682     if (rc < 0)
683     {
684         goto cap_drv_exit;
685     }
686 
687     nv_init_rsync_info();
688     nv_detect_conf_compute_platform();
689 
690     if (!rm_init_rm(*sp))
691     {
692         nv_printf(NV_DBG_ERRORS, "NVRM: rm_init_rm() failed!\n");
693         rc = -EIO;
694         goto nvlink_exit;
695     }
696 
697     rc = nv_module_state_init(*sp);
698     if (rc < 0)
699     {
700         goto init_rm_exit;
701     }
702 
703     return rc;
704 
705 init_rm_exit:
706     rm_shutdown_rm(*sp);
707 
708 nvlink_exit:
709     nv_destroy_rsync_info();
710     nvlink_drivers_exit();
711 
712 cap_drv_exit:
713     nv_cap_drv_exit();
714     nv_module_resources_exit(*sp);
715 
716     return rc;
717 }
718 
719 /*
720  * In this function we check for the cases where GPU exclusion is not
721  * honored, and issue a warning.
722  *
723  * Only GPUs that support a mechanism to query UUID prior to
724  * initializing the GPU can be excluded, so that we can detect and
725  * exclude them during device probe.  This function checks that an
726  * initialized GPU was not specified in the exclusion list, and issues a
727  * warning if so.
728  */
729 static void
nv_assert_not_in_gpu_exclusion_list(nvidia_stack_t * sp,nv_state_t * nv)730 nv_assert_not_in_gpu_exclusion_list(
731     nvidia_stack_t *sp,
732     nv_state_t *nv
733 )
734 {
735     char *uuid = rm_get_gpu_uuid(sp, nv);
736 
737     if (uuid == NULL)
738     {
739         NV_DEV_PRINTF(NV_DBG_INFO, nv, "Unable to read UUID");
740         return;
741     }
742 
743     if (nv_is_uuid_in_gpu_exclusion_list(uuid))
744     {
745         NV_DEV_PRINTF(NV_DBG_WARNINGS, nv,
746                       "Could not exclude GPU %s because PBI is not supported\n",
747                       uuid);
748         WARN_ON(1);
749     }
750 
751     os_free_mem(uuid);
752 
753     return;
754 }
755 
nv_caps_root_init(void)756 static int __init nv_caps_root_init(void)
757 {
758     nvidia_caps_root = os_nv_cap_init("driver/" MODULE_NAME);
759 
760     return (nvidia_caps_root == NULL) ? -ENOENT : 0;
761 }
762 
nv_caps_root_exit(void)763 static void nv_caps_root_exit(void)
764 {
765     os_nv_cap_destroy_entry(nvidia_caps_root);
766     nvidia_caps_root = NULL;
767 }
768 
nv_register_chrdev(unsigned int minor,unsigned int count,struct cdev * cdev,const char * name,struct file_operations * fops)769 static int nv_register_chrdev(
770     unsigned int minor,
771     unsigned int count,
772     struct cdev *cdev,
773     const char *name,
774     struct file_operations *fops
775 )
776 {
777     int rc;
778 
779     rc = register_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor),
780             count, name);
781     if (rc < 0)
782     {
783         nv_printf(NV_DBG_ERRORS,
784             "NVRM: register_chrdev_region() failed for %s!\n", name);
785         return rc;
786     }
787 
788     cdev_init(cdev, fops);
789     rc = cdev_add(cdev, MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
790     if (rc < 0)
791     {
792         nv_printf(NV_DBG_ERRORS, "NVRM: cdev_add() failed for %s!\n", name);
793         unregister_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
794         return rc;
795     }
796 
797     return rc;
798 }
799 
nv_unregister_chrdev(unsigned int minor,unsigned int count,struct cdev * cdev)800 static void nv_unregister_chrdev(
801     unsigned int minor,
802     unsigned int count,
803     struct cdev *cdev
804 )
805 {
806     cdev_del(cdev);
807     unregister_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
808 }
809 
nvidia_init_module(void)810 static int __init nvidia_init_module(void)
811 {
812     int rc;
813     NvU32 count;
814     nvidia_stack_t *sp = NULL;
815     const NvBool is_nvswitch_present = os_is_nvswitch_present();
816 
817     nv_memdbg_init();
818 
819     rc = nv_procfs_init();
820     if (rc < 0)
821     {
822         nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize procfs.\n");
823         return rc;
824     }
825 
826     rc = nv_caps_root_init();
827     if (rc < 0)
828     {
829         nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize capabilities.\n");
830         goto procfs_exit;
831     }
832 
833     rc = nv_caps_imex_init();
834     if (rc < 0)
835     {
836         nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
837         goto caps_root_exit;
838     }
839 
840     rc = nv_module_init(&sp);
841     if (rc < 0)
842     {
843         nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
844         goto caps_imex_exit;
845     }
846 
847     count = nvos_count_devices();
848     if ((count == 0) && (!is_nvswitch_present))
849     {
850         nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA GPU found.\n");
851         rc = -ENODEV;
852         goto module_exit;
853     }
854 
855     rc = nv_drivers_init();
856     if (rc < 0)
857     {
858         goto module_exit;
859     }
860 
861     if (num_probed_nv_devices != count)
862     {
863         nv_printf(NV_DBG_ERRORS,
864             "NVRM: The NVIDIA probe routine was not called for %d device(s).\n",
865             count - num_probed_nv_devices);
866         nv_printf(NV_DBG_ERRORS,
867             "NVRM: This can occur when another driver was loaded and \n"
868             "NVRM: obtained ownership of the NVIDIA device(s).\n");
869         nv_printf(NV_DBG_ERRORS,
870             "NVRM: Try unloading the conflicting kernel module (and/or\n"
871             "NVRM: reconfigure your kernel without the conflicting\n"
872             "NVRM: driver(s)), then try loading the NVIDIA kernel module\n"
873             "NVRM: again.\n");
874     }
875 
876     if ((num_probed_nv_devices == 0) && (!is_nvswitch_present))
877     {
878         rc = -ENODEV;
879         nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA devices probed.\n");
880         goto drivers_exit;
881     }
882 
883     if (num_probed_nv_devices != num_nv_devices)
884     {
885         nv_printf(NV_DBG_ERRORS,
886             "NVRM: The NVIDIA probe routine failed for %d device(s).\n",
887             num_probed_nv_devices - num_nv_devices);
888     }
889 
890     if ((num_nv_devices == 0) && (!is_nvswitch_present))
891     {
892         rc = -ENODEV;
893         nv_printf(NV_DBG_ERRORS,
894             "NVRM: None of the NVIDIA devices were initialized.\n");
895         goto drivers_exit;
896     }
897 
898     /*
899      * Initialize registry keys after PCI driver registration has
900      * completed successfully to support per-device module
901      * parameters.
902      */
903     nv_registry_keys_init(sp);
904 
905     nv_report_applied_patches();
906 
907     nv_printf(NV_DBG_ERRORS, "NVRM: loading %s\n", pNVRM_ID);
908 
909 #if defined(NV_UVM_ENABLE)
910     rc = nv_uvm_init();
911     if (rc != 0)
912     {
913         goto drivers_exit;
914     }
915 #endif
916 
917     /*
918      * Register char devices for both the region of regular devices
919      * as well as the control device.
920      *
921      * NOTE: THIS SHOULD BE DONE LAST.
922      */
923     rc = nv_register_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
924             &nv_linux_devices_cdev, "nvidia", &nvidia_fops);
925     if (rc < 0)
926     {
927         goto no_chrdev_exit;
928     }
929 
930     rc = nv_register_chrdev(NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, 1,
931             &nv_linux_control_device_cdev, "nvidiactl", &nvidia_fops);
932     if (rc < 0)
933     {
934         goto partial_chrdev_exit;
935     }
936 
937     __nv_init_sp = sp;
938 
939     return 0;
940 
941 partial_chrdev_exit:
942     nv_unregister_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
943         &nv_linux_devices_cdev);
944 
945 no_chrdev_exit:
946 #if defined(NV_UVM_ENABLE)
947     nv_uvm_exit();
948 #endif
949 
950 drivers_exit:
951     nv_drivers_exit();
952 
953 module_exit:
954     nv_module_exit(sp);
955 
956 caps_imex_exit:
957     nv_caps_imex_exit();
958 
959 caps_root_exit:
960     nv_caps_root_exit();
961 
962 procfs_exit:
963     nv_procfs_exit();
964 
965     return rc;
966 }
967 
nvidia_exit_module(void)968 static void __exit nvidia_exit_module(void)
969 {
970     nvidia_stack_t *sp = __nv_init_sp;
971 
972     nv_unregister_chrdev(NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, 1,
973         &nv_linux_control_device_cdev);
974     nv_unregister_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
975         &nv_linux_devices_cdev);
976 
977 #if defined(NV_UVM_ENABLE)
978     nv_uvm_exit();
979 #endif
980 
981     nv_drivers_exit();
982 
983     nv_module_exit(sp);
984 
985     nv_caps_imex_exit();
986 
987     nv_caps_root_exit();
988 
989     nv_procfs_exit();
990 
991     nv_memdbg_exit();
992 }
993 
nv_alloc_file_private(void)994 static void *nv_alloc_file_private(void)
995 {
996     nv_linux_file_private_t *nvlfp;
997 
998     NV_KZALLOC(nvlfp, sizeof(nv_linux_file_private_t));
999     if (!nvlfp)
1000         return NULL;
1001 
1002     init_waitqueue_head(&nvlfp->waitqueue);
1003     NV_SPIN_LOCK_INIT(&nvlfp->fp_lock);
1004 
1005     return nvlfp;
1006 }
1007 
nv_free_file_private(nv_linux_file_private_t * nvlfp)1008 static void nv_free_file_private(nv_linux_file_private_t *nvlfp)
1009 {
1010     nvidia_event_t *nvet;
1011 
1012     if (nvlfp == NULL)
1013         return;
1014 
1015     for (nvet = nvlfp->event_data_head; nvet != NULL; nvet = nvlfp->event_data_head)
1016     {
1017         nvlfp->event_data_head = nvlfp->event_data_head->next;
1018         NV_KFREE(nvet, sizeof(nvidia_event_t));
1019     }
1020 
1021     if (nvlfp->mmap_context.page_array != NULL)
1022     {
1023         os_free_mem(nvlfp->mmap_context.page_array);
1024     }
1025 
1026     NV_KFREE(nvlfp, sizeof(nv_linux_file_private_t));
1027 }
1028 
1029 /*
1030  * Find the nv device with the given minor device number in the minor number
1031  * table. Caller should hold nv_linux_devices_lock using
1032  * LOCK_NV_LINUX_DEVICES. This function does not automatically take
1033  * nvl->ldata_lock, so the caller must do that if required.
1034  */
find_minor_locked(NvU32 minor)1035 static nv_linux_state_t *find_minor_locked(NvU32 minor)
1036 {
1037     nv_linux_state_t *nvl;
1038 
1039     if (minor > NV_MINOR_DEVICE_NUMBER_REGULAR_MAX)
1040         return NULL;
1041 
1042     nvl = nv_linux_minor_num_table[minor];
1043     if (nvl == NULL)
1044     {
1045         // there isn't actually a GPU present for nv_linux_minor_num_table[minor]
1046     }
1047     else if (nvl->minor_num != minor)
1048     {
1049         // nv_linux_minor_num_table out of sync -- this shouldn't happen
1050         WARN_ON(1);
1051         nvl = NULL;
1052     }
1053 
1054     return nvl;
1055 }
1056 
1057 /*
1058  * Find the nv device with the given minor device number in the minor number
1059  * table. If found, nvl is returned with nvl->ldata_lock taken.
1060  */
find_minor(NvU32 minor)1061 static nv_linux_state_t *find_minor(NvU32 minor)
1062 {
1063     nv_linux_state_t *nvl;
1064 
1065     if (minor > NV_MINOR_DEVICE_NUMBER_REGULAR_MAX)
1066         return NULL;
1067 
1068     LOCK_NV_LINUX_DEVICES();
1069 
1070     nvl = find_minor_locked(minor);
1071     if (nvl != NULL)
1072     {
1073         down(&nvl->ldata_lock);
1074     }
1075 
1076     UNLOCK_NV_LINUX_DEVICES();
1077     return nvl;
1078 }
1079 
1080 /*
1081  * Search the global list of nv devices for the one with the given gpu_id.
1082  * If found, nvl is returned with nvl->ldata_lock taken.
1083  */
find_gpu_id(NvU32 gpu_id)1084 static nv_linux_state_t *find_gpu_id(NvU32 gpu_id)
1085 {
1086     nv_linux_state_t *nvl;
1087 
1088     LOCK_NV_LINUX_DEVICES();
1089     nvl = nv_linux_devices;
1090     while (nvl != NULL)
1091     {
1092         nv_state_t *nv = NV_STATE_PTR(nvl);
1093         if (nv->gpu_id == gpu_id)
1094         {
1095             down(&nvl->ldata_lock);
1096             break;
1097         }
1098         nvl = nvl->next;
1099     }
1100 
1101     UNLOCK_NV_LINUX_DEVICES();
1102     return nvl;
1103 }
1104 
1105 /*
1106  * Search the global list of nv devices for the one with the given UUID. Devices
1107  * with missing UUID information are ignored. If found, nvl is returned with
1108  * nvl->ldata_lock taken.
1109  */
find_uuid(const NvU8 * uuid)1110 nv_linux_state_t *find_uuid(const NvU8 *uuid)
1111 {
1112     nv_linux_state_t *nvl = NULL;
1113     nv_state_t *nv;
1114     const NvU8 *dev_uuid;
1115 
1116     LOCK_NV_LINUX_DEVICES();
1117 
1118     for (nvl = nv_linux_devices; nvl; nvl = nvl->next)
1119     {
1120         nv = NV_STATE_PTR(nvl);
1121         down(&nvl->ldata_lock);
1122         dev_uuid = nv_get_cached_uuid(nv);
1123         if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
1124             goto out;
1125         up(&nvl->ldata_lock);
1126     }
1127 
1128 out:
1129     UNLOCK_NV_LINUX_DEVICES();
1130     return nvl;
1131 }
1132 
1133 /*
1134  * Search the global list of nv devices. The search logic is:
1135  *
1136  * 1) If any device has the given UUID, return it
1137  *
1138  * 2) If no device has the given UUID but at least one device is missing
1139  *    its UUID (for example because rm_init_adapter has not run on it yet),
1140  *    return that device.
1141  *
1142  * 3) If no device has the given UUID and all UUIDs are present, return NULL.
1143  *
1144  * In cases 1 and 2, nvl is returned with nvl->ldata_lock taken.
1145  *
1146  * The reason for this weird logic is because UUIDs aren't always available. See
1147  * bug 1642200.
1148  */
find_uuid_candidate(const NvU8 * uuid)1149 static nv_linux_state_t *find_uuid_candidate(const NvU8 *uuid)
1150 {
1151     nv_linux_state_t *nvl = NULL;
1152     nv_state_t *nv;
1153     const NvU8 *dev_uuid;
1154     int use_missing;
1155     int has_missing = 0;
1156 
1157     LOCK_NV_LINUX_DEVICES();
1158 
1159     /*
1160      * Take two passes through the list. The first pass just looks for the UUID.
1161      * The second looks for the target or missing UUIDs. It would be nice if
1162      * this could be done in a single pass by remembering which nvls are missing
1163      * UUIDs, but we have to hold the nvl lock after we check for the UUID.
1164      */
1165     for (use_missing = 0; use_missing <= 1; use_missing++)
1166     {
1167         for (nvl = nv_linux_devices; nvl; nvl = nvl->next)
1168         {
1169             nv = NV_STATE_PTR(nvl);
1170             down(&nvl->ldata_lock);
1171             dev_uuid = nv_get_cached_uuid(nv);
1172             if (dev_uuid)
1173             {
1174                 /* Case 1: If a device has the given UUID, return it */
1175                 if (memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
1176                     goto out;
1177             }
1178             else
1179             {
1180                 /* Case 2: If no device has the given UUID but at least one
1181                  * device is missing its UUID, return that device. */
1182                 if (use_missing)
1183                     goto out;
1184                 has_missing = 1;
1185             }
1186             up(&nvl->ldata_lock);
1187         }
1188 
1189         /* Case 3: If no device has the given UUID and all UUIDs are present,
1190          * return NULL. */
1191         if (!has_missing)
1192             break;
1193     }
1194 
1195 out:
1196     UNLOCK_NV_LINUX_DEVICES();
1197     return nvl;
1198 }
1199 
nv_dev_free_stacks(nv_linux_state_t * nvl)1200 void nv_dev_free_stacks(nv_linux_state_t *nvl)
1201 {
1202     NvU32 i;
1203     for (i = 0; i < NV_DEV_STACK_COUNT; i++)
1204     {
1205         if (nvl->sp[i])
1206         {
1207             nv_kmem_cache_free_stack(nvl->sp[i]);
1208             nvl->sp[i] = NULL;
1209         }
1210     }
1211 }
1212 
nv_dev_alloc_stacks(nv_linux_state_t * nvl)1213 static int nv_dev_alloc_stacks(nv_linux_state_t *nvl)
1214 {
1215     NvU32 i;
1216     int rc;
1217 
1218     for (i = 0; i < NV_DEV_STACK_COUNT; i++)
1219     {
1220         rc = nv_kmem_cache_alloc_stack(&nvl->sp[i]);
1221         if (rc != 0)
1222         {
1223             nv_dev_free_stacks(nvl);
1224             return rc;
1225         }
1226     }
1227 
1228     return 0;
1229 }
1230 
validate_numa_start_state(nv_linux_state_t * nvl)1231 static int validate_numa_start_state(nv_linux_state_t *nvl)
1232 {
1233     int rc = 0;
1234     int numa_status = nv_get_numa_status(nvl);
1235 
1236     if (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED)
1237     {
1238         if (nv_ctl_device.numa_memblock_size == 0)
1239         {
1240             nv_printf(NV_DBG_ERRORS, "NVRM: numa memblock size of zero "
1241                       "found during device start");
1242             rc = -EINVAL;
1243         }
1244         else
1245         {
1246             /* Keep the individual devices consistent with the control device */
1247             nvl->numa_memblock_size = nv_ctl_device.numa_memblock_size;
1248         }
1249     }
1250 
1251     return rc;
1252 }
1253 
nv_get_num_dpaux_instances(nv_state_t * nv,NvU32 * num_instances)1254 NV_STATUS NV_API_CALL nv_get_num_dpaux_instances(nv_state_t *nv, NvU32 *num_instances)
1255 {
1256     *num_instances = nv->num_dpaux_instance;
1257     return NV_OK;
1258 }
1259 
1260 void NV_API_CALL
nv_schedule_uvm_isr(nv_state_t * nv)1261 nv_schedule_uvm_isr(nv_state_t *nv)
1262 {
1263 #if defined(NV_UVM_ENABLE)
1264     nv_uvm_event_interrupt(nv_get_cached_uuid(nv));
1265 #endif
1266 }
1267 
1268 /*
1269  * Brings up the device on the first file open. Assumes nvl->ldata_lock is held.
1270  */
nv_start_device(nv_state_t * nv,nvidia_stack_t * sp)1271 static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
1272 {
1273     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1274 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1275     NvU32 msi_config = 0;
1276 #endif
1277     int rc = 0;
1278     NvBool kthread_init = NV_FALSE;
1279     NvBool remove_numa_memory_kthread_init = NV_FALSE;
1280     NvBool power_ref = NV_FALSE;
1281 
1282     rc = nv_get_rsync_info();
1283     if (rc != 0)
1284     {
1285         return rc;
1286     }
1287 
1288     rc = validate_numa_start_state(nvl);
1289     if (rc != 0)
1290     {
1291         goto failed;
1292     }
1293 
1294     if (dev_is_pci(nvl->dev) && (nv->pci_info.device_id == 0))
1295     {
1296         nv_printf(NV_DBG_ERRORS, "NVRM: open of non-existent GPU with minor number %d\n", nvl->minor_num);
1297         rc = -ENXIO;
1298         goto failed;
1299     }
1300 
1301     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1302     {
1303         if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE) != NV_OK)
1304         {
1305             rc = -EINVAL;
1306             goto failed;
1307         }
1308         power_ref = NV_TRUE;
1309     }
1310     else
1311     {
1312         if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE) != NV_OK)
1313         {
1314             rc = -EINVAL;
1315             goto failed;
1316         }
1317         power_ref = NV_TRUE;
1318     }
1319 
1320     rc = nv_init_ibmnpu_devices(nv);
1321     if (rc != 0)
1322     {
1323         nv_printf(NV_DBG_ERRORS,
1324             "NVRM: failed to initialize ibmnpu devices attached to GPU with minor number %d\n",
1325             nvl->minor_num);
1326         goto failed;
1327     }
1328 
1329     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1330     {
1331         rc = nv_dev_alloc_stacks(nvl);
1332         if (rc != 0)
1333             goto failed;
1334     }
1335 
1336 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1337     if (dev_is_pci(nvl->dev))
1338     {
1339         if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1340         {
1341             rm_read_registry_dword(sp, nv, NV_REG_ENABLE_MSI, &msi_config);
1342             if (msi_config == 1)
1343             {
1344                 if (nvl->pci_dev->msix_cap && rm_is_msix_allowed(sp, nv))
1345                 {
1346                     nv_init_msix(nv);
1347                 }
1348                 if (nvl->pci_dev->msi_cap && !(nv->flags & NV_FLAG_USES_MSIX))
1349                 {
1350                     nv_init_msi(nv);
1351                 }
1352             }
1353         }
1354     }
1355 #endif
1356 
1357     if (((!(nv->flags & NV_FLAG_USES_MSI)) && (!(nv->flags & NV_FLAG_USES_MSIX)))
1358         && (nv->interrupt_line == 0) && !(nv->flags & NV_FLAG_SOC_DISPLAY)
1359         && !(nv->flags & NV_FLAG_SOC_IGPU))
1360     {
1361         NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1362                       "No interrupts of any type are available. Cannot use this GPU.\n");
1363         rc = -EIO;
1364         goto failed;
1365     }
1366 
1367     rc = 0;
1368     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1369     {
1370         if (nv->flags & NV_FLAG_SOC_DISPLAY)
1371         {
1372         }
1373         else if (!(nv->flags & NV_FLAG_USES_MSIX))
1374         {
1375             rc = request_threaded_irq(nv->interrupt_line, nvidia_isr,
1376                                   nvidia_isr_kthread_bh, nv_default_irq_flags(nv),
1377                                   nv_device_name, (void *)nvl);
1378         }
1379 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1380         else
1381         {
1382             rc = nv_request_msix_irq(nvl);
1383         }
1384 #endif
1385     }
1386     if (rc != 0)
1387     {
1388         if ((nv->interrupt_line != 0) && (rc == -EBUSY))
1389         {
1390             NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1391                 "Tried to get IRQ %d, but another driver\n",
1392                 (unsigned int) nv->interrupt_line);
1393             nv_printf(NV_DBG_ERRORS, "NVRM: has it and is not sharing it.\n");
1394             nv_printf(NV_DBG_ERRORS, "NVRM: You may want to verify that no audio driver");
1395             nv_printf(NV_DBG_ERRORS, " is using the IRQ.\n");
1396         }
1397         NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "request_irq() failed (%d)\n", rc);
1398         goto failed;
1399     }
1400 
1401     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1402     {
1403         rc = os_alloc_mutex(&nvl->isr_bh_unlocked_mutex);
1404         if (rc != 0)
1405             goto failed;
1406         nv_kthread_q_item_init(&nvl->bottom_half_q_item, nvidia_isr_bh_unlocked, (void *)nv);
1407         rc = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name);
1408         if (rc != 0)
1409             goto failed;
1410         kthread_init = NV_TRUE;
1411 
1412         rc = nv_kthread_q_init(&nvl->queue.nvk, "nv_queue");
1413         if (rc)
1414             goto failed;
1415         nv->queue = &nvl->queue;
1416 
1417         if (nv_platform_use_auto_online(nvl))
1418         {
1419             rc = nv_kthread_q_init(&nvl->remove_numa_memory_q,
1420                                    "nv_remove_numa_memory");
1421             if (rc)
1422                 goto failed;
1423             remove_numa_memory_kthread_init = NV_TRUE;
1424         }
1425     }
1426 
1427     if (!rm_init_adapter(sp, nv))
1428     {
1429         if (!(nv->flags & NV_FLAG_USES_MSIX) &&
1430             !(nv->flags & NV_FLAG_SOC_DISPLAY) &&
1431             !(nv->flags & NV_FLAG_SOC_IGPU))
1432         {
1433             free_irq(nv->interrupt_line, (void *) nvl);
1434         }
1435         else if (nv->flags & NV_FLAG_SOC_DISPLAY)
1436         {
1437         }
1438 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1439         else
1440         {
1441             nv_free_msix_irq(nvl);
1442         }
1443 #endif
1444         NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1445                       "rm_init_adapter failed, device minor number %d\n",
1446                       nvl->minor_num);
1447         rc = -EIO;
1448         goto failed;
1449     }
1450 
1451     {
1452         const NvU8 *uuid = rm_get_gpu_uuid_raw(sp, nv);
1453 
1454         if (uuid != NULL)
1455         {
1456 #if defined(NV_UVM_ENABLE)
1457             nv_uvm_notify_start_device(uuid);
1458 #endif
1459         }
1460     }
1461 
1462     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1463     {
1464         nv_acpi_register_notifier(nvl);
1465     }
1466 
1467     nv->flags |= NV_FLAG_OPEN;
1468 
1469     rm_request_dnotifier_state(sp, nv);
1470 
1471     /*
1472      * Now that RM init is done, allow dynamic power to control the GPU in FINE
1473      * mode, if enabled.  (If the mode is COARSE, this unref will do nothing
1474      * which will cause the GPU to remain powered up.)
1475      * This is balanced by a FINE ref increment at the beginning of
1476      * nv_stop_device().
1477      */
1478     rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
1479 
1480     return 0;
1481 
1482 failed:
1483 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1484     if (nv->flags & NV_FLAG_USES_MSI)
1485     {
1486         nv->flags &= ~NV_FLAG_USES_MSI;
1487         NV_PCI_DISABLE_MSI(nvl->pci_dev);
1488         if(nvl->irq_count)
1489             NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t));
1490     }
1491     else if (nv->flags & NV_FLAG_USES_MSIX)
1492     {
1493         nv->flags &= ~NV_FLAG_USES_MSIX;
1494         pci_disable_msix(nvl->pci_dev);
1495         NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t));
1496         NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry));
1497     }
1498 
1499     if (nvl->msix_bh_mutex)
1500     {
1501         os_free_mutex(nvl->msix_bh_mutex);
1502         nvl->msix_bh_mutex = NULL;
1503     }
1504 #endif
1505 
1506     if (nv->queue && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1507     {
1508         nv->queue = NULL;
1509         nv_kthread_q_stop(&nvl->queue.nvk);
1510     }
1511 
1512     if (kthread_init && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1513         nv_kthread_q_stop(&nvl->bottom_half_q);
1514 
1515     if (remove_numa_memory_kthread_init &&
1516         !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1517     {
1518         nv_kthread_q_stop(&nvl->remove_numa_memory_q);
1519     }
1520 
1521     if (nvl->isr_bh_unlocked_mutex)
1522     {
1523         os_free_mutex(nvl->isr_bh_unlocked_mutex);
1524         nvl->isr_bh_unlocked_mutex = NULL;
1525     }
1526 
1527     nv_dev_free_stacks(nvl);
1528 
1529     nv_unregister_ibmnpu_devices(nv);
1530 
1531     if (power_ref)
1532     {
1533         rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE);
1534     }
1535 
1536     nv_put_rsync_info();
1537 
1538     return rc;
1539 }
1540 
1541 /*
1542  * Makes sure the device is ready for operations and increases nvl->usage_count.
1543  * Assumes nvl->ldata_lock is held.
1544  */
nv_open_device(nv_state_t * nv,nvidia_stack_t * sp)1545 static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
1546 {
1547     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1548     int rc;
1549     NV_STATUS status;
1550 
1551     if ((nv->flags & NV_FLAG_EXCLUDE) != 0)
1552     {
1553         char *uuid = rm_get_gpu_uuid(sp, nv);
1554         NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1555                       "open() not permitted for excluded %s\n",
1556                       (uuid != NULL) ? uuid : "GPU");
1557         if (uuid != NULL)
1558             os_free_mem(uuid);
1559         return -EPERM;
1560     }
1561 
1562     if (os_is_vgx_hyper())
1563     {
1564         /* fail open if GPU is being unbound */
1565         if (nv->flags & NV_FLAG_UNBIND_LOCK)
1566         {
1567             NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1568                           "Open failed as GPU is locked for unbind operation\n");
1569             return -ENODEV;
1570         }
1571     }
1572 
1573     NV_DEV_PRINTF(NV_DBG_INFO, nv, "Opening GPU with minor number %d\n",
1574                   nvl->minor_num);
1575 
1576     status = nv_check_gpu_state(nv);
1577     if (status == NV_ERR_GPU_IS_LOST)
1578     {
1579         NV_DEV_PRINTF(NV_DBG_INFO, nv, "Device in removal process\n");
1580         return -ENODEV;
1581     }
1582 
1583     if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
1584         return -EMFILE;
1585 
1586     if ( ! (nv->flags & NV_FLAG_OPEN))
1587     {
1588         /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */
1589         if (NV_ATOMIC_READ(nvl->usage_count) != 0)
1590         {
1591             NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1592                           "Minor device %u is referenced without being open!\n",
1593                           nvl->minor_num);
1594             WARN_ON(1);
1595             return -EBUSY;
1596         }
1597 
1598         rc = nv_start_device(nv, sp);
1599         if (rc != 0)
1600             return rc;
1601     }
1602     else if (rm_is_device_sequestered(sp, nv))
1603     {
1604         /* Do not increment the usage count of sequestered devices. */
1605         NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Device is currently unavailable\n");
1606         return -EBUSY;
1607     }
1608 
1609     nv_assert_not_in_gpu_exclusion_list(sp, nv);
1610 
1611     NV_ATOMIC_INC(nvl->usage_count);
1612     return 0;
1613 }
1614 
nv_init_mapping_revocation(nv_linux_state_t * nvl,struct file * file,nv_linux_file_private_t * nvlfp,struct inode * inode)1615 static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
1616                                        struct file *file,
1617                                        nv_linux_file_private_t *nvlfp,
1618                                        struct inode *inode)
1619 {
1620     down(&nvl->mmap_lock);
1621 
1622     /* Set up struct address_space for use with unmap_mapping_range() */
1623     address_space_init_once(&nvlfp->mapping);
1624     nvlfp->mapping.host = inode;
1625     nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
1626 #if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
1627     nvlfp->mapping.backing_dev_info = inode->i_mapping->backing_dev_info;
1628 #endif
1629     file->f_mapping = &nvlfp->mapping;
1630 
1631     /* Add nvlfp to list of open files in nvl for mapping revocation */
1632     list_add(&nvlfp->entry, &nvl->open_files);
1633 
1634     up(&nvl->mmap_lock);
1635 }
1636 
1637 /*
1638  * Like nv_open_device but stores rc and adapter status in the given nvlfp.
1639  * Assumes nvl->ldata_lock is held.
1640  */
nv_open_device_for_nvlfp(nv_state_t * nv,nvidia_stack_t * sp,nv_linux_file_private_t * nvlfp)1641 static int nv_open_device_for_nvlfp(
1642     nv_state_t *nv,
1643     nvidia_stack_t *sp,
1644     nv_linux_file_private_t *nvlfp
1645 )
1646 {
1647     nvlfp->open_rc = nv_open_device(nv, sp);
1648 
1649     if (nvlfp->open_rc == 0)
1650     {
1651         nvlfp->adapter_status = NV_OK;
1652     }
1653     else
1654     {
1655         nvlfp->adapter_status = rm_get_adapter_status_external(sp, nv);
1656     }
1657 
1658     return nvlfp->open_rc;
1659 }
1660 
nvidia_open_deferred(void * nvlfp_raw)1661 static void nvidia_open_deferred(void *nvlfp_raw)
1662 {
1663     nv_linux_file_private_t *nvlfp = (nv_linux_file_private_t *) nvlfp_raw;
1664     nv_linux_state_t *nvl = nvlfp->deferred_open_nvl;
1665     int rc;
1666 
1667     /*
1668      * Deferred opens and device removal are synchronized via
1669      * nvl->is_accepting_opens and nvl->open_q flushes so that nvl is
1670      * guaranteed to outlive any pending open operation.
1671      *
1672      * So, it is safe to take nvl->ldata_lock here without holding
1673      * any refcount or larger lock.
1674      *
1675      * Deferred opens and system suspend are synchronized by an explicit
1676      * nvl->open_q flush before suspending.
1677      *
1678      * So, it is safe to proceed without nv_system_pm_lock here (in fact, it
1679      * must not be taken to ensure nvl->open_q can make forward progress).
1680      */
1681     down(&nvl->ldata_lock);
1682     rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
1683     up(&nvl->ldata_lock);
1684 
1685     /* Set nvptr only upon success (where nvl->usage_count is incremented) */
1686     if (rc == 0)
1687         nvlfp->nvptr = nvl;
1688 
1689     complete_all(&nvlfp->open_complete);
1690 }
1691 
1692 /*
1693  * Tries to prepare (by taking nvl->ldata_lock) for an open in the foreground
1694  * for the given file and device.
1695  *
1696  * This succeeds if:
1697  * - O_NONBLOCK is not passed (or non-blocking opens are disabled), or
1698  * - O_NONBLOCK is passed, but we are able to determine (without blocking)
1699  *   that the device is already initialized
1700  *
1701  * Returns 0 with nvl->ldata_lock taken if open can occur in the foreground.
1702  * Otherwise, returns non-zero (without nvl->ldata_lock taken).
1703  */
nv_try_lock_foreground_open(struct file * file,nv_linux_state_t * nvl)1704 static int nv_try_lock_foreground_open(
1705     struct file *file,
1706     nv_linux_state_t *nvl
1707 )
1708 {
1709     nv_state_t *nv = NV_STATE_PTR(nvl);
1710 
1711     if (NVreg_EnableNonblockingOpen && (file->f_flags & O_NONBLOCK))
1712     {
1713         if (down_trylock(&nvl->ldata_lock) == 0)
1714         {
1715             if (nv->flags & NV_FLAG_OPEN)
1716             {
1717                 /* device already initialized */
1718                 return 0;
1719             }
1720             else
1721             {
1722                 /* device not initialized yet */
1723                 up(&nvl->ldata_lock);
1724                 return -EWOULDBLOCK;
1725             }
1726         }
1727         else
1728         {
1729             /* unable to check nv->flags safely without blocking */
1730             return -EWOULDBLOCK;
1731         }
1732     }
1733 
1734     /* O_NONBLOCK not passed or non-blocking opens are disabled */
1735     down(&nvl->ldata_lock);
1736     return 0;
1737 }
1738 
1739 /*
1740 ** nvidia_open
1741 **
1742 ** nv driver open entry point.  Sessions are created here.
1743 */
1744 int
nvidia_open(struct inode * inode,struct file * file)1745 nvidia_open(
1746     struct inode *inode,
1747     struct file *file
1748 )
1749 {
1750     nv_state_t *nv = NULL;
1751     nv_linux_state_t *nvl = NULL;
1752     int rc = 0;
1753     nv_linux_file_private_t *nvlfp = NULL;
1754     nvidia_stack_t *sp = NULL;
1755 
1756     nv_printf(NV_DBG_INFO, "NVRM: nvidia_open...\n");
1757 
1758     nvlfp = nv_alloc_file_private();
1759     if (nvlfp == NULL)
1760     {
1761         nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate file private!\n");
1762         return -ENOMEM;
1763     }
1764 
1765     rc = nv_kmem_cache_alloc_stack(&sp);
1766     if (rc != 0)
1767     {
1768         nv_free_file_private(nvlfp);
1769         return rc;
1770     }
1771 
1772     NV_SET_FILE_PRIVATE(file, nvlfp);
1773     nvlfp->sp = sp;
1774 
1775     /* for control device, just jump to its open routine */
1776     /* after setting up the private data */
1777     if (nv_is_control_device(inode))
1778     {
1779         rc = nvidia_ctl_open(inode, file);
1780         if (rc != 0)
1781             goto failed;
1782         return rc;
1783     }
1784 
1785     rc = nv_down_read_interruptible(&nv_system_pm_lock);
1786     if (rc < 0)
1787         goto failed;
1788 
1789     /* nvptr will get set to actual nvl upon successful open */
1790     nvlfp->nvptr = NULL;
1791 
1792     init_completion(&nvlfp->open_complete);
1793 
1794     LOCK_NV_LINUX_DEVICES();
1795 
1796     nvl = find_minor_locked(NV_DEVICE_MINOR_NUMBER(inode));
1797     if (nvl == NULL)
1798     {
1799         rc = -ENODEV;
1800         UNLOCK_NV_LINUX_DEVICES();
1801         up_read(&nv_system_pm_lock);
1802         goto failed;
1803     }
1804 
1805     nv = NV_STATE_PTR(nvl);
1806 
1807     if (nv_try_lock_foreground_open(file, nvl) == 0)
1808     {
1809         /* Proceed in foreground */
1810         /* nvl->ldata_lock is already taken at this point */
1811 
1812         UNLOCK_NV_LINUX_DEVICES();
1813 
1814         rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);
1815 
1816         up(&nvl->ldata_lock);
1817 
1818         /* Set nvptr only upon success (where nvl->usage_count is incremented) */
1819         if (rc == 0)
1820             nvlfp->nvptr = nvl;
1821 
1822         complete_all(&nvlfp->open_complete);
1823     }
1824     else
1825     {
1826         /* Defer to background kthread */
1827         int item_scheduled = 0;
1828 
1829         /*
1830          * Take nvl->open_q_lock in order to check nvl->is_accepting_opens and
1831          * schedule work items on nvl->open_q.
1832          *
1833          * Continue holding nv_linux_devices_lock (LOCK_NV_LINUX_DEVICES)
1834          * until the work item gets onto nvl->open_q in order to ensure the
1835          * lifetime of nvl.
1836          */
1837         down(&nvl->open_q_lock);
1838 
1839         if (!nvl->is_accepting_opens)
1840         {
1841             /* Background kthread is not accepting opens, bail! */
1842             rc = -EBUSY;
1843             goto nonblock_end;
1844         }
1845 
1846         nvlfp->deferred_open_nvl = nvl;
1847         nv_kthread_q_item_init(&nvlfp->open_q_item,
1848                                nvidia_open_deferred,
1849                                nvlfp);
1850 
1851         item_scheduled = nv_kthread_q_schedule_q_item(
1852                 &nvl->open_q, &nvlfp->open_q_item);
1853 
1854         if (!item_scheduled)
1855         {
1856             WARN_ON(!item_scheduled);
1857             rc = -EBUSY;
1858         }
1859 
1860 nonblock_end:
1861         up(&nvl->open_q_lock);
1862         UNLOCK_NV_LINUX_DEVICES();
1863     }
1864 
1865     up_read(&nv_system_pm_lock);
1866 failed:
1867     if (rc != 0)
1868     {
1869         if (nvlfp != NULL)
1870         {
1871             nv_free_file_private(nvlfp);
1872             NV_SET_FILE_PRIVATE(file, NULL);
1873         }
1874     }
1875     else
1876     {
1877         nv_init_mapping_revocation(nvl, file, nvlfp, inode);
1878     }
1879 
1880     return rc;
1881 }
1882 
validate_numa_shutdown_state(nv_linux_state_t * nvl)1883 static void validate_numa_shutdown_state(nv_linux_state_t *nvl)
1884 {
1885     int numa_status = nv_get_numa_status(nvl);
1886     WARN_ON((numa_status != NV_IOCTL_NUMA_STATUS_OFFLINE) &&
1887             (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED));
1888 }
1889 
nv_shutdown_adapter(nvidia_stack_t * sp,nv_state_t * nv,nv_linux_state_t * nvl)1890 void nv_shutdown_adapter(nvidia_stack_t *sp,
1891                          nv_state_t *nv,
1892                          nv_linux_state_t *nvl)
1893 {
1894 #if defined(NVCPU_PPC64LE)
1895     validate_numa_shutdown_state(nvl);
1896 #endif
1897 
1898     rm_disable_adapter(sp, nv);
1899 
1900     // It's safe to call nv_kthread_q_stop even if queue is not initialized
1901     nv_kthread_q_stop(&nvl->bottom_half_q);
1902 
1903     if (nv->queue != NULL)
1904     {
1905         nv->queue = NULL;
1906         nv_kthread_q_stop(&nvl->queue.nvk);
1907     }
1908 
1909     if (nvl->isr_bh_unlocked_mutex)
1910     {
1911         os_free_mutex(nvl->isr_bh_unlocked_mutex);
1912         nvl->isr_bh_unlocked_mutex = NULL;
1913     }
1914 
1915     if (!(nv->flags & NV_FLAG_USES_MSIX) &&
1916         !(nv->flags & NV_FLAG_SOC_DISPLAY) &&
1917         !(nv->flags & NV_FLAG_SOC_IGPU))
1918     {
1919         free_irq(nv->interrupt_line, (void *)nvl);
1920         if (nv->flags & NV_FLAG_USES_MSI)
1921         {
1922             NV_PCI_DISABLE_MSI(nvl->pci_dev);
1923             if(nvl->irq_count)
1924                 NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t));
1925         }
1926     }
1927     else if (nv->flags & NV_FLAG_SOC_DISPLAY)
1928     {
1929     }
1930 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1931     else
1932     {
1933         nv_free_msix_irq(nvl);
1934         pci_disable_msix(nvl->pci_dev);
1935         nv->flags &= ~NV_FLAG_USES_MSIX;
1936         NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry));
1937         NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t));
1938     }
1939 #endif
1940 
1941     if (nvl->msix_bh_mutex)
1942     {
1943         os_free_mutex(nvl->msix_bh_mutex);
1944         nvl->msix_bh_mutex = NULL;
1945     }
1946 
1947     rm_shutdown_adapter(sp, nv);
1948 
1949     if (nv_platform_use_auto_online(nvl))
1950         nv_kthread_q_stop(&nvl->remove_numa_memory_q);
1951 }
1952 
1953 /*
1954  * Tears down the device on the last file close. Assumes nvl->ldata_lock is
1955  * held.
1956  */
nv_stop_device(nv_state_t * nv,nvidia_stack_t * sp)1957 static void nv_stop_device(nv_state_t *nv, nvidia_stack_t *sp)
1958 {
1959     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1960     static int persistence_mode_notice_logged;
1961 
1962     /*
1963      * The GPU needs to be powered on to go through the teardown sequence.
1964      * This balances the FINE unref at the end of nv_start_device().
1965      */
1966     rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
1967 
1968 #if defined(NV_UVM_ENABLE)
1969     {
1970         const NvU8* uuid;
1971         // Inform UVM before disabling adapter. Use cached copy
1972         uuid = nv_get_cached_uuid(nv);
1973         if (uuid != NULL)
1974         {
1975             // this function cannot fail
1976             nv_uvm_notify_stop_device(uuid);
1977         }
1978     }
1979 #endif
1980     /* Adapter is already shutdown as part of nvidia_pci_remove */
1981     if (!nv->removed)
1982     {
1983         if (nv->flags & NV_FLAG_PERSISTENT_SW_STATE)
1984         {
1985             rm_disable_adapter(sp, nv);
1986         }
1987         else
1988         {
1989             nv_acpi_unregister_notifier(nvl);
1990             nv_shutdown_adapter(sp, nv, nvl);
1991         }
1992     }
1993 
1994     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1995     {
1996         nv_dev_free_stacks(nvl);
1997     }
1998 
1999     if ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) &&
2000         (!persistence_mode_notice_logged) && (!os_is_vgx_hyper()))
2001     {
2002         nv_printf(NV_DBG_ERRORS, "NVRM: Persistence mode is deprecated and"
2003                   " will be removed in a future release. Please use"
2004                   " nvidia-persistenced instead.\n");
2005         persistence_mode_notice_logged  = 1;
2006     }
2007 
2008     /* leave INIT flag alone so we don't reinit every time */
2009     nv->flags &= ~NV_FLAG_OPEN;
2010 
2011     nv_unregister_ibmnpu_devices(nv);
2012 
2013     if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
2014     {
2015         rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE);
2016     }
2017     else
2018     {
2019         /* If in legacy persistence mode, only unref FINE refcount. */
2020         rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
2021     }
2022 
2023     nv_put_rsync_info();
2024 }
2025 
2026 /*
2027  * Decreases nvl->usage_count, stopping the device when it reaches 0. Assumes
2028  * nvl->ldata_lock is held.
2029  */
nv_close_device(nv_state_t * nv,nvidia_stack_t * sp)2030 static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp)
2031 {
2032     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2033 
2034     if (NV_ATOMIC_READ(nvl->usage_count) == 0)
2035     {
2036         nv_printf(NV_DBG_ERRORS,
2037                   "NVRM: Attempting to close unopened minor device %u!\n",
2038                   nvl->minor_num);
2039         WARN_ON(1);
2040         return;
2041     }
2042 
2043     if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
2044         nv_stop_device(nv, sp);
2045 }
2046 
2047 /*
2048 ** nvidia_close
2049 **
2050 ** Primary driver close entry point.
2051 */
2052 
2053 static void
nvidia_close_callback(nv_linux_file_private_t * nvlfp)2054 nvidia_close_callback(
2055    nv_linux_file_private_t *nvlfp
2056 )
2057 {
2058     nv_linux_state_t *nvl;
2059     nv_state_t *nv;
2060     nvidia_stack_t *sp = nvlfp->sp;
2061     NvBool bRemove = NV_FALSE;
2062 
2063     nvl = nvlfp->nvptr;
2064     if (nvl == NULL)
2065     {
2066         /*
2067          * If nvlfp has no associated nvl device (meaning the open operation
2068          * failed), then there is no state outside of nvlfp to cleanup.
2069          */
2070 
2071         nv_free_file_private(nvlfp);
2072         nv_kmem_cache_free_stack(sp);
2073         return;
2074     }
2075 
2076     nv = NV_STATE_PTR(nvl);
2077 
2078     rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
2079 
2080     down(&nvl->mmap_lock);
2081     list_del(&nvlfp->entry);
2082     up(&nvl->mmap_lock);
2083 
2084     down(&nvl->ldata_lock);
2085     nv_close_device(nv, sp);
2086 
2087     bRemove = (!NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) &&
2088               (NV_ATOMIC_READ(nvl->usage_count) == 0) &&
2089               rm_get_device_remove_flag(sp, nv->gpu_id);
2090 
2091     nv_free_file_private(nvlfp);
2092 
2093     /*
2094      * In case of surprise removal of device, we have 2 cases as below:
2095      *
2096      * 1> When nvidia_pci_remove is scheduled prior to nvidia_close.
2097      * nvidia_pci_remove will not destroy linux layer locks & nv linux state
2098      * struct but will set variable nv->removed for nvidia_close.
2099      * Once all the clients are closed, last nvidia_close will clean up linux
2100      * layer locks and nv linux state struct.
2101      *
2102      * 2> When nvidia_close is scheduled prior to nvidia_pci_remove.
2103      * This will be treated as normal working case. nvidia_close will not do
2104      * any cleanup related to linux layer locks and nv linux state struct.
2105      * nvidia_pci_remove when scheduled will do necessary cleanup.
2106      */
2107     if ((NV_ATOMIC_READ(nvl->usage_count) == 0) && nv->removed)
2108     {
2109         nv_lock_destroy_locks(sp, nv);
2110         NV_KFREE(nvl, sizeof(nv_linux_state_t));
2111     }
2112     else
2113     {
2114         up(&nvl->ldata_lock);
2115 
2116 #if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE)
2117         if (bRemove)
2118         {
2119             NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(nvl->pci_dev);
2120         }
2121 #endif
2122     }
2123 
2124     nv_kmem_cache_free_stack(sp);
2125 }
2126 
nvidia_close_deferred(void * data)2127 static void nvidia_close_deferred(void *data)
2128 {
2129     nv_linux_file_private_t *nvlfp = data;
2130 
2131     nv_wait_open_complete(nvlfp);
2132 
2133     down_read(&nv_system_pm_lock);
2134 
2135     nvidia_close_callback(nvlfp);
2136 
2137     up_read(&nv_system_pm_lock);
2138 }
2139 
2140 int
nvidia_close(struct inode * inode,struct file * file)2141 nvidia_close(
2142     struct inode *inode,
2143     struct file *file
2144 )
2145 {
2146     int rc;
2147     nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2148 
2149     nv_printf(NV_DBG_INFO,
2150               "NVRM: nvidia_close on GPU with minor number %d\n",
2151               NV_DEVICE_MINOR_NUMBER(inode));
2152 
2153     if (nv_is_control_device(inode))
2154     {
2155         return nvidia_ctl_close(inode, file);
2156     }
2157 
2158     NV_SET_FILE_PRIVATE(file, NULL);
2159 
2160     rc = nv_wait_open_complete_interruptible(nvlfp);
2161     if (rc == 0)
2162     {
2163         rc = nv_down_read_interruptible(&nv_system_pm_lock);
2164     }
2165 
2166     if (rc == 0)
2167     {
2168         nvidia_close_callback(nvlfp);
2169         up_read(&nv_system_pm_lock);
2170     }
2171     else
2172     {
2173         nv_kthread_q_item_init(&nvlfp->deferred_close_q_item,
2174                                nvidia_close_deferred,
2175                                nvlfp);
2176         rc = nv_kthread_q_schedule_q_item(&nv_deferred_close_kthread_q,
2177                                           &nvlfp->deferred_close_q_item);
2178         WARN_ON(rc == 0);
2179     }
2180 
2181     return 0;
2182 }
2183 
2184 unsigned int
nvidia_poll(struct file * file,poll_table * wait)2185 nvidia_poll(
2186     struct file *file,
2187     poll_table  *wait
2188 )
2189 {
2190     unsigned int mask = 0;
2191     nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2192     unsigned long eflags;
2193     nv_linux_state_t *nvl;
2194     nv_state_t *nv;
2195     NV_STATUS status;
2196 
2197     if (!nv_is_control_device(NV_FILE_INODE(file)))
2198     {
2199         if (!nv_is_open_complete(nvlfp))
2200         {
2201             return POLLERR;
2202         }
2203     }
2204 
2205     nvl = nvlfp->nvptr;
2206     if (nvl == NULL)
2207     {
2208         return POLLERR;
2209     }
2210 
2211     nv = NV_STATE_PTR(nvl);
2212 
2213     status = nv_check_gpu_state(nv);
2214     if (status == NV_ERR_GPU_IS_LOST)
2215     {
2216         NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping nvidia_poll\n");
2217         return POLLHUP;
2218     }
2219 
2220     if ((file->f_flags & O_NONBLOCK) == 0)
2221         poll_wait(file, &nvlfp->waitqueue, wait);
2222 
2223     NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
2224 
2225     if ((nvlfp->event_data_head != NULL) || nvlfp->dataless_event_pending)
2226     {
2227         mask = (POLLPRI | POLLIN);
2228         nvlfp->dataless_event_pending = NV_FALSE;
2229     }
2230 
2231     NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
2232 
2233     return mask;
2234 }
2235 
2236 #define NV_CTL_DEVICE_ONLY(nv)                 \
2237 {                                              \
2238     if (((nv)->flags & NV_FLAG_CONTROL) == 0)  \
2239     {                                          \
2240         status = -EINVAL;                      \
2241         goto done;                             \
2242     }                                          \
2243 }
2244 
2245 #define NV_ACTUAL_DEVICE_ONLY(nv)              \
2246 {                                              \
2247     if (((nv)->flags & NV_FLAG_CONTROL) != 0)  \
2248     {                                          \
2249         status = -EINVAL;                      \
2250         goto done;                             \
2251     }                                          \
2252 }
2253 
2254 /*
2255  * Fills the ci array with the state of num_entries devices. Returns -EINVAL if
2256  * num_entries isn't big enough to hold all available devices.
2257  */
nvidia_read_card_info(nv_ioctl_card_info_t * ci,size_t num_entries)2258 static int nvidia_read_card_info(nv_ioctl_card_info_t *ci, size_t num_entries)
2259 {
2260     nv_state_t *nv;
2261     nv_linux_state_t *nvl;
2262     size_t i = 0;
2263     int rc = 0;
2264 
2265     /* Clear each card's flags field the lazy way */
2266     memset(ci, 0, num_entries * sizeof(ci[0]));
2267 
2268     LOCK_NV_LINUX_DEVICES();
2269 
2270     if (num_entries < num_nv_devices)
2271     {
2272         rc = -EINVAL;
2273         goto out;
2274     }
2275 
2276     for (nvl = nv_linux_devices; nvl && i < num_entries; nvl = nvl->next)
2277     {
2278         nv = NV_STATE_PTR(nvl);
2279 
2280         /* We do not include excluded GPUs in the list... */
2281         if ((nv->flags & NV_FLAG_EXCLUDE) != 0)
2282             continue;
2283 
2284         ci[i].valid              = NV_TRUE;
2285         ci[i].pci_info.domain    = nv->pci_info.domain;
2286         ci[i].pci_info.bus       = nv->pci_info.bus;
2287         ci[i].pci_info.slot      = nv->pci_info.slot;
2288         ci[i].pci_info.vendor_id = nv->pci_info.vendor_id;
2289         ci[i].pci_info.device_id = nv->pci_info.device_id;
2290         ci[i].gpu_id             = nv->gpu_id;
2291         ci[i].interrupt_line     = nv->interrupt_line;
2292         ci[i].reg_address        = nv->regs->cpu_address;
2293         ci[i].reg_size           = nv->regs->size;
2294         ci[i].minor_number       = nvl->minor_num;
2295         if (dev_is_pci(nvl->dev))
2296         {
2297             ci[i].fb_address         = nv->fb->cpu_address;
2298             ci[i].fb_size            = nv->fb->size;
2299         }
2300         i++;
2301     }
2302 
2303 out:
2304     UNLOCK_NV_LINUX_DEVICES();
2305     return rc;
2306 }
2307 
2308 int
nvidia_ioctl(struct inode * inode,struct file * file,unsigned int cmd,unsigned long i_arg)2309 nvidia_ioctl(
2310     struct inode *inode,
2311     struct file *file,
2312     unsigned int cmd,
2313     unsigned long i_arg)
2314 {
2315     NV_STATUS rmStatus;
2316     int status = 0;
2317     nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2318     nv_linux_state_t *nvl;
2319     nv_state_t *nv;
2320     nvidia_stack_t *sp = NULL;
2321     nv_ioctl_xfer_t ioc_xfer;
2322     void *arg_ptr = (void *) i_arg;
2323     void *arg_copy = NULL;
2324     size_t arg_size = 0;
2325     int arg_cmd;
2326 
2327     nv_printf(NV_DBG_INFO, "NVRM: ioctl(0x%x, 0x%x, 0x%x)\n",
2328         _IOC_NR(cmd), (unsigned int) i_arg, _IOC_SIZE(cmd));
2329 
2330     if (!nv_is_control_device(inode))
2331     {
2332         status = nv_wait_open_complete_interruptible(nvlfp);
2333         if (status != 0)
2334             goto done_early;
2335     }
2336 
2337     arg_size = _IOC_SIZE(cmd);
2338     arg_cmd  = _IOC_NR(cmd);
2339 
2340     if (arg_cmd == NV_ESC_IOCTL_XFER_CMD)
2341     {
2342         if (arg_size != sizeof(nv_ioctl_xfer_t))
2343         {
2344             nv_printf(NV_DBG_ERRORS,
2345                     "NVRM: invalid ioctl XFER structure size!\n");
2346             status = -EINVAL;
2347             goto done_early;
2348         }
2349 
2350         if (NV_COPY_FROM_USER(&ioc_xfer, arg_ptr, sizeof(ioc_xfer)))
2351         {
2352             nv_printf(NV_DBG_ERRORS,
2353                     "NVRM: failed to copy in ioctl XFER data!\n");
2354             status = -EFAULT;
2355             goto done_early;
2356         }
2357 
2358         arg_cmd  = ioc_xfer.cmd;
2359         arg_size = ioc_xfer.size;
2360         arg_ptr  = NvP64_VALUE(ioc_xfer.ptr);
2361 
2362         if (arg_size > NV_ABSOLUTE_MAX_IOCTL_SIZE)
2363         {
2364             nv_printf(NV_DBG_ERRORS, "NVRM: invalid ioctl XFER size!\n");
2365             status = -EINVAL;
2366             goto done_early;
2367         }
2368     }
2369 
2370     NV_KMALLOC(arg_copy, arg_size);
2371     if (arg_copy == NULL)
2372     {
2373         nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate ioctl memory\n");
2374         status = -ENOMEM;
2375         goto done_early;
2376     }
2377 
2378     if (NV_COPY_FROM_USER(arg_copy, arg_ptr, arg_size))
2379     {
2380         nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy in ioctl data!\n");
2381         status = -EFAULT;
2382         goto done_early;
2383     }
2384 
2385     /*
2386      * Handle NV_ESC_WAIT_OPEN_COMPLETE early as it is allowed to work
2387      * with or without nvl.
2388      */
2389     if (arg_cmd == NV_ESC_WAIT_OPEN_COMPLETE)
2390     {
2391         nv_ioctl_wait_open_complete_t *params = arg_copy;
2392         params->rc = nvlfp->open_rc;
2393         params->adapterStatus = nvlfp->adapter_status;
2394         goto done_early;
2395     }
2396 
2397     nvl = nvlfp->nvptr;
2398     if (nvl == NULL)
2399     {
2400         status = -EIO;
2401         goto done_early;
2402     }
2403 
2404     nv = NV_STATE_PTR(nvl);
2405 
2406     status = nv_down_read_interruptible(&nv_system_pm_lock);
2407     if (status < 0)
2408     {
2409         goto done_early;
2410     }
2411 
2412     status = nv_kmem_cache_alloc_stack(&sp);
2413     if (status != 0)
2414     {
2415         nv_printf(NV_DBG_ERRORS, "NVRM: Unable to allocate altstack for ioctl\n");
2416         goto done_pm_unlock;
2417     }
2418 
2419     rmStatus = nv_check_gpu_state(nv);
2420     if (rmStatus == NV_ERR_GPU_IS_LOST)
2421     {
2422         nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping nvidia_ioctl\n");
2423         status = -EINVAL;
2424         goto done;
2425     }
2426 
2427     switch (arg_cmd)
2428     {
2429         case NV_ESC_QUERY_DEVICE_INTR:
2430         {
2431             nv_ioctl_query_device_intr *query_intr = arg_copy;
2432 
2433             NV_ACTUAL_DEVICE_ONLY(nv);
2434 
2435             if ((arg_size < sizeof(*query_intr)) ||
2436                 (!nv->regs->map))
2437             {
2438                 status = -EINVAL;
2439                 goto done;
2440             }
2441 
2442             query_intr->intrStatus =
2443                 *(nv->regs->map + (NV_RM_DEVICE_INTR_ADDRESS >> 2));
2444             query_intr->status = NV_OK;
2445             break;
2446         }
2447 
2448         /* pass out info about the card */
2449         case NV_ESC_CARD_INFO:
2450         {
2451             size_t num_arg_devices = arg_size / sizeof(nv_ioctl_card_info_t);
2452 
2453             NV_CTL_DEVICE_ONLY(nv);
2454 
2455             status = nvidia_read_card_info(arg_copy, num_arg_devices);
2456             break;
2457         }
2458 
2459         case NV_ESC_ATTACH_GPUS_TO_FD:
2460         {
2461             size_t num_arg_gpus = arg_size / sizeof(NvU32);
2462             size_t i;
2463 
2464             NV_CTL_DEVICE_ONLY(nv);
2465 
2466             if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 ||
2467                 arg_size % sizeof(NvU32) != 0)
2468             {
2469                 status = -EINVAL;
2470                 goto done;
2471             }
2472 
2473             NV_KMALLOC(nvlfp->attached_gpus, arg_size);
2474             if (nvlfp->attached_gpus == NULL)
2475             {
2476                 status = -ENOMEM;
2477                 goto done;
2478             }
2479             memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
2480             nvlfp->num_attached_gpus = num_arg_gpus;
2481 
2482             for (i = 0; i < nvlfp->num_attached_gpus; i++)
2483             {
2484                 if (nvlfp->attached_gpus[i] == 0)
2485                 {
2486                     continue;
2487                 }
2488 
2489                 if (nvidia_dev_get(nvlfp->attached_gpus[i], sp))
2490                 {
2491                     while (i--)
2492                     {
2493                         if (nvlfp->attached_gpus[i] != 0)
2494                             nvidia_dev_put(nvlfp->attached_gpus[i], sp);
2495                     }
2496                     NV_KFREE(nvlfp->attached_gpus, arg_size);
2497                     nvlfp->num_attached_gpus = 0;
2498 
2499                     status = -EINVAL;
2500                     break;
2501                 }
2502             }
2503 
2504             break;
2505         }
2506 
2507         case NV_ESC_CHECK_VERSION_STR:
2508         {
2509             NV_CTL_DEVICE_ONLY(nv);
2510 
2511             rmStatus = rm_perform_version_check(sp, arg_copy, arg_size);
2512             status = ((rmStatus == NV_OK) ? 0 : -EINVAL);
2513             break;
2514         }
2515 
2516         case NV_ESC_SYS_PARAMS:
2517         {
2518             nv_ioctl_sys_params_t *api = arg_copy;
2519 
2520             NV_CTL_DEVICE_ONLY(nv);
2521 
2522             if (arg_size != sizeof(nv_ioctl_sys_params_t))
2523             {
2524                 status = -EINVAL;
2525                 goto done;
2526             }
2527 
2528             /* numa_memblock_size should only be set once */
2529             if (nvl->numa_memblock_size == 0)
2530             {
2531                 nvl->numa_memblock_size = api->memblock_size;
2532             }
2533             else
2534             {
2535                 status = (nvl->numa_memblock_size == api->memblock_size) ?
2536                     0 : -EBUSY;
2537                 goto done;
2538             }
2539             break;
2540         }
2541 
2542         case NV_ESC_NUMA_INFO:
2543         {
2544             nv_ioctl_numa_info_t *api = arg_copy;
2545             rmStatus = NV_OK;
2546 
2547             NV_ACTUAL_DEVICE_ONLY(nv);
2548 
2549             if (arg_size != sizeof(nv_ioctl_numa_info_t))
2550             {
2551                 status = -EINVAL;
2552                 goto done;
2553             }
2554 
2555             rmStatus = rm_get_gpu_numa_info(sp, nv, api);
2556             if (rmStatus != NV_OK)
2557             {
2558                 status = -EBUSY;
2559                 goto done;
2560             }
2561 
2562             api->status = nv_get_numa_status(nvl);
2563             api->use_auto_online = nv_platform_use_auto_online(nvl);
2564             api->memblock_size = nv_ctl_device.numa_memblock_size;
2565             break;
2566         }
2567 
2568         case NV_ESC_SET_NUMA_STATUS:
2569         {
2570             nv_ioctl_set_numa_status_t *api = arg_copy;
2571             rmStatus = NV_OK;
2572 
2573             if (!NV_IS_SUSER())
2574             {
2575                 status = -EACCES;
2576                 goto done;
2577             }
2578 
2579             NV_ACTUAL_DEVICE_ONLY(nv);
2580 
2581             if (arg_size != sizeof(nv_ioctl_set_numa_status_t))
2582             {
2583                 status = -EINVAL;
2584                 goto done;
2585             }
2586 
2587             /*
2588              * The nv_linux_state_t for the device needs to be locked
2589              * in order to prevent additional open()/close() calls from
2590              * manipulating the usage count for the device while we
2591              * determine if NUMA state can be changed.
2592              */
2593             down(&nvl->ldata_lock);
2594 
2595             if (nv_get_numa_status(nvl) != api->status)
2596             {
2597                 if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS)
2598                 {
2599                     /*
2600                      * Only the current client should have an open file
2601                      * descriptor for the device, to allow safe offlining.
2602                      */
2603                     if (NV_ATOMIC_READ(nvl->usage_count) > 1)
2604                     {
2605                         status = -EBUSY;
2606                         goto unlock;
2607                     }
2608                     else
2609                     {
2610                         /*
2611                          * If this call fails, it indicates that RM
2612                          * is not ready to offline memory, and we should keep
2613                          * the current NUMA status of ONLINE.
2614                          */
2615                         rmStatus = rm_gpu_numa_offline(sp, nv);
2616                         if (rmStatus != NV_OK)
2617                         {
2618                             status = -EBUSY;
2619                             goto unlock;
2620                         }
2621                     }
2622                 }
2623 
2624                 status = nv_set_numa_status(nvl, api->status);
2625                 if (status < 0)
2626                 {
2627                     if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS)
2628                         (void) rm_gpu_numa_online(sp, nv);
2629                     goto unlock;
2630                 }
2631 
2632                 if (api->status == NV_IOCTL_NUMA_STATUS_ONLINE)
2633                 {
2634                     rmStatus = rm_gpu_numa_online(sp, nv);
2635                     if (rmStatus != NV_OK)
2636                     {
2637                         status = -EBUSY;
2638                         goto unlock;
2639                     }
2640                 }
2641             }
2642 
2643 unlock:
2644             up(&nvl->ldata_lock);
2645 
2646             break;
2647         }
2648 
2649         case NV_ESC_EXPORT_TO_DMABUF_FD:
2650         {
2651             nv_ioctl_export_to_dma_buf_fd_t *params = arg_copy;
2652 
2653             if (arg_size != sizeof(nv_ioctl_export_to_dma_buf_fd_t))
2654             {
2655                 status = -EINVAL;
2656                 goto done;
2657             }
2658 
2659             NV_ACTUAL_DEVICE_ONLY(nv);
2660 
2661             params->status = nv_dma_buf_export(nv, params);
2662 
2663             break;
2664         }
2665 
2666         default:
2667             rmStatus = rm_ioctl(sp, nv, &nvlfp->nvfp, arg_cmd, arg_copy, arg_size);
2668             status = ((rmStatus == NV_OK) ? 0 : -EINVAL);
2669             break;
2670     }
2671 
2672 done:
2673     nv_kmem_cache_free_stack(sp);
2674 
2675 done_pm_unlock:
2676     up_read(&nv_system_pm_lock);
2677 
2678 done_early:
2679     if (arg_copy != NULL)
2680     {
2681         if (status != -EFAULT)
2682         {
2683             if (NV_COPY_TO_USER(arg_ptr, arg_copy, arg_size))
2684             {
2685                 nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy out ioctl data\n");
2686                 status = -EFAULT;
2687             }
2688         }
2689         NV_KFREE(arg_copy, arg_size);
2690     }
2691 
2692     return status;
2693 }
2694 
nvidia_unlocked_ioctl(struct file * file,unsigned int cmd,unsigned long i_arg)2695 long nvidia_unlocked_ioctl(
2696     struct file *file,
2697     unsigned int cmd,
2698     unsigned long i_arg
2699 )
2700 {
2701     return nvidia_ioctl(NV_FILE_INODE(file), file, cmd, i_arg);
2702 }
2703 
2704 irqreturn_t
nvidia_isr_msix(int irq,void * arg)2705 nvidia_isr_msix(
2706     int   irq,
2707     void *arg
2708 )
2709 {
2710     irqreturn_t ret;
2711     nv_linux_state_t *nvl = (void *) arg;
2712 
2713     // nvidia_isr_msix() is called for each of the MSI-X vectors and they can
2714     // run in parallel on different CPUs (cores), but this is not currently
2715     // supported by nvidia_isr() and its children. As a big hammer fix just
2716     // spinlock around the nvidia_isr() call to serialize them.
2717     //
2718     // At this point interrupts are disabled on the CPU running our ISR (see
2719     // comments for nv_default_irq_flags()) so a plain spinlock is enough.
2720     NV_SPIN_LOCK(&nvl->msix_isr_lock);
2721 
2722     ret = nvidia_isr(irq, arg);
2723 
2724     NV_SPIN_UNLOCK(&nvl->msix_isr_lock);
2725 
2726     return ret;
2727 }
2728 
2729 /*
2730  * driver receives an interrupt
2731  *    if someone waiting, then hand it off.
2732  */
2733 irqreturn_t
nvidia_isr(int irq,void * arg)2734 nvidia_isr(
2735     int   irq,
2736     void *arg
2737 )
2738 {
2739     nv_linux_state_t *nvl = (void *) arg;
2740     nv_state_t *nv = NV_STATE_PTR(nvl);
2741     NvU32 need_to_run_bottom_half_gpu_lock_held = 0;
2742     NvBool rm_handled = NV_FALSE, uvm_handled = NV_FALSE, rm_fault_handling_needed = NV_FALSE;
2743     NvU32 rm_serviceable_fault_cnt = 0;
2744     NvU32 sec, usec;
2745     NvU16 index = 0;
2746     NvU64 currentTime = 0;
2747     NvBool found_irq = NV_FALSE;
2748 
2749     rm_gpu_handle_mmu_faults(nvl->sp[NV_DEV_STACK_ISR], nv, &rm_serviceable_fault_cnt);
2750     rm_fault_handling_needed = (rm_serviceable_fault_cnt != 0);
2751 
2752 #if defined (NV_UVM_ENABLE)
2753     //
2754     // Returns NV_OK if the UVM driver handled the interrupt
2755     //
2756     // Returns NV_ERR_NO_INTR_PENDING if the interrupt is not for
2757     // the UVM driver.
2758     //
2759     // Returns NV_WARN_MORE_PROCESSING_REQUIRED if the UVM top-half ISR was
2760     // unable to get its lock(s), due to other (UVM) threads holding them.
2761     //
2762     // RM can normally treat NV_WARN_MORE_PROCESSING_REQUIRED the same as
2763     // NV_ERR_NO_INTR_PENDING, but in some cases the extra information may
2764     // be helpful.
2765     //
2766     if (nv_uvm_event_interrupt(nv_get_cached_uuid(nv)) == NV_OK)
2767         uvm_handled = NV_TRUE;
2768 #endif
2769 
2770     rm_handled = rm_isr(nvl->sp[NV_DEV_STACK_ISR], nv,
2771                         &need_to_run_bottom_half_gpu_lock_held);
2772 
2773     /* Replicating the logic in linux kernel to track unhandled interrupt crossing a threshold */
2774     if ((nv->flags & NV_FLAG_USES_MSI) || (nv->flags & NV_FLAG_USES_MSIX))
2775     {
2776         if (nvl->irq_count != NULL)
2777         {
2778             for (index = 0; index < nvl->current_num_irq_tracked; index++)
2779             {
2780                 if (nvl->irq_count[index].irq == irq)
2781                 {
2782                     found_irq = NV_TRUE;
2783                     break;
2784                 }
2785 
2786                 found_irq = NV_FALSE;
2787             }
2788 
2789             if (!found_irq && nvl->current_num_irq_tracked < nvl->num_intr)
2790             {
2791                 index = nvl->current_num_irq_tracked;
2792                 nvl->irq_count[index].irq = irq;
2793                 nvl->current_num_irq_tracked++;
2794                 found_irq = NV_TRUE;
2795             }
2796 
2797             if (found_irq)
2798             {
2799                 nvl->irq_count[index].total++;
2800 
2801                 if(rm_handled == NV_FALSE)
2802                 {
2803                     os_get_current_time(&sec, &usec);
2804                     currentTime = ((NvU64)sec) * 1000000 + (NvU64)usec;
2805 
2806                     /* Reset unhandled count if it's been more than 0.1 seconds since the last unhandled IRQ */
2807                     if ((currentTime - nvl->irq_count[index].last_unhandled) > RM_UNHANDLED_TIMEOUT_US)
2808                         nvl->irq_count[index].unhandled = 1;
2809                     else
2810                         nvl->irq_count[index].unhandled++;
2811 
2812                     nvl->irq_count[index].last_unhandled = currentTime;
2813                     rm_handled = NV_TRUE;
2814                 }
2815 
2816                 if (nvl->irq_count[index].total >= RM_THRESHOLD_TOTAL_IRQ_COUNT)
2817                 {
2818                     if (nvl->irq_count[index].unhandled > RM_THRESHOLD_UNAHNDLED_IRQ_COUNT)
2819                         nv_printf(NV_DBG_ERRORS,"NVRM: Going over RM unhandled interrupt threshold for irq %d\n", irq);
2820 
2821                     nvl->irq_count[index].total = 0;
2822                     nvl->irq_count[index].unhandled = 0;
2823                     nvl->irq_count[index].last_unhandled = 0;
2824                 }
2825             }
2826             else
2827                 nv_printf(NV_DBG_ERRORS,"NVRM: IRQ number out of valid range\n");
2828         }
2829     }
2830 
2831     if (need_to_run_bottom_half_gpu_lock_held)
2832     {
2833         return IRQ_WAKE_THREAD;
2834     }
2835     else
2836     {
2837         //
2838         // If rm_isr does not need to run a bottom half and mmu_faults_copied
2839         // indicates that bottom half is needed, then we enqueue a kthread based
2840         // bottom half, as this specific bottom_half will acquire the GPU lock
2841         //
2842         if (rm_fault_handling_needed)
2843             nv_kthread_q_schedule_q_item(&nvl->bottom_half_q, &nvl->bottom_half_q_item);
2844     }
2845 
2846     return IRQ_RETVAL(rm_handled || uvm_handled || rm_fault_handling_needed);
2847 }
2848 
2849 irqreturn_t
nvidia_isr_kthread_bh(int irq,void * data)2850 nvidia_isr_kthread_bh(
2851     int irq,
2852     void *data
2853 )
2854 {
2855     return nvidia_isr_common_bh(data);
2856 }
2857 
2858 irqreturn_t
nvidia_isr_msix_kthread_bh(int irq,void * data)2859 nvidia_isr_msix_kthread_bh(
2860     int irq,
2861     void *data
2862 )
2863 {
2864     NV_STATUS status;
2865     irqreturn_t ret;
2866     nv_state_t *nv = (nv_state_t *) data;
2867     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2868 
2869     //
2870     // Synchronize kthreads servicing bottom halves for different MSI-X vectors
2871     // as they share same pre-allocated alt-stack.
2872     //
2873     status = os_acquire_mutex(nvl->msix_bh_mutex);
2874     // os_acquire_mutex can only fail if we cannot sleep and we can
2875     WARN_ON(status != NV_OK);
2876 
2877     ret = nvidia_isr_common_bh(data);
2878 
2879     os_release_mutex(nvl->msix_bh_mutex);
2880 
2881     return ret;
2882 }
2883 
2884 static irqreturn_t
nvidia_isr_common_bh(void * data)2885 nvidia_isr_common_bh(
2886     void *data
2887 )
2888 {
2889     nv_state_t *nv = (nv_state_t *) data;
2890     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2891     nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_ISR_BH];
2892     NV_STATUS status;
2893 
2894     status = nv_check_gpu_state(nv);
2895     if (status == NV_ERR_GPU_IS_LOST)
2896     {
2897         nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping ISR bottom half\n");
2898     }
2899     else
2900     {
2901         rm_isr_bh(sp, nv);
2902     }
2903 
2904     return IRQ_HANDLED;
2905 }
2906 
2907 static void
nvidia_isr_bh_unlocked(void * args)2908 nvidia_isr_bh_unlocked(
2909     void * args
2910 )
2911 {
2912     nv_state_t *nv = (nv_state_t *) args;
2913     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2914     nvidia_stack_t *sp;
2915     NV_STATUS status;
2916 
2917     //
2918     // Synchronize kthreads servicing unlocked bottom half as they
2919     // share same pre-allocated stack for alt-stack
2920     //
2921     status = os_acquire_mutex(nvl->isr_bh_unlocked_mutex);
2922     if (status != NV_OK)
2923     {
2924         nv_printf(NV_DBG_ERRORS, "NVRM: %s: Unable to take bottom_half mutex!\n",
2925                   __FUNCTION__);
2926         WARN_ON(1);
2927     }
2928 
2929     sp = nvl->sp[NV_DEV_STACK_ISR_BH_UNLOCKED];
2930 
2931     status = nv_check_gpu_state(nv);
2932     if (status == NV_ERR_GPU_IS_LOST)
2933     {
2934         nv_printf(NV_DBG_INFO,
2935             "NVRM: GPU is lost, skipping unlocked ISR bottom half\n");
2936     }
2937     else
2938     {
2939         rm_isr_bh_unlocked(sp, nv);
2940     }
2941 
2942     os_release_mutex(nvl->isr_bh_unlocked_mutex);
2943 }
2944 
2945 static void
nvidia_rc_timer_callback(struct nv_timer * nv_timer)2946 nvidia_rc_timer_callback(
2947     struct nv_timer *nv_timer
2948 )
2949 {
2950     nv_linux_state_t *nvl = container_of(nv_timer, nv_linux_state_t, rc_timer);
2951     nv_state_t *nv = NV_STATE_PTR(nvl);
2952     nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_TIMER];
2953     NV_STATUS status;
2954 
2955     status = nv_check_gpu_state(nv);
2956     if (status == NV_ERR_GPU_IS_LOST)
2957     {
2958         nv_printf(NV_DBG_INFO,
2959             "NVRM: GPU is lost, skipping device timer callbacks\n");
2960         return;
2961     }
2962 
2963     if (rm_run_rc_callback(sp, nv) == NV_OK)
2964     {
2965         // set another timeout 1 sec in the future:
2966         mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ);
2967     }
2968 }
2969 
2970 /*
2971 ** nvidia_ctl_open
2972 **
2973 ** nv control driver open entry point.  Sessions are created here.
2974 */
2975 static int
nvidia_ctl_open(struct inode * inode,struct file * file)2976 nvidia_ctl_open(
2977     struct inode *inode,
2978     struct file *file
2979 )
2980 {
2981     nv_linux_state_t *nvl = &nv_ctl_device;
2982     nv_state_t *nv = NV_STATE_PTR(nvl);
2983     nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2984 
2985     nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_open\n");
2986 
2987     down(&nvl->ldata_lock);
2988 
2989     /* save the nv away in file->private_data */
2990     nvlfp->nvptr = nvl;
2991 
2992     if (NV_ATOMIC_READ(nvl->usage_count) == 0)
2993     {
2994         nv->flags |= (NV_FLAG_OPEN | NV_FLAG_CONTROL);
2995     }
2996 
2997     NV_ATOMIC_INC(nvl->usage_count);
2998     up(&nvl->ldata_lock);
2999 
3000     return 0;
3001 }
3002 
3003 
3004 /*
3005 ** nvidia_ctl_close
3006 */
3007 static int
nvidia_ctl_close(struct inode * inode,struct file * file)3008 nvidia_ctl_close(
3009     struct inode *inode,
3010     struct file *file
3011 )
3012 {
3013     nv_alloc_t *at, *next;
3014     nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file);
3015     nv_state_t *nv = NV_STATE_PTR(nvl);
3016     nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
3017     nvidia_stack_t *sp = nvlfp->sp;
3018 
3019     nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_close\n");
3020 
3021     down(&nvl->ldata_lock);
3022     if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
3023     {
3024         nv->flags &= ~NV_FLAG_OPEN;
3025     }
3026     up(&nvl->ldata_lock);
3027 
3028     rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
3029 
3030     if (nvlfp->free_list != NULL)
3031     {
3032         at = nvlfp->free_list;
3033         while (at != NULL)
3034         {
3035             next = at->next;
3036             if (at->pid == os_get_current_process())
3037                 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3038             nv_free_pages(nv, at->num_pages,
3039                           at->flags.contig,
3040                           at->cache_type,
3041                           (void *)at);
3042             at = next;
3043         }
3044     }
3045 
3046     if (nvlfp->num_attached_gpus != 0)
3047     {
3048         size_t i;
3049 
3050         for (i = 0; i < nvlfp->num_attached_gpus; i++)
3051         {
3052             if (nvlfp->attached_gpus[i] != 0)
3053                 nvidia_dev_put(nvlfp->attached_gpus[i], sp);
3054         }
3055 
3056         NV_KFREE(nvlfp->attached_gpus, sizeof(NvU32) * nvlfp->num_attached_gpus);
3057         nvlfp->num_attached_gpus = 0;
3058     }
3059 
3060     nv_free_file_private(nvlfp);
3061     NV_SET_FILE_PRIVATE(file, NULL);
3062 
3063     nv_kmem_cache_free_stack(sp);
3064 
3065     return 0;
3066 }
3067 
3068 
3069 void NV_API_CALL
nv_set_dma_address_size(nv_state_t * nv,NvU32 phys_addr_bits)3070 nv_set_dma_address_size(
3071     nv_state_t  *nv,
3072     NvU32       phys_addr_bits
3073 )
3074 {
3075     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3076     NvU64 start_addr = nv_get_dma_start_address(nv);
3077     NvU64 new_mask = (((NvU64)1) << phys_addr_bits) - 1;
3078 
3079     nvl->dma_dev.addressable_range.limit = start_addr + new_mask;
3080 
3081     /*
3082      * The only scenario in which we definitely should not update the DMA mask
3083      * is on POWER, when using TCE bypass mode (see nv_get_dma_start_address()
3084      * for details), since the meaning of the DMA mask is overloaded in that
3085      * case.
3086      */
3087     if (!nvl->tce_bypass_enabled)
3088     {
3089         dma_set_mask(&nvl->pci_dev->dev, new_mask);
3090         /* Certain kernels have a bug which causes pci_set_consistent_dma_mask
3091          * to call GPL sme_active symbol, this bug has already been fixed in a
3092          * minor release update but detect the failure scenario here to prevent
3093          * an installation regression */
3094 #if !NV_IS_EXPORT_SYMBOL_GPL_sme_active
3095         dma_set_coherent_mask(&nvl->pci_dev->dev, new_mask);
3096 #endif
3097     }
3098 }
3099 
3100 static NvUPtr
nv_map_guest_pages(nv_alloc_t * at,NvU64 address,NvU32 page_count,NvU32 page_idx)3101 nv_map_guest_pages(nv_alloc_t *at,
3102                    NvU64 address,
3103                    NvU32 page_count,
3104                    NvU32 page_idx)
3105 {
3106     struct page **pages;
3107     NvU32 j;
3108     NvUPtr virt_addr;
3109 
3110     NV_KMALLOC(pages, sizeof(struct page *) * page_count);
3111     if (pages == NULL)
3112     {
3113         nv_printf(NV_DBG_ERRORS,
3114                   "NVRM: failed to allocate vmap() page descriptor table!\n");
3115         return 0;
3116     }
3117 
3118     for (j = 0; j < page_count; j++)
3119     {
3120         pages[j] = NV_GET_PAGE_STRUCT(at->page_table[page_idx+j]->phys_addr);
3121     }
3122 
3123     virt_addr = nv_vm_map_pages(pages, page_count,
3124         at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted);
3125     NV_KFREE(pages, sizeof(struct page *) * page_count);
3126 
3127     return virt_addr;
3128 }
3129 
3130 NV_STATUS NV_API_CALL
nv_alias_pages(nv_state_t * nv,NvU32 page_cnt,NvU32 contiguous,NvU32 cache_type,NvU64 guest_id,NvU64 * pte_array,void ** priv_data)3131 nv_alias_pages(
3132     nv_state_t *nv,
3133     NvU32 page_cnt,
3134     NvU32 contiguous,
3135     NvU32 cache_type,
3136     NvU64 guest_id,
3137     NvU64 *pte_array,
3138     void **priv_data
3139 )
3140 {
3141     nv_alloc_t *at;
3142     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3143     NvU32 i=0;
3144     nvidia_pte_t *page_ptr = NULL;
3145 
3146     at = nvos_create_alloc(nvl->dev, page_cnt);
3147 
3148     if (at == NULL)
3149     {
3150         return NV_ERR_NO_MEMORY;
3151     }
3152 
3153     at->cache_type = cache_type;
3154     if (contiguous)
3155         at->flags.contig = NV_TRUE;
3156 #if defined(NVCPU_AARCH64)
3157     if (at->cache_type != NV_MEMORY_CACHED)
3158         at->flags.aliased = NV_TRUE;
3159 #endif
3160 
3161     at->flags.guest = NV_TRUE;
3162 
3163     at->order = get_order(at->num_pages * PAGE_SIZE);
3164 
3165     for (i=0; i < at->num_pages; ++i)
3166     {
3167         page_ptr = at->page_table[i];
3168 
3169         if (contiguous && i>0)
3170         {
3171             page_ptr->dma_addr = pte_array[0] + (i << PAGE_SHIFT);
3172         }
3173         else
3174         {
3175             page_ptr->dma_addr  = pte_array[i];
3176         }
3177 
3178         page_ptr->phys_addr = page_ptr->dma_addr;
3179 
3180         /* aliased pages will be mapped on demand. */
3181         page_ptr->virt_addr = 0x0;
3182     }
3183 
3184     at->guest_id = guest_id;
3185     *priv_data = at;
3186     NV_ATOMIC_INC(at->usage_count);
3187 
3188     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3189 
3190     return NV_OK;
3191 }
3192 
3193 /*
3194  *   This creates a dummy nv_alloc_t for peer IO mem, so that it can
3195  *   be mapped using NvRmMapMemory.
3196  */
nv_register_peer_io_mem(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,void ** priv_data)3197 NV_STATUS NV_API_CALL nv_register_peer_io_mem(
3198     nv_state_t *nv,
3199     NvU64      *phys_addr,
3200     NvU64       page_count,
3201     void      **priv_data
3202 )
3203 {
3204     nv_alloc_t *at;
3205     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3206     NvU64 i;
3207     NvU64 addr;
3208 
3209     at = nvos_create_alloc(nvl->dev, page_count);
3210 
3211     if (at == NULL)
3212         return NV_ERR_NO_MEMORY;
3213 
3214     // IO regions should be uncached and contiguous
3215     at->cache_type = NV_MEMORY_UNCACHED;
3216     at->flags.contig = NV_TRUE;
3217 #if defined(NVCPU_AARCH64)
3218     at->flags.aliased = NV_TRUE;
3219 #endif
3220     at->flags.peer_io = NV_TRUE;
3221 
3222     at->order = get_order(at->num_pages * PAGE_SIZE);
3223 
3224     addr = phys_addr[0];
3225 
3226     for (i = 0; i < page_count; i++)
3227     {
3228         at->page_table[i]->phys_addr = addr;
3229         addr += PAGE_SIZE;
3230     }
3231 
3232     // No struct page array exists for this memory.
3233     at->user_pages = NULL;
3234 
3235     *priv_data = at;
3236 
3237     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3238 
3239     return NV_OK;
3240 }
3241 
nv_unregister_peer_io_mem(nv_state_t * nv,void * priv_data)3242 void NV_API_CALL nv_unregister_peer_io_mem(
3243     nv_state_t *nv,
3244     void       *priv_data
3245 )
3246 {
3247     nv_alloc_t *at = priv_data;
3248 
3249     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3250 
3251     nvos_free_alloc(at);
3252 }
3253 
3254 /*
3255  * By registering user pages, we create a dummy nv_alloc_t for it, so that the
3256  * rest of the RM can treat it like any other alloc.
3257  *
3258  * This also converts the page array to an array of physical addresses.
3259  */
nv_register_user_pages(nv_state_t * nv,NvU64 page_count,NvU64 * phys_addr,void * import_priv,void ** priv_data)3260 NV_STATUS NV_API_CALL nv_register_user_pages(
3261     nv_state_t *nv,
3262     NvU64       page_count,
3263     NvU64      *phys_addr,
3264     void       *import_priv,
3265     void      **priv_data
3266 )
3267 {
3268     nv_alloc_t *at;
3269     NvU64 i;
3270     struct page **user_pages;
3271     nv_linux_state_t *nvl;
3272     nvidia_pte_t *page_ptr;
3273 
3274     nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%x\n", page_count);
3275     user_pages = *priv_data;
3276     nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3277 
3278     at = nvos_create_alloc(nvl->dev, page_count);
3279 
3280     if (at == NULL)
3281     {
3282         return NV_ERR_NO_MEMORY;
3283     }
3284 
3285     /*
3286      * Anonymous memory currently must be write-back cacheable, and we can't
3287      * enforce contiguity.
3288      */
3289     at->cache_type = NV_MEMORY_UNCACHED;
3290 #if defined(NVCPU_AARCH64)
3291     at->flags.aliased = NV_TRUE;
3292 #endif
3293 
3294     at->flags.user = NV_TRUE;
3295 
3296     at->order = get_order(at->num_pages * PAGE_SIZE);
3297 
3298     for (i = 0; i < page_count; i++)
3299     {
3300         /*
3301          * We only assign the physical address and not the DMA address, since
3302          * this allocation hasn't been DMA-mapped yet.
3303          */
3304         page_ptr = at->page_table[i];
3305         page_ptr->phys_addr = page_to_phys(user_pages[i]);
3306 
3307         phys_addr[i] = page_ptr->phys_addr;
3308     }
3309 
3310     /* Save off the user pages array to be restored later */
3311     at->user_pages = user_pages;
3312 
3313     /* Save off the import private data to be returned later */
3314     if (import_priv != NULL)
3315     {
3316         at->import_priv = import_priv;
3317     }
3318 
3319     *priv_data = at;
3320 
3321     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3322 
3323     return NV_OK;
3324 }
3325 
nv_unregister_user_pages(nv_state_t * nv,NvU64 page_count,void ** import_priv,void ** priv_data)3326 void NV_API_CALL nv_unregister_user_pages(
3327     nv_state_t *nv,
3328     NvU64       page_count,
3329     void      **import_priv,
3330     void      **priv_data
3331 )
3332 {
3333     nv_alloc_t *at = *priv_data;
3334 
3335     nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%x\n", page_count);
3336 
3337     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3338 
3339     WARN_ON(!at->flags.user);
3340 
3341     /* Restore the user pages array for the caller to handle */
3342     *priv_data = at->user_pages;
3343 
3344     /* Return the import private data for the caller to handle */
3345     if (import_priv != NULL)
3346     {
3347         *import_priv = at->import_priv;
3348     }
3349 
3350     nvos_free_alloc(at);
3351 }
3352 
3353 /*
3354  * This creates a dummy nv_alloc_t for existing physical allocations, so
3355  * that it can be mapped using NvRmMapMemory and BAR2 code path.
3356  */
nv_register_phys_pages(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,NvU32 cache_type,void ** priv_data)3357 NV_STATUS NV_API_CALL nv_register_phys_pages(
3358     nv_state_t *nv,
3359     NvU64      *phys_addr,
3360     NvU64       page_count,
3361     NvU32       cache_type,
3362     void      **priv_data
3363 )
3364 {
3365     nv_alloc_t *at;
3366     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3367     NvU64 i;
3368     NvU64 addr;
3369 
3370     at = nvos_create_alloc(nvl->dev, page_count);
3371 
3372     if (at == NULL)
3373         return NV_ERR_NO_MEMORY;
3374     /*
3375      * Setting memory flags to cacheable and discontiguous.
3376      */
3377     at->cache_type = cache_type;
3378 
3379     /*
3380      * Only physical address is available so we don't try to reuse existing
3381      * mappings
3382      */
3383     at->flags.physical = NV_TRUE;
3384 
3385     at->order = get_order(at->num_pages * PAGE_SIZE);
3386 
3387     for (i = 0, addr = phys_addr[0]; i < page_count; addr = phys_addr[++i])
3388     {
3389         at->page_table[i]->phys_addr = addr;
3390     }
3391 
3392     at->user_pages = NULL;
3393     *priv_data = at;
3394 
3395     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3396 
3397     return NV_OK;
3398 }
3399 
nv_register_sgt(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,NvU32 cache_type,void ** priv_data,struct sg_table * import_sgt,void * import_priv)3400 NV_STATUS NV_API_CALL nv_register_sgt(
3401     nv_state_t *nv,
3402     NvU64      *phys_addr,
3403     NvU64       page_count,
3404     NvU32       cache_type,
3405     void      **priv_data,
3406     struct sg_table *import_sgt,
3407     void       *import_priv
3408 )
3409 {
3410     nv_alloc_t *at;
3411     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3412 
3413     unsigned int i, j = 0;
3414     NvU64 sg_addr, sg_off, sg_len;
3415     struct scatterlist *sg;
3416 
3417     at = nvos_create_alloc(nvl->dev, page_count);
3418 
3419     if (at == NULL)
3420         return NV_ERR_NO_MEMORY;
3421 
3422     /* Populate phys addrs with DMA addrs from SGT */
3423     for_each_sg(import_sgt->sgl, sg, import_sgt->nents, i)
3424     {
3425         /*
3426          * It is possible for dma_map_sg() to merge scatterlist entries, so
3427          * make sure we account for that here.
3428          */
3429         for (sg_addr = sg_dma_address(sg), sg_len = sg_dma_len(sg), sg_off = 0;
3430              (sg_off < sg_len) && (j < page_count);
3431              sg_off += PAGE_SIZE, j++)
3432         {
3433             phys_addr[j] = sg_addr + sg_off;
3434         }
3435     }
3436 
3437     /*
3438      * Setting memory flags to cacheable and discontiguous.
3439      */
3440     at->cache_type = cache_type;
3441 
3442     at->import_sgt = import_sgt;
3443 
3444     /* Save off the import private data to be returned later */
3445     if (import_priv != NULL)
3446     {
3447         at->import_priv = import_priv;
3448     }
3449 
3450     at->order = get_order(at->num_pages * PAGE_SIZE);
3451 
3452     *priv_data = at;
3453 
3454     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3455 
3456     return NV_OK;
3457 }
3458 
nv_unregister_sgt(nv_state_t * nv,struct sg_table ** import_sgt,void ** import_priv,void * priv_data)3459 void NV_API_CALL nv_unregister_sgt(
3460     nv_state_t *nv,
3461     struct sg_table **import_sgt,
3462     void **import_priv,
3463     void  *priv_data
3464 )
3465 {
3466     nv_alloc_t *at = priv_data;
3467 
3468     nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_sgt\n");
3469 
3470     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3471 
3472     /* Restore the imported SGT for the caller to handle */
3473     *import_sgt = at->import_sgt;
3474 
3475     /* Return the import private data for the caller to handle */
3476     if (import_priv != NULL)
3477     {
3478         *import_priv = at->import_priv;
3479     }
3480 
3481     nvos_free_alloc(at);
3482 }
3483 
nv_unregister_phys_pages(nv_state_t * nv,void * priv_data)3484 void NV_API_CALL nv_unregister_phys_pages(
3485     nv_state_t *nv,
3486     void       *priv_data
3487 )
3488 {
3489     nv_alloc_t *at = priv_data;
3490     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3491 
3492     nvos_free_alloc(at);
3493 }
3494 
nv_get_num_phys_pages(void * pAllocPrivate,NvU32 * pNumPages)3495 NV_STATUS NV_API_CALL nv_get_num_phys_pages(
3496     void    *pAllocPrivate,
3497     NvU32   *pNumPages
3498 )
3499 {
3500     nv_alloc_t *at = pAllocPrivate;
3501 
3502     if (!pNumPages) {
3503         return NV_ERR_INVALID_ARGUMENT;
3504     }
3505 
3506     *pNumPages = at->num_pages;
3507 
3508     return NV_OK;
3509 }
3510 
nv_get_phys_pages(void * pAllocPrivate,void * pPages,NvU32 * pNumPages)3511 NV_STATUS NV_API_CALL nv_get_phys_pages(
3512     void    *pAllocPrivate,
3513     void    *pPages,
3514     NvU32   *pNumPages
3515 )
3516 {
3517     nv_alloc_t *at = pAllocPrivate;
3518     struct page **pages = (struct page **)pPages;
3519     NvU32 page_count;
3520     int i;
3521 
3522     if (!pNumPages || !pPages) {
3523         return NV_ERR_INVALID_ARGUMENT;
3524     }
3525 
3526     page_count = NV_MIN(*pNumPages, at->num_pages);
3527 
3528     for (i = 0; i < page_count; i++) {
3529         pages[i] = NV_GET_PAGE_STRUCT(at->page_table[i]->phys_addr);
3530     }
3531 
3532     *pNumPages = page_count;
3533 
3534     return NV_OK;
3535 }
3536 
nv_get_disp_smmu_stream_ids(nv_state_t * nv,NvU32 * dispIsoStreamId,NvU32 * dispNisoStreamId)3537 void nv_get_disp_smmu_stream_ids
3538 (
3539     nv_state_t *nv,
3540     NvU32 *dispIsoStreamId,
3541     NvU32 *dispNisoStreamId)
3542 {
3543     *dispIsoStreamId = nv->iommus.dispIsoStreamId;
3544     *dispNisoStreamId = nv->iommus.dispNisoStreamId;
3545 }
3546 
nv_alloc_kernel_mapping(nv_state_t * nv,void * pAllocPrivate,NvU64 pageIndex,NvU32 pageOffset,NvU64 size,void ** pPrivate)3547 void* NV_API_CALL nv_alloc_kernel_mapping(
3548     nv_state_t *nv,
3549     void       *pAllocPrivate,
3550     NvU64       pageIndex,
3551     NvU32       pageOffset,
3552     NvU64       size,
3553     void      **pPrivate
3554 )
3555 {
3556     nv_alloc_t *at = pAllocPrivate;
3557     NvU32 j, page_count;
3558     NvUPtr virt_addr;
3559     struct page **pages;
3560     NvBool isUserAllocatedMem;
3561 
3562     //
3563     // For User allocated memory (like ErrorNotifier's) which is NOT allocated
3564     // nor owned by RM, the RM driver just stores the physical address
3565     // corresponding to that memory and does not map it until required.
3566     // In that case, in page tables the virt_addr == 0, so first we need to map
3567     // those pages to obtain virtual address.
3568     //
3569     isUserAllocatedMem = at->flags.user &&
3570                         !at->page_table[pageIndex]->virt_addr &&
3571                          at->page_table[pageIndex]->phys_addr;
3572 
3573     //
3574     // User memory may NOT have kernel VA. So check this and fallback to else
3575     // case to create one.
3576     //
3577     if (((size + pageOffset) <= PAGE_SIZE) &&
3578          !at->flags.guest && !at->flags.aliased &&
3579          !isUserAllocatedMem && !at->flags.physical)
3580     {
3581         *pPrivate = NULL;
3582         return (void *)(at->page_table[pageIndex]->virt_addr + pageOffset);
3583     }
3584     else
3585     {
3586         size += pageOffset;
3587         page_count = (size >> PAGE_SHIFT) + ((size & ~NV_PAGE_MASK) ? 1 : 0);
3588 
3589         if (at->flags.guest)
3590         {
3591             virt_addr = nv_map_guest_pages(at,
3592                                            nv->bars[NV_GPU_BAR_INDEX_REGS].cpu_address,
3593                                            page_count, pageIndex);
3594         }
3595         else
3596         {
3597             NV_KMALLOC(pages, sizeof(struct page *) * page_count);
3598             if (pages == NULL)
3599             {
3600                 nv_printf(NV_DBG_ERRORS,
3601                           "NVRM: failed to allocate vmap() page descriptor table!\n");
3602                 return NULL;
3603             }
3604 
3605             for (j = 0; j < page_count; j++)
3606                 pages[j] = NV_GET_PAGE_STRUCT(at->page_table[pageIndex+j]->phys_addr);
3607 
3608             virt_addr = nv_vm_map_pages(pages, page_count,
3609                 at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted);
3610             NV_KFREE(pages, sizeof(struct page *) * page_count);
3611         }
3612 
3613         if (virt_addr == 0)
3614         {
3615             nv_printf(NV_DBG_ERRORS, "NVRM: failed to map pages!\n");
3616             return NULL;
3617         }
3618 
3619         *pPrivate = (void *)(NvUPtr)page_count;
3620         return (void *)(virt_addr + pageOffset);
3621     }
3622 
3623     return NULL;
3624 }
3625 
nv_free_kernel_mapping(nv_state_t * nv,void * pAllocPrivate,void * address,void * pPrivate)3626 NV_STATUS NV_API_CALL nv_free_kernel_mapping(
3627     nv_state_t *nv,
3628     void       *pAllocPrivate,
3629     void       *address,
3630     void       *pPrivate
3631 )
3632 {
3633     nv_alloc_t *at = pAllocPrivate;
3634     NvUPtr virt_addr;
3635     NvU32 page_count;
3636 
3637     virt_addr = ((NvUPtr)address & NV_PAGE_MASK);
3638     page_count = (NvUPtr)pPrivate;
3639 
3640     if (at->flags.guest)
3641     {
3642         nv_iounmap((void *)virt_addr, (page_count * PAGE_SIZE));
3643     }
3644     else if (pPrivate != NULL)
3645     {
3646         nv_vm_unmap_pages(virt_addr, page_count);
3647     }
3648 
3649     return NV_OK;
3650 }
3651 
nv_alloc_pages(nv_state_t * nv,NvU32 page_count,NvU64 page_size,NvBool contiguous,NvU32 cache_type,NvBool zeroed,NvBool unencrypted,NvS32 node_id,NvU64 * pte_array,void ** priv_data)3652 NV_STATUS NV_API_CALL nv_alloc_pages(
3653     nv_state_t *nv,
3654     NvU32       page_count,
3655     NvU64       page_size,
3656     NvBool      contiguous,
3657     NvU32       cache_type,
3658     NvBool      zeroed,
3659     NvBool      unencrypted,
3660     NvS32       node_id,
3661     NvU64      *pte_array,
3662     void      **priv_data
3663 )
3664 {
3665     nv_alloc_t *at;
3666     NV_STATUS status = NV_ERR_NO_MEMORY;
3667     nv_linux_state_t *nvl = NULL;
3668     NvBool will_remap = NV_FALSE;
3669     NvU32 i;
3670     struct device *dev = NULL;
3671 
3672     nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_alloc_pages: %d pages, nodeid %d\n", page_count, node_id);
3673     nv_printf(NV_DBG_MEMINFO, "NVRM: VM:    contig %d  cache_type %d\n",
3674         contiguous, cache_type);
3675 
3676     //
3677     // system memory allocation can be associated with a client instead of a gpu
3678     // handle the case where per device state is NULL
3679     //
3680     if(nv)
3681     {
3682        nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3683        will_remap = nv_requires_dma_remap(nv);
3684        dev = nvl->dev;
3685     }
3686 
3687     if (nv_encode_caching(NULL, cache_type, NV_MEMORY_TYPE_SYSTEM))
3688         return NV_ERR_NOT_SUPPORTED;
3689 
3690     at = nvos_create_alloc(dev, page_count);
3691     if (at == NULL)
3692         return NV_ERR_NO_MEMORY;
3693 
3694     at->cache_type = cache_type;
3695 
3696     if (contiguous)
3697         at->flags.contig = NV_TRUE;
3698     if (zeroed)
3699         at->flags.zeroed = NV_TRUE;
3700 #if defined(NVCPU_AARCH64)
3701     if (at->cache_type != NV_MEMORY_CACHED)
3702         at->flags.aliased = NV_TRUE;
3703 #endif
3704     if (unencrypted)
3705         at->flags.unencrypted = NV_TRUE;
3706 
3707 #if defined(NVCPU_PPC64LE)
3708     /*
3709      * Starting on Power9 systems, DMA addresses for NVLink are no longer the
3710      * same as used over PCIe. There is an address compression scheme required
3711      * for NVLink ONLY which impacts the upper address bits of the DMA address.
3712      *
3713      * This divergence between PCIe and NVLink DMA mappings breaks assumptions
3714      * in the driver where during initialization we allocate system memory
3715      * for the GPU to access over PCIe before NVLink is trained -- and some of
3716      * these mappings persist on the GPU. If these persistent mappings are not
3717      * equivalent they will cause invalid DMA accesses from the GPU once we
3718      * switch to NVLink.
3719      *
3720      * To work around this we limit all system memory allocations from the driver
3721      * during the period before NVLink is enabled to be from NUMA node 0 (CPU 0)
3722      * which has a CPU real address with the upper address bits (above bit 42)
3723      * set to 0. Effectively making the PCIe and NVLink DMA mappings equivalent
3724      * allowing persistent system memory mappings already programmed on the GPU
3725      * to remain valid after NVLink is enabled.
3726      *
3727      * See Bug 1920398 for more details.
3728      */
3729     if (nv && nvl->npu && !nvl->dma_dev.nvlink)
3730     {
3731         at->flags.node = NV_TRUE;
3732         at->node_id = 0;
3733     }
3734 #endif
3735 
3736     if (node_id != NUMA_NO_NODE)
3737     {
3738         at->flags.node = NV_TRUE;
3739         at->node_id = node_id;
3740     }
3741 
3742     if (at->flags.contig)
3743     {
3744         status = nv_alloc_contig_pages(nv, at);
3745     }
3746     else
3747     {
3748         if (page_size == 0)
3749         {
3750             status = NV_ERR_INVALID_ARGUMENT;
3751             goto failed;
3752         }
3753         at->order = get_order(page_size);
3754         status = nv_alloc_system_pages(nv, at);
3755     }
3756 
3757     if (status != NV_OK)
3758         goto failed;
3759 
3760     for (i = 0; i < ((contiguous) ? 1 : page_count); i++)
3761     {
3762         /*
3763          * The contents of the pte_array[] depend on whether or not this device
3764          * requires DMA-remapping. If it does, it should be the phys addresses
3765          * used by the DMA-remapping paths, otherwise it should be the actual
3766          * address that the device should use for DMA (which, confusingly, may
3767          * be different than the CPU physical address, due to a static DMA
3768          * offset).
3769          */
3770         if ((nv == NULL) || will_remap)
3771         {
3772             pte_array[i] = at->page_table[i]->phys_addr;
3773         }
3774         else
3775         {
3776             pte_array[i] = nv_phys_to_dma(dev,
3777                 at->page_table[i]->phys_addr);
3778         }
3779     }
3780 
3781     *priv_data = at;
3782     NV_ATOMIC_INC(at->usage_count);
3783 
3784     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3785 
3786     return NV_OK;
3787 
3788 failed:
3789     nvos_free_alloc(at);
3790 
3791     return status;
3792 }
3793 
nv_free_pages(nv_state_t * nv,NvU32 page_count,NvBool contiguous,NvU32 cache_type,void * priv_data)3794 NV_STATUS NV_API_CALL nv_free_pages(
3795     nv_state_t *nv,
3796     NvU32 page_count,
3797     NvBool contiguous,
3798     NvU32 cache_type,
3799     void *priv_data
3800 )
3801 {
3802     NV_STATUS rmStatus = NV_OK;
3803     nv_alloc_t *at = priv_data;
3804 
3805     nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_free_pages: 0x%x\n", page_count);
3806 
3807     NV_PRINT_AT(NV_DBG_MEMINFO, at);
3808 
3809     /*
3810      * If the 'at' usage count doesn't drop to zero here, not all of
3811      * the user mappings have been torn down in time - we can't
3812      * safely free the memory. We report success back to the RM, but
3813      * defer the actual free operation until later.
3814      *
3815      * This is described in greater detail in the comments above the
3816      * nvidia_vma_(open|release)() callbacks in nv-mmap.c.
3817      */
3818     if (!NV_ATOMIC_DEC_AND_TEST(at->usage_count))
3819         return NV_OK;
3820 
3821     if (!at->flags.guest)
3822     {
3823         if (at->flags.contig)
3824             nv_free_contig_pages(at);
3825         else
3826             nv_free_system_pages(at);
3827     }
3828 
3829     nvos_free_alloc(at);
3830 
3831     return rmStatus;
3832 }
3833 
nv_lock_init_locks(nvidia_stack_t * sp,nv_state_t * nv)3834 NvBool nv_lock_init_locks
3835 (
3836     nvidia_stack_t *sp,
3837     nv_state_t *nv
3838 )
3839 {
3840     nv_linux_state_t *nvl;
3841     nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3842 
3843     NV_INIT_MUTEX(&nvl->ldata_lock);
3844     NV_INIT_MUTEX(&nvl->mmap_lock);
3845     NV_INIT_MUTEX(&nvl->open_q_lock);
3846 
3847     NV_ATOMIC_SET(nvl->usage_count, 0);
3848 
3849     if (!rm_init_event_locks(sp, nv))
3850         return NV_FALSE;
3851 
3852     return NV_TRUE;
3853 }
3854 
nv_lock_destroy_locks(nvidia_stack_t * sp,nv_state_t * nv)3855 void nv_lock_destroy_locks
3856 (
3857     nvidia_stack_t *sp,
3858     nv_state_t *nv
3859 )
3860 {
3861     rm_destroy_event_locks(sp, nv);
3862 }
3863 
nv_post_event(nv_event_t * event,NvHandle handle,NvU32 index,NvU32 info32,NvU16 info16,NvBool data_valid)3864 void NV_API_CALL nv_post_event(
3865     nv_event_t *event,
3866     NvHandle    handle,
3867     NvU32       index,
3868     NvU32       info32,
3869     NvU16       info16,
3870     NvBool      data_valid
3871 )
3872 {
3873     nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(event->nvfp);
3874     unsigned long eflags;
3875     nvidia_event_t *nvet;
3876 
3877     NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
3878 
3879     if (data_valid)
3880     {
3881         NV_KMALLOC_ATOMIC(nvet, sizeof(nvidia_event_t));
3882         if (nvet == NULL)
3883         {
3884             NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
3885             return;
3886         }
3887 
3888         if (nvlfp->event_data_tail != NULL)
3889             nvlfp->event_data_tail->next = nvet;
3890         if (nvlfp->event_data_head == NULL)
3891             nvlfp->event_data_head = nvet;
3892         nvlfp->event_data_tail = nvet;
3893         nvet->next = NULL;
3894 
3895         nvet->event = *event;
3896         nvet->event.hObject = handle;
3897         nvet->event.index = index;
3898         nvet->event.info32 = info32;
3899         nvet->event.info16 = info16;
3900     }
3901     //
3902     // 'event_pending' is interpreted by nvidia_poll() and nv_get_event() to
3903     // mean that an event without data is pending. Therefore, only set it to
3904     // true here if newly posted event is dataless.
3905     //
3906     else
3907     {
3908         nvlfp->dataless_event_pending = NV_TRUE;
3909     }
3910 
3911     NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
3912 
3913     wake_up_interruptible(&nvlfp->waitqueue);
3914 }
3915 
nv_is_rm_firmware_active(nv_state_t * nv)3916 NvBool NV_API_CALL nv_is_rm_firmware_active(
3917     nv_state_t *nv
3918 )
3919 {
3920     if (rm_firmware_active)
3921     {
3922         // "all" here means all GPUs
3923         if (strcmp(rm_firmware_active, "all") == 0)
3924             return NV_TRUE;
3925     }
3926     return NV_FALSE;
3927 }
3928 
nv_get_firmware(nv_state_t * nv,nv_firmware_type_t fw_type,nv_firmware_chip_family_t fw_chip_family,const void ** fw_buf,NvU32 * fw_size)3929 const void* NV_API_CALL nv_get_firmware(
3930     nv_state_t *nv,
3931     nv_firmware_type_t fw_type,
3932     nv_firmware_chip_family_t fw_chip_family,
3933     const void **fw_buf,
3934     NvU32 *fw_size
3935 )
3936 {
3937     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3938     const struct firmware *fw;
3939 
3940     // path is relative to /lib/firmware
3941     // if this fails it will print an error to dmesg
3942     if (request_firmware(&fw, nv_firmware_path(fw_type, fw_chip_family), nvl->dev) != 0)
3943         return NULL;
3944 
3945     *fw_size = fw->size;
3946     *fw_buf = fw->data;
3947 
3948     return fw;
3949 }
3950 
nv_put_firmware(const void * fw_handle)3951 void NV_API_CALL nv_put_firmware(
3952     const void *fw_handle
3953 )
3954 {
3955     release_firmware(fw_handle);
3956 }
3957 
nv_get_file_private(NvS32 fd,NvBool ctl,void ** os_private)3958 nv_file_private_t* NV_API_CALL nv_get_file_private(
3959     NvS32 fd,
3960     NvBool ctl,
3961     void **os_private
3962 )
3963 {
3964     struct file *filp = NULL;
3965     nv_linux_file_private_t *nvlfp = NULL;
3966     dev_t rdev = 0;
3967 
3968     filp = fget(fd);
3969 
3970     if (filp == NULL || !NV_FILE_INODE(filp))
3971     {
3972         goto fail;
3973     }
3974 
3975     rdev = (NV_FILE_INODE(filp))->i_rdev;
3976 
3977     if (MAJOR(rdev) != NV_MAJOR_DEVICE_NUMBER)
3978     {
3979         goto fail;
3980     }
3981 
3982     if (ctl)
3983     {
3984         if (MINOR(rdev) != NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE)
3985             goto fail;
3986     }
3987     else
3988     {
3989         NvBool found = NV_FALSE;
3990         int i;
3991 
3992         for (i = 0; i <= NV_MINOR_DEVICE_NUMBER_REGULAR_MAX; i++)
3993         {
3994             if ((nv_linux_minor_num_table[i] != NULL) && (MINOR(rdev) == i))
3995             {
3996                 found = NV_TRUE;
3997                 break;
3998             }
3999         }
4000 
4001         if (!found)
4002             goto fail;
4003     }
4004 
4005     nvlfp = NV_GET_LINUX_FILE_PRIVATE(filp);
4006 
4007     *os_private = filp;
4008 
4009     return &nvlfp->nvfp;
4010 
4011 fail:
4012 
4013     if (filp != NULL)
4014     {
4015         fput(filp);
4016     }
4017 
4018     return NULL;
4019 }
4020 
nv_put_file_private(void * os_private)4021 void NV_API_CALL nv_put_file_private(
4022     void *os_private
4023 )
4024 {
4025     struct file *filp = os_private;
4026     fput(filp);
4027 }
4028 
nv_get_event(nv_file_private_t * nvfp,nv_event_t * event,NvU32 * pending)4029 int NV_API_CALL nv_get_event(
4030     nv_file_private_t  *nvfp,
4031     nv_event_t         *event,
4032     NvU32              *pending
4033 )
4034 {
4035     nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(nvfp);
4036     nvidia_event_t *nvet;
4037     unsigned long eflags;
4038 
4039     NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
4040 
4041     nvet = nvlfp->event_data_head;
4042     if (nvet == NULL)
4043     {
4044         NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
4045         return NV_ERR_GENERIC;
4046     }
4047 
4048     *event = nvet->event;
4049 
4050     if (nvlfp->event_data_tail == nvet)
4051         nvlfp->event_data_tail = NULL;
4052     nvlfp->event_data_head = nvet->next;
4053 
4054     *pending = (nvlfp->event_data_head != NULL);
4055 
4056     NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
4057 
4058     NV_KFREE(nvet, sizeof(nvidia_event_t));
4059 
4060     return NV_OK;
4061 }
4062 
nv_start_rc_timer(nv_state_t * nv)4063 int NV_API_CALL nv_start_rc_timer(
4064     nv_state_t *nv
4065 )
4066 {
4067     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4068 
4069     if (nv->rc_timer_enabled)
4070         return -1;
4071 
4072     nv_printf(NV_DBG_INFO, "NVRM: initializing rc timer\n");
4073 
4074     nv_timer_setup(&nvl->rc_timer, nvidia_rc_timer_callback);
4075 
4076     nv->rc_timer_enabled = 1;
4077 
4078     // set the timeout for 1 second in the future:
4079     mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ);
4080 
4081     nv_printf(NV_DBG_INFO, "NVRM: rc timer initialized\n");
4082 
4083     return 0;
4084 }
4085 
nv_stop_rc_timer(nv_state_t * nv)4086 int NV_API_CALL nv_stop_rc_timer(
4087     nv_state_t *nv
4088 )
4089 {
4090     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4091 
4092     if (!nv->rc_timer_enabled)
4093         return -1;
4094 
4095     nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n");
4096     nv->rc_timer_enabled = 0;
4097     del_timer_sync(&nvl->rc_timer.kernel_timer);
4098     nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n");
4099 
4100     return 0;
4101 }
4102 
4103 #define SNAPSHOT_TIMER_FREQ (jiffies + HZ / NV_SNAPSHOT_TIMER_HZ)
4104 
snapshot_timer_callback(struct nv_timer * timer)4105 static void snapshot_timer_callback(struct nv_timer *timer)
4106 {
4107     nv_linux_state_t *nvl = &nv_ctl_device;
4108     nv_state_t *nv = NV_STATE_PTR(nvl);
4109     unsigned long flags;
4110 
4111     NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4112     if (nvl->snapshot_callback != NULL)
4113     {
4114         nvl->snapshot_callback(nv->profiler_context);
4115         mod_timer(&timer->kernel_timer, SNAPSHOT_TIMER_FREQ);
4116     }
4117     NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4118 }
4119 
nv_start_snapshot_timer(void (* snapshot_callback)(void * context))4120 void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context))
4121 {
4122     nv_linux_state_t *nvl = &nv_ctl_device;
4123 
4124     nvl->snapshot_callback = snapshot_callback;
4125     nv_timer_setup(&nvl->snapshot_timer, snapshot_timer_callback);
4126     mod_timer(&nvl->snapshot_timer.kernel_timer, SNAPSHOT_TIMER_FREQ);
4127 }
4128 
nv_stop_snapshot_timer(void)4129 void NV_API_CALL nv_stop_snapshot_timer(void)
4130 {
4131     nv_linux_state_t *nvl = &nv_ctl_device;
4132     NvBool timer_active;
4133     unsigned long flags;
4134 
4135     NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4136     timer_active = nvl->snapshot_callback != NULL;
4137     nvl->snapshot_callback = NULL;
4138     NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4139 
4140     if (timer_active)
4141         del_timer_sync(&nvl->snapshot_timer.kernel_timer);
4142 }
4143 
nv_flush_snapshot_timer(void)4144 void NV_API_CALL nv_flush_snapshot_timer(void)
4145 {
4146     nv_linux_state_t *nvl = &nv_ctl_device;
4147     nv_state_t *nv = NV_STATE_PTR(nvl);
4148     unsigned long flags;
4149 
4150     NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4151     if (nvl->snapshot_callback != NULL)
4152         nvl->snapshot_callback(nv->profiler_context);
4153     NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4154 }
4155 
4156 static int __init
nvos_count_devices(void)4157 nvos_count_devices(void)
4158 {
4159     int count;
4160 
4161     count = nv_pci_count_devices();
4162 
4163     return count;
4164 }
4165 
4166 #if NVCPU_IS_AARCH64
nvos_is_chipset_io_coherent(void)4167 NvBool nvos_is_chipset_io_coherent(void)
4168 {
4169     static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE;
4170 
4171     if (nv_chipset_is_io_coherent == NV_TRISTATE_INDETERMINATE)
4172     {
4173         nvidia_stack_t *sp = NULL;
4174         if (nv_kmem_cache_alloc_stack(&sp) != 0)
4175         {
4176             nv_printf(NV_DBG_ERRORS,
4177               "NVRM: cannot allocate stack for platform coherence check callback \n");
4178             WARN_ON(1);
4179             return NV_FALSE;
4180         }
4181 
4182         nv_chipset_is_io_coherent = rm_is_chipset_io_coherent(sp);
4183 
4184         nv_kmem_cache_free_stack(sp);
4185     }
4186 
4187     return nv_chipset_is_io_coherent;
4188 }
4189 #endif // NVCPU_IS_AARCH64
4190 
4191 #if defined(CONFIG_PM)
4192 static NV_STATUS
nv_power_management(nv_state_t * nv,nv_pm_action_t pm_action)4193 nv_power_management(
4194     nv_state_t *nv,
4195     nv_pm_action_t pm_action
4196 )
4197 {
4198     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4199     int status = NV_OK;
4200     nvidia_stack_t *sp = NULL;
4201 
4202     if (nv_kmem_cache_alloc_stack(&sp) != 0)
4203     {
4204         return NV_ERR_NO_MEMORY;
4205     }
4206 
4207     status = nv_check_gpu_state(nv);
4208     if (status == NV_ERR_GPU_IS_LOST)
4209     {
4210         NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping PM event\n");
4211         goto failure;
4212     }
4213 
4214     switch (pm_action)
4215     {
4216         case NV_PM_ACTION_STANDBY:
4217             /* fall through */
4218         case NV_PM_ACTION_HIBERNATE:
4219         {
4220             /*
4221              * Flush nvl->open_q before suspend/hibernate to ensure deferred
4222              * opens do not get attempted during the PM transition.
4223              *
4224              * Note: user space is either frozen by the kernel or locked out
4225              * by nv_system_pm_lock, so no further deferred opens can be
4226              * enqueued before resume (meaning we do not need to unset
4227              * nvl->is_accepting_opens).
4228              */
4229             nv_kthread_q_flush(&nvl->open_q);
4230 
4231             status = rm_power_management(sp, nv, pm_action);
4232 
4233             nv_kthread_q_stop(&nvl->bottom_half_q);
4234 
4235             nv_disable_pat_support();
4236             break;
4237         }
4238         case NV_PM_ACTION_RESUME:
4239         {
4240             nv_enable_pat_support();
4241 
4242             nv_kthread_q_item_init(&nvl->bottom_half_q_item,
4243                                    nvidia_isr_bh_unlocked, (void *)nv);
4244 
4245             status = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name);
4246             if (status != NV_OK)
4247                 break;
4248 
4249             status = rm_power_management(sp, nv, pm_action);
4250             break;
4251         }
4252         default:
4253             status = NV_ERR_INVALID_ARGUMENT;
4254             break;
4255     }
4256 
4257 failure:
4258     nv_kmem_cache_free_stack(sp);
4259 
4260     return status;
4261 }
4262 
4263 static NV_STATUS
nv_restore_user_channels(nv_state_t * nv)4264 nv_restore_user_channels(
4265     nv_state_t *nv
4266 )
4267 {
4268     NV_STATUS status = NV_OK;
4269     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4270     nv_stack_t *sp = NULL;
4271 
4272     if (nv_kmem_cache_alloc_stack(&sp) != 0)
4273     {
4274         return NV_ERR_NO_MEMORY;
4275     }
4276 
4277     down(&nvl->ldata_lock);
4278 
4279     if ((nv->flags & NV_FLAG_OPEN) == 0)
4280     {
4281         goto done;
4282     }
4283 
4284     status = rm_restart_user_channels(sp, nv);
4285     WARN_ON(status != NV_OK);
4286 
4287     down(&nvl->mmap_lock);
4288 
4289     nv_set_safe_to_mmap_locked(nv, NV_TRUE);
4290 
4291     up(&nvl->mmap_lock);
4292 
4293     rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
4294 
4295 done:
4296     up(&nvl->ldata_lock);
4297 
4298     nv_kmem_cache_free_stack(sp);
4299 
4300     return status;
4301 }
4302 
4303 static NV_STATUS
nv_preempt_user_channels(nv_state_t * nv)4304 nv_preempt_user_channels(
4305     nv_state_t *nv
4306 )
4307 {
4308     NV_STATUS status = NV_OK;
4309     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4310     nv_stack_t *sp = NULL;
4311 
4312     if (nv_kmem_cache_alloc_stack(&sp) != 0)
4313     {
4314         return NV_ERR_NO_MEMORY;
4315     }
4316 
4317     down(&nvl->ldata_lock);
4318 
4319     if ((nv->flags & NV_FLAG_OPEN) == 0)
4320     {
4321         goto done;
4322     }
4323 
4324     status = rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
4325     WARN_ON(status != NV_OK);
4326 
4327     down(&nvl->mmap_lock);
4328 
4329     nv_set_safe_to_mmap_locked(nv, NV_FALSE);
4330     nv_revoke_gpu_mappings_locked(nv);
4331 
4332     up(&nvl->mmap_lock);
4333 
4334     status = rm_stop_user_channels(sp, nv);
4335     WARN_ON(status != NV_OK);
4336 
4337 done:
4338     up(&nvl->ldata_lock);
4339 
4340     nv_kmem_cache_free_stack(sp);
4341 
4342     return status;
4343 }
4344 
4345 static NV_STATUS
nvidia_suspend(struct device * dev,nv_pm_action_t pm_action,NvBool is_procfs_suspend)4346 nvidia_suspend(
4347     struct device *dev,
4348     nv_pm_action_t pm_action,
4349     NvBool is_procfs_suspend
4350 )
4351 {
4352     NV_STATUS status = NV_OK;
4353     struct pci_dev *pci_dev = NULL;
4354     nv_linux_state_t *nvl;
4355     nv_state_t *nv;
4356 
4357     if (dev_is_pci(dev))
4358     {
4359         pci_dev = to_pci_dev(dev);
4360         nvl = pci_get_drvdata(pci_dev);
4361     }
4362     else
4363     {
4364         nvl = dev_get_drvdata(dev);
4365     }
4366     nv = NV_STATE_PTR(nvl);
4367 
4368     down(&nvl->ldata_lock);
4369 
4370     if (((nv->flags & NV_FLAG_OPEN) == 0) &&
4371         ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) == 0))
4372     {
4373         goto done;
4374     }
4375 
4376     if ((nv->flags & NV_FLAG_SUSPENDED) != 0)
4377     {
4378         nvl->suspend_count++;
4379         goto pci_pm;
4380     }
4381 
4382     if (nv->preserve_vidmem_allocations && !is_procfs_suspend)
4383     {
4384         NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
4385                       "PreserveVideoMemoryAllocations module parameter is set. "
4386                       "System Power Management attempted without driver procfs suspend interface. "
4387                       "Please refer to the 'Configuring Power Management Support' section in the driver README.\n");
4388         status = NV_ERR_NOT_SUPPORTED;
4389         goto done;
4390     }
4391 
4392     nvidia_modeset_suspend(nv->gpu_id);
4393 
4394     status = nv_power_management(nv, pm_action);
4395 
4396     if (status != NV_OK)
4397     {
4398         nvidia_modeset_resume(nv->gpu_id);
4399         goto done;
4400     }
4401     else
4402     {
4403         nv->flags |= NV_FLAG_SUSPENDED;
4404     }
4405 
4406 pci_pm:
4407     /*
4408      * Check if PCI power state should be D0 during system suspend. The PCI PM
4409      * core will change the power state only if the driver has not saved the
4410      * state in it's suspend callback.
4411      */
4412     if ((nv->d0_state_in_suspend) && (pci_dev != NULL) &&
4413         !is_procfs_suspend && (pm_action == NV_PM_ACTION_STANDBY))
4414     {
4415         pci_save_state(pci_dev);
4416     }
4417 
4418 done:
4419     up(&nvl->ldata_lock);
4420 
4421     return status;
4422 }
4423 
4424 static NV_STATUS
nvidia_resume(struct device * dev,nv_pm_action_t pm_action)4425 nvidia_resume(
4426     struct device *dev,
4427     nv_pm_action_t pm_action
4428 )
4429 {
4430     NV_STATUS status = NV_OK;
4431     struct pci_dev *pci_dev;
4432     nv_linux_state_t *nvl;
4433     nv_state_t *nv;
4434 
4435     if (dev_is_pci(dev))
4436     {
4437         pci_dev = to_pci_dev(dev);
4438         nvl = pci_get_drvdata(pci_dev);
4439     }
4440     else
4441     {
4442         nvl = dev_get_drvdata(dev);
4443     }
4444     nv = NV_STATE_PTR(nvl);
4445 
4446     down(&nvl->ldata_lock);
4447 
4448     if ((nv->flags & NV_FLAG_SUSPENDED) == 0)
4449     {
4450         goto done;
4451     }
4452 
4453     if (nvl->suspend_count != 0)
4454     {
4455         nvl->suspend_count--;
4456     }
4457     else
4458     {
4459         status = nv_power_management(nv, pm_action);
4460 
4461         if (status == NV_OK)
4462         {
4463             nvidia_modeset_resume(nv->gpu_id);
4464             nv->flags &= ~NV_FLAG_SUSPENDED;
4465         }
4466     }
4467 
4468 done:
4469     up(&nvl->ldata_lock);
4470 
4471     return status;
4472 }
4473 
4474 static NV_STATUS
nv_resume_devices(nv_pm_action_t pm_action,nv_pm_action_depth_t pm_action_depth)4475 nv_resume_devices(
4476     nv_pm_action_t pm_action,
4477     nv_pm_action_depth_t pm_action_depth
4478 )
4479 {
4480     nv_linux_state_t *nvl;
4481     NvBool resume_devices = NV_TRUE;
4482     NV_STATUS status;
4483 
4484     if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET)
4485     {
4486         goto resume_modeset;
4487     }
4488 
4489     if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM)
4490     {
4491         resume_devices = NV_FALSE;
4492     }
4493 
4494     LOCK_NV_LINUX_DEVICES();
4495 
4496     for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4497     {
4498         if (resume_devices)
4499         {
4500             status = nvidia_resume(nvl->dev, pm_action);
4501             WARN_ON(status != NV_OK);
4502         }
4503     }
4504 
4505     UNLOCK_NV_LINUX_DEVICES();
4506 
4507     status = nv_uvm_resume();
4508     WARN_ON(status != NV_OK);
4509 
4510     LOCK_NV_LINUX_DEVICES();
4511 
4512     for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4513     {
4514         status = nv_restore_user_channels(NV_STATE_PTR(nvl));
4515         WARN_ON(status != NV_OK);
4516     }
4517 
4518     UNLOCK_NV_LINUX_DEVICES();
4519 
4520 resume_modeset:
4521     nvidia_modeset_resume(0);
4522 
4523     return NV_OK;
4524 }
4525 
4526 static NV_STATUS
nv_suspend_devices(nv_pm_action_t pm_action,nv_pm_action_depth_t pm_action_depth)4527 nv_suspend_devices(
4528     nv_pm_action_t pm_action,
4529     nv_pm_action_depth_t pm_action_depth
4530 )
4531 {
4532     nv_linux_state_t *nvl;
4533     NvBool resume_devices = NV_FALSE;
4534     NV_STATUS status = NV_OK;
4535 
4536     nvidia_modeset_suspend(0);
4537 
4538     if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET)
4539     {
4540         return NV_OK;
4541     }
4542 
4543     LOCK_NV_LINUX_DEVICES();
4544 
4545     for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next)
4546     {
4547         status = nv_preempt_user_channels(NV_STATE_PTR(nvl));
4548         WARN_ON(status != NV_OK);
4549     }
4550 
4551     UNLOCK_NV_LINUX_DEVICES();
4552 
4553     if (status == NV_OK)
4554     {
4555         status = nv_uvm_suspend();
4556         WARN_ON(status != NV_OK);
4557     }
4558     if (status != NV_OK)
4559     {
4560         goto done;
4561     }
4562 
4563     if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM)
4564     {
4565         return NV_OK;
4566     }
4567 
4568     LOCK_NV_LINUX_DEVICES();
4569 
4570     for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next)
4571     {
4572         status = nvidia_suspend(nvl->dev, pm_action, NV_TRUE);
4573         WARN_ON(status != NV_OK);
4574     }
4575     if (status != NV_OK)
4576     {
4577         resume_devices = NV_TRUE;
4578     }
4579 
4580     UNLOCK_NV_LINUX_DEVICES();
4581 
4582 done:
4583     if (status != NV_OK)
4584     {
4585         LOCK_NV_LINUX_DEVICES();
4586 
4587         for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4588         {
4589             if (resume_devices)
4590             {
4591                 nvidia_resume(nvl->dev, pm_action);
4592             }
4593 
4594             nv_restore_user_channels(NV_STATE_PTR(nvl));
4595         }
4596 
4597         UNLOCK_NV_LINUX_DEVICES();
4598     }
4599 
4600     return status;
4601 }
4602 
4603 NV_STATUS
nv_set_system_power_state(nv_power_state_t power_state,nv_pm_action_depth_t pm_action_depth)4604 nv_set_system_power_state(
4605     nv_power_state_t power_state,
4606     nv_pm_action_depth_t pm_action_depth
4607 )
4608 {
4609     NV_STATUS status;
4610     nv_pm_action_t pm_action;
4611 
4612     switch (power_state)
4613     {
4614         case NV_POWER_STATE_IN_HIBERNATE:
4615             pm_action = NV_PM_ACTION_HIBERNATE;
4616             break;
4617         case NV_POWER_STATE_IN_STANDBY:
4618             pm_action = NV_PM_ACTION_STANDBY;
4619             break;
4620         case NV_POWER_STATE_RUNNING:
4621             pm_action = NV_PM_ACTION_RESUME;
4622             break;
4623         default:
4624             return NV_ERR_INVALID_ARGUMENT;
4625     }
4626 
4627     down(&nv_system_power_state_lock);
4628 
4629     if (nv_system_power_state == power_state)
4630     {
4631         status = NV_OK;
4632         goto done;
4633     }
4634 
4635     if (power_state == NV_POWER_STATE_RUNNING)
4636     {
4637         status = nv_resume_devices(pm_action, nv_system_pm_action_depth);
4638         up_write(&nv_system_pm_lock);
4639     }
4640     else
4641     {
4642         if (nv_system_power_state != NV_POWER_STATE_RUNNING)
4643         {
4644             status = NV_ERR_INVALID_ARGUMENT;
4645             goto done;
4646         }
4647 
4648         nv_system_pm_action_depth = pm_action_depth;
4649 
4650         down_write(&nv_system_pm_lock);
4651         status = nv_suspend_devices(pm_action, nv_system_pm_action_depth);
4652         if (status != NV_OK)
4653         {
4654             up_write(&nv_system_pm_lock);
4655             goto done;
4656         }
4657     }
4658 
4659     nv_system_power_state = power_state;
4660 
4661 done:
4662     up(&nv_system_power_state_lock);
4663 
4664     return status;
4665 }
4666 
nv_pmops_suspend(struct device * dev)4667 int nv_pmops_suspend(
4668     struct device *dev
4669 )
4670 {
4671     NV_STATUS status;
4672 
4673     status = nvidia_suspend(dev, NV_PM_ACTION_STANDBY, NV_FALSE);
4674     return (status == NV_OK) ? 0 : -EIO;
4675 }
4676 
nv_pmops_resume(struct device * dev)4677 int nv_pmops_resume(
4678     struct device *dev
4679 )
4680 {
4681     NV_STATUS status;
4682 
4683     status = nvidia_resume(dev, NV_PM_ACTION_RESUME);
4684     return (status == NV_OK) ? 0 : -EIO;
4685 }
4686 
nv_pmops_freeze(struct device * dev)4687 int nv_pmops_freeze(
4688     struct device *dev
4689 )
4690 {
4691     NV_STATUS status;
4692 
4693     status = nvidia_suspend(dev, NV_PM_ACTION_HIBERNATE, NV_FALSE);
4694     return (status == NV_OK) ? 0 : -EIO;
4695 }
4696 
nv_pmops_thaw(struct device * dev)4697 int nv_pmops_thaw(
4698     struct device *dev
4699 )
4700 {
4701     return 0;
4702 }
4703 
nv_pmops_restore(struct device * dev)4704 int nv_pmops_restore(
4705     struct device *dev
4706 )
4707 {
4708     NV_STATUS status;
4709 
4710     status = nvidia_resume(dev, NV_PM_ACTION_RESUME);
4711     return (status == NV_OK) ? 0 : -EIO;
4712 }
4713 
nv_pmops_poweroff(struct device * dev)4714 int nv_pmops_poweroff(
4715     struct device *dev
4716 )
4717 {
4718     return 0;
4719 }
4720 
4721 static int
nvidia_transition_dynamic_power(struct device * dev,NvBool enter)4722 nvidia_transition_dynamic_power(
4723     struct device *dev,
4724     NvBool enter
4725 )
4726 {
4727     struct pci_dev *pci_dev = to_pci_dev(dev);
4728     nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
4729     nv_state_t *nv = NV_STATE_PTR(nvl);
4730     nvidia_stack_t *sp = NULL;
4731     NvBool bTryAgain = NV_FALSE;
4732     NV_STATUS status;
4733 
4734     if ((nv->flags & (NV_FLAG_OPEN | NV_FLAG_PERSISTENT_SW_STATE)) == 0)
4735     {
4736         return 0;
4737     }
4738 
4739     if (nv_kmem_cache_alloc_stack(&sp) != 0)
4740     {
4741         return -ENOMEM;
4742     }
4743 
4744     status = rm_transition_dynamic_power(sp, nv, enter, &bTryAgain);
4745 
4746     nv_kmem_cache_free_stack(sp);
4747 
4748     if (bTryAgain)
4749     {
4750         /*
4751          * Return -EAGAIN so that kernel PM core will not treat this as a fatal error and
4752          * reschedule the callback again in the future.
4753          */
4754         return -EAGAIN;
4755     }
4756 
4757     return (status == NV_OK) ? 0 : -EIO;
4758 }
4759 
nv_pmops_runtime_suspend(struct device * dev)4760 int nv_pmops_runtime_suspend(
4761     struct device *dev
4762 )
4763 {
4764     return nvidia_transition_dynamic_power(dev, NV_TRUE);
4765 }
4766 
nv_pmops_runtime_resume(struct device * dev)4767 int nv_pmops_runtime_resume(
4768     struct device *dev
4769 )
4770 {
4771     return nvidia_transition_dynamic_power(dev, NV_FALSE);
4772 }
4773 #endif /* defined(CONFIG_PM) */
4774 
nv_get_adapter_state(NvU32 domain,NvU8 bus,NvU8 slot)4775 nv_state_t* NV_API_CALL nv_get_adapter_state(
4776     NvU32 domain,
4777     NvU8  bus,
4778     NvU8  slot
4779 )
4780 {
4781     nv_linux_state_t *nvl;
4782 
4783     LOCK_NV_LINUX_DEVICES();
4784     for (nvl = nv_linux_devices; nvl != NULL;  nvl = nvl->next)
4785     {
4786         nv_state_t *nv = NV_STATE_PTR(nvl);
4787         if (nv->pci_info.domain == domain && nv->pci_info.bus == bus
4788             && nv->pci_info.slot == slot)
4789         {
4790             UNLOCK_NV_LINUX_DEVICES();
4791             return nv;
4792         }
4793     }
4794     UNLOCK_NV_LINUX_DEVICES();
4795 
4796     return NULL;
4797 }
4798 
nv_get_ctl_state(void)4799 nv_state_t* NV_API_CALL nv_get_ctl_state(void)
4800 {
4801     return NV_STATE_PTR(&nv_ctl_device);
4802 }
4803 
nv_log_error(nv_state_t * nv,NvU32 error_number,const char * format,va_list ap)4804 NV_STATUS NV_API_CALL nv_log_error(
4805     nv_state_t *nv,
4806     NvU32       error_number,
4807     const char *format,
4808     va_list    ap
4809 )
4810 {
4811     NV_STATUS status = NV_OK;
4812     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4813 
4814     nv_report_error(nvl->pci_dev, error_number, format, ap);
4815 #if defined(CONFIG_CRAY_XT)
4816     status = nvos_forward_error_to_cray(nvl->pci_dev, error_number,
4817                 format, ap);
4818 #endif
4819 
4820     return status;
4821 }
4822 
nv_get_dma_start_address(nv_state_t * nv)4823 NvU64 NV_API_CALL nv_get_dma_start_address(
4824     nv_state_t *nv
4825 )
4826 {
4827 #if defined(NVCPU_PPC64LE)
4828     struct pci_dev *pci_dev;
4829     dma_addr_t dma_addr;
4830     NvU64 saved_dma_mask;
4831     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4832 
4833     /*
4834      * If TCE bypass is disabled via a module parameter, then just return
4835      * the default (which is 0).
4836      *
4837      * Otherwise, the DMA start address only needs to be set once, and it
4838      * won't change afterward. Just return the cached value if asked again,
4839      * to avoid the kernel printing redundant messages to the kernel
4840      * log when we call pci_set_dma_mask().
4841      */
4842     if ((nv_tce_bypass_mode == NV_TCE_BYPASS_MODE_DISABLE) ||
4843         (nvl->tce_bypass_enabled))
4844     {
4845         return nvl->dma_dev.addressable_range.start;
4846     }
4847 
4848     pci_dev = nvl->pci_dev;
4849 
4850     /*
4851      * Linux on IBM POWER8 offers 2 different DMA set-ups, sometimes
4852      * referred to as "windows".
4853      *
4854      * The "default window" provides a 2GB region of PCI address space
4855      * located below the 32-bit line. The IOMMU is used to provide a
4856      * "rich" mapping--any page in system memory can be mapped at an
4857      * arbitrary address within this window. The mappings are dynamic
4858      * and pass in and out of being as pci_map*()/pci_unmap*() calls
4859      * are made.
4860      *
4861      * Dynamic DMA Windows (sometimes "Huge DDW") provides a linear
4862      * mapping of the system's entire physical address space at some
4863      * fixed offset above the 59-bit line. IOMMU is still used, and
4864      * pci_map*()/pci_unmap*() are still required, but mappings are
4865      * static. They're effectively set up in advance, and any given
4866      * system page will always map to the same PCI bus address. I.e.
4867      *   physical 0x00000000xxxxxxxx => PCI 0x08000000xxxxxxxx
4868      *
4869      * This driver does not support the 2G default window because
4870      * of its limited size, and for reasons having to do with UVM.
4871      *
4872      * Linux on POWER8 will only provide the DDW-style full linear
4873      * mapping when the driver claims support for 64-bit DMA addressing
4874      * (a pre-requisite because the PCI addresses used in this case will
4875      * be near the top of the 64-bit range). The linear mapping
4876      * is not available in all system configurations.
4877      *
4878      * Detect whether the linear mapping is present by claiming
4879      * 64-bit support and then mapping physical page 0. For historical
4880      * reasons, Linux on POWER8 will never map a page to PCI address 0x0.
4881      * In the "default window" case page 0 will be mapped to some
4882      * non-zero address below the 32-bit line.  In the
4883      * DDW/linear-mapping case, it will be mapped to address 0 plus
4884      * some high-order offset.
4885      *
4886      * If the linear mapping is present and sane then return the offset
4887      * as the starting address for all DMA mappings.
4888      */
4889     saved_dma_mask = pci_dev->dma_mask;
4890     if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)) != 0)
4891     {
4892         goto done;
4893     }
4894 
4895     dma_addr = dma_map_single(&pci_dev->dev, NULL, 1, DMA_BIDIRECTIONAL);
4896     if (dma_mapping_error(&pci_dev->dev, dma_addr))
4897     {
4898         dma_set_mask(&pci_dev->dev, saved_dma_mask);
4899         goto done;
4900     }
4901 
4902     dma_unmap_single(&pci_dev->dev, dma_addr, 1, DMA_BIDIRECTIONAL);
4903 
4904     /*
4905      * From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation
4906      * of full 64-bit DMA support will establish a window in address-space
4907      * with the high 14 bits being constant and the bottom up-to-50 bits
4908      * varying with the mapping."
4909      *
4910      * Unfortunately, we don't have any good interfaces or definitions from
4911      * the kernel to get information about the DMA offset assigned by OS.
4912      * However, we have been told that the offset will be defined by the top
4913      * 14 bits of the address, and bits 40-49 will not vary for any DMA
4914      * mappings until 1TB of system memory is surpassed; this limitation is
4915      * essential for us to function properly since our current GPUs only
4916      * support 40 physical address bits. We are in a fragile place where we
4917      * need to tell the OS that we're capable of 64-bit addressing, while
4918      * relying on the assumption that the top 24 bits will not vary in this
4919      * case.
4920      *
4921      * The way we try to compute the window, then, is mask the trial mapping
4922      * against the DMA capabilities of the device. That way, devices with
4923      * greater addressing capabilities will only take the bits it needs to
4924      * define the window.
4925      */
4926     if ((dma_addr & DMA_BIT_MASK(32)) != 0)
4927     {
4928         /*
4929          * Huge DDW not available - page 0 mapped to non-zero address below
4930          * the 32-bit line.
4931          */
4932         nv_printf(NV_DBG_WARNINGS,
4933             "NVRM: DMA window limited by platform\n");
4934         dma_set_mask(&pci_dev->dev, saved_dma_mask);
4935         goto done;
4936     }
4937     else if ((dma_addr & saved_dma_mask) != 0)
4938     {
4939         NvU64 memory_size = NV_NUM_PHYSPAGES * PAGE_SIZE;
4940         if ((dma_addr & ~saved_dma_mask) !=
4941             ((dma_addr + memory_size) & ~saved_dma_mask))
4942         {
4943             /*
4944              * The physical window straddles our addressing limit boundary,
4945              * e.g., for an adapter that can address up to 1TB, the window
4946              * crosses the 40-bit limit so that the lower end of the range
4947              * has different bits 63:40 than the higher end of the range.
4948              * We can only handle a single, static value for bits 63:40, so
4949              * we must fall back here.
4950              */
4951             nv_printf(NV_DBG_WARNINGS,
4952                 "NVRM: DMA window limited by memory size\n");
4953             dma_set_mask(&pci_dev->dev, saved_dma_mask);
4954             goto done;
4955         }
4956     }
4957 
4958     nvl->tce_bypass_enabled = NV_TRUE;
4959     nvl->dma_dev.addressable_range.start = dma_addr & ~(saved_dma_mask);
4960 
4961     /* Update the coherent mask to match */
4962     dma_set_coherent_mask(&pci_dev->dev, pci_dev->dma_mask);
4963 
4964 done:
4965     return nvl->dma_dev.addressable_range.start;
4966 #else
4967     return 0;
4968 #endif
4969 }
4970 
nv_set_primary_vga_status(nv_state_t * nv)4971 NV_STATUS NV_API_CALL nv_set_primary_vga_status(
4972     nv_state_t *nv
4973 )
4974 {
4975     /* IORESOURCE_ROM_SHADOW wasn't added until 2.6.10 */
4976 #if defined(IORESOURCE_ROM_SHADOW)
4977     nv_linux_state_t *nvl;
4978     struct pci_dev *pci_dev;
4979 
4980     nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4981     pci_dev = nvl->pci_dev;
4982 
4983     nv->primary_vga = ((NV_PCI_RESOURCE_FLAGS(pci_dev, PCI_ROM_RESOURCE) &
4984         IORESOURCE_ROM_SHADOW) == IORESOURCE_ROM_SHADOW);
4985     return NV_OK;
4986 #else
4987     return NV_ERR_NOT_SUPPORTED;
4988 #endif
4989 }
4990 
nv_pci_trigger_recovery(nv_state_t * nv)4991 NV_STATUS NV_API_CALL nv_pci_trigger_recovery(
4992      nv_state_t *nv
4993 )
4994 {
4995     NV_STATUS status = NV_ERR_NOT_SUPPORTED;
4996 #if defined(NV_PCI_ERROR_RECOVERY)
4997     nv_linux_state_t *nvl       = NV_GET_NVL_FROM_NV_STATE(nv);
4998 
4999     /*
5000      * Calling readl() on PPC64LE will allow the kernel to check its state for
5001      * the device and update it accordingly. This needs to be done before
5002      * checking if the PCI channel is offline, so that we don't check stale
5003      * state.
5004      *
5005      * This will also kick off the recovery process for the device.
5006      */
5007     if (NV_PCI_ERROR_RECOVERY_ENABLED())
5008     {
5009         if (readl(nv->regs->map) == 0xFFFFFFFF)
5010         {
5011             if (pci_channel_offline(nvl->pci_dev))
5012             {
5013                 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
5014                               "PCI channel for the device is offline\n");
5015                 status = NV_OK;
5016             }
5017         }
5018     }
5019 #endif
5020     return status;
5021 }
5022 
nv_requires_dma_remap(nv_state_t * nv)5023 NvBool NV_API_CALL nv_requires_dma_remap(
5024     nv_state_t *nv
5025 )
5026 {
5027     NvBool dma_remap = NV_FALSE;
5028     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5029     dma_remap = !nv_dma_maps_swiotlb(nvl->dev);
5030     return dma_remap;
5031 }
5032 
5033 /*
5034  * Intended for use by external kernel modules to list nvidia gpu ids.
5035  */
nvidia_get_gpuid_list(NvU32 * gpu_ids,NvU32 * gpu_count)5036 NvBool nvidia_get_gpuid_list(NvU32 *gpu_ids, NvU32 *gpu_count)
5037 {
5038     nv_linux_state_t *nvl;
5039     unsigned int count;
5040     NvBool ret = NV_TRUE;
5041 
5042     LOCK_NV_LINUX_DEVICES();
5043 
5044     count = 0;
5045     for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
5046         count++;
5047 
5048     if (*gpu_count == 0)
5049     {
5050         goto done;
5051     }
5052     else if ((*gpu_count) < count)
5053     {
5054         ret = NV_FALSE;
5055         goto done;
5056     }
5057 
5058     count = 0;
5059     for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
5060     {
5061         nv_state_t *nv = NV_STATE_PTR(nvl);
5062         gpu_ids[count++] = nv->gpu_id;
5063     }
5064 
5065 
5066 done:
5067 
5068     *gpu_count = count;
5069 
5070     UNLOCK_NV_LINUX_DEVICES();
5071 
5072     return ret;
5073 }
5074 
5075 /*
5076  * Kernel-level analog to nvidia_open, intended for use by external
5077  * kernel modules. This increments the ref count of the device with
5078  * the given gpu_id and makes sure the device has been initialized.
5079  *
5080  * Clients of this interface are counted by the RM reset path, to ensure a
5081  * GPU is not reset while the GPU is active.
5082  *
5083  * Returns -ENODEV if the given gpu_id does not exist.
5084  */
nvidia_dev_get(NvU32 gpu_id,nvidia_stack_t * sp)5085 int nvidia_dev_get(NvU32 gpu_id, nvidia_stack_t *sp)
5086 {
5087     nv_linux_state_t *nvl;
5088     int rc;
5089 
5090     /* Takes nvl->ldata_lock */
5091     nvl = find_gpu_id(gpu_id);
5092     if (!nvl)
5093         return -ENODEV;
5094 
5095     rc = nv_open_device(NV_STATE_PTR(nvl), sp);
5096 
5097     if (rc == 0)
5098         WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK);
5099 
5100     up(&nvl->ldata_lock);
5101     return rc;
5102 }
5103 
5104 /*
5105  * Kernel-level analog to nvidia_close, intended for use by external
5106  * kernel modules. This decrements the ref count of the device with
5107  * the given gpu_id, potentially tearing it down.
5108  */
nvidia_dev_put(NvU32 gpu_id,nvidia_stack_t * sp)5109 void nvidia_dev_put(NvU32 gpu_id, nvidia_stack_t *sp)
5110 {
5111     nv_linux_state_t *nvl;
5112 
5113     /* Takes nvl->ldata_lock */
5114     nvl = find_gpu_id(gpu_id);
5115     if (!nvl)
5116         return;
5117 
5118     nv_close_device(NV_STATE_PTR(nvl), sp);
5119 
5120     WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK);
5121 
5122     up(&nvl->ldata_lock);
5123 }
5124 
5125 /*
5126  * Like nvidia_dev_get but uses UUID instead of gpu_id. Note that this may
5127  * trigger initialization and teardown of unrelated devices to look up their
5128  * UUIDs.
5129  *
5130  * Clients of this interface are counted by the RM reset path, to ensure a
5131  * GPU is not reset while the GPU is active.
5132  */
nvidia_dev_get_uuid(const NvU8 * uuid,nvidia_stack_t * sp)5133 int nvidia_dev_get_uuid(const NvU8 *uuid, nvidia_stack_t *sp)
5134 {
5135     nv_state_t *nv = NULL;
5136     nv_linux_state_t *nvl = NULL;
5137     const NvU8 *dev_uuid;
5138     int rc = 0;
5139 
5140     /* Takes nvl->ldata_lock */
5141     nvl = find_uuid_candidate(uuid);
5142     while (nvl)
5143     {
5144         nv = NV_STATE_PTR(nvl);
5145 
5146         /*
5147          * If the device is missing its UUID, this call exists solely so
5148          * rm_get_gpu_uuid_raw will be called and we can inspect the UUID.
5149          */
5150         rc = nv_open_device(nv, sp);
5151         if (rc != 0)
5152             goto out;
5153 
5154         /* The UUID should always be present following nv_open_device */
5155         dev_uuid = nv_get_cached_uuid(nv);
5156         WARN_ON(!dev_uuid);
5157         if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
5158             break;
5159 
5160         /* No match, try again. */
5161         nv_close_device(nv, sp);
5162         up(&nvl->ldata_lock);
5163         nvl = find_uuid_candidate(uuid);
5164     }
5165 
5166     if (nvl)
5167     {
5168         rc = 0;
5169         WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK);
5170     }
5171     else
5172         rc = -ENODEV;
5173 
5174 out:
5175     if (nvl)
5176         up(&nvl->ldata_lock);
5177     return rc;
5178 }
5179 
5180 /*
5181  * Like nvidia_dev_put but uses UUID instead of gpu_id.
5182  */
nvidia_dev_put_uuid(const NvU8 * uuid,nvidia_stack_t * sp)5183 void nvidia_dev_put_uuid(const NvU8 *uuid, nvidia_stack_t *sp)
5184 {
5185     nv_linux_state_t *nvl;
5186 
5187     /* Callers must already have called nvidia_dev_get_uuid() */
5188 
5189     /* Takes nvl->ldata_lock */
5190     nvl = find_uuid(uuid);
5191     if (!nvl)
5192         return;
5193 
5194     nv_close_device(NV_STATE_PTR(nvl), sp);
5195 
5196     WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK);
5197 
5198     up(&nvl->ldata_lock);
5199 }
5200 
nvidia_dev_block_gc6(const NvU8 * uuid,nvidia_stack_t * sp)5201 int nvidia_dev_block_gc6(const NvU8 *uuid, nvidia_stack_t *sp)
5202 
5203 {
5204     nv_linux_state_t *nvl;
5205 
5206     /* Callers must already have called nvidia_dev_get_uuid() */
5207 
5208     /* Takes nvl->ldata_lock */
5209     nvl = find_uuid(uuid);
5210     if (!nvl)
5211         return -ENODEV;
5212 
5213     if (rm_ref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE) != NV_OK)
5214     {
5215         up(&nvl->ldata_lock);
5216         return -EINVAL;
5217     }
5218 
5219     up(&nvl->ldata_lock);
5220 
5221     return 0;
5222 }
5223 
nvidia_dev_unblock_gc6(const NvU8 * uuid,nvidia_stack_t * sp)5224 int nvidia_dev_unblock_gc6(const NvU8 *uuid, nvidia_stack_t *sp)
5225 
5226 {
5227     nv_linux_state_t *nvl;
5228 
5229     /* Callers must already have called nvidia_dev_get_uuid() */
5230 
5231     /* Takes nvl->ldata_lock */
5232     nvl = find_uuid(uuid);
5233     if (!nvl)
5234         return -ENODEV;
5235 
5236     rm_unref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE);
5237 
5238     up(&nvl->ldata_lock);
5239 
5240     return 0;
5241 }
5242 
nv_get_device_memory_config(nv_state_t * nv,NvU64 * compr_addr_sys_phys,NvU64 * addr_guest_phys,NvU64 * rsvd_phys,NvU32 * addr_width,NvS32 * node_id)5243 NV_STATUS NV_API_CALL nv_get_device_memory_config(
5244     nv_state_t *nv,
5245     NvU64 *compr_addr_sys_phys,
5246     NvU64 *addr_guest_phys,
5247     NvU64 *rsvd_phys,
5248     NvU32 *addr_width,
5249     NvS32 *node_id
5250 )
5251 {
5252     NV_STATUS status = NV_ERR_NOT_SUPPORTED;
5253 
5254 #if defined(NVCPU_PPC64LE)
5255     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5256 
5257     if (!nv_platform_supports_numa(nvl))
5258     {
5259         return NV_ERR_NOT_SUPPORTED;
5260     }
5261 
5262     if (node_id != NULL)
5263     {
5264         *node_id = nvl->numa_info.node_id;
5265     }
5266 
5267     {
5268         nv_npu_numa_info_t *numa_info;
5269 
5270         numa_info = &nvl->npu->numa_info;
5271 
5272         if (compr_addr_sys_phys != NULL)
5273         {
5274             *compr_addr_sys_phys =
5275                 numa_info->compr_sys_phys_addr;
5276         }
5277 
5278         if (addr_guest_phys != NULL)
5279         {
5280             *addr_guest_phys =
5281                 numa_info->guest_phys_addr;
5282         }
5283     }
5284 
5285     if (addr_width != NULL)
5286     {
5287         *addr_width = nv_volta_dma_addr_size - nv_volta_addr_space_width;
5288     }
5289 
5290     status = NV_OK;
5291 #endif
5292 #if defined(NVCPU_AARCH64)
5293     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5294 
5295     if (node_id != NULL)
5296     {
5297         *node_id = nvl->numa_info.node_id;
5298     }
5299 
5300     if (compr_addr_sys_phys)
5301     {
5302         *compr_addr_sys_phys = nvl->coherent_link_info.gpu_mem_pa;
5303     }
5304     if (addr_guest_phys)
5305     {
5306         *addr_guest_phys = nvl->coherent_link_info.gpu_mem_pa;
5307     }
5308     if (rsvd_phys)
5309     {
5310         *rsvd_phys = nvl->coherent_link_info.rsvd_mem_pa;
5311     }
5312     if (addr_width)
5313     {
5314         // TH500 PA width - NV_PFB_PRI_MMU_ATS_ADDR_RANGE_GRANULARITY
5315         *addr_width = 48 - 37;
5316     }
5317 
5318     status = NV_OK;
5319 #endif
5320 
5321     return status;
5322 }
5323 
5324 #if defined(NVCPU_PPC64LE)
5325 
nv_get_nvlink_line_rate(nv_state_t * nvState,NvU32 * linerate)5326 NV_STATUS NV_API_CALL nv_get_nvlink_line_rate(
5327     nv_state_t *nvState,
5328     NvU32      *linerate
5329 )
5330 {
5331 #if defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
5332 
5333     nv_linux_state_t *nvl;
5334     struct pci_dev   *npuDev;
5335     NvU32            *pSpeedPtr = NULL;
5336     NvU32            speed;
5337     int              len;
5338 
5339     if (nvState != NULL)
5340         nvl = NV_GET_NVL_FROM_NV_STATE(nvState);
5341     else
5342         return NV_ERR_INVALID_ARGUMENT;
5343 
5344     if (!nvl->npu)
5345     {
5346         return NV_ERR_NOT_SUPPORTED;
5347     }
5348 
5349     npuDev = nvl->npu->devs[0];
5350     if (!npuDev->dev.of_node)
5351     {
5352         nv_printf(NV_DBG_ERRORS, "NVRM: %s: OF Node not found in IBM-NPU device node\n",
5353                   __FUNCTION__);
5354         return NV_ERR_NOT_SUPPORTED;
5355     }
5356 
5357     pSpeedPtr = (NvU32 *) of_get_property(npuDev->dev.of_node, "ibm,nvlink-speed", &len);
5358 
5359     if (pSpeedPtr)
5360     {
5361         speed = (NvU32) be32_to_cpup(pSpeedPtr);
5362     }
5363     else
5364     {
5365         return NV_ERR_NOT_SUPPORTED;
5366     }
5367 
5368     if (!speed)
5369     {
5370         return NV_ERR_NOT_SUPPORTED;
5371     }
5372     else
5373     {
5374         *linerate = speed;
5375     }
5376 
5377     return NV_OK;
5378 
5379 #endif
5380 
5381     return NV_ERR_NOT_SUPPORTED;
5382 }
5383 
5384 #endif
5385 
nv_indicate_idle(nv_state_t * nv)5386 NV_STATUS NV_API_CALL nv_indicate_idle(
5387     nv_state_t *nv
5388 )
5389 {
5390 #if NV_FILESYSTEM_ACCESS_AVAILABLE
5391 #if defined(NV_PM_RUNTIME_AVAILABLE)
5392     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5393     struct device *dev = nvl->dev;
5394     struct file *file = nvl->sysfs_config_file;
5395     loff_t f_pos = 0;
5396     char buf;
5397 
5398     pm_runtime_put_noidle(dev);
5399 
5400 #if defined(NV_SEQ_READ_ITER_PRESENT)
5401     {
5402         struct kernfs_open_file *of = ((struct seq_file *)file->private_data)->private;
5403         struct kernfs_node *kn;
5404 
5405         mutex_lock(&of->mutex);
5406         kn = of->kn;
5407         if (kn != NULL && atomic_inc_unless_negative(&kn->active))
5408         {
5409             if ((kn->attr.ops != NULL) && (kn->attr.ops->read != NULL))
5410             {
5411                 kn->attr.ops->read(of, &buf, 1, f_pos);
5412             }
5413             atomic_dec(&kn->active);
5414         }
5415         mutex_unlock(&of->mutex);
5416     }
5417 #else
5418 #if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
5419     kernel_read(file, &buf, 1, &f_pos);
5420 #else
5421     kernel_read(file, f_pos, &buf, 1);
5422 #endif
5423 #endif
5424 
5425     return NV_OK;
5426 #else
5427     return NV_ERR_NOT_SUPPORTED;
5428 #endif
5429 #else
5430     return NV_ERR_NOT_SUPPORTED;
5431 #endif
5432 }
5433 
nv_indicate_not_idle(nv_state_t * nv)5434 NV_STATUS NV_API_CALL nv_indicate_not_idle(
5435     nv_state_t *nv
5436 )
5437 {
5438 #if defined(NV_PM_RUNTIME_AVAILABLE)
5439     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5440     struct device *dev = nvl->dev;
5441 
5442     pm_runtime_get_noresume(dev);
5443 
5444     nvl->is_forced_shutdown = NV_TRUE;
5445     pci_bus_type.shutdown(dev);
5446 
5447     return NV_OK;
5448 #else
5449     return NV_ERR_NOT_SUPPORTED;
5450 #endif
5451 }
5452 
nv_idle_holdoff(nv_state_t * nv)5453 void NV_API_CALL nv_idle_holdoff(
5454     nv_state_t *nv
5455 )
5456 {
5457 #if defined(NV_PM_RUNTIME_AVAILABLE)
5458     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5459     struct device *dev = nvl->dev;
5460 
5461     pm_runtime_get_noresume(dev);
5462 #endif
5463 }
5464 
nv_dynamic_power_available(nv_state_t * nv)5465 NvBool NV_API_CALL nv_dynamic_power_available(
5466     nv_state_t *nv
5467 )
5468 {
5469 #if defined(NV_PM_RUNTIME_AVAILABLE)
5470     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5471     return nvl->sysfs_config_file != NULL;
5472 #else
5473     return NV_FALSE;
5474 #endif
5475 }
5476 
5477 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */
nv_linux_add_device_locked(nv_linux_state_t * nvl)5478 int nv_linux_add_device_locked(nv_linux_state_t *nvl)
5479 {
5480     int rc = -1;
5481     int i;
5482 
5483     // look for free a minor number and assign unique minor number to this device
5484     for (i = 0; i <= NV_MINOR_DEVICE_NUMBER_REGULAR_MAX; i++)
5485     {
5486         if (nv_linux_minor_num_table[i] == NULL)
5487         {
5488             nv_linux_minor_num_table[i] = nvl;
5489             nvl->minor_num = i;
5490             rc = 0;
5491             break;
5492         }
5493     }
5494 
5495     // bail if no minor number is free
5496     if (rc != 0)
5497         return rc;
5498 
5499     if (nv_linux_devices == NULL) {
5500         nv_linux_devices = nvl;
5501     }
5502     else
5503     {
5504         nv_linux_state_t *tnvl;
5505         for (tnvl = nv_linux_devices; tnvl->next != NULL;  tnvl = tnvl->next);
5506         tnvl->next = nvl;
5507     }
5508 
5509     return rc;
5510 }
5511 
5512 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */
nv_linux_remove_device_locked(nv_linux_state_t * nvl)5513 void nv_linux_remove_device_locked(nv_linux_state_t *nvl)
5514 {
5515     if (nvl == nv_linux_devices) {
5516         nv_linux_devices = nvl->next;
5517     }
5518     else
5519     {
5520         nv_linux_state_t *tnvl;
5521         for (tnvl = nv_linux_devices; tnvl->next != nvl;  tnvl = tnvl->next);
5522         tnvl->next = nvl->next;
5523     }
5524 
5525     nv_linux_minor_num_table[nvl->minor_num] = NULL;
5526 }
5527 
nv_linux_init_open_q(nv_linux_state_t * nvl)5528 int nv_linux_init_open_q(nv_linux_state_t *nvl)
5529 {
5530     int rc;
5531     rc = nv_kthread_q_init(&nvl->open_q, "nv_open_q");
5532     if (rc != 0)
5533         return rc;
5534 
5535     down(&nvl->open_q_lock);
5536     nvl->is_accepting_opens = NV_TRUE;
5537     up(&nvl->open_q_lock);
5538     return 0;
5539 }
5540 
nv_linux_stop_open_q(nv_linux_state_t * nvl)5541 void nv_linux_stop_open_q(nv_linux_state_t *nvl)
5542 {
5543     NvBool should_stop = NV_FALSE;
5544 
5545     down(&nvl->open_q_lock);
5546     if (nvl->is_accepting_opens)
5547     {
5548         should_stop = NV_TRUE;
5549         nvl->is_accepting_opens = NV_FALSE;
5550     }
5551     up(&nvl->open_q_lock);
5552 
5553     if (should_stop)
5554         nv_kthread_q_stop(&nvl->open_q);
5555 }
5556 
nv_control_soc_irqs(nv_state_t * nv,NvBool bEnable)5557 void NV_API_CALL nv_control_soc_irqs(nv_state_t *nv, NvBool bEnable)
5558 {
5559     int count;
5560     unsigned long flags;
5561     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5562 
5563     if (nv->current_soc_irq != -1)
5564         return;
5565 
5566     NV_SPIN_LOCK_IRQSAVE(&nvl->soc_isr_lock, flags);
5567     if (bEnable)
5568     {
5569         for (count = 0; count < nv->num_soc_irqs; count++)
5570         {
5571             if (nv->soc_irq_info[count].ref_count == 0)
5572             {
5573                 nv->soc_irq_info[count].ref_count++;
5574                 enable_irq(nv->soc_irq_info[count].irq_num);
5575             }
5576         }
5577     }
5578     else
5579     {
5580         for (count = 0; count < nv->num_soc_irqs; count++)
5581         {
5582             if (nv->soc_irq_info[count].ref_count == 1)
5583             {
5584                 nv->soc_irq_info[count].ref_count--;
5585                 disable_irq_nosync(nv->soc_irq_info[count].irq_num);
5586             }
5587         }
5588     }
5589     NV_SPIN_UNLOCK_IRQRESTORE(&nvl->soc_isr_lock, flags);
5590 }
5591 
nv_get_dev_minor(nv_state_t * nv)5592 NvU32 NV_API_CALL nv_get_dev_minor(nv_state_t *nv)
5593 {
5594     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5595 
5596     return nvl->minor_num;
5597 }
5598 
nv_acquire_fabric_mgmt_cap(int fd,int * duped_fd)5599 NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap(int fd, int *duped_fd)
5600 {
5601     *duped_fd = nvlink_cap_acquire(fd, NVLINK_CAP_FABRIC_MANAGEMENT);
5602     if (*duped_fd < 0)
5603     {
5604         return NV_ERR_INSUFFICIENT_PERMISSIONS;
5605     }
5606 
5607     return NV_OK;
5608 }
5609 
5610 /*
5611  * Wakes up the NVIDIA GPU HDA codec and contoller by reading
5612  * codec proc file.
5613  */
nv_audio_dynamic_power(nv_state_t * nv)5614 void NV_API_CALL nv_audio_dynamic_power(
5615     nv_state_t *nv
5616 )
5617 {
5618 /*
5619  * The runtime power management for nvidia HDA controller can be possible
5620  * after commit 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA
5621  * controller"). This commit has also moved 'PCI_CLASS_MULTIMEDIA_HD_AUDIO'
5622  * macro from <sound/hdaudio.h> to <linux/pci_ids.h>.
5623  * If 'NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT' is not defined, then
5624  * this function will be stub function.
5625  *
5626  * Also, check if runtime PM is enabled in the kernel (with
5627  * 'NV_PM_RUNTIME_AVAILABLE') and stub this function if it is disabled. This
5628  * function uses kernel fields only present when the kconfig has runtime PM
5629  * enabled.
5630  */
5631 #if defined(NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT) && defined(NV_PM_RUNTIME_AVAILABLE)
5632     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5633     struct device *dev = nvl->dev;
5634     struct pci_dev *audio_pci_dev, *pci_dev;
5635     struct snd_card *card;
5636 
5637     if (!dev_is_pci(dev))
5638         return;
5639 
5640     pci_dev = to_pci_dev(dev);
5641 
5642     audio_pci_dev = os_pci_init_handle(NV_PCI_DOMAIN_NUMBER(pci_dev),
5643                                        NV_PCI_BUS_NUMBER(pci_dev),
5644                                        NV_PCI_SLOT_NUMBER(pci_dev),
5645                                        1, NULL, NULL);
5646 
5647     if (audio_pci_dev == NULL)
5648         return;
5649 
5650     /*
5651      * Check if HDA controller is in pm suspended state. The HDA contoller
5652      * can not be runtime resumed if this API is called during system
5653      * suspend/resume time and HDA controller is in pm suspended state.
5654      */
5655     if (audio_pci_dev->dev.power.is_suspended)
5656         return;
5657 
5658     card = pci_get_drvdata(audio_pci_dev);
5659     if (card == NULL)
5660         return;
5661 
5662     /*
5663      * Commit be57bfffb7b5 ("ALSA: hda: move hda_codec.h to include/sound")
5664      * in v4.20-rc1 moved "hda_codec.h" header file from the private sound
5665      * folder to include/sound.
5666      */
5667 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT)
5668     {
5669         struct list_head *p;
5670         struct hda_codec *codec = NULL;
5671         unsigned int cmd, res;
5672 
5673         /*
5674          * Traverse the list of devices which the sound card maintains and
5675          * search for HDA codec controller.
5676          */
5677         list_for_each_prev(p, &card->devices)
5678         {
5679             struct snd_device *pdev = list_entry(p, struct snd_device, list);
5680 
5681             if (pdev->type == SNDRV_DEV_CODEC)
5682             {
5683                 codec = pdev->device_data;
5684 
5685                 /*
5686                  * NVIDIA HDA codec controller uses linux kernel HDA codec
5687                  * driver. Commit 05852448690d ("ALSA: hda - Support indirect
5688                  * execution of verbs") added support for overriding exec_verb.
5689                  * This codec->core.exec_verb will be codec_exec_verb() for
5690                  * NVIDIA HDA codec driver.
5691                  */
5692                 if (codec->core.exec_verb == NULL)
5693                 {
5694                     return;
5695                 }
5696 
5697                 break;
5698             }
5699         }
5700 
5701         if (codec == NULL)
5702         {
5703             return;
5704         }
5705 
5706         /* If HDA codec controller is already runtime active, then return */
5707         if (snd_hdac_is_power_on(&codec->core))
5708         {
5709             return;
5710         }
5711 
5712         /*
5713          * Encode codec verb for getting vendor ID from root node.
5714          * Refer Intel High Definition Audio Specification for more details.
5715          */
5716         cmd = (codec->addr << 28) | (AC_NODE_ROOT << 20) |
5717               (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
5718 
5719         /*
5720          * It will internally increment the runtime PM refcount,
5721          * wake-up the audio codec controller and send the HW
5722          * command for getting vendor ID. Once the vendor ID will be
5723          * returned back, then it will decrement the runtime PM refcount
5724          * and runtime suspend audio codec controller again (If refcount is
5725          * zero) once auto suspend counter expires.
5726          */
5727         codec->core.exec_verb(&codec->core, cmd, 0, &res);
5728     }
5729 #else
5730     {
5731         int codec_addr;
5732 
5733         /*
5734          * The filp_open() call below depends on the current task's fs_struct
5735          * (current->fs), which may already be NULL if this is called during
5736          * process teardown.
5737          */
5738         if (current->fs == NULL)
5739             return;
5740 
5741         /* If device is runtime active, then return */
5742         if (audio_pci_dev->dev.power.runtime_status == RPM_ACTIVE)
5743             return;
5744 
5745         for (codec_addr = 0; codec_addr < NV_HDA_MAX_CODECS; codec_addr++)
5746         {
5747             char filename[48];
5748             NvU8 buf;
5749             int ret;
5750 
5751             ret = snprintf(filename, sizeof(filename),
5752                            "/proc/asound/card%d/codec#%d",
5753                            card->number, codec_addr);
5754 
5755             if (ret > 0 && ret < sizeof(filename) &&
5756                 (os_open_and_read_file(filename, &buf, 1) == NV_OK))
5757             {
5758                     break;
5759             }
5760         }
5761     }
5762 #endif
5763 #endif
5764 }
5765 
nv_match_dev_state(const void * data,struct file * filp,unsigned fd)5766 static int nv_match_dev_state(const void *data, struct file *filp, unsigned fd)
5767 {
5768     nv_linux_state_t *nvl = NULL;
5769 
5770     if (filp == NULL ||
5771         filp->f_op != &nvidia_fops ||
5772         filp->private_data == NULL)
5773         return 0;
5774 
5775     nvl = NV_GET_NVL_FROM_FILEP(filp);
5776     if (nvl == NULL)
5777         return 0;
5778 
5779     return (data == nvl);
5780 }
5781 
nv_match_gpu_os_info(nv_state_t * nv,void * os_info)5782 NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *nv, void *os_info)
5783 {
5784     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5785 
5786     return nv_match_dev_state(nvl, os_info, -1);
5787 }
5788 
nv_is_gpu_accessible(nv_state_t * nv)5789 NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *nv)
5790 {
5791     struct files_struct *files = current->files;
5792     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5793 
5794     return !!iterate_fd(files, 0, nv_match_dev_state, nvl);
5795 }
5796 
nv_platform_supports_s0ix(void)5797 NvBool NV_API_CALL nv_platform_supports_s0ix(void)
5798 {
5799 #if defined(CONFIG_ACPI)
5800     return (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) != 0;
5801 #else
5802     return NV_FALSE;
5803 #endif
5804 }
5805 
nv_s2idle_pm_configured(void)5806 NvBool NV_API_CALL nv_s2idle_pm_configured(void)
5807 {
5808     NvU8 buf[8];
5809 
5810 #if defined(NV_SEQ_READ_ITER_PRESENT)
5811     struct file *file;
5812     ssize_t num_read;
5813     struct kiocb kiocb;
5814     struct iov_iter iter;
5815     struct kvec iov = {
5816         .iov_base = &buf,
5817         .iov_len = sizeof(buf),
5818     };
5819 
5820     if (os_open_readonly_file("/sys/power/mem_sleep", (void **)&file) != NV_OK)
5821     {
5822         return NV_FALSE;
5823     }
5824 
5825     /*
5826      * init_sync_kiocb() internally uses GPL licensed __get_task_ioprio() from
5827      * v5.20-rc1.
5828      */
5829 #if defined(NV_GET_TASK_IOPRIO_PRESENT)
5830     memset(&kiocb, 0, sizeof(kiocb));
5831     kiocb.ki_filp = file;
5832     kiocb.ki_flags = iocb_flags(file);
5833     kiocb.ki_ioprio = IOPRIO_DEFAULT;
5834 #else
5835     init_sync_kiocb(&kiocb, file);
5836 #endif
5837 
5838     kiocb.ki_pos = 0;
5839     iov_iter_kvec(&iter, READ, &iov, 1, sizeof(buf));
5840 
5841     num_read = seq_read_iter(&kiocb, &iter);
5842 
5843     os_close_file((void *)file);
5844 
5845     if (num_read != sizeof(buf))
5846     {
5847         return NV_FALSE;
5848     }
5849 #else
5850     if (os_open_and_read_file("/sys/power/mem_sleep", buf,
5851                               sizeof(buf)) != NV_OK)
5852     {
5853         return NV_FALSE;
5854     }
5855 #endif
5856 
5857     return (memcmp(buf, "[s2idle]", 8) == 0);
5858 }
5859 
5860 /*
5861  * Function query system chassis info, to figure out if the platform is
5862  * Laptop or Notebook.
5863  * This function should be used when querying GPU form factor information is
5864  * not possible via core RM or if querying both system and GPU form factor
5865  * information is necessary.
5866  */
nv_is_chassis_notebook(void)5867 NvBool NV_API_CALL nv_is_chassis_notebook(void)
5868 {
5869     const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
5870 
5871     //
5872     // Return true only for Laptop & Notebook
5873     // As per SMBIOS spec Laptop = 9 and Notebook = 10
5874     //
5875     return (chassis_type && (!strcmp(chassis_type, "9") || !strcmp(chassis_type, "10")));
5876 }
5877 
nv_allow_runtime_suspend(nv_state_t * nv)5878 void NV_API_CALL nv_allow_runtime_suspend
5879 (
5880     nv_state_t *nv
5881 )
5882 {
5883 #if defined(NV_PM_RUNTIME_AVAILABLE)
5884     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5885     struct device    *dev = nvl->dev;
5886 
5887     spin_lock_irq(&dev->power.lock);
5888 
5889     if (dev->power.runtime_auto == false)
5890     {
5891         dev->power.runtime_auto = true;
5892         atomic_add_unless(&dev->power.usage_count, -1, 0);
5893     }
5894 
5895     spin_unlock_irq(&dev->power.lock);
5896 #endif
5897 }
5898 
nv_disallow_runtime_suspend(nv_state_t * nv)5899 void NV_API_CALL nv_disallow_runtime_suspend
5900 (
5901     nv_state_t *nv
5902 )
5903 {
5904 #if defined(NV_PM_RUNTIME_AVAILABLE)
5905     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5906     struct device    *dev = nvl->dev;
5907 
5908     spin_lock_irq(&dev->power.lock);
5909 
5910     if (dev->power.runtime_auto == true)
5911     {
5912         dev->power.runtime_auto = false;
5913         atomic_inc(&dev->power.usage_count);
5914     }
5915 
5916     spin_unlock_irq(&dev->power.lock);
5917 #endif
5918 }
5919 
nv_get_os_type(void)5920 NvU32 NV_API_CALL nv_get_os_type(void)
5921 {
5922     return OS_TYPE_LINUX;
5923 }
5924 
nv_flush_coherent_cpu_cache_range(nv_state_t * nv,NvU64 cpu_virtual,NvU64 size)5925 void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size)
5926 {
5927 #if NVCPU_IS_PPC64LE
5928     return nv_ibmnpu_cache_flush_range(nv, cpu_virtual, size);
5929 #elif NVCPU_IS_AARCH64
5930     NvU64 va, cbsize;
5931     NvU64 end_cpu_virtual = cpu_virtual + size;
5932 
5933     nv_printf(NV_DBG_INFO,
5934             "Flushing CPU virtual range [0x%llx, 0x%llx)\n",
5935             cpu_virtual, end_cpu_virtual);
5936 
5937     cbsize = cache_line_size();
5938     // Align address to line size
5939     cpu_virtual = NV_ALIGN_UP(cpu_virtual, cbsize);
5940 
5941     // Force eviction of any cache lines from the NUMA-onlined region.
5942     for (va = cpu_virtual; va < end_cpu_virtual; va += cbsize)
5943     {
5944         asm volatile("dc civac, %0" : : "r" (va): "memory");
5945         // Reschedule if necessary to avoid lockup warnings
5946         cond_resched();
5947     }
5948     asm volatile("dsb sy" : : : "memory");
5949 #endif
5950 }
5951 
nv_next_resource(struct resource * p)5952 static struct resource *nv_next_resource(struct resource *p)
5953 {
5954     if (p->child != NULL)
5955         return p->child;
5956 
5957     while ((p->sibling == NULL) && (p->parent != NULL))
5958         p = p->parent;
5959 
5960     return p->sibling;
5961 }
5962 
5963 /*
5964  * Function to get the correct PCI Bus memory window which can be mapped
5965  * in the real mode emulator (emu).
5966  * The function gets called during the initialization of the emu before
5967  * remapping it to OS.
5968  */
nv_get_updated_emu_seg(NvU32 * start,NvU32 * end)5969 void NV_API_CALL nv_get_updated_emu_seg(
5970     NvU32 *start,
5971     NvU32 *end
5972 )
5973 {
5974     struct resource *p;
5975 
5976     if (*start >= *end)
5977         return;
5978 
5979     for (p = iomem_resource.child; (p != NULL); p = nv_next_resource(p))
5980     {
5981         /* If we passed the resource we are looking for, stop */
5982         if (p->start > *end)
5983         {
5984             p = NULL;
5985             break;
5986         }
5987 
5988         /* Skip until we find a range that matches what we look for */
5989         if (p->end < *start)
5990             continue;
5991 
5992         if ((p->end > *end) && (p->child))
5993             continue;
5994 
5995         if ((p->flags & IORESOURCE_MEM) != IORESOURCE_MEM)
5996             continue;
5997 
5998         /* Found a match, break */
5999         break;
6000     }
6001 
6002     if (p != NULL)
6003     {
6004         *start = max((resource_size_t)*start, p->start);
6005         *end = min((resource_size_t)*end, p->end);
6006     }
6007 }
6008 
nv_get_egm_info(nv_state_t * nv,NvU64 * phys_addr,NvU64 * size,NvS32 * egm_node_id)6009 NV_STATUS NV_API_CALL nv_get_egm_info(
6010     nv_state_t *nv,
6011     NvU64 *phys_addr,
6012     NvU64 *size,
6013     NvS32 *egm_node_id
6014 )
6015 {
6016 #if defined(NV_DEVICE_PROPERTY_READ_U64_PRESENT) && \
6017     defined(CONFIG_ACPI_NUMA) && \
6018     NV_IS_EXPORT_SYMBOL_PRESENT_pxm_to_node
6019     nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
6020     NvU64 pa, sz, pxm;
6021 
6022     if (device_property_read_u64(nvl->dev, "nvidia,egm-pxm", &pxm) != 0)
6023     {
6024         goto failed;
6025     }
6026 
6027     if (device_property_read_u64(nvl->dev, "nvidia,egm-base-pa", &pa) != 0)
6028     {
6029         goto failed;
6030     }
6031 
6032     if (device_property_read_u64(nvl->dev, "nvidia,egm-size", &sz) != 0)
6033     {
6034         goto failed;
6035     }
6036 
6037     NV_DEV_PRINTF(NV_DBG_INFO, nv, "DSD properties: \n");
6038     NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM base PA: 0x%llx \n", pa);
6039     NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM size: 0x%llx \n", sz);
6040     NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM _PXM: 0x%llx \n", pxm);
6041 
6042     if (egm_node_id != NULL)
6043     {
6044         *egm_node_id = pxm_to_node(pxm);
6045         nv_printf(NV_DBG_INFO, "EGM node id: %d\n", *egm_node_id);
6046     }
6047 
6048     if (phys_addr != NULL)
6049     {
6050         *phys_addr = pa;
6051         nv_printf(NV_DBG_INFO, "EGM base addr: 0x%llx\n", *phys_addr);
6052     }
6053 
6054     if (size != NULL)
6055     {
6056         *size = sz;
6057         nv_printf(NV_DBG_INFO, "EGM size: 0x%llx\n", *size);
6058     }
6059 
6060     return NV_OK;
6061 
6062 failed:
6063 #endif // NV_DEVICE_PROPERTY_READ_U64_PRESENT
6064 
6065     NV_DEV_PRINTF(NV_DBG_INFO, nv, "Cannot get EGM info\n");
6066     return NV_ERR_NOT_SUPPORTED;
6067 }
6068 
nv_get_screen_info(nv_state_t * nv,NvU64 * pPhysicalAddress,NvU32 * pFbWidth,NvU32 * pFbHeight,NvU32 * pFbDepth,NvU32 * pFbPitch,NvU64 * pFbSize)6069 void NV_API_CALL nv_get_screen_info(
6070     nv_state_t  *nv,
6071     NvU64       *pPhysicalAddress,
6072     NvU32       *pFbWidth,
6073     NvU32       *pFbHeight,
6074     NvU32       *pFbDepth,
6075     NvU32       *pFbPitch,
6076     NvU64       *pFbSize
6077 )
6078 {
6079     *pPhysicalAddress = 0;
6080     *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
6081 
6082 #if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
6083     if (num_registered_fb > 0)
6084     {
6085         int i;
6086 
6087         for (i = 0; i < num_registered_fb; i++)
6088         {
6089             if (!registered_fb[i])
6090                 continue;
6091 
6092             /* Make sure base address is mapped to GPU BAR */
6093             if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
6094             {
6095                 *pPhysicalAddress = registered_fb[i]->fix.smem_start;
6096                 *pFbWidth = registered_fb[i]->var.xres;
6097                 *pFbHeight = registered_fb[i]->var.yres;
6098                 *pFbDepth = registered_fb[i]->var.bits_per_pixel;
6099                 *pFbPitch = registered_fb[i]->fix.line_length;
6100                 *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
6101                 return;
6102             }
6103         }
6104     }
6105 #endif
6106 
6107     /*
6108      * If the screen info is not found in the registered FBs then fallback
6109      * to the screen_info structure.
6110      *
6111      * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
6112      * generic framebuffers so the new generic system-framebuffer drivers can
6113      * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
6114      * device created by SYSFB_SIMPLEFB.
6115      *
6116      * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
6117      * information required by nv_get_screen_info(), therefore you need to
6118      * fall back onto the screen_info structure.
6119      *
6120      * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
6121      * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
6122      */
6123 
6124 #if NV_CHECK_EXPORT_SYMBOL(screen_info)
6125     /*
6126      * If there is not a framebuffer console, return 0 size.
6127      *
6128      * orig_video_isVGA is set to 1 during early Linux kernel
6129      * initialization, and then will be set to a value, such as
6130      * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
6131      */
6132     if (screen_info.orig_video_isVGA > 1)
6133     {
6134         NvU64 physAddr = screen_info.lfb_base;
6135 #if defined(VIDEO_CAPABILITY_64BIT_BASE)
6136         physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
6137 #endif
6138 
6139         /* Make sure base address is mapped to GPU BAR */
6140         if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
6141         {
6142             *pPhysicalAddress = physAddr;
6143             *pFbWidth = screen_info.lfb_width;
6144             *pFbHeight = screen_info.lfb_height;
6145             *pFbDepth = screen_info.lfb_depth;
6146             *pFbPitch = screen_info.lfb_linelength;
6147             *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
6148         }
6149     }
6150 #else
6151     {
6152         nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
6153         struct pci_dev *pci_dev = nvl->pci_dev;
6154         int i;
6155 
6156         if (pci_dev == NULL)
6157             return;
6158 
6159         BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
6160         for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
6161         {
6162             int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
6163             struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
6164             struct resource *res = gpu_bar_res->child;
6165 
6166             /*
6167              * Console resource will become child resource of pci-dev resource.
6168              * Check if child resource start address matches with expected
6169              * console start address.
6170              */
6171             if ((res != NULL) &&
6172                 NV_IS_CONSOLE_MAPPED(nv, res->start))
6173             {
6174                 NvU32 res_name_len = strlen(res->name);
6175 
6176                 /*
6177                  * The resource name ends with 'fb' (efifb, vesafb, etc.).
6178                  * For simple-framebuffer, the resource name is 'BOOTFB'.
6179                  * Confirm if the resources name either ends with 'fb' or 'FB'.
6180                  */
6181                 if ((res_name_len > 2) &&
6182                     !strcasecmp((res->name + res_name_len - 2), "fb"))
6183                 {
6184                     *pPhysicalAddress = res->start;
6185                     *pFbSize = resource_size(res);
6186                     return;
6187                 }
6188             }
6189         }
6190     }
6191 #endif
6192 }
6193 
6194 
6195 module_init(nvidia_init_module);
6196 module_exit(nvidia_exit_module);
6197