1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <linux/module.h> // for MODULE_FIRMWARE
25
26 // must precede "nv.h" and "nv-firmware.h" includes
27 #define NV_FIRMWARE_PATH_FOR_FILENAME(filename) "nvidia/" NV_VERSION_STRING "/" filename
28 #define NV_FIRMWARE_DECLARE_GSP_FILENAME(filename) \
29 MODULE_FIRMWARE(NV_FIRMWARE_PATH_FOR_FILENAME(filename));
30 #include "nv-firmware.h"
31
32 #include "nvmisc.h"
33 #include "os-interface.h"
34 #include "nv-linux.h"
35 #include "nv-p2p.h"
36 #include "nv-reg.h"
37 #include "nv-msi.h"
38 #include "nv-pci-table.h"
39 #include "nv-chardev-numbers.h"
40
41 #if defined(NV_UVM_ENABLE)
42 #include "nv_uvm_interface.h"
43 #endif
44
45 #if defined(NV_VGPU_KVM_BUILD)
46 #include "nv-vgpu-vfio-interface.h"
47 #endif
48
49 #include "nvlink_proto.h"
50 #include "nvlink_caps.h"
51
52 #include "nv-hypervisor.h"
53 #include "nv-ibmnpu.h"
54 #include "nv-rsync.h"
55 #include "nv-kthread-q.h"
56 #include "nv-pat.h"
57 #include "nv-dmabuf.h"
58 #include "nv-caps-imex.h"
59
60 /*
61 * Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
62 * CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
63 */
64 #if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
65 #include "nv-retpoline.h"
66 #endif
67
68 #include <linux/firmware.h>
69 #include <linux/cdev.h>
70
71 #include <sound/core.h> /* HDA struct snd_card */
72
73 #include <asm/cache.h>
74
75 #if defined(NV_SOUND_HDAUDIO_H_PRESENT)
76 #include "sound/hdaudio.h"
77 #endif
78
79 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT)
80 #include <sound/core.h>
81 #include <sound/hda_codec.h>
82 #include <sound/hda_verbs.h>
83 #endif
84
85 #if defined(NV_SEQ_READ_ITER_PRESENT)
86 #include <linux/uio.h>
87 #include <linux/seq_file.h>
88 #include <linux/kernfs.h>
89 #endif
90
91 #include <linux/dmi.h> /* System DMI info */
92
93 #include <linux/ioport.h>
94
95 #if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
96 #include <linux/cc_platform.h>
97 #endif
98
99 #if defined(NV_ASM_CPUFEATURE_H_PRESENT)
100 #include <asm/cpufeature.h>
101 #endif
102
103 #include "conftest/patches.h"
104
105 #include "detect-self-hosted.h"
106
107 #define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
108 #define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
109 #define RM_UNHANDLED_TIMEOUT_US 100000
110
111 MODULE_LICENSE("Dual MIT/GPL");
112
113 MODULE_INFO(supported, "external");
114 MODULE_VERSION(NV_VERSION_STRING);
115 MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);
116
117 /*
118 * MODULE_IMPORT_NS() is added by commit id 8651ec01daeda
119 * ("module: add support for symbol namespaces") in 5.4
120 */
121 #if defined(MODULE_IMPORT_NS)
122 /*
123 * DMA_BUF namespace is added by commit id 16b0314aa746
124 * ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16
125 */
126 MODULE_IMPORT_NS(DMA_BUF);
127 #endif // defined(MODULE_IMPORT_NS)
128
129 const NvBool nv_is_rm_firmware_supported_os = NV_TRUE;
130
131 // Deprecated, use NV_REG_ENABLE_GPU_FIRMWARE instead
132 char *rm_firmware_active = NULL;
133 NV_MODULE_STRING_PARAMETER(rm_firmware_active);
134
135 /*
136 * Global NVIDIA capability state, for GPU driver
137 */
138 nv_cap_t *nvidia_caps_root = NULL;
139
140 /*
141 * Global counts for tracking if all devices were initialized properly
142 */
143 NvU32 num_nv_devices = 0;
144 NvU32 num_probed_nv_devices = 0;
145
146 /*
147 * Global list and table of per-device state
148 * note: both nv_linux_devices and nv_linux_minor_num_table
149 * are protected by nv_linux_devices_lock
150 */
151 nv_linux_state_t *nv_linux_devices;
152 static nv_linux_state_t *nv_linux_minor_num_table[NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1];
153
154 // Global state for the control device
155 nv_linux_state_t nv_ctl_device = { { 0 } };
156
157 // cdev covering the region of regular (non-control) devices
158 static struct cdev nv_linux_devices_cdev;
159
160 // cdev covering the control device
161 static struct cdev nv_linux_control_device_cdev;
162
163 extern NvU32 nv_dma_remap_peer_mmio;
164
165 nv_kthread_q_t nv_kthread_q;
166 nv_kthread_q_t nv_deferred_close_kthread_q;
167
168 struct rw_semaphore nv_system_pm_lock;
169
170 #if defined(CONFIG_PM)
171 static nv_power_state_t nv_system_power_state;
172 static nv_pm_action_depth_t nv_system_pm_action_depth;
173 struct semaphore nv_system_power_state_lock;
174 #endif
175
176 void *nvidia_p2p_page_t_cache;
177 static void *nvidia_pte_t_cache;
178 void *nvidia_stack_t_cache;
179 static nvidia_stack_t *__nv_init_sp;
180
181 static int nv_tce_bypass_mode = NV_TCE_BYPASS_MODE_DEFAULT;
182
183 struct semaphore nv_linux_devices_lock;
184
185 // True if all the successfully probed devices support ATS
186 // Assigned at device probe (module init) time
187 NvBool nv_ats_supported = NVCPU_IS_PPC64LE
188 #if defined(NV_PCI_DEV_HAS_ATS_ENABLED)
189 || NV_TRUE
190 #endif
191 ;
192
193 // allow an easy way to convert all debug printfs related to events
194 // back and forth between 'info' and 'errors'
195 #if defined(NV_DBG_EVENTS)
196 #define NV_DBG_EVENTINFO NV_DBG_ERRORS
197 #else
198 #define NV_DBG_EVENTINFO NV_DBG_INFO
199 #endif
200
201 #if defined(HDA_MAX_CODECS)
202 #define NV_HDA_MAX_CODECS HDA_MAX_CODECS
203 #else
204 #define NV_HDA_MAX_CODECS 8
205 #endif
206
207 /***
208 *** STATIC functions, only in this file
209 ***/
210
211 /* nvos_ functions.. do not take a state device parameter */
212 static int nvos_count_devices(void);
213
214 static nv_alloc_t *nvos_create_alloc(struct device *, NvU64);
215 static int nvos_free_alloc(nv_alloc_t *);
216
217 /***
218 *** EXPORTS to Linux Kernel
219 ***/
220
221 static irqreturn_t nvidia_isr_common_bh (void *);
222 static void nvidia_isr_bh_unlocked (void *);
223 static int nvidia_ctl_open (struct inode *, struct file *);
224 static int nvidia_ctl_close (struct inode *, struct file *);
225
226 const char *nv_device_name = MODULE_NAME;
227 static const char *nvidia_stack_cache_name = MODULE_NAME "_stack_cache";
228 static const char *nvidia_pte_cache_name = MODULE_NAME "_pte_cache";
229 static const char *nvidia_p2p_page_cache_name = MODULE_NAME "_p2p_page_cache";
230
231 static int nvidia_open (struct inode *, struct file *);
232 static int nvidia_close (struct inode *, struct file *);
233 static unsigned int nvidia_poll (struct file *, poll_table *);
234 static int nvidia_ioctl (struct inode *, struct file *, unsigned int, unsigned long);
235 static long nvidia_unlocked_ioctl (struct file *, unsigned int, unsigned long);
236
237 /* character device entry points*/
238 static struct file_operations nvidia_fops = {
239 .owner = THIS_MODULE,
240 .poll = nvidia_poll,
241 .unlocked_ioctl = nvidia_unlocked_ioctl,
242 #if NVCPU_IS_X86_64 || NVCPU_IS_AARCH64
243 .compat_ioctl = nvidia_unlocked_ioctl,
244 #endif
245 .mmap = nvidia_mmap,
246 .open = nvidia_open,
247 .release = nvidia_close,
248 };
249
250 #if defined(CONFIG_PM)
251 static int nv_pmops_suspend (struct device *dev);
252 static int nv_pmops_resume (struct device *dev);
253 static int nv_pmops_freeze (struct device *dev);
254 static int nv_pmops_thaw (struct device *dev);
255 static int nv_pmops_restore (struct device *dev);
256 static int nv_pmops_poweroff (struct device *dev);
257 static int nv_pmops_runtime_suspend (struct device *dev);
258 static int nv_pmops_runtime_resume (struct device *dev);
259
260 struct dev_pm_ops nv_pm_ops = {
261 .suspend = nv_pmops_suspend,
262 .resume = nv_pmops_resume,
263 .freeze = nv_pmops_freeze,
264 .thaw = nv_pmops_thaw,
265 .poweroff = nv_pmops_poweroff,
266 .restore = nv_pmops_restore,
267 .runtime_suspend = nv_pmops_runtime_suspend,
268 .runtime_resume = nv_pmops_runtime_resume,
269 };
270 #endif
271
272 /***
273 *** see nv.h for functions exported to other parts of resman
274 ***/
275
276 /***
277 *** STATIC functions
278 ***/
279
280 static
nv_detect_conf_compute_platform(void)281 void nv_detect_conf_compute_platform(
282 void
283 )
284 {
285 #if defined(NV_CC_PLATFORM_PRESENT)
286 os_cc_enabled = cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
287
288 #if defined(X86_FEATURE_TDX_GUEST)
289 if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
290 {
291 os_cc_tdx_enabled = NV_TRUE;
292 }
293 #endif
294 #else
295 os_cc_enabled = NV_FALSE;
296 os_cc_tdx_enabled = NV_FALSE;
297 #endif
298 }
299
300 static
nvos_create_alloc(struct device * dev,NvU64 num_pages)301 nv_alloc_t *nvos_create_alloc(
302 struct device *dev,
303 NvU64 num_pages
304 )
305 {
306 nv_alloc_t *at;
307 NvU64 pt_size;
308 unsigned int i;
309
310 NV_KZALLOC(at, sizeof(nv_alloc_t));
311 if (at == NULL)
312 {
313 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate alloc info\n");
314 return NULL;
315 }
316
317 at->dev = dev;
318 pt_size = num_pages * sizeof(nvidia_pte_t *);
319 //
320 // Check for multiplication overflow and check whether num_pages value can fit in at->num_pages.
321 //
322 if ((num_pages != 0) && ((pt_size / num_pages) != sizeof(nvidia_pte_t*)))
323 {
324 nv_printf(NV_DBG_ERRORS, "NVRM: Invalid page table allocation - Number of pages exceeds max value.\n");
325 NV_KFREE(at, sizeof(nv_alloc_t));
326 return NULL;
327 }
328
329 at->num_pages = num_pages;
330 if (at->num_pages != num_pages)
331 {
332 nv_printf(NV_DBG_ERRORS, "NVRM: Invalid page table allocation - requested size overflows.\n");
333 NV_KFREE(at, sizeof(nv_alloc_t));
334 return NULL;
335 }
336
337 if (os_alloc_mem((void **)&at->page_table, pt_size) != NV_OK)
338 {
339 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate page table\n");
340 NV_KFREE(at, sizeof(nv_alloc_t));
341 return NULL;
342 }
343
344 memset(at->page_table, 0, pt_size);
345 NV_ATOMIC_SET(at->usage_count, 0);
346
347 for (i = 0; i < at->num_pages; i++)
348 {
349 at->page_table[i] = NV_KMEM_CACHE_ALLOC(nvidia_pte_t_cache);
350 if (at->page_table[i] == NULL)
351 {
352 nv_printf(NV_DBG_ERRORS,
353 "NVRM: failed to allocate page table entry\n");
354 nvos_free_alloc(at);
355 return NULL;
356 }
357 memset(at->page_table[i], 0, sizeof(nvidia_pte_t));
358 }
359
360 at->pid = os_get_current_process();
361
362 return at;
363 }
364
365 static
nvos_free_alloc(nv_alloc_t * at)366 int nvos_free_alloc(
367 nv_alloc_t *at
368 )
369 {
370 unsigned int i;
371
372 if (at == NULL)
373 return -1;
374
375 if (NV_ATOMIC_READ(at->usage_count))
376 return 1;
377
378 for (i = 0; i < at->num_pages; i++)
379 {
380 if (at->page_table[i] != NULL)
381 NV_KMEM_CACHE_FREE(at->page_table[i], nvidia_pte_t_cache);
382 }
383 os_free_mem(at->page_table);
384
385 NV_KFREE(at, sizeof(nv_alloc_t));
386
387 return 0;
388 }
389
390 static void
nv_module_resources_exit(nv_stack_t * sp)391 nv_module_resources_exit(nv_stack_t *sp)
392 {
393 nv_kmem_cache_free_stack(sp);
394
395 NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache);
396 NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache);
397 NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache);
398 }
399
400 static int __init
nv_module_resources_init(nv_stack_t ** sp)401 nv_module_resources_init(nv_stack_t **sp)
402 {
403 int rc = -ENOMEM;
404
405 nvidia_stack_t_cache = NV_KMEM_CACHE_CREATE(nvidia_stack_cache_name,
406 nvidia_stack_t);
407 if (nvidia_stack_t_cache == NULL)
408 {
409 nv_printf(NV_DBG_ERRORS,
410 "NVRM: nvidia_stack_t cache allocation failed.\n");
411 goto exit;
412 }
413
414 nvidia_pte_t_cache = NV_KMEM_CACHE_CREATE(nvidia_pte_cache_name,
415 nvidia_pte_t);
416 if (nvidia_pte_t_cache == NULL)
417 {
418 nv_printf(NV_DBG_ERRORS,
419 "NVRM: nvidia_pte_t cache allocation failed.\n");
420 goto exit;
421 }
422
423 nvidia_p2p_page_t_cache = NV_KMEM_CACHE_CREATE(nvidia_p2p_page_cache_name,
424 nvidia_p2p_page_t);
425 if (nvidia_p2p_page_t_cache == NULL)
426 {
427 nv_printf(NV_DBG_ERRORS,
428 "NVRM: nvidia_p2p_page_t cache allocation failed.\n");
429 goto exit;
430 }
431
432 rc = nv_kmem_cache_alloc_stack(sp);
433 if (rc < 0)
434 {
435 goto exit;
436 }
437
438 exit:
439 if (rc < 0)
440 {
441 nv_kmem_cache_free_stack(*sp);
442
443 NV_KMEM_CACHE_DESTROY(nvidia_p2p_page_t_cache);
444 NV_KMEM_CACHE_DESTROY(nvidia_pte_t_cache);
445 NV_KMEM_CACHE_DESTROY(nvidia_stack_t_cache);
446 }
447
448 return rc;
449 }
450
451 static void
nvlink_drivers_exit(void)452 nvlink_drivers_exit(void)
453 {
454 #if NVCPU_IS_64_BITS
455 nvswitch_exit();
456 #endif
457
458 #if defined(NVCPU_PPC64LE)
459 ibmnpu_exit();
460 #endif
461
462 nvlink_core_exit();
463 }
464
465 static int __init
nvlink_drivers_init(void)466 nvlink_drivers_init(void)
467 {
468 int rc = 0;
469
470 rc = nvlink_core_init();
471 if (rc < 0)
472 {
473 nv_printf(NV_DBG_INFO, "NVRM: NVLink core init failed.\n");
474 return rc;
475 }
476
477 #if defined(NVCPU_PPC64LE)
478 rc = ibmnpu_init();
479 if (rc < 0)
480 {
481 nv_printf(NV_DBG_INFO, "NVRM: IBM NPU init failed.\n");
482 nvlink_core_exit();
483 return rc;
484 }
485 #endif
486
487 #if NVCPU_IS_64_BITS
488 rc = nvswitch_init();
489 if (rc < 0)
490 {
491 nv_printf(NV_DBG_INFO, "NVRM: NVSwitch init failed.\n");
492 #if defined(NVCPU_PPC64LE)
493 ibmnpu_exit();
494 #endif
495 nvlink_core_exit();
496 }
497 #endif
498
499 return rc;
500 }
501
502 static void
nv_module_state_exit(nv_stack_t * sp)503 nv_module_state_exit(nv_stack_t *sp)
504 {
505 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
506
507 nv_teardown_pat_support();
508
509 nv_kthread_q_stop(&nv_deferred_close_kthread_q);
510 nv_kthread_q_stop(&nv_kthread_q);
511
512 nv_lock_destroy_locks(sp, nv);
513 }
514
515 static int
nv_module_state_init(nv_stack_t * sp)516 nv_module_state_init(nv_stack_t *sp)
517 {
518 int rc;
519 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
520
521 nv->os_state = (void *)&nv_ctl_device;
522
523 if (!nv_lock_init_locks(sp, nv))
524 {
525 return -ENOMEM;
526 }
527
528 rc = nv_kthread_q_init(&nv_kthread_q, "nv_queue");
529 if (rc != 0)
530 {
531 goto exit;
532 }
533
534 rc = nv_kthread_q_init(&nv_deferred_close_kthread_q, "nv_queue");
535 if (rc != 0)
536 {
537 nv_kthread_q_stop(&nv_kthread_q);
538 goto exit;
539 }
540
541 rc = nv_init_pat_support(sp);
542 if (rc < 0)
543 {
544 nv_kthread_q_stop(&nv_deferred_close_kthread_q);
545 nv_kthread_q_stop(&nv_kthread_q);
546 goto exit;
547 }
548
549 nv_linux_devices = NULL;
550 memset(nv_linux_minor_num_table, 0, sizeof(nv_linux_minor_num_table));
551 NV_INIT_MUTEX(&nv_linux_devices_lock);
552 init_rwsem(&nv_system_pm_lock);
553
554 #if defined(CONFIG_PM)
555 NV_INIT_MUTEX(&nv_system_power_state_lock);
556 nv_system_power_state = NV_POWER_STATE_RUNNING;
557 nv_system_pm_action_depth = NV_PM_ACTION_DEPTH_DEFAULT;
558 #endif
559
560 NV_SPIN_LOCK_INIT(&nv_ctl_device.snapshot_timer_lock);
561
562 exit:
563 if (rc < 0)
564 {
565 nv_lock_destroy_locks(sp, nv);
566 }
567
568 return rc;
569 }
570
571 static void __init
nv_registry_keys_init(nv_stack_t * sp)572 nv_registry_keys_init(nv_stack_t *sp)
573 {
574 NV_STATUS status;
575 nv_state_t *nv = NV_STATE_PTR(&nv_ctl_device);
576 NvU32 data;
577
578 /*
579 * Determine the TCE bypass mode here so it can be used during
580 * device probe. Also determine whether we should allow
581 * user-mode NUMA onlining of device memory.
582 */
583 if (NVCPU_IS_PPC64LE)
584 {
585 status = rm_read_registry_dword(sp, nv,
586 NV_REG_TCE_BYPASS_MODE,
587 &data);
588 if ((status == NV_OK) && ((int)data != NV_TCE_BYPASS_MODE_DEFAULT))
589 {
590 nv_tce_bypass_mode = data;
591 }
592
593 if (NVreg_EnableUserNUMAManagement)
594 {
595 /* Force on the core RM registry key to match. */
596 status = rm_write_registry_dword(sp, nv, "RMNumaOnlining", 1);
597 WARN_ON(status != NV_OK);
598 }
599 }
600
601 status = rm_read_registry_dword(sp, nv, NV_DMA_REMAP_PEER_MMIO, &data);
602 if (status == NV_OK)
603 {
604 nv_dma_remap_peer_mmio = data;
605 }
606 }
607
608 static void __init
nv_report_applied_patches(void)609 nv_report_applied_patches(void)
610 {
611 unsigned i;
612
613 for (i = 0; __nv_patches[i].short_description; i++)
614 {
615 if (i == 0)
616 {
617 nv_printf(NV_DBG_ERRORS, "NVRM: Applied patches:\n");
618 }
619
620 nv_printf(NV_DBG_ERRORS,
621 "NVRM: Patch #%d: %s\n", i + 1, __nv_patches[i].short_description);
622 }
623 }
624
625 static void
nv_drivers_exit(void)626 nv_drivers_exit(void)
627 {
628 nv_pci_unregister_driver();
629 }
630
631 static int __init
nv_drivers_init(void)632 nv_drivers_init(void)
633 {
634 int rc;
635
636 rc = nv_pci_register_driver();
637 if (rc < 0)
638 {
639 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA PCI devices found.\n");
640 rc = -ENODEV;
641 goto exit;
642 }
643
644 exit:
645 return rc;
646 }
647
648 static void
nv_module_exit(nv_stack_t * sp)649 nv_module_exit(nv_stack_t *sp)
650 {
651 nv_module_state_exit(sp);
652
653 rm_shutdown_rm(sp);
654
655 nv_destroy_rsync_info();
656 nvlink_drivers_exit();
657
658 nv_cap_drv_exit();
659
660 nv_module_resources_exit(sp);
661 }
662
663 static int __init
nv_module_init(nv_stack_t ** sp)664 nv_module_init(nv_stack_t **sp)
665 {
666 int rc;
667
668 rc = nv_module_resources_init(sp);
669 if (rc < 0)
670 {
671 return rc;
672 }
673
674 rc = nv_cap_drv_init();
675 if (rc < 0)
676 {
677 nv_printf(NV_DBG_ERRORS, "NVRM: nv-cap-drv init failed.\n");
678 goto cap_drv_exit;
679 }
680
681 rc = nvlink_drivers_init();
682 if (rc < 0)
683 {
684 goto cap_drv_exit;
685 }
686
687 nv_init_rsync_info();
688 nv_detect_conf_compute_platform();
689
690 if (!rm_init_rm(*sp))
691 {
692 nv_printf(NV_DBG_ERRORS, "NVRM: rm_init_rm() failed!\n");
693 rc = -EIO;
694 goto nvlink_exit;
695 }
696
697 rc = nv_module_state_init(*sp);
698 if (rc < 0)
699 {
700 goto init_rm_exit;
701 }
702
703 return rc;
704
705 init_rm_exit:
706 rm_shutdown_rm(*sp);
707
708 nvlink_exit:
709 nv_destroy_rsync_info();
710 nvlink_drivers_exit();
711
712 cap_drv_exit:
713 nv_cap_drv_exit();
714 nv_module_resources_exit(*sp);
715
716 return rc;
717 }
718
719 /*
720 * In this function we check for the cases where GPU exclusion is not
721 * honored, and issue a warning.
722 *
723 * Only GPUs that support a mechanism to query UUID prior to
724 * initializing the GPU can be excluded, so that we can detect and
725 * exclude them during device probe. This function checks that an
726 * initialized GPU was not specified in the exclusion list, and issues a
727 * warning if so.
728 */
729 static void
nv_assert_not_in_gpu_exclusion_list(nvidia_stack_t * sp,nv_state_t * nv)730 nv_assert_not_in_gpu_exclusion_list(
731 nvidia_stack_t *sp,
732 nv_state_t *nv
733 )
734 {
735 char *uuid = rm_get_gpu_uuid(sp, nv);
736
737 if (uuid == NULL)
738 {
739 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Unable to read UUID");
740 return;
741 }
742
743 if (nv_is_uuid_in_gpu_exclusion_list(uuid))
744 {
745 NV_DEV_PRINTF(NV_DBG_WARNINGS, nv,
746 "Could not exclude GPU %s because PBI is not supported\n",
747 uuid);
748 WARN_ON(1);
749 }
750
751 os_free_mem(uuid);
752
753 return;
754 }
755
nv_caps_root_init(void)756 static int __init nv_caps_root_init(void)
757 {
758 nvidia_caps_root = os_nv_cap_init("driver/" MODULE_NAME);
759
760 return (nvidia_caps_root == NULL) ? -ENOENT : 0;
761 }
762
nv_caps_root_exit(void)763 static void nv_caps_root_exit(void)
764 {
765 os_nv_cap_destroy_entry(nvidia_caps_root);
766 nvidia_caps_root = NULL;
767 }
768
nv_register_chrdev(unsigned int minor,unsigned int count,struct cdev * cdev,const char * name,struct file_operations * fops)769 static int nv_register_chrdev(
770 unsigned int minor,
771 unsigned int count,
772 struct cdev *cdev,
773 const char *name,
774 struct file_operations *fops
775 )
776 {
777 int rc;
778
779 rc = register_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor),
780 count, name);
781 if (rc < 0)
782 {
783 nv_printf(NV_DBG_ERRORS,
784 "NVRM: register_chrdev_region() failed for %s!\n", name);
785 return rc;
786 }
787
788 cdev_init(cdev, fops);
789 rc = cdev_add(cdev, MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
790 if (rc < 0)
791 {
792 nv_printf(NV_DBG_ERRORS, "NVRM: cdev_add() failed for %s!\n", name);
793 unregister_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
794 return rc;
795 }
796
797 return rc;
798 }
799
nv_unregister_chrdev(unsigned int minor,unsigned int count,struct cdev * cdev)800 static void nv_unregister_chrdev(
801 unsigned int minor,
802 unsigned int count,
803 struct cdev *cdev
804 )
805 {
806 cdev_del(cdev);
807 unregister_chrdev_region(MKDEV(NV_MAJOR_DEVICE_NUMBER, minor), count);
808 }
809
nvidia_init_module(void)810 static int __init nvidia_init_module(void)
811 {
812 int rc;
813 NvU32 count;
814 nvidia_stack_t *sp = NULL;
815 const NvBool is_nvswitch_present = os_is_nvswitch_present();
816
817 nv_memdbg_init();
818
819 rc = nv_procfs_init();
820 if (rc < 0)
821 {
822 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize procfs.\n");
823 return rc;
824 }
825
826 rc = nv_caps_root_init();
827 if (rc < 0)
828 {
829 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize capabilities.\n");
830 goto procfs_exit;
831 }
832
833 rc = nv_caps_imex_init();
834 if (rc < 0)
835 {
836 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
837 goto caps_root_exit;
838 }
839
840 rc = nv_module_init(&sp);
841 if (rc < 0)
842 {
843 nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
844 goto caps_imex_exit;
845 }
846
847 count = nvos_count_devices();
848 if ((count == 0) && (!is_nvswitch_present))
849 {
850 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA GPU found.\n");
851 rc = -ENODEV;
852 goto module_exit;
853 }
854
855 rc = nv_drivers_init();
856 if (rc < 0)
857 {
858 goto module_exit;
859 }
860
861 if (num_probed_nv_devices != count)
862 {
863 nv_printf(NV_DBG_ERRORS,
864 "NVRM: The NVIDIA probe routine was not called for %d device(s).\n",
865 count - num_probed_nv_devices);
866 nv_printf(NV_DBG_ERRORS,
867 "NVRM: This can occur when another driver was loaded and \n"
868 "NVRM: obtained ownership of the NVIDIA device(s).\n");
869 nv_printf(NV_DBG_ERRORS,
870 "NVRM: Try unloading the conflicting kernel module (and/or\n"
871 "NVRM: reconfigure your kernel without the conflicting\n"
872 "NVRM: driver(s)), then try loading the NVIDIA kernel module\n"
873 "NVRM: again.\n");
874 }
875
876 if ((num_probed_nv_devices == 0) && (!is_nvswitch_present))
877 {
878 rc = -ENODEV;
879 nv_printf(NV_DBG_ERRORS, "NVRM: No NVIDIA devices probed.\n");
880 goto drivers_exit;
881 }
882
883 if (num_probed_nv_devices != num_nv_devices)
884 {
885 nv_printf(NV_DBG_ERRORS,
886 "NVRM: The NVIDIA probe routine failed for %d device(s).\n",
887 num_probed_nv_devices - num_nv_devices);
888 }
889
890 if ((num_nv_devices == 0) && (!is_nvswitch_present))
891 {
892 rc = -ENODEV;
893 nv_printf(NV_DBG_ERRORS,
894 "NVRM: None of the NVIDIA devices were initialized.\n");
895 goto drivers_exit;
896 }
897
898 /*
899 * Initialize registry keys after PCI driver registration has
900 * completed successfully to support per-device module
901 * parameters.
902 */
903 nv_registry_keys_init(sp);
904
905 nv_report_applied_patches();
906
907 nv_printf(NV_DBG_ERRORS, "NVRM: loading %s\n", pNVRM_ID);
908
909 #if defined(NV_UVM_ENABLE)
910 rc = nv_uvm_init();
911 if (rc != 0)
912 {
913 goto drivers_exit;
914 }
915 #endif
916
917 /*
918 * Register char devices for both the region of regular devices
919 * as well as the control device.
920 *
921 * NOTE: THIS SHOULD BE DONE LAST.
922 */
923 rc = nv_register_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
924 &nv_linux_devices_cdev, "nvidia", &nvidia_fops);
925 if (rc < 0)
926 {
927 goto no_chrdev_exit;
928 }
929
930 rc = nv_register_chrdev(NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, 1,
931 &nv_linux_control_device_cdev, "nvidiactl", &nvidia_fops);
932 if (rc < 0)
933 {
934 goto partial_chrdev_exit;
935 }
936
937 __nv_init_sp = sp;
938
939 return 0;
940
941 partial_chrdev_exit:
942 nv_unregister_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
943 &nv_linux_devices_cdev);
944
945 no_chrdev_exit:
946 #if defined(NV_UVM_ENABLE)
947 nv_uvm_exit();
948 #endif
949
950 drivers_exit:
951 nv_drivers_exit();
952
953 module_exit:
954 nv_module_exit(sp);
955
956 caps_imex_exit:
957 nv_caps_imex_exit();
958
959 caps_root_exit:
960 nv_caps_root_exit();
961
962 procfs_exit:
963 nv_procfs_exit();
964
965 return rc;
966 }
967
nvidia_exit_module(void)968 static void __exit nvidia_exit_module(void)
969 {
970 nvidia_stack_t *sp = __nv_init_sp;
971
972 nv_unregister_chrdev(NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE, 1,
973 &nv_linux_control_device_cdev);
974 nv_unregister_chrdev(0, NV_MINOR_DEVICE_NUMBER_REGULAR_MAX + 1,
975 &nv_linux_devices_cdev);
976
977 #if defined(NV_UVM_ENABLE)
978 nv_uvm_exit();
979 #endif
980
981 nv_drivers_exit();
982
983 nv_module_exit(sp);
984
985 nv_caps_imex_exit();
986
987 nv_caps_root_exit();
988
989 nv_procfs_exit();
990
991 nv_memdbg_exit();
992 }
993
nv_alloc_file_private(void)994 static void *nv_alloc_file_private(void)
995 {
996 nv_linux_file_private_t *nvlfp;
997
998 NV_KZALLOC(nvlfp, sizeof(nv_linux_file_private_t));
999 if (!nvlfp)
1000 return NULL;
1001
1002 init_waitqueue_head(&nvlfp->waitqueue);
1003 NV_SPIN_LOCK_INIT(&nvlfp->fp_lock);
1004
1005 return nvlfp;
1006 }
1007
nv_free_file_private(nv_linux_file_private_t * nvlfp)1008 static void nv_free_file_private(nv_linux_file_private_t *nvlfp)
1009 {
1010 nvidia_event_t *nvet;
1011
1012 if (nvlfp == NULL)
1013 return;
1014
1015 for (nvet = nvlfp->event_data_head; nvet != NULL; nvet = nvlfp->event_data_head)
1016 {
1017 nvlfp->event_data_head = nvlfp->event_data_head->next;
1018 NV_KFREE(nvet, sizeof(nvidia_event_t));
1019 }
1020
1021 if (nvlfp->mmap_context.page_array != NULL)
1022 {
1023 os_free_mem(nvlfp->mmap_context.page_array);
1024 }
1025
1026 NV_KFREE(nvlfp, sizeof(nv_linux_file_private_t));
1027 }
1028
1029 /*
1030 * Find the nv device with the given minor device number in the minor number
1031 * table. Caller should hold nv_linux_devices_lock using
1032 * LOCK_NV_LINUX_DEVICES. This function does not automatically take
1033 * nvl->ldata_lock, so the caller must do that if required.
1034 */
find_minor_locked(NvU32 minor)1035 static nv_linux_state_t *find_minor_locked(NvU32 minor)
1036 {
1037 nv_linux_state_t *nvl;
1038
1039 if (minor > NV_MINOR_DEVICE_NUMBER_REGULAR_MAX)
1040 return NULL;
1041
1042 nvl = nv_linux_minor_num_table[minor];
1043 if (nvl == NULL)
1044 {
1045 // there isn't actually a GPU present for nv_linux_minor_num_table[minor]
1046 }
1047 else if (nvl->minor_num != minor)
1048 {
1049 // nv_linux_minor_num_table out of sync -- this shouldn't happen
1050 WARN_ON(1);
1051 nvl = NULL;
1052 }
1053
1054 return nvl;
1055 }
1056
1057 /*
1058 * Find the nv device with the given minor device number in the minor number
1059 * table. If found, nvl is returned with nvl->ldata_lock taken.
1060 */
find_minor(NvU32 minor)1061 static nv_linux_state_t *find_minor(NvU32 minor)
1062 {
1063 nv_linux_state_t *nvl;
1064
1065 if (minor > NV_MINOR_DEVICE_NUMBER_REGULAR_MAX)
1066 return NULL;
1067
1068 LOCK_NV_LINUX_DEVICES();
1069
1070 nvl = find_minor_locked(minor);
1071 if (nvl != NULL)
1072 {
1073 down(&nvl->ldata_lock);
1074 }
1075
1076 UNLOCK_NV_LINUX_DEVICES();
1077 return nvl;
1078 }
1079
1080 /*
1081 * Search the global list of nv devices for the one with the given gpu_id.
1082 * If found, nvl is returned with nvl->ldata_lock taken.
1083 */
find_gpu_id(NvU32 gpu_id)1084 static nv_linux_state_t *find_gpu_id(NvU32 gpu_id)
1085 {
1086 nv_linux_state_t *nvl;
1087
1088 LOCK_NV_LINUX_DEVICES();
1089 nvl = nv_linux_devices;
1090 while (nvl != NULL)
1091 {
1092 nv_state_t *nv = NV_STATE_PTR(nvl);
1093 if (nv->gpu_id == gpu_id)
1094 {
1095 down(&nvl->ldata_lock);
1096 break;
1097 }
1098 nvl = nvl->next;
1099 }
1100
1101 UNLOCK_NV_LINUX_DEVICES();
1102 return nvl;
1103 }
1104
1105 /*
1106 * Search the global list of nv devices for the one with the given UUID. Devices
1107 * with missing UUID information are ignored. If found, nvl is returned with
1108 * nvl->ldata_lock taken.
1109 */
find_uuid(const NvU8 * uuid)1110 nv_linux_state_t *find_uuid(const NvU8 *uuid)
1111 {
1112 nv_linux_state_t *nvl = NULL;
1113 nv_state_t *nv;
1114 const NvU8 *dev_uuid;
1115
1116 LOCK_NV_LINUX_DEVICES();
1117
1118 for (nvl = nv_linux_devices; nvl; nvl = nvl->next)
1119 {
1120 nv = NV_STATE_PTR(nvl);
1121 down(&nvl->ldata_lock);
1122 dev_uuid = nv_get_cached_uuid(nv);
1123 if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
1124 goto out;
1125 up(&nvl->ldata_lock);
1126 }
1127
1128 out:
1129 UNLOCK_NV_LINUX_DEVICES();
1130 return nvl;
1131 }
1132
1133 /*
1134 * Search the global list of nv devices. The search logic is:
1135 *
1136 * 1) If any device has the given UUID, return it
1137 *
1138 * 2) If no device has the given UUID but at least one device is missing
1139 * its UUID (for example because rm_init_adapter has not run on it yet),
1140 * return that device.
1141 *
1142 * 3) If no device has the given UUID and all UUIDs are present, return NULL.
1143 *
1144 * In cases 1 and 2, nvl is returned with nvl->ldata_lock taken.
1145 *
1146 * The reason for this weird logic is because UUIDs aren't always available. See
1147 * bug 1642200.
1148 */
find_uuid_candidate(const NvU8 * uuid)1149 static nv_linux_state_t *find_uuid_candidate(const NvU8 *uuid)
1150 {
1151 nv_linux_state_t *nvl = NULL;
1152 nv_state_t *nv;
1153 const NvU8 *dev_uuid;
1154 int use_missing;
1155 int has_missing = 0;
1156
1157 LOCK_NV_LINUX_DEVICES();
1158
1159 /*
1160 * Take two passes through the list. The first pass just looks for the UUID.
1161 * The second looks for the target or missing UUIDs. It would be nice if
1162 * this could be done in a single pass by remembering which nvls are missing
1163 * UUIDs, but we have to hold the nvl lock after we check for the UUID.
1164 */
1165 for (use_missing = 0; use_missing <= 1; use_missing++)
1166 {
1167 for (nvl = nv_linux_devices; nvl; nvl = nvl->next)
1168 {
1169 nv = NV_STATE_PTR(nvl);
1170 down(&nvl->ldata_lock);
1171 dev_uuid = nv_get_cached_uuid(nv);
1172 if (dev_uuid)
1173 {
1174 /* Case 1: If a device has the given UUID, return it */
1175 if (memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
1176 goto out;
1177 }
1178 else
1179 {
1180 /* Case 2: If no device has the given UUID but at least one
1181 * device is missing its UUID, return that device. */
1182 if (use_missing)
1183 goto out;
1184 has_missing = 1;
1185 }
1186 up(&nvl->ldata_lock);
1187 }
1188
1189 /* Case 3: If no device has the given UUID and all UUIDs are present,
1190 * return NULL. */
1191 if (!has_missing)
1192 break;
1193 }
1194
1195 out:
1196 UNLOCK_NV_LINUX_DEVICES();
1197 return nvl;
1198 }
1199
nv_dev_free_stacks(nv_linux_state_t * nvl)1200 void nv_dev_free_stacks(nv_linux_state_t *nvl)
1201 {
1202 NvU32 i;
1203 for (i = 0; i < NV_DEV_STACK_COUNT; i++)
1204 {
1205 if (nvl->sp[i])
1206 {
1207 nv_kmem_cache_free_stack(nvl->sp[i]);
1208 nvl->sp[i] = NULL;
1209 }
1210 }
1211 }
1212
nv_dev_alloc_stacks(nv_linux_state_t * nvl)1213 static int nv_dev_alloc_stacks(nv_linux_state_t *nvl)
1214 {
1215 NvU32 i;
1216 int rc;
1217
1218 for (i = 0; i < NV_DEV_STACK_COUNT; i++)
1219 {
1220 rc = nv_kmem_cache_alloc_stack(&nvl->sp[i]);
1221 if (rc != 0)
1222 {
1223 nv_dev_free_stacks(nvl);
1224 return rc;
1225 }
1226 }
1227
1228 return 0;
1229 }
1230
validate_numa_start_state(nv_linux_state_t * nvl)1231 static int validate_numa_start_state(nv_linux_state_t *nvl)
1232 {
1233 int rc = 0;
1234 int numa_status = nv_get_numa_status(nvl);
1235
1236 if (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED)
1237 {
1238 if (nv_ctl_device.numa_memblock_size == 0)
1239 {
1240 nv_printf(NV_DBG_ERRORS, "NVRM: numa memblock size of zero "
1241 "found during device start");
1242 rc = -EINVAL;
1243 }
1244 else
1245 {
1246 /* Keep the individual devices consistent with the control device */
1247 nvl->numa_memblock_size = nv_ctl_device.numa_memblock_size;
1248 }
1249 }
1250
1251 return rc;
1252 }
1253
nv_get_num_dpaux_instances(nv_state_t * nv,NvU32 * num_instances)1254 NV_STATUS NV_API_CALL nv_get_num_dpaux_instances(nv_state_t *nv, NvU32 *num_instances)
1255 {
1256 *num_instances = nv->num_dpaux_instance;
1257 return NV_OK;
1258 }
1259
1260 void NV_API_CALL
nv_schedule_uvm_isr(nv_state_t * nv)1261 nv_schedule_uvm_isr(nv_state_t *nv)
1262 {
1263 #if defined(NV_UVM_ENABLE)
1264 nv_uvm_event_interrupt(nv_get_cached_uuid(nv));
1265 #endif
1266 }
1267
1268 /*
1269 * Brings up the device on the first file open. Assumes nvl->ldata_lock is held.
1270 */
nv_start_device(nv_state_t * nv,nvidia_stack_t * sp)1271 static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
1272 {
1273 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1274 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1275 NvU32 msi_config = 0;
1276 #endif
1277 int rc = 0;
1278 NvBool kthread_init = NV_FALSE;
1279 NvBool remove_numa_memory_kthread_init = NV_FALSE;
1280 NvBool power_ref = NV_FALSE;
1281
1282 rc = nv_get_rsync_info();
1283 if (rc != 0)
1284 {
1285 return rc;
1286 }
1287
1288 rc = validate_numa_start_state(nvl);
1289 if (rc != 0)
1290 {
1291 goto failed;
1292 }
1293
1294 if (dev_is_pci(nvl->dev) && (nv->pci_info.device_id == 0))
1295 {
1296 nv_printf(NV_DBG_ERRORS, "NVRM: open of non-existent GPU with minor number %d\n", nvl->minor_num);
1297 rc = -ENXIO;
1298 goto failed;
1299 }
1300
1301 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1302 {
1303 if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE) != NV_OK)
1304 {
1305 rc = -EINVAL;
1306 goto failed;
1307 }
1308 power_ref = NV_TRUE;
1309 }
1310 else
1311 {
1312 if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE) != NV_OK)
1313 {
1314 rc = -EINVAL;
1315 goto failed;
1316 }
1317 power_ref = NV_TRUE;
1318 }
1319
1320 rc = nv_init_ibmnpu_devices(nv);
1321 if (rc != 0)
1322 {
1323 nv_printf(NV_DBG_ERRORS,
1324 "NVRM: failed to initialize ibmnpu devices attached to GPU with minor number %d\n",
1325 nvl->minor_num);
1326 goto failed;
1327 }
1328
1329 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1330 {
1331 rc = nv_dev_alloc_stacks(nvl);
1332 if (rc != 0)
1333 goto failed;
1334 }
1335
1336 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1337 if (dev_is_pci(nvl->dev))
1338 {
1339 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1340 {
1341 rm_read_registry_dword(sp, nv, NV_REG_ENABLE_MSI, &msi_config);
1342 if (msi_config == 1)
1343 {
1344 if (nvl->pci_dev->msix_cap && rm_is_msix_allowed(sp, nv))
1345 {
1346 nv_init_msix(nv);
1347 }
1348 if (nvl->pci_dev->msi_cap && !(nv->flags & NV_FLAG_USES_MSIX))
1349 {
1350 nv_init_msi(nv);
1351 }
1352 }
1353 }
1354 }
1355 #endif
1356
1357 if (((!(nv->flags & NV_FLAG_USES_MSI)) && (!(nv->flags & NV_FLAG_USES_MSIX)))
1358 && (nv->interrupt_line == 0) && !(nv->flags & NV_FLAG_SOC_DISPLAY)
1359 && !(nv->flags & NV_FLAG_SOC_IGPU))
1360 {
1361 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1362 "No interrupts of any type are available. Cannot use this GPU.\n");
1363 rc = -EIO;
1364 goto failed;
1365 }
1366
1367 rc = 0;
1368 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1369 {
1370 if (nv->flags & NV_FLAG_SOC_DISPLAY)
1371 {
1372 }
1373 else if (!(nv->flags & NV_FLAG_USES_MSIX))
1374 {
1375 rc = request_threaded_irq(nv->interrupt_line, nvidia_isr,
1376 nvidia_isr_kthread_bh, nv_default_irq_flags(nv),
1377 nv_device_name, (void *)nvl);
1378 }
1379 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1380 else
1381 {
1382 rc = nv_request_msix_irq(nvl);
1383 }
1384 #endif
1385 }
1386 if (rc != 0)
1387 {
1388 if ((nv->interrupt_line != 0) && (rc == -EBUSY))
1389 {
1390 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1391 "Tried to get IRQ %d, but another driver\n",
1392 (unsigned int) nv->interrupt_line);
1393 nv_printf(NV_DBG_ERRORS, "NVRM: has it and is not sharing it.\n");
1394 nv_printf(NV_DBG_ERRORS, "NVRM: You may want to verify that no audio driver");
1395 nv_printf(NV_DBG_ERRORS, " is using the IRQ.\n");
1396 }
1397 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "request_irq() failed (%d)\n", rc);
1398 goto failed;
1399 }
1400
1401 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1402 {
1403 rc = os_alloc_mutex(&nvl->isr_bh_unlocked_mutex);
1404 if (rc != 0)
1405 goto failed;
1406 nv_kthread_q_item_init(&nvl->bottom_half_q_item, nvidia_isr_bh_unlocked, (void *)nv);
1407 rc = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name);
1408 if (rc != 0)
1409 goto failed;
1410 kthread_init = NV_TRUE;
1411
1412 rc = nv_kthread_q_init(&nvl->queue.nvk, "nv_queue");
1413 if (rc)
1414 goto failed;
1415 nv->queue = &nvl->queue;
1416
1417 if (nv_platform_use_auto_online(nvl))
1418 {
1419 rc = nv_kthread_q_init(&nvl->remove_numa_memory_q,
1420 "nv_remove_numa_memory");
1421 if (rc)
1422 goto failed;
1423 remove_numa_memory_kthread_init = NV_TRUE;
1424 }
1425 }
1426
1427 if (!rm_init_adapter(sp, nv))
1428 {
1429 if (!(nv->flags & NV_FLAG_USES_MSIX) &&
1430 !(nv->flags & NV_FLAG_SOC_DISPLAY) &&
1431 !(nv->flags & NV_FLAG_SOC_IGPU))
1432 {
1433 free_irq(nv->interrupt_line, (void *) nvl);
1434 }
1435 else if (nv->flags & NV_FLAG_SOC_DISPLAY)
1436 {
1437 }
1438 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1439 else
1440 {
1441 nv_free_msix_irq(nvl);
1442 }
1443 #endif
1444 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1445 "rm_init_adapter failed, device minor number %d\n",
1446 nvl->minor_num);
1447 rc = -EIO;
1448 goto failed;
1449 }
1450
1451 {
1452 const NvU8 *uuid = rm_get_gpu_uuid_raw(sp, nv);
1453
1454 if (uuid != NULL)
1455 {
1456 #if defined(NV_UVM_ENABLE)
1457 nv_uvm_notify_start_device(uuid);
1458 #endif
1459 }
1460 }
1461
1462 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1463 {
1464 nv_acpi_register_notifier(nvl);
1465 }
1466
1467 nv->flags |= NV_FLAG_OPEN;
1468
1469 rm_request_dnotifier_state(sp, nv);
1470
1471 /*
1472 * Now that RM init is done, allow dynamic power to control the GPU in FINE
1473 * mode, if enabled. (If the mode is COARSE, this unref will do nothing
1474 * which will cause the GPU to remain powered up.)
1475 * This is balanced by a FINE ref increment at the beginning of
1476 * nv_stop_device().
1477 */
1478 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
1479
1480 return 0;
1481
1482 failed:
1483 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1484 if (nv->flags & NV_FLAG_USES_MSI)
1485 {
1486 nv->flags &= ~NV_FLAG_USES_MSI;
1487 NV_PCI_DISABLE_MSI(nvl->pci_dev);
1488 if(nvl->irq_count)
1489 NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t));
1490 }
1491 else if (nv->flags & NV_FLAG_USES_MSIX)
1492 {
1493 nv->flags &= ~NV_FLAG_USES_MSIX;
1494 pci_disable_msix(nvl->pci_dev);
1495 NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t));
1496 NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry));
1497 }
1498
1499 if (nvl->msix_bh_mutex)
1500 {
1501 os_free_mutex(nvl->msix_bh_mutex);
1502 nvl->msix_bh_mutex = NULL;
1503 }
1504 #endif
1505
1506 if (nv->queue && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1507 {
1508 nv->queue = NULL;
1509 nv_kthread_q_stop(&nvl->queue.nvk);
1510 }
1511
1512 if (kthread_init && !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1513 nv_kthread_q_stop(&nvl->bottom_half_q);
1514
1515 if (remove_numa_memory_kthread_init &&
1516 !(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1517 {
1518 nv_kthread_q_stop(&nvl->remove_numa_memory_q);
1519 }
1520
1521 if (nvl->isr_bh_unlocked_mutex)
1522 {
1523 os_free_mutex(nvl->isr_bh_unlocked_mutex);
1524 nvl->isr_bh_unlocked_mutex = NULL;
1525 }
1526
1527 nv_dev_free_stacks(nvl);
1528
1529 nv_unregister_ibmnpu_devices(nv);
1530
1531 if (power_ref)
1532 {
1533 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE);
1534 }
1535
1536 nv_put_rsync_info();
1537
1538 return rc;
1539 }
1540
1541 /*
1542 * Makes sure the device is ready for operations and increases nvl->usage_count.
1543 * Assumes nvl->ldata_lock is held.
1544 */
nv_open_device(nv_state_t * nv,nvidia_stack_t * sp)1545 static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
1546 {
1547 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1548 int rc;
1549 NV_STATUS status;
1550
1551 if ((nv->flags & NV_FLAG_EXCLUDE) != 0)
1552 {
1553 char *uuid = rm_get_gpu_uuid(sp, nv);
1554 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1555 "open() not permitted for excluded %s\n",
1556 (uuid != NULL) ? uuid : "GPU");
1557 if (uuid != NULL)
1558 os_free_mem(uuid);
1559 return -EPERM;
1560 }
1561
1562 if (os_is_vgx_hyper())
1563 {
1564 /* fail open if GPU is being unbound */
1565 if (nv->flags & NV_FLAG_UNBIND_LOCK)
1566 {
1567 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1568 "Open failed as GPU is locked for unbind operation\n");
1569 return -ENODEV;
1570 }
1571 }
1572
1573 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Opening GPU with minor number %d\n",
1574 nvl->minor_num);
1575
1576 status = nv_check_gpu_state(nv);
1577 if (status == NV_ERR_GPU_IS_LOST)
1578 {
1579 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Device in removal process\n");
1580 return -ENODEV;
1581 }
1582
1583 if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
1584 return -EMFILE;
1585
1586 if ( ! (nv->flags & NV_FLAG_OPEN))
1587 {
1588 /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */
1589 if (NV_ATOMIC_READ(nvl->usage_count) != 0)
1590 {
1591 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1592 "Minor device %u is referenced without being open!\n",
1593 nvl->minor_num);
1594 WARN_ON(1);
1595 return -EBUSY;
1596 }
1597
1598 rc = nv_start_device(nv, sp);
1599 if (rc != 0)
1600 return rc;
1601 }
1602 else if (rm_is_device_sequestered(sp, nv))
1603 {
1604 /* Do not increment the usage count of sequestered devices. */
1605 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Device is currently unavailable\n");
1606 return -EBUSY;
1607 }
1608
1609 nv_assert_not_in_gpu_exclusion_list(sp, nv);
1610
1611 NV_ATOMIC_INC(nvl->usage_count);
1612 return 0;
1613 }
1614
nv_init_mapping_revocation(nv_linux_state_t * nvl,struct file * file,nv_linux_file_private_t * nvlfp,struct inode * inode)1615 static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
1616 struct file *file,
1617 nv_linux_file_private_t *nvlfp,
1618 struct inode *inode)
1619 {
1620 down(&nvl->mmap_lock);
1621
1622 /* Set up struct address_space for use with unmap_mapping_range() */
1623 address_space_init_once(&nvlfp->mapping);
1624 nvlfp->mapping.host = inode;
1625 nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
1626 #if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
1627 nvlfp->mapping.backing_dev_info = inode->i_mapping->backing_dev_info;
1628 #endif
1629 file->f_mapping = &nvlfp->mapping;
1630
1631 /* Add nvlfp to list of open files in nvl for mapping revocation */
1632 list_add(&nvlfp->entry, &nvl->open_files);
1633
1634 up(&nvl->mmap_lock);
1635 }
1636
1637 /*
1638 * Like nv_open_device but stores rc and adapter status in the given nvlfp.
1639 * Assumes nvl->ldata_lock is held.
1640 */
nv_open_device_for_nvlfp(nv_state_t * nv,nvidia_stack_t * sp,nv_linux_file_private_t * nvlfp)1641 static int nv_open_device_for_nvlfp(
1642 nv_state_t *nv,
1643 nvidia_stack_t *sp,
1644 nv_linux_file_private_t *nvlfp
1645 )
1646 {
1647 nvlfp->open_rc = nv_open_device(nv, sp);
1648
1649 if (nvlfp->open_rc == 0)
1650 {
1651 nvlfp->adapter_status = NV_OK;
1652 }
1653 else
1654 {
1655 nvlfp->adapter_status = rm_get_adapter_status_external(sp, nv);
1656 }
1657
1658 return nvlfp->open_rc;
1659 }
1660
nvidia_open_deferred(void * nvlfp_raw)1661 static void nvidia_open_deferred(void *nvlfp_raw)
1662 {
1663 nv_linux_file_private_t *nvlfp = (nv_linux_file_private_t *) nvlfp_raw;
1664 nv_linux_state_t *nvl = nvlfp->deferred_open_nvl;
1665 int rc;
1666
1667 /*
1668 * Deferred opens and device removal are synchronized via
1669 * nvl->is_accepting_opens and nvl->open_q flushes so that nvl is
1670 * guaranteed to outlive any pending open operation.
1671 *
1672 * So, it is safe to take nvl->ldata_lock here without holding
1673 * any refcount or larger lock.
1674 *
1675 * Deferred opens and system suspend are synchronized by an explicit
1676 * nvl->open_q flush before suspending.
1677 *
1678 * So, it is safe to proceed without nv_system_pm_lock here (in fact, it
1679 * must not be taken to ensure nvl->open_q can make forward progress).
1680 */
1681 down(&nvl->ldata_lock);
1682 rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
1683 up(&nvl->ldata_lock);
1684
1685 /* Set nvptr only upon success (where nvl->usage_count is incremented) */
1686 if (rc == 0)
1687 nvlfp->nvptr = nvl;
1688
1689 complete_all(&nvlfp->open_complete);
1690 }
1691
1692 /*
1693 * Tries to prepare (by taking nvl->ldata_lock) for an open in the foreground
1694 * for the given file and device.
1695 *
1696 * This succeeds if:
1697 * - O_NONBLOCK is not passed (or non-blocking opens are disabled), or
1698 * - O_NONBLOCK is passed, but we are able to determine (without blocking)
1699 * that the device is already initialized
1700 *
1701 * Returns 0 with nvl->ldata_lock taken if open can occur in the foreground.
1702 * Otherwise, returns non-zero (without nvl->ldata_lock taken).
1703 */
nv_try_lock_foreground_open(struct file * file,nv_linux_state_t * nvl)1704 static int nv_try_lock_foreground_open(
1705 struct file *file,
1706 nv_linux_state_t *nvl
1707 )
1708 {
1709 nv_state_t *nv = NV_STATE_PTR(nvl);
1710
1711 if (NVreg_EnableNonblockingOpen && (file->f_flags & O_NONBLOCK))
1712 {
1713 if (down_trylock(&nvl->ldata_lock) == 0)
1714 {
1715 if (nv->flags & NV_FLAG_OPEN)
1716 {
1717 /* device already initialized */
1718 return 0;
1719 }
1720 else
1721 {
1722 /* device not initialized yet */
1723 up(&nvl->ldata_lock);
1724 return -EWOULDBLOCK;
1725 }
1726 }
1727 else
1728 {
1729 /* unable to check nv->flags safely without blocking */
1730 return -EWOULDBLOCK;
1731 }
1732 }
1733
1734 /* O_NONBLOCK not passed or non-blocking opens are disabled */
1735 down(&nvl->ldata_lock);
1736 return 0;
1737 }
1738
1739 /*
1740 ** nvidia_open
1741 **
1742 ** nv driver open entry point. Sessions are created here.
1743 */
1744 int
nvidia_open(struct inode * inode,struct file * file)1745 nvidia_open(
1746 struct inode *inode,
1747 struct file *file
1748 )
1749 {
1750 nv_state_t *nv = NULL;
1751 nv_linux_state_t *nvl = NULL;
1752 int rc = 0;
1753 nv_linux_file_private_t *nvlfp = NULL;
1754 nvidia_stack_t *sp = NULL;
1755
1756 nv_printf(NV_DBG_INFO, "NVRM: nvidia_open...\n");
1757
1758 nvlfp = nv_alloc_file_private();
1759 if (nvlfp == NULL)
1760 {
1761 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate file private!\n");
1762 return -ENOMEM;
1763 }
1764
1765 rc = nv_kmem_cache_alloc_stack(&sp);
1766 if (rc != 0)
1767 {
1768 nv_free_file_private(nvlfp);
1769 return rc;
1770 }
1771
1772 NV_SET_FILE_PRIVATE(file, nvlfp);
1773 nvlfp->sp = sp;
1774
1775 /* for control device, just jump to its open routine */
1776 /* after setting up the private data */
1777 if (nv_is_control_device(inode))
1778 {
1779 rc = nvidia_ctl_open(inode, file);
1780 if (rc != 0)
1781 goto failed;
1782 return rc;
1783 }
1784
1785 rc = nv_down_read_interruptible(&nv_system_pm_lock);
1786 if (rc < 0)
1787 goto failed;
1788
1789 /* nvptr will get set to actual nvl upon successful open */
1790 nvlfp->nvptr = NULL;
1791
1792 init_completion(&nvlfp->open_complete);
1793
1794 LOCK_NV_LINUX_DEVICES();
1795
1796 nvl = find_minor_locked(NV_DEVICE_MINOR_NUMBER(inode));
1797 if (nvl == NULL)
1798 {
1799 rc = -ENODEV;
1800 UNLOCK_NV_LINUX_DEVICES();
1801 up_read(&nv_system_pm_lock);
1802 goto failed;
1803 }
1804
1805 nv = NV_STATE_PTR(nvl);
1806
1807 if (nv_try_lock_foreground_open(file, nvl) == 0)
1808 {
1809 /* Proceed in foreground */
1810 /* nvl->ldata_lock is already taken at this point */
1811
1812 UNLOCK_NV_LINUX_DEVICES();
1813
1814 rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);
1815
1816 up(&nvl->ldata_lock);
1817
1818 /* Set nvptr only upon success (where nvl->usage_count is incremented) */
1819 if (rc == 0)
1820 nvlfp->nvptr = nvl;
1821
1822 complete_all(&nvlfp->open_complete);
1823 }
1824 else
1825 {
1826 /* Defer to background kthread */
1827 int item_scheduled = 0;
1828
1829 /*
1830 * Take nvl->open_q_lock in order to check nvl->is_accepting_opens and
1831 * schedule work items on nvl->open_q.
1832 *
1833 * Continue holding nv_linux_devices_lock (LOCK_NV_LINUX_DEVICES)
1834 * until the work item gets onto nvl->open_q in order to ensure the
1835 * lifetime of nvl.
1836 */
1837 down(&nvl->open_q_lock);
1838
1839 if (!nvl->is_accepting_opens)
1840 {
1841 /* Background kthread is not accepting opens, bail! */
1842 rc = -EBUSY;
1843 goto nonblock_end;
1844 }
1845
1846 nvlfp->deferred_open_nvl = nvl;
1847 nv_kthread_q_item_init(&nvlfp->open_q_item,
1848 nvidia_open_deferred,
1849 nvlfp);
1850
1851 item_scheduled = nv_kthread_q_schedule_q_item(
1852 &nvl->open_q, &nvlfp->open_q_item);
1853
1854 if (!item_scheduled)
1855 {
1856 WARN_ON(!item_scheduled);
1857 rc = -EBUSY;
1858 }
1859
1860 nonblock_end:
1861 up(&nvl->open_q_lock);
1862 UNLOCK_NV_LINUX_DEVICES();
1863 }
1864
1865 up_read(&nv_system_pm_lock);
1866 failed:
1867 if (rc != 0)
1868 {
1869 if (nvlfp != NULL)
1870 {
1871 nv_free_file_private(nvlfp);
1872 NV_SET_FILE_PRIVATE(file, NULL);
1873 }
1874 }
1875 else
1876 {
1877 nv_init_mapping_revocation(nvl, file, nvlfp, inode);
1878 }
1879
1880 return rc;
1881 }
1882
validate_numa_shutdown_state(nv_linux_state_t * nvl)1883 static void validate_numa_shutdown_state(nv_linux_state_t *nvl)
1884 {
1885 int numa_status = nv_get_numa_status(nvl);
1886 WARN_ON((numa_status != NV_IOCTL_NUMA_STATUS_OFFLINE) &&
1887 (numa_status != NV_IOCTL_NUMA_STATUS_DISABLED));
1888 }
1889
nv_shutdown_adapter(nvidia_stack_t * sp,nv_state_t * nv,nv_linux_state_t * nvl)1890 void nv_shutdown_adapter(nvidia_stack_t *sp,
1891 nv_state_t *nv,
1892 nv_linux_state_t *nvl)
1893 {
1894 #if defined(NVCPU_PPC64LE)
1895 validate_numa_shutdown_state(nvl);
1896 #endif
1897
1898 rm_disable_adapter(sp, nv);
1899
1900 // It's safe to call nv_kthread_q_stop even if queue is not initialized
1901 nv_kthread_q_stop(&nvl->bottom_half_q);
1902
1903 if (nv->queue != NULL)
1904 {
1905 nv->queue = NULL;
1906 nv_kthread_q_stop(&nvl->queue.nvk);
1907 }
1908
1909 if (nvl->isr_bh_unlocked_mutex)
1910 {
1911 os_free_mutex(nvl->isr_bh_unlocked_mutex);
1912 nvl->isr_bh_unlocked_mutex = NULL;
1913 }
1914
1915 if (!(nv->flags & NV_FLAG_USES_MSIX) &&
1916 !(nv->flags & NV_FLAG_SOC_DISPLAY) &&
1917 !(nv->flags & NV_FLAG_SOC_IGPU))
1918 {
1919 free_irq(nv->interrupt_line, (void *)nvl);
1920 if (nv->flags & NV_FLAG_USES_MSI)
1921 {
1922 NV_PCI_DISABLE_MSI(nvl->pci_dev);
1923 if(nvl->irq_count)
1924 NV_KFREE(nvl->irq_count, nvl->num_intr * sizeof(nv_irq_count_info_t));
1925 }
1926 }
1927 else if (nv->flags & NV_FLAG_SOC_DISPLAY)
1928 {
1929 }
1930 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
1931 else
1932 {
1933 nv_free_msix_irq(nvl);
1934 pci_disable_msix(nvl->pci_dev);
1935 nv->flags &= ~NV_FLAG_USES_MSIX;
1936 NV_KFREE(nvl->msix_entries, nvl->num_intr*sizeof(struct msix_entry));
1937 NV_KFREE(nvl->irq_count, nvl->num_intr*sizeof(nv_irq_count_info_t));
1938 }
1939 #endif
1940
1941 if (nvl->msix_bh_mutex)
1942 {
1943 os_free_mutex(nvl->msix_bh_mutex);
1944 nvl->msix_bh_mutex = NULL;
1945 }
1946
1947 rm_shutdown_adapter(sp, nv);
1948
1949 if (nv_platform_use_auto_online(nvl))
1950 nv_kthread_q_stop(&nvl->remove_numa_memory_q);
1951 }
1952
1953 /*
1954 * Tears down the device on the last file close. Assumes nvl->ldata_lock is
1955 * held.
1956 */
nv_stop_device(nv_state_t * nv,nvidia_stack_t * sp)1957 static void nv_stop_device(nv_state_t *nv, nvidia_stack_t *sp)
1958 {
1959 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
1960 static int persistence_mode_notice_logged;
1961
1962 /*
1963 * The GPU needs to be powered on to go through the teardown sequence.
1964 * This balances the FINE unref at the end of nv_start_device().
1965 */
1966 rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
1967
1968 #if defined(NV_UVM_ENABLE)
1969 {
1970 const NvU8* uuid;
1971 // Inform UVM before disabling adapter. Use cached copy
1972 uuid = nv_get_cached_uuid(nv);
1973 if (uuid != NULL)
1974 {
1975 // this function cannot fail
1976 nv_uvm_notify_stop_device(uuid);
1977 }
1978 }
1979 #endif
1980 /* Adapter is already shutdown as part of nvidia_pci_remove */
1981 if (!nv->removed)
1982 {
1983 if (nv->flags & NV_FLAG_PERSISTENT_SW_STATE)
1984 {
1985 rm_disable_adapter(sp, nv);
1986 }
1987 else
1988 {
1989 nv_acpi_unregister_notifier(nvl);
1990 nv_shutdown_adapter(sp, nv, nvl);
1991 }
1992 }
1993
1994 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
1995 {
1996 nv_dev_free_stacks(nvl);
1997 }
1998
1999 if ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) &&
2000 (!persistence_mode_notice_logged) && (!os_is_vgx_hyper()))
2001 {
2002 nv_printf(NV_DBG_ERRORS, "NVRM: Persistence mode is deprecated and"
2003 " will be removed in a future release. Please use"
2004 " nvidia-persistenced instead.\n");
2005 persistence_mode_notice_logged = 1;
2006 }
2007
2008 /* leave INIT flag alone so we don't reinit every time */
2009 nv->flags &= ~NV_FLAG_OPEN;
2010
2011 nv_unregister_ibmnpu_devices(nv);
2012
2013 if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
2014 {
2015 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE);
2016 }
2017 else
2018 {
2019 /* If in legacy persistence mode, only unref FINE refcount. */
2020 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
2021 }
2022
2023 nv_put_rsync_info();
2024 }
2025
2026 /*
2027 * Decreases nvl->usage_count, stopping the device when it reaches 0. Assumes
2028 * nvl->ldata_lock is held.
2029 */
nv_close_device(nv_state_t * nv,nvidia_stack_t * sp)2030 static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp)
2031 {
2032 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2033
2034 if (NV_ATOMIC_READ(nvl->usage_count) == 0)
2035 {
2036 nv_printf(NV_DBG_ERRORS,
2037 "NVRM: Attempting to close unopened minor device %u!\n",
2038 nvl->minor_num);
2039 WARN_ON(1);
2040 return;
2041 }
2042
2043 if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
2044 nv_stop_device(nv, sp);
2045 }
2046
2047 /*
2048 ** nvidia_close
2049 **
2050 ** Primary driver close entry point.
2051 */
2052
2053 static void
nvidia_close_callback(nv_linux_file_private_t * nvlfp)2054 nvidia_close_callback(
2055 nv_linux_file_private_t *nvlfp
2056 )
2057 {
2058 nv_linux_state_t *nvl;
2059 nv_state_t *nv;
2060 nvidia_stack_t *sp = nvlfp->sp;
2061 NvBool bRemove = NV_FALSE;
2062
2063 nvl = nvlfp->nvptr;
2064 if (nvl == NULL)
2065 {
2066 /*
2067 * If nvlfp has no associated nvl device (meaning the open operation
2068 * failed), then there is no state outside of nvlfp to cleanup.
2069 */
2070
2071 nv_free_file_private(nvlfp);
2072 nv_kmem_cache_free_stack(sp);
2073 return;
2074 }
2075
2076 nv = NV_STATE_PTR(nvl);
2077
2078 rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
2079
2080 down(&nvl->mmap_lock);
2081 list_del(&nvlfp->entry);
2082 up(&nvl->mmap_lock);
2083
2084 down(&nvl->ldata_lock);
2085 nv_close_device(nv, sp);
2086
2087 bRemove = (!NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) &&
2088 (NV_ATOMIC_READ(nvl->usage_count) == 0) &&
2089 rm_get_device_remove_flag(sp, nv->gpu_id);
2090
2091 nv_free_file_private(nvlfp);
2092
2093 /*
2094 * In case of surprise removal of device, we have 2 cases as below:
2095 *
2096 * 1> When nvidia_pci_remove is scheduled prior to nvidia_close.
2097 * nvidia_pci_remove will not destroy linux layer locks & nv linux state
2098 * struct but will set variable nv->removed for nvidia_close.
2099 * Once all the clients are closed, last nvidia_close will clean up linux
2100 * layer locks and nv linux state struct.
2101 *
2102 * 2> When nvidia_close is scheduled prior to nvidia_pci_remove.
2103 * This will be treated as normal working case. nvidia_close will not do
2104 * any cleanup related to linux layer locks and nv linux state struct.
2105 * nvidia_pci_remove when scheduled will do necessary cleanup.
2106 */
2107 if ((NV_ATOMIC_READ(nvl->usage_count) == 0) && nv->removed)
2108 {
2109 nv_lock_destroy_locks(sp, nv);
2110 NV_KFREE(nvl, sizeof(nv_linux_state_t));
2111 }
2112 else
2113 {
2114 up(&nvl->ldata_lock);
2115
2116 #if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE)
2117 if (bRemove)
2118 {
2119 NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(nvl->pci_dev);
2120 }
2121 #endif
2122 }
2123
2124 nv_kmem_cache_free_stack(sp);
2125 }
2126
nvidia_close_deferred(void * data)2127 static void nvidia_close_deferred(void *data)
2128 {
2129 nv_linux_file_private_t *nvlfp = data;
2130
2131 nv_wait_open_complete(nvlfp);
2132
2133 down_read(&nv_system_pm_lock);
2134
2135 nvidia_close_callback(nvlfp);
2136
2137 up_read(&nv_system_pm_lock);
2138 }
2139
2140 int
nvidia_close(struct inode * inode,struct file * file)2141 nvidia_close(
2142 struct inode *inode,
2143 struct file *file
2144 )
2145 {
2146 int rc;
2147 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2148
2149 nv_printf(NV_DBG_INFO,
2150 "NVRM: nvidia_close on GPU with minor number %d\n",
2151 NV_DEVICE_MINOR_NUMBER(inode));
2152
2153 if (nv_is_control_device(inode))
2154 {
2155 return nvidia_ctl_close(inode, file);
2156 }
2157
2158 NV_SET_FILE_PRIVATE(file, NULL);
2159
2160 rc = nv_wait_open_complete_interruptible(nvlfp);
2161 if (rc == 0)
2162 {
2163 rc = nv_down_read_interruptible(&nv_system_pm_lock);
2164 }
2165
2166 if (rc == 0)
2167 {
2168 nvidia_close_callback(nvlfp);
2169 up_read(&nv_system_pm_lock);
2170 }
2171 else
2172 {
2173 nv_kthread_q_item_init(&nvlfp->deferred_close_q_item,
2174 nvidia_close_deferred,
2175 nvlfp);
2176 rc = nv_kthread_q_schedule_q_item(&nv_deferred_close_kthread_q,
2177 &nvlfp->deferred_close_q_item);
2178 WARN_ON(rc == 0);
2179 }
2180
2181 return 0;
2182 }
2183
2184 unsigned int
nvidia_poll(struct file * file,poll_table * wait)2185 nvidia_poll(
2186 struct file *file,
2187 poll_table *wait
2188 )
2189 {
2190 unsigned int mask = 0;
2191 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2192 unsigned long eflags;
2193 nv_linux_state_t *nvl;
2194 nv_state_t *nv;
2195 NV_STATUS status;
2196
2197 if (!nv_is_control_device(NV_FILE_INODE(file)))
2198 {
2199 if (!nv_is_open_complete(nvlfp))
2200 {
2201 return POLLERR;
2202 }
2203 }
2204
2205 nvl = nvlfp->nvptr;
2206 if (nvl == NULL)
2207 {
2208 return POLLERR;
2209 }
2210
2211 nv = NV_STATE_PTR(nvl);
2212
2213 status = nv_check_gpu_state(nv);
2214 if (status == NV_ERR_GPU_IS_LOST)
2215 {
2216 NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping nvidia_poll\n");
2217 return POLLHUP;
2218 }
2219
2220 if ((file->f_flags & O_NONBLOCK) == 0)
2221 poll_wait(file, &nvlfp->waitqueue, wait);
2222
2223 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
2224
2225 if ((nvlfp->event_data_head != NULL) || nvlfp->dataless_event_pending)
2226 {
2227 mask = (POLLPRI | POLLIN);
2228 nvlfp->dataless_event_pending = NV_FALSE;
2229 }
2230
2231 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
2232
2233 return mask;
2234 }
2235
2236 #define NV_CTL_DEVICE_ONLY(nv) \
2237 { \
2238 if (((nv)->flags & NV_FLAG_CONTROL) == 0) \
2239 { \
2240 status = -EINVAL; \
2241 goto done; \
2242 } \
2243 }
2244
2245 #define NV_ACTUAL_DEVICE_ONLY(nv) \
2246 { \
2247 if (((nv)->flags & NV_FLAG_CONTROL) != 0) \
2248 { \
2249 status = -EINVAL; \
2250 goto done; \
2251 } \
2252 }
2253
2254 /*
2255 * Fills the ci array with the state of num_entries devices. Returns -EINVAL if
2256 * num_entries isn't big enough to hold all available devices.
2257 */
nvidia_read_card_info(nv_ioctl_card_info_t * ci,size_t num_entries)2258 static int nvidia_read_card_info(nv_ioctl_card_info_t *ci, size_t num_entries)
2259 {
2260 nv_state_t *nv;
2261 nv_linux_state_t *nvl;
2262 size_t i = 0;
2263 int rc = 0;
2264
2265 /* Clear each card's flags field the lazy way */
2266 memset(ci, 0, num_entries * sizeof(ci[0]));
2267
2268 LOCK_NV_LINUX_DEVICES();
2269
2270 if (num_entries < num_nv_devices)
2271 {
2272 rc = -EINVAL;
2273 goto out;
2274 }
2275
2276 for (nvl = nv_linux_devices; nvl && i < num_entries; nvl = nvl->next)
2277 {
2278 nv = NV_STATE_PTR(nvl);
2279
2280 /* We do not include excluded GPUs in the list... */
2281 if ((nv->flags & NV_FLAG_EXCLUDE) != 0)
2282 continue;
2283
2284 ci[i].valid = NV_TRUE;
2285 ci[i].pci_info.domain = nv->pci_info.domain;
2286 ci[i].pci_info.bus = nv->pci_info.bus;
2287 ci[i].pci_info.slot = nv->pci_info.slot;
2288 ci[i].pci_info.vendor_id = nv->pci_info.vendor_id;
2289 ci[i].pci_info.device_id = nv->pci_info.device_id;
2290 ci[i].gpu_id = nv->gpu_id;
2291 ci[i].interrupt_line = nv->interrupt_line;
2292 ci[i].reg_address = nv->regs->cpu_address;
2293 ci[i].reg_size = nv->regs->size;
2294 ci[i].minor_number = nvl->minor_num;
2295 if (dev_is_pci(nvl->dev))
2296 {
2297 ci[i].fb_address = nv->fb->cpu_address;
2298 ci[i].fb_size = nv->fb->size;
2299 }
2300 i++;
2301 }
2302
2303 out:
2304 UNLOCK_NV_LINUX_DEVICES();
2305 return rc;
2306 }
2307
2308 int
nvidia_ioctl(struct inode * inode,struct file * file,unsigned int cmd,unsigned long i_arg)2309 nvidia_ioctl(
2310 struct inode *inode,
2311 struct file *file,
2312 unsigned int cmd,
2313 unsigned long i_arg)
2314 {
2315 NV_STATUS rmStatus;
2316 int status = 0;
2317 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2318 nv_linux_state_t *nvl;
2319 nv_state_t *nv;
2320 nvidia_stack_t *sp = NULL;
2321 nv_ioctl_xfer_t ioc_xfer;
2322 void *arg_ptr = (void *) i_arg;
2323 void *arg_copy = NULL;
2324 size_t arg_size = 0;
2325 int arg_cmd;
2326
2327 nv_printf(NV_DBG_INFO, "NVRM: ioctl(0x%x, 0x%x, 0x%x)\n",
2328 _IOC_NR(cmd), (unsigned int) i_arg, _IOC_SIZE(cmd));
2329
2330 if (!nv_is_control_device(inode))
2331 {
2332 status = nv_wait_open_complete_interruptible(nvlfp);
2333 if (status != 0)
2334 goto done_early;
2335 }
2336
2337 arg_size = _IOC_SIZE(cmd);
2338 arg_cmd = _IOC_NR(cmd);
2339
2340 if (arg_cmd == NV_ESC_IOCTL_XFER_CMD)
2341 {
2342 if (arg_size != sizeof(nv_ioctl_xfer_t))
2343 {
2344 nv_printf(NV_DBG_ERRORS,
2345 "NVRM: invalid ioctl XFER structure size!\n");
2346 status = -EINVAL;
2347 goto done_early;
2348 }
2349
2350 if (NV_COPY_FROM_USER(&ioc_xfer, arg_ptr, sizeof(ioc_xfer)))
2351 {
2352 nv_printf(NV_DBG_ERRORS,
2353 "NVRM: failed to copy in ioctl XFER data!\n");
2354 status = -EFAULT;
2355 goto done_early;
2356 }
2357
2358 arg_cmd = ioc_xfer.cmd;
2359 arg_size = ioc_xfer.size;
2360 arg_ptr = NvP64_VALUE(ioc_xfer.ptr);
2361
2362 if (arg_size > NV_ABSOLUTE_MAX_IOCTL_SIZE)
2363 {
2364 nv_printf(NV_DBG_ERRORS, "NVRM: invalid ioctl XFER size!\n");
2365 status = -EINVAL;
2366 goto done_early;
2367 }
2368 }
2369
2370 NV_KMALLOC(arg_copy, arg_size);
2371 if (arg_copy == NULL)
2372 {
2373 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate ioctl memory\n");
2374 status = -ENOMEM;
2375 goto done_early;
2376 }
2377
2378 if (NV_COPY_FROM_USER(arg_copy, arg_ptr, arg_size))
2379 {
2380 nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy in ioctl data!\n");
2381 status = -EFAULT;
2382 goto done_early;
2383 }
2384
2385 /*
2386 * Handle NV_ESC_WAIT_OPEN_COMPLETE early as it is allowed to work
2387 * with or without nvl.
2388 */
2389 if (arg_cmd == NV_ESC_WAIT_OPEN_COMPLETE)
2390 {
2391 nv_ioctl_wait_open_complete_t *params = arg_copy;
2392 params->rc = nvlfp->open_rc;
2393 params->adapterStatus = nvlfp->adapter_status;
2394 goto done_early;
2395 }
2396
2397 nvl = nvlfp->nvptr;
2398 if (nvl == NULL)
2399 {
2400 status = -EIO;
2401 goto done_early;
2402 }
2403
2404 nv = NV_STATE_PTR(nvl);
2405
2406 status = nv_down_read_interruptible(&nv_system_pm_lock);
2407 if (status < 0)
2408 {
2409 goto done_early;
2410 }
2411
2412 status = nv_kmem_cache_alloc_stack(&sp);
2413 if (status != 0)
2414 {
2415 nv_printf(NV_DBG_ERRORS, "NVRM: Unable to allocate altstack for ioctl\n");
2416 goto done_pm_unlock;
2417 }
2418
2419 rmStatus = nv_check_gpu_state(nv);
2420 if (rmStatus == NV_ERR_GPU_IS_LOST)
2421 {
2422 nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping nvidia_ioctl\n");
2423 status = -EINVAL;
2424 goto done;
2425 }
2426
2427 switch (arg_cmd)
2428 {
2429 case NV_ESC_QUERY_DEVICE_INTR:
2430 {
2431 nv_ioctl_query_device_intr *query_intr = arg_copy;
2432
2433 NV_ACTUAL_DEVICE_ONLY(nv);
2434
2435 if ((arg_size < sizeof(*query_intr)) ||
2436 (!nv->regs->map))
2437 {
2438 status = -EINVAL;
2439 goto done;
2440 }
2441
2442 query_intr->intrStatus =
2443 *(nv->regs->map + (NV_RM_DEVICE_INTR_ADDRESS >> 2));
2444 query_intr->status = NV_OK;
2445 break;
2446 }
2447
2448 /* pass out info about the card */
2449 case NV_ESC_CARD_INFO:
2450 {
2451 size_t num_arg_devices = arg_size / sizeof(nv_ioctl_card_info_t);
2452
2453 NV_CTL_DEVICE_ONLY(nv);
2454
2455 status = nvidia_read_card_info(arg_copy, num_arg_devices);
2456 break;
2457 }
2458
2459 case NV_ESC_ATTACH_GPUS_TO_FD:
2460 {
2461 size_t num_arg_gpus = arg_size / sizeof(NvU32);
2462 size_t i;
2463
2464 NV_CTL_DEVICE_ONLY(nv);
2465
2466 if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 ||
2467 arg_size % sizeof(NvU32) != 0)
2468 {
2469 status = -EINVAL;
2470 goto done;
2471 }
2472
2473 NV_KMALLOC(nvlfp->attached_gpus, arg_size);
2474 if (nvlfp->attached_gpus == NULL)
2475 {
2476 status = -ENOMEM;
2477 goto done;
2478 }
2479 memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
2480 nvlfp->num_attached_gpus = num_arg_gpus;
2481
2482 for (i = 0; i < nvlfp->num_attached_gpus; i++)
2483 {
2484 if (nvlfp->attached_gpus[i] == 0)
2485 {
2486 continue;
2487 }
2488
2489 if (nvidia_dev_get(nvlfp->attached_gpus[i], sp))
2490 {
2491 while (i--)
2492 {
2493 if (nvlfp->attached_gpus[i] != 0)
2494 nvidia_dev_put(nvlfp->attached_gpus[i], sp);
2495 }
2496 NV_KFREE(nvlfp->attached_gpus, arg_size);
2497 nvlfp->num_attached_gpus = 0;
2498
2499 status = -EINVAL;
2500 break;
2501 }
2502 }
2503
2504 break;
2505 }
2506
2507 case NV_ESC_CHECK_VERSION_STR:
2508 {
2509 NV_CTL_DEVICE_ONLY(nv);
2510
2511 rmStatus = rm_perform_version_check(sp, arg_copy, arg_size);
2512 status = ((rmStatus == NV_OK) ? 0 : -EINVAL);
2513 break;
2514 }
2515
2516 case NV_ESC_SYS_PARAMS:
2517 {
2518 nv_ioctl_sys_params_t *api = arg_copy;
2519
2520 NV_CTL_DEVICE_ONLY(nv);
2521
2522 if (arg_size != sizeof(nv_ioctl_sys_params_t))
2523 {
2524 status = -EINVAL;
2525 goto done;
2526 }
2527
2528 /* numa_memblock_size should only be set once */
2529 if (nvl->numa_memblock_size == 0)
2530 {
2531 nvl->numa_memblock_size = api->memblock_size;
2532 }
2533 else
2534 {
2535 status = (nvl->numa_memblock_size == api->memblock_size) ?
2536 0 : -EBUSY;
2537 goto done;
2538 }
2539 break;
2540 }
2541
2542 case NV_ESC_NUMA_INFO:
2543 {
2544 nv_ioctl_numa_info_t *api = arg_copy;
2545 rmStatus = NV_OK;
2546
2547 NV_ACTUAL_DEVICE_ONLY(nv);
2548
2549 if (arg_size != sizeof(nv_ioctl_numa_info_t))
2550 {
2551 status = -EINVAL;
2552 goto done;
2553 }
2554
2555 rmStatus = rm_get_gpu_numa_info(sp, nv, api);
2556 if (rmStatus != NV_OK)
2557 {
2558 status = -EBUSY;
2559 goto done;
2560 }
2561
2562 api->status = nv_get_numa_status(nvl);
2563 api->use_auto_online = nv_platform_use_auto_online(nvl);
2564 api->memblock_size = nv_ctl_device.numa_memblock_size;
2565 break;
2566 }
2567
2568 case NV_ESC_SET_NUMA_STATUS:
2569 {
2570 nv_ioctl_set_numa_status_t *api = arg_copy;
2571 rmStatus = NV_OK;
2572
2573 if (!NV_IS_SUSER())
2574 {
2575 status = -EACCES;
2576 goto done;
2577 }
2578
2579 NV_ACTUAL_DEVICE_ONLY(nv);
2580
2581 if (arg_size != sizeof(nv_ioctl_set_numa_status_t))
2582 {
2583 status = -EINVAL;
2584 goto done;
2585 }
2586
2587 /*
2588 * The nv_linux_state_t for the device needs to be locked
2589 * in order to prevent additional open()/close() calls from
2590 * manipulating the usage count for the device while we
2591 * determine if NUMA state can be changed.
2592 */
2593 down(&nvl->ldata_lock);
2594
2595 if (nv_get_numa_status(nvl) != api->status)
2596 {
2597 if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS)
2598 {
2599 /*
2600 * Only the current client should have an open file
2601 * descriptor for the device, to allow safe offlining.
2602 */
2603 if (NV_ATOMIC_READ(nvl->usage_count) > 1)
2604 {
2605 status = -EBUSY;
2606 goto unlock;
2607 }
2608 else
2609 {
2610 /*
2611 * If this call fails, it indicates that RM
2612 * is not ready to offline memory, and we should keep
2613 * the current NUMA status of ONLINE.
2614 */
2615 rmStatus = rm_gpu_numa_offline(sp, nv);
2616 if (rmStatus != NV_OK)
2617 {
2618 status = -EBUSY;
2619 goto unlock;
2620 }
2621 }
2622 }
2623
2624 status = nv_set_numa_status(nvl, api->status);
2625 if (status < 0)
2626 {
2627 if (api->status == NV_IOCTL_NUMA_STATUS_OFFLINE_IN_PROGRESS)
2628 (void) rm_gpu_numa_online(sp, nv);
2629 goto unlock;
2630 }
2631
2632 if (api->status == NV_IOCTL_NUMA_STATUS_ONLINE)
2633 {
2634 rmStatus = rm_gpu_numa_online(sp, nv);
2635 if (rmStatus != NV_OK)
2636 {
2637 status = -EBUSY;
2638 goto unlock;
2639 }
2640 }
2641 }
2642
2643 unlock:
2644 up(&nvl->ldata_lock);
2645
2646 break;
2647 }
2648
2649 case NV_ESC_EXPORT_TO_DMABUF_FD:
2650 {
2651 nv_ioctl_export_to_dma_buf_fd_t *params = arg_copy;
2652
2653 if (arg_size != sizeof(nv_ioctl_export_to_dma_buf_fd_t))
2654 {
2655 status = -EINVAL;
2656 goto done;
2657 }
2658
2659 NV_ACTUAL_DEVICE_ONLY(nv);
2660
2661 params->status = nv_dma_buf_export(nv, params);
2662
2663 break;
2664 }
2665
2666 default:
2667 rmStatus = rm_ioctl(sp, nv, &nvlfp->nvfp, arg_cmd, arg_copy, arg_size);
2668 status = ((rmStatus == NV_OK) ? 0 : -EINVAL);
2669 break;
2670 }
2671
2672 done:
2673 nv_kmem_cache_free_stack(sp);
2674
2675 done_pm_unlock:
2676 up_read(&nv_system_pm_lock);
2677
2678 done_early:
2679 if (arg_copy != NULL)
2680 {
2681 if (status != -EFAULT)
2682 {
2683 if (NV_COPY_TO_USER(arg_ptr, arg_copy, arg_size))
2684 {
2685 nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy out ioctl data\n");
2686 status = -EFAULT;
2687 }
2688 }
2689 NV_KFREE(arg_copy, arg_size);
2690 }
2691
2692 return status;
2693 }
2694
nvidia_unlocked_ioctl(struct file * file,unsigned int cmd,unsigned long i_arg)2695 long nvidia_unlocked_ioctl(
2696 struct file *file,
2697 unsigned int cmd,
2698 unsigned long i_arg
2699 )
2700 {
2701 return nvidia_ioctl(NV_FILE_INODE(file), file, cmd, i_arg);
2702 }
2703
2704 irqreturn_t
nvidia_isr_msix(int irq,void * arg)2705 nvidia_isr_msix(
2706 int irq,
2707 void *arg
2708 )
2709 {
2710 irqreturn_t ret;
2711 nv_linux_state_t *nvl = (void *) arg;
2712
2713 // nvidia_isr_msix() is called for each of the MSI-X vectors and they can
2714 // run in parallel on different CPUs (cores), but this is not currently
2715 // supported by nvidia_isr() and its children. As a big hammer fix just
2716 // spinlock around the nvidia_isr() call to serialize them.
2717 //
2718 // At this point interrupts are disabled on the CPU running our ISR (see
2719 // comments for nv_default_irq_flags()) so a plain spinlock is enough.
2720 NV_SPIN_LOCK(&nvl->msix_isr_lock);
2721
2722 ret = nvidia_isr(irq, arg);
2723
2724 NV_SPIN_UNLOCK(&nvl->msix_isr_lock);
2725
2726 return ret;
2727 }
2728
2729 /*
2730 * driver receives an interrupt
2731 * if someone waiting, then hand it off.
2732 */
2733 irqreturn_t
nvidia_isr(int irq,void * arg)2734 nvidia_isr(
2735 int irq,
2736 void *arg
2737 )
2738 {
2739 nv_linux_state_t *nvl = (void *) arg;
2740 nv_state_t *nv = NV_STATE_PTR(nvl);
2741 NvU32 need_to_run_bottom_half_gpu_lock_held = 0;
2742 NvBool rm_handled = NV_FALSE, uvm_handled = NV_FALSE, rm_fault_handling_needed = NV_FALSE;
2743 NvU32 rm_serviceable_fault_cnt = 0;
2744 NvU32 sec, usec;
2745 NvU16 index = 0;
2746 NvU64 currentTime = 0;
2747 NvBool found_irq = NV_FALSE;
2748
2749 rm_gpu_handle_mmu_faults(nvl->sp[NV_DEV_STACK_ISR], nv, &rm_serviceable_fault_cnt);
2750 rm_fault_handling_needed = (rm_serviceable_fault_cnt != 0);
2751
2752 #if defined (NV_UVM_ENABLE)
2753 //
2754 // Returns NV_OK if the UVM driver handled the interrupt
2755 //
2756 // Returns NV_ERR_NO_INTR_PENDING if the interrupt is not for
2757 // the UVM driver.
2758 //
2759 // Returns NV_WARN_MORE_PROCESSING_REQUIRED if the UVM top-half ISR was
2760 // unable to get its lock(s), due to other (UVM) threads holding them.
2761 //
2762 // RM can normally treat NV_WARN_MORE_PROCESSING_REQUIRED the same as
2763 // NV_ERR_NO_INTR_PENDING, but in some cases the extra information may
2764 // be helpful.
2765 //
2766 if (nv_uvm_event_interrupt(nv_get_cached_uuid(nv)) == NV_OK)
2767 uvm_handled = NV_TRUE;
2768 #endif
2769
2770 rm_handled = rm_isr(nvl->sp[NV_DEV_STACK_ISR], nv,
2771 &need_to_run_bottom_half_gpu_lock_held);
2772
2773 /* Replicating the logic in linux kernel to track unhandled interrupt crossing a threshold */
2774 if ((nv->flags & NV_FLAG_USES_MSI) || (nv->flags & NV_FLAG_USES_MSIX))
2775 {
2776 if (nvl->irq_count != NULL)
2777 {
2778 for (index = 0; index < nvl->current_num_irq_tracked; index++)
2779 {
2780 if (nvl->irq_count[index].irq == irq)
2781 {
2782 found_irq = NV_TRUE;
2783 break;
2784 }
2785
2786 found_irq = NV_FALSE;
2787 }
2788
2789 if (!found_irq && nvl->current_num_irq_tracked < nvl->num_intr)
2790 {
2791 index = nvl->current_num_irq_tracked;
2792 nvl->irq_count[index].irq = irq;
2793 nvl->current_num_irq_tracked++;
2794 found_irq = NV_TRUE;
2795 }
2796
2797 if (found_irq)
2798 {
2799 nvl->irq_count[index].total++;
2800
2801 if(rm_handled == NV_FALSE)
2802 {
2803 os_get_current_time(&sec, &usec);
2804 currentTime = ((NvU64)sec) * 1000000 + (NvU64)usec;
2805
2806 /* Reset unhandled count if it's been more than 0.1 seconds since the last unhandled IRQ */
2807 if ((currentTime - nvl->irq_count[index].last_unhandled) > RM_UNHANDLED_TIMEOUT_US)
2808 nvl->irq_count[index].unhandled = 1;
2809 else
2810 nvl->irq_count[index].unhandled++;
2811
2812 nvl->irq_count[index].last_unhandled = currentTime;
2813 rm_handled = NV_TRUE;
2814 }
2815
2816 if (nvl->irq_count[index].total >= RM_THRESHOLD_TOTAL_IRQ_COUNT)
2817 {
2818 if (nvl->irq_count[index].unhandled > RM_THRESHOLD_UNAHNDLED_IRQ_COUNT)
2819 nv_printf(NV_DBG_ERRORS,"NVRM: Going over RM unhandled interrupt threshold for irq %d\n", irq);
2820
2821 nvl->irq_count[index].total = 0;
2822 nvl->irq_count[index].unhandled = 0;
2823 nvl->irq_count[index].last_unhandled = 0;
2824 }
2825 }
2826 else
2827 nv_printf(NV_DBG_ERRORS,"NVRM: IRQ number out of valid range\n");
2828 }
2829 }
2830
2831 if (need_to_run_bottom_half_gpu_lock_held)
2832 {
2833 return IRQ_WAKE_THREAD;
2834 }
2835 else
2836 {
2837 //
2838 // If rm_isr does not need to run a bottom half and mmu_faults_copied
2839 // indicates that bottom half is needed, then we enqueue a kthread based
2840 // bottom half, as this specific bottom_half will acquire the GPU lock
2841 //
2842 if (rm_fault_handling_needed)
2843 nv_kthread_q_schedule_q_item(&nvl->bottom_half_q, &nvl->bottom_half_q_item);
2844 }
2845
2846 return IRQ_RETVAL(rm_handled || uvm_handled || rm_fault_handling_needed);
2847 }
2848
2849 irqreturn_t
nvidia_isr_kthread_bh(int irq,void * data)2850 nvidia_isr_kthread_bh(
2851 int irq,
2852 void *data
2853 )
2854 {
2855 return nvidia_isr_common_bh(data);
2856 }
2857
2858 irqreturn_t
nvidia_isr_msix_kthread_bh(int irq,void * data)2859 nvidia_isr_msix_kthread_bh(
2860 int irq,
2861 void *data
2862 )
2863 {
2864 NV_STATUS status;
2865 irqreturn_t ret;
2866 nv_state_t *nv = (nv_state_t *) data;
2867 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2868
2869 //
2870 // Synchronize kthreads servicing bottom halves for different MSI-X vectors
2871 // as they share same pre-allocated alt-stack.
2872 //
2873 status = os_acquire_mutex(nvl->msix_bh_mutex);
2874 // os_acquire_mutex can only fail if we cannot sleep and we can
2875 WARN_ON(status != NV_OK);
2876
2877 ret = nvidia_isr_common_bh(data);
2878
2879 os_release_mutex(nvl->msix_bh_mutex);
2880
2881 return ret;
2882 }
2883
2884 static irqreturn_t
nvidia_isr_common_bh(void * data)2885 nvidia_isr_common_bh(
2886 void *data
2887 )
2888 {
2889 nv_state_t *nv = (nv_state_t *) data;
2890 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2891 nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_ISR_BH];
2892 NV_STATUS status;
2893
2894 status = nv_check_gpu_state(nv);
2895 if (status == NV_ERR_GPU_IS_LOST)
2896 {
2897 nv_printf(NV_DBG_INFO, "NVRM: GPU is lost, skipping ISR bottom half\n");
2898 }
2899 else
2900 {
2901 rm_isr_bh(sp, nv);
2902 }
2903
2904 return IRQ_HANDLED;
2905 }
2906
2907 static void
nvidia_isr_bh_unlocked(void * args)2908 nvidia_isr_bh_unlocked(
2909 void * args
2910 )
2911 {
2912 nv_state_t *nv = (nv_state_t *) args;
2913 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
2914 nvidia_stack_t *sp;
2915 NV_STATUS status;
2916
2917 //
2918 // Synchronize kthreads servicing unlocked bottom half as they
2919 // share same pre-allocated stack for alt-stack
2920 //
2921 status = os_acquire_mutex(nvl->isr_bh_unlocked_mutex);
2922 if (status != NV_OK)
2923 {
2924 nv_printf(NV_DBG_ERRORS, "NVRM: %s: Unable to take bottom_half mutex!\n",
2925 __FUNCTION__);
2926 WARN_ON(1);
2927 }
2928
2929 sp = nvl->sp[NV_DEV_STACK_ISR_BH_UNLOCKED];
2930
2931 status = nv_check_gpu_state(nv);
2932 if (status == NV_ERR_GPU_IS_LOST)
2933 {
2934 nv_printf(NV_DBG_INFO,
2935 "NVRM: GPU is lost, skipping unlocked ISR bottom half\n");
2936 }
2937 else
2938 {
2939 rm_isr_bh_unlocked(sp, nv);
2940 }
2941
2942 os_release_mutex(nvl->isr_bh_unlocked_mutex);
2943 }
2944
2945 static void
nvidia_rc_timer_callback(struct nv_timer * nv_timer)2946 nvidia_rc_timer_callback(
2947 struct nv_timer *nv_timer
2948 )
2949 {
2950 nv_linux_state_t *nvl = container_of(nv_timer, nv_linux_state_t, rc_timer);
2951 nv_state_t *nv = NV_STATE_PTR(nvl);
2952 nvidia_stack_t *sp = nvl->sp[NV_DEV_STACK_TIMER];
2953 NV_STATUS status;
2954
2955 status = nv_check_gpu_state(nv);
2956 if (status == NV_ERR_GPU_IS_LOST)
2957 {
2958 nv_printf(NV_DBG_INFO,
2959 "NVRM: GPU is lost, skipping device timer callbacks\n");
2960 return;
2961 }
2962
2963 if (rm_run_rc_callback(sp, nv) == NV_OK)
2964 {
2965 // set another timeout 1 sec in the future:
2966 mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ);
2967 }
2968 }
2969
2970 /*
2971 ** nvidia_ctl_open
2972 **
2973 ** nv control driver open entry point. Sessions are created here.
2974 */
2975 static int
nvidia_ctl_open(struct inode * inode,struct file * file)2976 nvidia_ctl_open(
2977 struct inode *inode,
2978 struct file *file
2979 )
2980 {
2981 nv_linux_state_t *nvl = &nv_ctl_device;
2982 nv_state_t *nv = NV_STATE_PTR(nvl);
2983 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
2984
2985 nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_open\n");
2986
2987 down(&nvl->ldata_lock);
2988
2989 /* save the nv away in file->private_data */
2990 nvlfp->nvptr = nvl;
2991
2992 if (NV_ATOMIC_READ(nvl->usage_count) == 0)
2993 {
2994 nv->flags |= (NV_FLAG_OPEN | NV_FLAG_CONTROL);
2995 }
2996
2997 NV_ATOMIC_INC(nvl->usage_count);
2998 up(&nvl->ldata_lock);
2999
3000 return 0;
3001 }
3002
3003
3004 /*
3005 ** nvidia_ctl_close
3006 */
3007 static int
nvidia_ctl_close(struct inode * inode,struct file * file)3008 nvidia_ctl_close(
3009 struct inode *inode,
3010 struct file *file
3011 )
3012 {
3013 nv_alloc_t *at, *next;
3014 nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file);
3015 nv_state_t *nv = NV_STATE_PTR(nvl);
3016 nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file);
3017 nvidia_stack_t *sp = nvlfp->sp;
3018
3019 nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_close\n");
3020
3021 down(&nvl->ldata_lock);
3022 if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count))
3023 {
3024 nv->flags &= ~NV_FLAG_OPEN;
3025 }
3026 up(&nvl->ldata_lock);
3027
3028 rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
3029
3030 if (nvlfp->free_list != NULL)
3031 {
3032 at = nvlfp->free_list;
3033 while (at != NULL)
3034 {
3035 next = at->next;
3036 if (at->pid == os_get_current_process())
3037 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3038 nv_free_pages(nv, at->num_pages,
3039 at->flags.contig,
3040 at->cache_type,
3041 (void *)at);
3042 at = next;
3043 }
3044 }
3045
3046 if (nvlfp->num_attached_gpus != 0)
3047 {
3048 size_t i;
3049
3050 for (i = 0; i < nvlfp->num_attached_gpus; i++)
3051 {
3052 if (nvlfp->attached_gpus[i] != 0)
3053 nvidia_dev_put(nvlfp->attached_gpus[i], sp);
3054 }
3055
3056 NV_KFREE(nvlfp->attached_gpus, sizeof(NvU32) * nvlfp->num_attached_gpus);
3057 nvlfp->num_attached_gpus = 0;
3058 }
3059
3060 nv_free_file_private(nvlfp);
3061 NV_SET_FILE_PRIVATE(file, NULL);
3062
3063 nv_kmem_cache_free_stack(sp);
3064
3065 return 0;
3066 }
3067
3068
3069 void NV_API_CALL
nv_set_dma_address_size(nv_state_t * nv,NvU32 phys_addr_bits)3070 nv_set_dma_address_size(
3071 nv_state_t *nv,
3072 NvU32 phys_addr_bits
3073 )
3074 {
3075 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3076 NvU64 start_addr = nv_get_dma_start_address(nv);
3077 NvU64 new_mask = (((NvU64)1) << phys_addr_bits) - 1;
3078
3079 nvl->dma_dev.addressable_range.limit = start_addr + new_mask;
3080
3081 /*
3082 * The only scenario in which we definitely should not update the DMA mask
3083 * is on POWER, when using TCE bypass mode (see nv_get_dma_start_address()
3084 * for details), since the meaning of the DMA mask is overloaded in that
3085 * case.
3086 */
3087 if (!nvl->tce_bypass_enabled)
3088 {
3089 dma_set_mask(&nvl->pci_dev->dev, new_mask);
3090 /* Certain kernels have a bug which causes pci_set_consistent_dma_mask
3091 * to call GPL sme_active symbol, this bug has already been fixed in a
3092 * minor release update but detect the failure scenario here to prevent
3093 * an installation regression */
3094 #if !NV_IS_EXPORT_SYMBOL_GPL_sme_active
3095 dma_set_coherent_mask(&nvl->pci_dev->dev, new_mask);
3096 #endif
3097 }
3098 }
3099
3100 static NvUPtr
nv_map_guest_pages(nv_alloc_t * at,NvU64 address,NvU32 page_count,NvU32 page_idx)3101 nv_map_guest_pages(nv_alloc_t *at,
3102 NvU64 address,
3103 NvU32 page_count,
3104 NvU32 page_idx)
3105 {
3106 struct page **pages;
3107 NvU32 j;
3108 NvUPtr virt_addr;
3109
3110 NV_KMALLOC(pages, sizeof(struct page *) * page_count);
3111 if (pages == NULL)
3112 {
3113 nv_printf(NV_DBG_ERRORS,
3114 "NVRM: failed to allocate vmap() page descriptor table!\n");
3115 return 0;
3116 }
3117
3118 for (j = 0; j < page_count; j++)
3119 {
3120 pages[j] = NV_GET_PAGE_STRUCT(at->page_table[page_idx+j]->phys_addr);
3121 }
3122
3123 virt_addr = nv_vm_map_pages(pages, page_count,
3124 at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted);
3125 NV_KFREE(pages, sizeof(struct page *) * page_count);
3126
3127 return virt_addr;
3128 }
3129
3130 NV_STATUS NV_API_CALL
nv_alias_pages(nv_state_t * nv,NvU32 page_cnt,NvU32 contiguous,NvU32 cache_type,NvU64 guest_id,NvU64 * pte_array,void ** priv_data)3131 nv_alias_pages(
3132 nv_state_t *nv,
3133 NvU32 page_cnt,
3134 NvU32 contiguous,
3135 NvU32 cache_type,
3136 NvU64 guest_id,
3137 NvU64 *pte_array,
3138 void **priv_data
3139 )
3140 {
3141 nv_alloc_t *at;
3142 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3143 NvU32 i=0;
3144 nvidia_pte_t *page_ptr = NULL;
3145
3146 at = nvos_create_alloc(nvl->dev, page_cnt);
3147
3148 if (at == NULL)
3149 {
3150 return NV_ERR_NO_MEMORY;
3151 }
3152
3153 at->cache_type = cache_type;
3154 if (contiguous)
3155 at->flags.contig = NV_TRUE;
3156 #if defined(NVCPU_AARCH64)
3157 if (at->cache_type != NV_MEMORY_CACHED)
3158 at->flags.aliased = NV_TRUE;
3159 #endif
3160
3161 at->flags.guest = NV_TRUE;
3162
3163 at->order = get_order(at->num_pages * PAGE_SIZE);
3164
3165 for (i=0; i < at->num_pages; ++i)
3166 {
3167 page_ptr = at->page_table[i];
3168
3169 if (contiguous && i>0)
3170 {
3171 page_ptr->dma_addr = pte_array[0] + (i << PAGE_SHIFT);
3172 }
3173 else
3174 {
3175 page_ptr->dma_addr = pte_array[i];
3176 }
3177
3178 page_ptr->phys_addr = page_ptr->dma_addr;
3179
3180 /* aliased pages will be mapped on demand. */
3181 page_ptr->virt_addr = 0x0;
3182 }
3183
3184 at->guest_id = guest_id;
3185 *priv_data = at;
3186 NV_ATOMIC_INC(at->usage_count);
3187
3188 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3189
3190 return NV_OK;
3191 }
3192
3193 /*
3194 * This creates a dummy nv_alloc_t for peer IO mem, so that it can
3195 * be mapped using NvRmMapMemory.
3196 */
nv_register_peer_io_mem(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,void ** priv_data)3197 NV_STATUS NV_API_CALL nv_register_peer_io_mem(
3198 nv_state_t *nv,
3199 NvU64 *phys_addr,
3200 NvU64 page_count,
3201 void **priv_data
3202 )
3203 {
3204 nv_alloc_t *at;
3205 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3206 NvU64 i;
3207 NvU64 addr;
3208
3209 at = nvos_create_alloc(nvl->dev, page_count);
3210
3211 if (at == NULL)
3212 return NV_ERR_NO_MEMORY;
3213
3214 // IO regions should be uncached and contiguous
3215 at->cache_type = NV_MEMORY_UNCACHED;
3216 at->flags.contig = NV_TRUE;
3217 #if defined(NVCPU_AARCH64)
3218 at->flags.aliased = NV_TRUE;
3219 #endif
3220 at->flags.peer_io = NV_TRUE;
3221
3222 at->order = get_order(at->num_pages * PAGE_SIZE);
3223
3224 addr = phys_addr[0];
3225
3226 for (i = 0; i < page_count; i++)
3227 {
3228 at->page_table[i]->phys_addr = addr;
3229 addr += PAGE_SIZE;
3230 }
3231
3232 // No struct page array exists for this memory.
3233 at->user_pages = NULL;
3234
3235 *priv_data = at;
3236
3237 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3238
3239 return NV_OK;
3240 }
3241
nv_unregister_peer_io_mem(nv_state_t * nv,void * priv_data)3242 void NV_API_CALL nv_unregister_peer_io_mem(
3243 nv_state_t *nv,
3244 void *priv_data
3245 )
3246 {
3247 nv_alloc_t *at = priv_data;
3248
3249 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3250
3251 nvos_free_alloc(at);
3252 }
3253
3254 /*
3255 * By registering user pages, we create a dummy nv_alloc_t for it, so that the
3256 * rest of the RM can treat it like any other alloc.
3257 *
3258 * This also converts the page array to an array of physical addresses.
3259 */
nv_register_user_pages(nv_state_t * nv,NvU64 page_count,NvU64 * phys_addr,void * import_priv,void ** priv_data)3260 NV_STATUS NV_API_CALL nv_register_user_pages(
3261 nv_state_t *nv,
3262 NvU64 page_count,
3263 NvU64 *phys_addr,
3264 void *import_priv,
3265 void **priv_data
3266 )
3267 {
3268 nv_alloc_t *at;
3269 NvU64 i;
3270 struct page **user_pages;
3271 nv_linux_state_t *nvl;
3272 nvidia_pte_t *page_ptr;
3273
3274 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%x\n", page_count);
3275 user_pages = *priv_data;
3276 nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3277
3278 at = nvos_create_alloc(nvl->dev, page_count);
3279
3280 if (at == NULL)
3281 {
3282 return NV_ERR_NO_MEMORY;
3283 }
3284
3285 /*
3286 * Anonymous memory currently must be write-back cacheable, and we can't
3287 * enforce contiguity.
3288 */
3289 at->cache_type = NV_MEMORY_UNCACHED;
3290 #if defined(NVCPU_AARCH64)
3291 at->flags.aliased = NV_TRUE;
3292 #endif
3293
3294 at->flags.user = NV_TRUE;
3295
3296 at->order = get_order(at->num_pages * PAGE_SIZE);
3297
3298 for (i = 0; i < page_count; i++)
3299 {
3300 /*
3301 * We only assign the physical address and not the DMA address, since
3302 * this allocation hasn't been DMA-mapped yet.
3303 */
3304 page_ptr = at->page_table[i];
3305 page_ptr->phys_addr = page_to_phys(user_pages[i]);
3306
3307 phys_addr[i] = page_ptr->phys_addr;
3308 }
3309
3310 /* Save off the user pages array to be restored later */
3311 at->user_pages = user_pages;
3312
3313 /* Save off the import private data to be returned later */
3314 if (import_priv != NULL)
3315 {
3316 at->import_priv = import_priv;
3317 }
3318
3319 *priv_data = at;
3320
3321 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3322
3323 return NV_OK;
3324 }
3325
nv_unregister_user_pages(nv_state_t * nv,NvU64 page_count,void ** import_priv,void ** priv_data)3326 void NV_API_CALL nv_unregister_user_pages(
3327 nv_state_t *nv,
3328 NvU64 page_count,
3329 void **import_priv,
3330 void **priv_data
3331 )
3332 {
3333 nv_alloc_t *at = *priv_data;
3334
3335 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%x\n", page_count);
3336
3337 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3338
3339 WARN_ON(!at->flags.user);
3340
3341 /* Restore the user pages array for the caller to handle */
3342 *priv_data = at->user_pages;
3343
3344 /* Return the import private data for the caller to handle */
3345 if (import_priv != NULL)
3346 {
3347 *import_priv = at->import_priv;
3348 }
3349
3350 nvos_free_alloc(at);
3351 }
3352
3353 /*
3354 * This creates a dummy nv_alloc_t for existing physical allocations, so
3355 * that it can be mapped using NvRmMapMemory and BAR2 code path.
3356 */
nv_register_phys_pages(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,NvU32 cache_type,void ** priv_data)3357 NV_STATUS NV_API_CALL nv_register_phys_pages(
3358 nv_state_t *nv,
3359 NvU64 *phys_addr,
3360 NvU64 page_count,
3361 NvU32 cache_type,
3362 void **priv_data
3363 )
3364 {
3365 nv_alloc_t *at;
3366 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3367 NvU64 i;
3368 NvU64 addr;
3369
3370 at = nvos_create_alloc(nvl->dev, page_count);
3371
3372 if (at == NULL)
3373 return NV_ERR_NO_MEMORY;
3374 /*
3375 * Setting memory flags to cacheable and discontiguous.
3376 */
3377 at->cache_type = cache_type;
3378
3379 /*
3380 * Only physical address is available so we don't try to reuse existing
3381 * mappings
3382 */
3383 at->flags.physical = NV_TRUE;
3384
3385 at->order = get_order(at->num_pages * PAGE_SIZE);
3386
3387 for (i = 0, addr = phys_addr[0]; i < page_count; addr = phys_addr[++i])
3388 {
3389 at->page_table[i]->phys_addr = addr;
3390 }
3391
3392 at->user_pages = NULL;
3393 *priv_data = at;
3394
3395 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3396
3397 return NV_OK;
3398 }
3399
nv_register_sgt(nv_state_t * nv,NvU64 * phys_addr,NvU64 page_count,NvU32 cache_type,void ** priv_data,struct sg_table * import_sgt,void * import_priv)3400 NV_STATUS NV_API_CALL nv_register_sgt(
3401 nv_state_t *nv,
3402 NvU64 *phys_addr,
3403 NvU64 page_count,
3404 NvU32 cache_type,
3405 void **priv_data,
3406 struct sg_table *import_sgt,
3407 void *import_priv
3408 )
3409 {
3410 nv_alloc_t *at;
3411 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3412
3413 unsigned int i, j = 0;
3414 NvU64 sg_addr, sg_off, sg_len;
3415 struct scatterlist *sg;
3416
3417 at = nvos_create_alloc(nvl->dev, page_count);
3418
3419 if (at == NULL)
3420 return NV_ERR_NO_MEMORY;
3421
3422 /* Populate phys addrs with DMA addrs from SGT */
3423 for_each_sg(import_sgt->sgl, sg, import_sgt->nents, i)
3424 {
3425 /*
3426 * It is possible for dma_map_sg() to merge scatterlist entries, so
3427 * make sure we account for that here.
3428 */
3429 for (sg_addr = sg_dma_address(sg), sg_len = sg_dma_len(sg), sg_off = 0;
3430 (sg_off < sg_len) && (j < page_count);
3431 sg_off += PAGE_SIZE, j++)
3432 {
3433 phys_addr[j] = sg_addr + sg_off;
3434 }
3435 }
3436
3437 /*
3438 * Setting memory flags to cacheable and discontiguous.
3439 */
3440 at->cache_type = cache_type;
3441
3442 at->import_sgt = import_sgt;
3443
3444 /* Save off the import private data to be returned later */
3445 if (import_priv != NULL)
3446 {
3447 at->import_priv = import_priv;
3448 }
3449
3450 at->order = get_order(at->num_pages * PAGE_SIZE);
3451
3452 *priv_data = at;
3453
3454 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3455
3456 return NV_OK;
3457 }
3458
nv_unregister_sgt(nv_state_t * nv,struct sg_table ** import_sgt,void ** import_priv,void * priv_data)3459 void NV_API_CALL nv_unregister_sgt(
3460 nv_state_t *nv,
3461 struct sg_table **import_sgt,
3462 void **import_priv,
3463 void *priv_data
3464 )
3465 {
3466 nv_alloc_t *at = priv_data;
3467
3468 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_sgt\n");
3469
3470 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3471
3472 /* Restore the imported SGT for the caller to handle */
3473 *import_sgt = at->import_sgt;
3474
3475 /* Return the import private data for the caller to handle */
3476 if (import_priv != NULL)
3477 {
3478 *import_priv = at->import_priv;
3479 }
3480
3481 nvos_free_alloc(at);
3482 }
3483
nv_unregister_phys_pages(nv_state_t * nv,void * priv_data)3484 void NV_API_CALL nv_unregister_phys_pages(
3485 nv_state_t *nv,
3486 void *priv_data
3487 )
3488 {
3489 nv_alloc_t *at = priv_data;
3490 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3491
3492 nvos_free_alloc(at);
3493 }
3494
nv_get_num_phys_pages(void * pAllocPrivate,NvU32 * pNumPages)3495 NV_STATUS NV_API_CALL nv_get_num_phys_pages(
3496 void *pAllocPrivate,
3497 NvU32 *pNumPages
3498 )
3499 {
3500 nv_alloc_t *at = pAllocPrivate;
3501
3502 if (!pNumPages) {
3503 return NV_ERR_INVALID_ARGUMENT;
3504 }
3505
3506 *pNumPages = at->num_pages;
3507
3508 return NV_OK;
3509 }
3510
nv_get_phys_pages(void * pAllocPrivate,void * pPages,NvU32 * pNumPages)3511 NV_STATUS NV_API_CALL nv_get_phys_pages(
3512 void *pAllocPrivate,
3513 void *pPages,
3514 NvU32 *pNumPages
3515 )
3516 {
3517 nv_alloc_t *at = pAllocPrivate;
3518 struct page **pages = (struct page **)pPages;
3519 NvU32 page_count;
3520 int i;
3521
3522 if (!pNumPages || !pPages) {
3523 return NV_ERR_INVALID_ARGUMENT;
3524 }
3525
3526 page_count = NV_MIN(*pNumPages, at->num_pages);
3527
3528 for (i = 0; i < page_count; i++) {
3529 pages[i] = NV_GET_PAGE_STRUCT(at->page_table[i]->phys_addr);
3530 }
3531
3532 *pNumPages = page_count;
3533
3534 return NV_OK;
3535 }
3536
nv_get_disp_smmu_stream_ids(nv_state_t * nv,NvU32 * dispIsoStreamId,NvU32 * dispNisoStreamId)3537 void nv_get_disp_smmu_stream_ids
3538 (
3539 nv_state_t *nv,
3540 NvU32 *dispIsoStreamId,
3541 NvU32 *dispNisoStreamId)
3542 {
3543 *dispIsoStreamId = nv->iommus.dispIsoStreamId;
3544 *dispNisoStreamId = nv->iommus.dispNisoStreamId;
3545 }
3546
nv_alloc_kernel_mapping(nv_state_t * nv,void * pAllocPrivate,NvU64 pageIndex,NvU32 pageOffset,NvU64 size,void ** pPrivate)3547 void* NV_API_CALL nv_alloc_kernel_mapping(
3548 nv_state_t *nv,
3549 void *pAllocPrivate,
3550 NvU64 pageIndex,
3551 NvU32 pageOffset,
3552 NvU64 size,
3553 void **pPrivate
3554 )
3555 {
3556 nv_alloc_t *at = pAllocPrivate;
3557 NvU32 j, page_count;
3558 NvUPtr virt_addr;
3559 struct page **pages;
3560 NvBool isUserAllocatedMem;
3561
3562 //
3563 // For User allocated memory (like ErrorNotifier's) which is NOT allocated
3564 // nor owned by RM, the RM driver just stores the physical address
3565 // corresponding to that memory and does not map it until required.
3566 // In that case, in page tables the virt_addr == 0, so first we need to map
3567 // those pages to obtain virtual address.
3568 //
3569 isUserAllocatedMem = at->flags.user &&
3570 !at->page_table[pageIndex]->virt_addr &&
3571 at->page_table[pageIndex]->phys_addr;
3572
3573 //
3574 // User memory may NOT have kernel VA. So check this and fallback to else
3575 // case to create one.
3576 //
3577 if (((size + pageOffset) <= PAGE_SIZE) &&
3578 !at->flags.guest && !at->flags.aliased &&
3579 !isUserAllocatedMem && !at->flags.physical)
3580 {
3581 *pPrivate = NULL;
3582 return (void *)(at->page_table[pageIndex]->virt_addr + pageOffset);
3583 }
3584 else
3585 {
3586 size += pageOffset;
3587 page_count = (size >> PAGE_SHIFT) + ((size & ~NV_PAGE_MASK) ? 1 : 0);
3588
3589 if (at->flags.guest)
3590 {
3591 virt_addr = nv_map_guest_pages(at,
3592 nv->bars[NV_GPU_BAR_INDEX_REGS].cpu_address,
3593 page_count, pageIndex);
3594 }
3595 else
3596 {
3597 NV_KMALLOC(pages, sizeof(struct page *) * page_count);
3598 if (pages == NULL)
3599 {
3600 nv_printf(NV_DBG_ERRORS,
3601 "NVRM: failed to allocate vmap() page descriptor table!\n");
3602 return NULL;
3603 }
3604
3605 for (j = 0; j < page_count; j++)
3606 pages[j] = NV_GET_PAGE_STRUCT(at->page_table[pageIndex+j]->phys_addr);
3607
3608 virt_addr = nv_vm_map_pages(pages, page_count,
3609 at->cache_type == NV_MEMORY_CACHED, at->flags.unencrypted);
3610 NV_KFREE(pages, sizeof(struct page *) * page_count);
3611 }
3612
3613 if (virt_addr == 0)
3614 {
3615 nv_printf(NV_DBG_ERRORS, "NVRM: failed to map pages!\n");
3616 return NULL;
3617 }
3618
3619 *pPrivate = (void *)(NvUPtr)page_count;
3620 return (void *)(virt_addr + pageOffset);
3621 }
3622
3623 return NULL;
3624 }
3625
nv_free_kernel_mapping(nv_state_t * nv,void * pAllocPrivate,void * address,void * pPrivate)3626 NV_STATUS NV_API_CALL nv_free_kernel_mapping(
3627 nv_state_t *nv,
3628 void *pAllocPrivate,
3629 void *address,
3630 void *pPrivate
3631 )
3632 {
3633 nv_alloc_t *at = pAllocPrivate;
3634 NvUPtr virt_addr;
3635 NvU32 page_count;
3636
3637 virt_addr = ((NvUPtr)address & NV_PAGE_MASK);
3638 page_count = (NvUPtr)pPrivate;
3639
3640 if (at->flags.guest)
3641 {
3642 nv_iounmap((void *)virt_addr, (page_count * PAGE_SIZE));
3643 }
3644 else if (pPrivate != NULL)
3645 {
3646 nv_vm_unmap_pages(virt_addr, page_count);
3647 }
3648
3649 return NV_OK;
3650 }
3651
nv_alloc_pages(nv_state_t * nv,NvU32 page_count,NvU64 page_size,NvBool contiguous,NvU32 cache_type,NvBool zeroed,NvBool unencrypted,NvS32 node_id,NvU64 * pte_array,void ** priv_data)3652 NV_STATUS NV_API_CALL nv_alloc_pages(
3653 nv_state_t *nv,
3654 NvU32 page_count,
3655 NvU64 page_size,
3656 NvBool contiguous,
3657 NvU32 cache_type,
3658 NvBool zeroed,
3659 NvBool unencrypted,
3660 NvS32 node_id,
3661 NvU64 *pte_array,
3662 void **priv_data
3663 )
3664 {
3665 nv_alloc_t *at;
3666 NV_STATUS status = NV_ERR_NO_MEMORY;
3667 nv_linux_state_t *nvl = NULL;
3668 NvBool will_remap = NV_FALSE;
3669 NvU32 i;
3670 struct device *dev = NULL;
3671
3672 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_alloc_pages: %d pages, nodeid %d\n", page_count, node_id);
3673 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: contig %d cache_type %d\n",
3674 contiguous, cache_type);
3675
3676 //
3677 // system memory allocation can be associated with a client instead of a gpu
3678 // handle the case where per device state is NULL
3679 //
3680 if(nv)
3681 {
3682 nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3683 will_remap = nv_requires_dma_remap(nv);
3684 dev = nvl->dev;
3685 }
3686
3687 if (nv_encode_caching(NULL, cache_type, NV_MEMORY_TYPE_SYSTEM))
3688 return NV_ERR_NOT_SUPPORTED;
3689
3690 at = nvos_create_alloc(dev, page_count);
3691 if (at == NULL)
3692 return NV_ERR_NO_MEMORY;
3693
3694 at->cache_type = cache_type;
3695
3696 if (contiguous)
3697 at->flags.contig = NV_TRUE;
3698 if (zeroed)
3699 at->flags.zeroed = NV_TRUE;
3700 #if defined(NVCPU_AARCH64)
3701 if (at->cache_type != NV_MEMORY_CACHED)
3702 at->flags.aliased = NV_TRUE;
3703 #endif
3704 if (unencrypted)
3705 at->flags.unencrypted = NV_TRUE;
3706
3707 #if defined(NVCPU_PPC64LE)
3708 /*
3709 * Starting on Power9 systems, DMA addresses for NVLink are no longer the
3710 * same as used over PCIe. There is an address compression scheme required
3711 * for NVLink ONLY which impacts the upper address bits of the DMA address.
3712 *
3713 * This divergence between PCIe and NVLink DMA mappings breaks assumptions
3714 * in the driver where during initialization we allocate system memory
3715 * for the GPU to access over PCIe before NVLink is trained -- and some of
3716 * these mappings persist on the GPU. If these persistent mappings are not
3717 * equivalent they will cause invalid DMA accesses from the GPU once we
3718 * switch to NVLink.
3719 *
3720 * To work around this we limit all system memory allocations from the driver
3721 * during the period before NVLink is enabled to be from NUMA node 0 (CPU 0)
3722 * which has a CPU real address with the upper address bits (above bit 42)
3723 * set to 0. Effectively making the PCIe and NVLink DMA mappings equivalent
3724 * allowing persistent system memory mappings already programmed on the GPU
3725 * to remain valid after NVLink is enabled.
3726 *
3727 * See Bug 1920398 for more details.
3728 */
3729 if (nv && nvl->npu && !nvl->dma_dev.nvlink)
3730 {
3731 at->flags.node = NV_TRUE;
3732 at->node_id = 0;
3733 }
3734 #endif
3735
3736 if (node_id != NUMA_NO_NODE)
3737 {
3738 at->flags.node = NV_TRUE;
3739 at->node_id = node_id;
3740 }
3741
3742 if (at->flags.contig)
3743 {
3744 status = nv_alloc_contig_pages(nv, at);
3745 }
3746 else
3747 {
3748 if (page_size == 0)
3749 {
3750 status = NV_ERR_INVALID_ARGUMENT;
3751 goto failed;
3752 }
3753 at->order = get_order(page_size);
3754 status = nv_alloc_system_pages(nv, at);
3755 }
3756
3757 if (status != NV_OK)
3758 goto failed;
3759
3760 for (i = 0; i < ((contiguous) ? 1 : page_count); i++)
3761 {
3762 /*
3763 * The contents of the pte_array[] depend on whether or not this device
3764 * requires DMA-remapping. If it does, it should be the phys addresses
3765 * used by the DMA-remapping paths, otherwise it should be the actual
3766 * address that the device should use for DMA (which, confusingly, may
3767 * be different than the CPU physical address, due to a static DMA
3768 * offset).
3769 */
3770 if ((nv == NULL) || will_remap)
3771 {
3772 pte_array[i] = at->page_table[i]->phys_addr;
3773 }
3774 else
3775 {
3776 pte_array[i] = nv_phys_to_dma(dev,
3777 at->page_table[i]->phys_addr);
3778 }
3779 }
3780
3781 *priv_data = at;
3782 NV_ATOMIC_INC(at->usage_count);
3783
3784 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3785
3786 return NV_OK;
3787
3788 failed:
3789 nvos_free_alloc(at);
3790
3791 return status;
3792 }
3793
nv_free_pages(nv_state_t * nv,NvU32 page_count,NvBool contiguous,NvU32 cache_type,void * priv_data)3794 NV_STATUS NV_API_CALL nv_free_pages(
3795 nv_state_t *nv,
3796 NvU32 page_count,
3797 NvBool contiguous,
3798 NvU32 cache_type,
3799 void *priv_data
3800 )
3801 {
3802 NV_STATUS rmStatus = NV_OK;
3803 nv_alloc_t *at = priv_data;
3804
3805 nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_free_pages: 0x%x\n", page_count);
3806
3807 NV_PRINT_AT(NV_DBG_MEMINFO, at);
3808
3809 /*
3810 * If the 'at' usage count doesn't drop to zero here, not all of
3811 * the user mappings have been torn down in time - we can't
3812 * safely free the memory. We report success back to the RM, but
3813 * defer the actual free operation until later.
3814 *
3815 * This is described in greater detail in the comments above the
3816 * nvidia_vma_(open|release)() callbacks in nv-mmap.c.
3817 */
3818 if (!NV_ATOMIC_DEC_AND_TEST(at->usage_count))
3819 return NV_OK;
3820
3821 if (!at->flags.guest)
3822 {
3823 if (at->flags.contig)
3824 nv_free_contig_pages(at);
3825 else
3826 nv_free_system_pages(at);
3827 }
3828
3829 nvos_free_alloc(at);
3830
3831 return rmStatus;
3832 }
3833
nv_lock_init_locks(nvidia_stack_t * sp,nv_state_t * nv)3834 NvBool nv_lock_init_locks
3835 (
3836 nvidia_stack_t *sp,
3837 nv_state_t *nv
3838 )
3839 {
3840 nv_linux_state_t *nvl;
3841 nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3842
3843 NV_INIT_MUTEX(&nvl->ldata_lock);
3844 NV_INIT_MUTEX(&nvl->mmap_lock);
3845 NV_INIT_MUTEX(&nvl->open_q_lock);
3846
3847 NV_ATOMIC_SET(nvl->usage_count, 0);
3848
3849 if (!rm_init_event_locks(sp, nv))
3850 return NV_FALSE;
3851
3852 return NV_TRUE;
3853 }
3854
nv_lock_destroy_locks(nvidia_stack_t * sp,nv_state_t * nv)3855 void nv_lock_destroy_locks
3856 (
3857 nvidia_stack_t *sp,
3858 nv_state_t *nv
3859 )
3860 {
3861 rm_destroy_event_locks(sp, nv);
3862 }
3863
nv_post_event(nv_event_t * event,NvHandle handle,NvU32 index,NvU32 info32,NvU16 info16,NvBool data_valid)3864 void NV_API_CALL nv_post_event(
3865 nv_event_t *event,
3866 NvHandle handle,
3867 NvU32 index,
3868 NvU32 info32,
3869 NvU16 info16,
3870 NvBool data_valid
3871 )
3872 {
3873 nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(event->nvfp);
3874 unsigned long eflags;
3875 nvidia_event_t *nvet;
3876
3877 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
3878
3879 if (data_valid)
3880 {
3881 NV_KMALLOC_ATOMIC(nvet, sizeof(nvidia_event_t));
3882 if (nvet == NULL)
3883 {
3884 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
3885 return;
3886 }
3887
3888 if (nvlfp->event_data_tail != NULL)
3889 nvlfp->event_data_tail->next = nvet;
3890 if (nvlfp->event_data_head == NULL)
3891 nvlfp->event_data_head = nvet;
3892 nvlfp->event_data_tail = nvet;
3893 nvet->next = NULL;
3894
3895 nvet->event = *event;
3896 nvet->event.hObject = handle;
3897 nvet->event.index = index;
3898 nvet->event.info32 = info32;
3899 nvet->event.info16 = info16;
3900 }
3901 //
3902 // 'event_pending' is interpreted by nvidia_poll() and nv_get_event() to
3903 // mean that an event without data is pending. Therefore, only set it to
3904 // true here if newly posted event is dataless.
3905 //
3906 else
3907 {
3908 nvlfp->dataless_event_pending = NV_TRUE;
3909 }
3910
3911 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
3912
3913 wake_up_interruptible(&nvlfp->waitqueue);
3914 }
3915
nv_is_rm_firmware_active(nv_state_t * nv)3916 NvBool NV_API_CALL nv_is_rm_firmware_active(
3917 nv_state_t *nv
3918 )
3919 {
3920 if (rm_firmware_active)
3921 {
3922 // "all" here means all GPUs
3923 if (strcmp(rm_firmware_active, "all") == 0)
3924 return NV_TRUE;
3925 }
3926 return NV_FALSE;
3927 }
3928
nv_get_firmware(nv_state_t * nv,nv_firmware_type_t fw_type,nv_firmware_chip_family_t fw_chip_family,const void ** fw_buf,NvU32 * fw_size)3929 const void* NV_API_CALL nv_get_firmware(
3930 nv_state_t *nv,
3931 nv_firmware_type_t fw_type,
3932 nv_firmware_chip_family_t fw_chip_family,
3933 const void **fw_buf,
3934 NvU32 *fw_size
3935 )
3936 {
3937 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
3938 const struct firmware *fw;
3939
3940 // path is relative to /lib/firmware
3941 // if this fails it will print an error to dmesg
3942 if (request_firmware(&fw, nv_firmware_path(fw_type, fw_chip_family), nvl->dev) != 0)
3943 return NULL;
3944
3945 *fw_size = fw->size;
3946 *fw_buf = fw->data;
3947
3948 return fw;
3949 }
3950
nv_put_firmware(const void * fw_handle)3951 void NV_API_CALL nv_put_firmware(
3952 const void *fw_handle
3953 )
3954 {
3955 release_firmware(fw_handle);
3956 }
3957
nv_get_file_private(NvS32 fd,NvBool ctl,void ** os_private)3958 nv_file_private_t* NV_API_CALL nv_get_file_private(
3959 NvS32 fd,
3960 NvBool ctl,
3961 void **os_private
3962 )
3963 {
3964 struct file *filp = NULL;
3965 nv_linux_file_private_t *nvlfp = NULL;
3966 dev_t rdev = 0;
3967
3968 filp = fget(fd);
3969
3970 if (filp == NULL || !NV_FILE_INODE(filp))
3971 {
3972 goto fail;
3973 }
3974
3975 rdev = (NV_FILE_INODE(filp))->i_rdev;
3976
3977 if (MAJOR(rdev) != NV_MAJOR_DEVICE_NUMBER)
3978 {
3979 goto fail;
3980 }
3981
3982 if (ctl)
3983 {
3984 if (MINOR(rdev) != NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE)
3985 goto fail;
3986 }
3987 else
3988 {
3989 NvBool found = NV_FALSE;
3990 int i;
3991
3992 for (i = 0; i <= NV_MINOR_DEVICE_NUMBER_REGULAR_MAX; i++)
3993 {
3994 if ((nv_linux_minor_num_table[i] != NULL) && (MINOR(rdev) == i))
3995 {
3996 found = NV_TRUE;
3997 break;
3998 }
3999 }
4000
4001 if (!found)
4002 goto fail;
4003 }
4004
4005 nvlfp = NV_GET_LINUX_FILE_PRIVATE(filp);
4006
4007 *os_private = filp;
4008
4009 return &nvlfp->nvfp;
4010
4011 fail:
4012
4013 if (filp != NULL)
4014 {
4015 fput(filp);
4016 }
4017
4018 return NULL;
4019 }
4020
nv_put_file_private(void * os_private)4021 void NV_API_CALL nv_put_file_private(
4022 void *os_private
4023 )
4024 {
4025 struct file *filp = os_private;
4026 fput(filp);
4027 }
4028
nv_get_event(nv_file_private_t * nvfp,nv_event_t * event,NvU32 * pending)4029 int NV_API_CALL nv_get_event(
4030 nv_file_private_t *nvfp,
4031 nv_event_t *event,
4032 NvU32 *pending
4033 )
4034 {
4035 nv_linux_file_private_t *nvlfp = nv_get_nvlfp_from_nvfp(nvfp);
4036 nvidia_event_t *nvet;
4037 unsigned long eflags;
4038
4039 NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
4040
4041 nvet = nvlfp->event_data_head;
4042 if (nvet == NULL)
4043 {
4044 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
4045 return NV_ERR_GENERIC;
4046 }
4047
4048 *event = nvet->event;
4049
4050 if (nvlfp->event_data_tail == nvet)
4051 nvlfp->event_data_tail = NULL;
4052 nvlfp->event_data_head = nvet->next;
4053
4054 *pending = (nvlfp->event_data_head != NULL);
4055
4056 NV_SPIN_UNLOCK_IRQRESTORE(&nvlfp->fp_lock, eflags);
4057
4058 NV_KFREE(nvet, sizeof(nvidia_event_t));
4059
4060 return NV_OK;
4061 }
4062
nv_start_rc_timer(nv_state_t * nv)4063 int NV_API_CALL nv_start_rc_timer(
4064 nv_state_t *nv
4065 )
4066 {
4067 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4068
4069 if (nv->rc_timer_enabled)
4070 return -1;
4071
4072 nv_printf(NV_DBG_INFO, "NVRM: initializing rc timer\n");
4073
4074 nv_timer_setup(&nvl->rc_timer, nvidia_rc_timer_callback);
4075
4076 nv->rc_timer_enabled = 1;
4077
4078 // set the timeout for 1 second in the future:
4079 mod_timer(&nvl->rc_timer.kernel_timer, jiffies + HZ);
4080
4081 nv_printf(NV_DBG_INFO, "NVRM: rc timer initialized\n");
4082
4083 return 0;
4084 }
4085
nv_stop_rc_timer(nv_state_t * nv)4086 int NV_API_CALL nv_stop_rc_timer(
4087 nv_state_t *nv
4088 )
4089 {
4090 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4091
4092 if (!nv->rc_timer_enabled)
4093 return -1;
4094
4095 nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n");
4096 nv->rc_timer_enabled = 0;
4097 del_timer_sync(&nvl->rc_timer.kernel_timer);
4098 nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n");
4099
4100 return 0;
4101 }
4102
4103 #define SNAPSHOT_TIMER_FREQ (jiffies + HZ / NV_SNAPSHOT_TIMER_HZ)
4104
snapshot_timer_callback(struct nv_timer * timer)4105 static void snapshot_timer_callback(struct nv_timer *timer)
4106 {
4107 nv_linux_state_t *nvl = &nv_ctl_device;
4108 nv_state_t *nv = NV_STATE_PTR(nvl);
4109 unsigned long flags;
4110
4111 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4112 if (nvl->snapshot_callback != NULL)
4113 {
4114 nvl->snapshot_callback(nv->profiler_context);
4115 mod_timer(&timer->kernel_timer, SNAPSHOT_TIMER_FREQ);
4116 }
4117 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4118 }
4119
nv_start_snapshot_timer(void (* snapshot_callback)(void * context))4120 void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context))
4121 {
4122 nv_linux_state_t *nvl = &nv_ctl_device;
4123
4124 nvl->snapshot_callback = snapshot_callback;
4125 nv_timer_setup(&nvl->snapshot_timer, snapshot_timer_callback);
4126 mod_timer(&nvl->snapshot_timer.kernel_timer, SNAPSHOT_TIMER_FREQ);
4127 }
4128
nv_stop_snapshot_timer(void)4129 void NV_API_CALL nv_stop_snapshot_timer(void)
4130 {
4131 nv_linux_state_t *nvl = &nv_ctl_device;
4132 NvBool timer_active;
4133 unsigned long flags;
4134
4135 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4136 timer_active = nvl->snapshot_callback != NULL;
4137 nvl->snapshot_callback = NULL;
4138 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4139
4140 if (timer_active)
4141 del_timer_sync(&nvl->snapshot_timer.kernel_timer);
4142 }
4143
nv_flush_snapshot_timer(void)4144 void NV_API_CALL nv_flush_snapshot_timer(void)
4145 {
4146 nv_linux_state_t *nvl = &nv_ctl_device;
4147 nv_state_t *nv = NV_STATE_PTR(nvl);
4148 unsigned long flags;
4149
4150 NV_SPIN_LOCK_IRQSAVE(&nvl->snapshot_timer_lock, flags);
4151 if (nvl->snapshot_callback != NULL)
4152 nvl->snapshot_callback(nv->profiler_context);
4153 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->snapshot_timer_lock, flags);
4154 }
4155
4156 static int __init
nvos_count_devices(void)4157 nvos_count_devices(void)
4158 {
4159 int count;
4160
4161 count = nv_pci_count_devices();
4162
4163 return count;
4164 }
4165
4166 #if NVCPU_IS_AARCH64
nvos_is_chipset_io_coherent(void)4167 NvBool nvos_is_chipset_io_coherent(void)
4168 {
4169 static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE;
4170
4171 if (nv_chipset_is_io_coherent == NV_TRISTATE_INDETERMINATE)
4172 {
4173 nvidia_stack_t *sp = NULL;
4174 if (nv_kmem_cache_alloc_stack(&sp) != 0)
4175 {
4176 nv_printf(NV_DBG_ERRORS,
4177 "NVRM: cannot allocate stack for platform coherence check callback \n");
4178 WARN_ON(1);
4179 return NV_FALSE;
4180 }
4181
4182 nv_chipset_is_io_coherent = rm_is_chipset_io_coherent(sp);
4183
4184 nv_kmem_cache_free_stack(sp);
4185 }
4186
4187 return nv_chipset_is_io_coherent;
4188 }
4189 #endif // NVCPU_IS_AARCH64
4190
4191 #if defined(CONFIG_PM)
4192 static NV_STATUS
nv_power_management(nv_state_t * nv,nv_pm_action_t pm_action)4193 nv_power_management(
4194 nv_state_t *nv,
4195 nv_pm_action_t pm_action
4196 )
4197 {
4198 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4199 int status = NV_OK;
4200 nvidia_stack_t *sp = NULL;
4201
4202 if (nv_kmem_cache_alloc_stack(&sp) != 0)
4203 {
4204 return NV_ERR_NO_MEMORY;
4205 }
4206
4207 status = nv_check_gpu_state(nv);
4208 if (status == NV_ERR_GPU_IS_LOST)
4209 {
4210 NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping PM event\n");
4211 goto failure;
4212 }
4213
4214 switch (pm_action)
4215 {
4216 case NV_PM_ACTION_STANDBY:
4217 /* fall through */
4218 case NV_PM_ACTION_HIBERNATE:
4219 {
4220 /*
4221 * Flush nvl->open_q before suspend/hibernate to ensure deferred
4222 * opens do not get attempted during the PM transition.
4223 *
4224 * Note: user space is either frozen by the kernel or locked out
4225 * by nv_system_pm_lock, so no further deferred opens can be
4226 * enqueued before resume (meaning we do not need to unset
4227 * nvl->is_accepting_opens).
4228 */
4229 nv_kthread_q_flush(&nvl->open_q);
4230
4231 status = rm_power_management(sp, nv, pm_action);
4232
4233 nv_kthread_q_stop(&nvl->bottom_half_q);
4234
4235 nv_disable_pat_support();
4236 break;
4237 }
4238 case NV_PM_ACTION_RESUME:
4239 {
4240 nv_enable_pat_support();
4241
4242 nv_kthread_q_item_init(&nvl->bottom_half_q_item,
4243 nvidia_isr_bh_unlocked, (void *)nv);
4244
4245 status = nv_kthread_q_init(&nvl->bottom_half_q, nv_device_name);
4246 if (status != NV_OK)
4247 break;
4248
4249 status = rm_power_management(sp, nv, pm_action);
4250 break;
4251 }
4252 default:
4253 status = NV_ERR_INVALID_ARGUMENT;
4254 break;
4255 }
4256
4257 failure:
4258 nv_kmem_cache_free_stack(sp);
4259
4260 return status;
4261 }
4262
4263 static NV_STATUS
nv_restore_user_channels(nv_state_t * nv)4264 nv_restore_user_channels(
4265 nv_state_t *nv
4266 )
4267 {
4268 NV_STATUS status = NV_OK;
4269 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4270 nv_stack_t *sp = NULL;
4271
4272 if (nv_kmem_cache_alloc_stack(&sp) != 0)
4273 {
4274 return NV_ERR_NO_MEMORY;
4275 }
4276
4277 down(&nvl->ldata_lock);
4278
4279 if ((nv->flags & NV_FLAG_OPEN) == 0)
4280 {
4281 goto done;
4282 }
4283
4284 status = rm_restart_user_channels(sp, nv);
4285 WARN_ON(status != NV_OK);
4286
4287 down(&nvl->mmap_lock);
4288
4289 nv_set_safe_to_mmap_locked(nv, NV_TRUE);
4290
4291 up(&nvl->mmap_lock);
4292
4293 rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
4294
4295 done:
4296 up(&nvl->ldata_lock);
4297
4298 nv_kmem_cache_free_stack(sp);
4299
4300 return status;
4301 }
4302
4303 static NV_STATUS
nv_preempt_user_channels(nv_state_t * nv)4304 nv_preempt_user_channels(
4305 nv_state_t *nv
4306 )
4307 {
4308 NV_STATUS status = NV_OK;
4309 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4310 nv_stack_t *sp = NULL;
4311
4312 if (nv_kmem_cache_alloc_stack(&sp) != 0)
4313 {
4314 return NV_ERR_NO_MEMORY;
4315 }
4316
4317 down(&nvl->ldata_lock);
4318
4319 if ((nv->flags & NV_FLAG_OPEN) == 0)
4320 {
4321 goto done;
4322 }
4323
4324 status = rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
4325 WARN_ON(status != NV_OK);
4326
4327 down(&nvl->mmap_lock);
4328
4329 nv_set_safe_to_mmap_locked(nv, NV_FALSE);
4330 nv_revoke_gpu_mappings_locked(nv);
4331
4332 up(&nvl->mmap_lock);
4333
4334 status = rm_stop_user_channels(sp, nv);
4335 WARN_ON(status != NV_OK);
4336
4337 done:
4338 up(&nvl->ldata_lock);
4339
4340 nv_kmem_cache_free_stack(sp);
4341
4342 return status;
4343 }
4344
4345 static NV_STATUS
nvidia_suspend(struct device * dev,nv_pm_action_t pm_action,NvBool is_procfs_suspend)4346 nvidia_suspend(
4347 struct device *dev,
4348 nv_pm_action_t pm_action,
4349 NvBool is_procfs_suspend
4350 )
4351 {
4352 NV_STATUS status = NV_OK;
4353 struct pci_dev *pci_dev = NULL;
4354 nv_linux_state_t *nvl;
4355 nv_state_t *nv;
4356
4357 if (dev_is_pci(dev))
4358 {
4359 pci_dev = to_pci_dev(dev);
4360 nvl = pci_get_drvdata(pci_dev);
4361 }
4362 else
4363 {
4364 nvl = dev_get_drvdata(dev);
4365 }
4366 nv = NV_STATE_PTR(nvl);
4367
4368 down(&nvl->ldata_lock);
4369
4370 if (((nv->flags & NV_FLAG_OPEN) == 0) &&
4371 ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) == 0))
4372 {
4373 goto done;
4374 }
4375
4376 if ((nv->flags & NV_FLAG_SUSPENDED) != 0)
4377 {
4378 nvl->suspend_count++;
4379 goto pci_pm;
4380 }
4381
4382 if (nv->preserve_vidmem_allocations && !is_procfs_suspend)
4383 {
4384 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
4385 "PreserveVideoMemoryAllocations module parameter is set. "
4386 "System Power Management attempted without driver procfs suspend interface. "
4387 "Please refer to the 'Configuring Power Management Support' section in the driver README.\n");
4388 status = NV_ERR_NOT_SUPPORTED;
4389 goto done;
4390 }
4391
4392 nvidia_modeset_suspend(nv->gpu_id);
4393
4394 status = nv_power_management(nv, pm_action);
4395
4396 if (status != NV_OK)
4397 {
4398 nvidia_modeset_resume(nv->gpu_id);
4399 goto done;
4400 }
4401 else
4402 {
4403 nv->flags |= NV_FLAG_SUSPENDED;
4404 }
4405
4406 pci_pm:
4407 /*
4408 * Check if PCI power state should be D0 during system suspend. The PCI PM
4409 * core will change the power state only if the driver has not saved the
4410 * state in it's suspend callback.
4411 */
4412 if ((nv->d0_state_in_suspend) && (pci_dev != NULL) &&
4413 !is_procfs_suspend && (pm_action == NV_PM_ACTION_STANDBY))
4414 {
4415 pci_save_state(pci_dev);
4416 }
4417
4418 done:
4419 up(&nvl->ldata_lock);
4420
4421 return status;
4422 }
4423
4424 static NV_STATUS
nvidia_resume(struct device * dev,nv_pm_action_t pm_action)4425 nvidia_resume(
4426 struct device *dev,
4427 nv_pm_action_t pm_action
4428 )
4429 {
4430 NV_STATUS status = NV_OK;
4431 struct pci_dev *pci_dev;
4432 nv_linux_state_t *nvl;
4433 nv_state_t *nv;
4434
4435 if (dev_is_pci(dev))
4436 {
4437 pci_dev = to_pci_dev(dev);
4438 nvl = pci_get_drvdata(pci_dev);
4439 }
4440 else
4441 {
4442 nvl = dev_get_drvdata(dev);
4443 }
4444 nv = NV_STATE_PTR(nvl);
4445
4446 down(&nvl->ldata_lock);
4447
4448 if ((nv->flags & NV_FLAG_SUSPENDED) == 0)
4449 {
4450 goto done;
4451 }
4452
4453 if (nvl->suspend_count != 0)
4454 {
4455 nvl->suspend_count--;
4456 }
4457 else
4458 {
4459 status = nv_power_management(nv, pm_action);
4460
4461 if (status == NV_OK)
4462 {
4463 nvidia_modeset_resume(nv->gpu_id);
4464 nv->flags &= ~NV_FLAG_SUSPENDED;
4465 }
4466 }
4467
4468 done:
4469 up(&nvl->ldata_lock);
4470
4471 return status;
4472 }
4473
4474 static NV_STATUS
nv_resume_devices(nv_pm_action_t pm_action,nv_pm_action_depth_t pm_action_depth)4475 nv_resume_devices(
4476 nv_pm_action_t pm_action,
4477 nv_pm_action_depth_t pm_action_depth
4478 )
4479 {
4480 nv_linux_state_t *nvl;
4481 NvBool resume_devices = NV_TRUE;
4482 NV_STATUS status;
4483
4484 if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET)
4485 {
4486 goto resume_modeset;
4487 }
4488
4489 if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM)
4490 {
4491 resume_devices = NV_FALSE;
4492 }
4493
4494 LOCK_NV_LINUX_DEVICES();
4495
4496 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4497 {
4498 if (resume_devices)
4499 {
4500 status = nvidia_resume(nvl->dev, pm_action);
4501 WARN_ON(status != NV_OK);
4502 }
4503 }
4504
4505 UNLOCK_NV_LINUX_DEVICES();
4506
4507 status = nv_uvm_resume();
4508 WARN_ON(status != NV_OK);
4509
4510 LOCK_NV_LINUX_DEVICES();
4511
4512 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4513 {
4514 status = nv_restore_user_channels(NV_STATE_PTR(nvl));
4515 WARN_ON(status != NV_OK);
4516 }
4517
4518 UNLOCK_NV_LINUX_DEVICES();
4519
4520 resume_modeset:
4521 nvidia_modeset_resume(0);
4522
4523 return NV_OK;
4524 }
4525
4526 static NV_STATUS
nv_suspend_devices(nv_pm_action_t pm_action,nv_pm_action_depth_t pm_action_depth)4527 nv_suspend_devices(
4528 nv_pm_action_t pm_action,
4529 nv_pm_action_depth_t pm_action_depth
4530 )
4531 {
4532 nv_linux_state_t *nvl;
4533 NvBool resume_devices = NV_FALSE;
4534 NV_STATUS status = NV_OK;
4535
4536 nvidia_modeset_suspend(0);
4537
4538 if (pm_action_depth == NV_PM_ACTION_DEPTH_MODESET)
4539 {
4540 return NV_OK;
4541 }
4542
4543 LOCK_NV_LINUX_DEVICES();
4544
4545 for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next)
4546 {
4547 status = nv_preempt_user_channels(NV_STATE_PTR(nvl));
4548 WARN_ON(status != NV_OK);
4549 }
4550
4551 UNLOCK_NV_LINUX_DEVICES();
4552
4553 if (status == NV_OK)
4554 {
4555 status = nv_uvm_suspend();
4556 WARN_ON(status != NV_OK);
4557 }
4558 if (status != NV_OK)
4559 {
4560 goto done;
4561 }
4562
4563 if (pm_action_depth == NV_PM_ACTION_DEPTH_UVM)
4564 {
4565 return NV_OK;
4566 }
4567
4568 LOCK_NV_LINUX_DEVICES();
4569
4570 for (nvl = nv_linux_devices; nvl != NULL && status == NV_OK; nvl = nvl->next)
4571 {
4572 status = nvidia_suspend(nvl->dev, pm_action, NV_TRUE);
4573 WARN_ON(status != NV_OK);
4574 }
4575 if (status != NV_OK)
4576 {
4577 resume_devices = NV_TRUE;
4578 }
4579
4580 UNLOCK_NV_LINUX_DEVICES();
4581
4582 done:
4583 if (status != NV_OK)
4584 {
4585 LOCK_NV_LINUX_DEVICES();
4586
4587 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4588 {
4589 if (resume_devices)
4590 {
4591 nvidia_resume(nvl->dev, pm_action);
4592 }
4593
4594 nv_restore_user_channels(NV_STATE_PTR(nvl));
4595 }
4596
4597 UNLOCK_NV_LINUX_DEVICES();
4598 }
4599
4600 return status;
4601 }
4602
4603 NV_STATUS
nv_set_system_power_state(nv_power_state_t power_state,nv_pm_action_depth_t pm_action_depth)4604 nv_set_system_power_state(
4605 nv_power_state_t power_state,
4606 nv_pm_action_depth_t pm_action_depth
4607 )
4608 {
4609 NV_STATUS status;
4610 nv_pm_action_t pm_action;
4611
4612 switch (power_state)
4613 {
4614 case NV_POWER_STATE_IN_HIBERNATE:
4615 pm_action = NV_PM_ACTION_HIBERNATE;
4616 break;
4617 case NV_POWER_STATE_IN_STANDBY:
4618 pm_action = NV_PM_ACTION_STANDBY;
4619 break;
4620 case NV_POWER_STATE_RUNNING:
4621 pm_action = NV_PM_ACTION_RESUME;
4622 break;
4623 default:
4624 return NV_ERR_INVALID_ARGUMENT;
4625 }
4626
4627 down(&nv_system_power_state_lock);
4628
4629 if (nv_system_power_state == power_state)
4630 {
4631 status = NV_OK;
4632 goto done;
4633 }
4634
4635 if (power_state == NV_POWER_STATE_RUNNING)
4636 {
4637 status = nv_resume_devices(pm_action, nv_system_pm_action_depth);
4638 up_write(&nv_system_pm_lock);
4639 }
4640 else
4641 {
4642 if (nv_system_power_state != NV_POWER_STATE_RUNNING)
4643 {
4644 status = NV_ERR_INVALID_ARGUMENT;
4645 goto done;
4646 }
4647
4648 nv_system_pm_action_depth = pm_action_depth;
4649
4650 down_write(&nv_system_pm_lock);
4651 status = nv_suspend_devices(pm_action, nv_system_pm_action_depth);
4652 if (status != NV_OK)
4653 {
4654 up_write(&nv_system_pm_lock);
4655 goto done;
4656 }
4657 }
4658
4659 nv_system_power_state = power_state;
4660
4661 done:
4662 up(&nv_system_power_state_lock);
4663
4664 return status;
4665 }
4666
nv_pmops_suspend(struct device * dev)4667 int nv_pmops_suspend(
4668 struct device *dev
4669 )
4670 {
4671 NV_STATUS status;
4672
4673 status = nvidia_suspend(dev, NV_PM_ACTION_STANDBY, NV_FALSE);
4674 return (status == NV_OK) ? 0 : -EIO;
4675 }
4676
nv_pmops_resume(struct device * dev)4677 int nv_pmops_resume(
4678 struct device *dev
4679 )
4680 {
4681 NV_STATUS status;
4682
4683 status = nvidia_resume(dev, NV_PM_ACTION_RESUME);
4684 return (status == NV_OK) ? 0 : -EIO;
4685 }
4686
nv_pmops_freeze(struct device * dev)4687 int nv_pmops_freeze(
4688 struct device *dev
4689 )
4690 {
4691 NV_STATUS status;
4692
4693 status = nvidia_suspend(dev, NV_PM_ACTION_HIBERNATE, NV_FALSE);
4694 return (status == NV_OK) ? 0 : -EIO;
4695 }
4696
nv_pmops_thaw(struct device * dev)4697 int nv_pmops_thaw(
4698 struct device *dev
4699 )
4700 {
4701 return 0;
4702 }
4703
nv_pmops_restore(struct device * dev)4704 int nv_pmops_restore(
4705 struct device *dev
4706 )
4707 {
4708 NV_STATUS status;
4709
4710 status = nvidia_resume(dev, NV_PM_ACTION_RESUME);
4711 return (status == NV_OK) ? 0 : -EIO;
4712 }
4713
nv_pmops_poweroff(struct device * dev)4714 int nv_pmops_poweroff(
4715 struct device *dev
4716 )
4717 {
4718 return 0;
4719 }
4720
4721 static int
nvidia_transition_dynamic_power(struct device * dev,NvBool enter)4722 nvidia_transition_dynamic_power(
4723 struct device *dev,
4724 NvBool enter
4725 )
4726 {
4727 struct pci_dev *pci_dev = to_pci_dev(dev);
4728 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
4729 nv_state_t *nv = NV_STATE_PTR(nvl);
4730 nvidia_stack_t *sp = NULL;
4731 NvBool bTryAgain = NV_FALSE;
4732 NV_STATUS status;
4733
4734 if ((nv->flags & (NV_FLAG_OPEN | NV_FLAG_PERSISTENT_SW_STATE)) == 0)
4735 {
4736 return 0;
4737 }
4738
4739 if (nv_kmem_cache_alloc_stack(&sp) != 0)
4740 {
4741 return -ENOMEM;
4742 }
4743
4744 status = rm_transition_dynamic_power(sp, nv, enter, &bTryAgain);
4745
4746 nv_kmem_cache_free_stack(sp);
4747
4748 if (bTryAgain)
4749 {
4750 /*
4751 * Return -EAGAIN so that kernel PM core will not treat this as a fatal error and
4752 * reschedule the callback again in the future.
4753 */
4754 return -EAGAIN;
4755 }
4756
4757 return (status == NV_OK) ? 0 : -EIO;
4758 }
4759
nv_pmops_runtime_suspend(struct device * dev)4760 int nv_pmops_runtime_suspend(
4761 struct device *dev
4762 )
4763 {
4764 return nvidia_transition_dynamic_power(dev, NV_TRUE);
4765 }
4766
nv_pmops_runtime_resume(struct device * dev)4767 int nv_pmops_runtime_resume(
4768 struct device *dev
4769 )
4770 {
4771 return nvidia_transition_dynamic_power(dev, NV_FALSE);
4772 }
4773 #endif /* defined(CONFIG_PM) */
4774
nv_get_adapter_state(NvU32 domain,NvU8 bus,NvU8 slot)4775 nv_state_t* NV_API_CALL nv_get_adapter_state(
4776 NvU32 domain,
4777 NvU8 bus,
4778 NvU8 slot
4779 )
4780 {
4781 nv_linux_state_t *nvl;
4782
4783 LOCK_NV_LINUX_DEVICES();
4784 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
4785 {
4786 nv_state_t *nv = NV_STATE_PTR(nvl);
4787 if (nv->pci_info.domain == domain && nv->pci_info.bus == bus
4788 && nv->pci_info.slot == slot)
4789 {
4790 UNLOCK_NV_LINUX_DEVICES();
4791 return nv;
4792 }
4793 }
4794 UNLOCK_NV_LINUX_DEVICES();
4795
4796 return NULL;
4797 }
4798
nv_get_ctl_state(void)4799 nv_state_t* NV_API_CALL nv_get_ctl_state(void)
4800 {
4801 return NV_STATE_PTR(&nv_ctl_device);
4802 }
4803
nv_log_error(nv_state_t * nv,NvU32 error_number,const char * format,va_list ap)4804 NV_STATUS NV_API_CALL nv_log_error(
4805 nv_state_t *nv,
4806 NvU32 error_number,
4807 const char *format,
4808 va_list ap
4809 )
4810 {
4811 NV_STATUS status = NV_OK;
4812 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4813
4814 nv_report_error(nvl->pci_dev, error_number, format, ap);
4815 #if defined(CONFIG_CRAY_XT)
4816 status = nvos_forward_error_to_cray(nvl->pci_dev, error_number,
4817 format, ap);
4818 #endif
4819
4820 return status;
4821 }
4822
nv_get_dma_start_address(nv_state_t * nv)4823 NvU64 NV_API_CALL nv_get_dma_start_address(
4824 nv_state_t *nv
4825 )
4826 {
4827 #if defined(NVCPU_PPC64LE)
4828 struct pci_dev *pci_dev;
4829 dma_addr_t dma_addr;
4830 NvU64 saved_dma_mask;
4831 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4832
4833 /*
4834 * If TCE bypass is disabled via a module parameter, then just return
4835 * the default (which is 0).
4836 *
4837 * Otherwise, the DMA start address only needs to be set once, and it
4838 * won't change afterward. Just return the cached value if asked again,
4839 * to avoid the kernel printing redundant messages to the kernel
4840 * log when we call pci_set_dma_mask().
4841 */
4842 if ((nv_tce_bypass_mode == NV_TCE_BYPASS_MODE_DISABLE) ||
4843 (nvl->tce_bypass_enabled))
4844 {
4845 return nvl->dma_dev.addressable_range.start;
4846 }
4847
4848 pci_dev = nvl->pci_dev;
4849
4850 /*
4851 * Linux on IBM POWER8 offers 2 different DMA set-ups, sometimes
4852 * referred to as "windows".
4853 *
4854 * The "default window" provides a 2GB region of PCI address space
4855 * located below the 32-bit line. The IOMMU is used to provide a
4856 * "rich" mapping--any page in system memory can be mapped at an
4857 * arbitrary address within this window. The mappings are dynamic
4858 * and pass in and out of being as pci_map*()/pci_unmap*() calls
4859 * are made.
4860 *
4861 * Dynamic DMA Windows (sometimes "Huge DDW") provides a linear
4862 * mapping of the system's entire physical address space at some
4863 * fixed offset above the 59-bit line. IOMMU is still used, and
4864 * pci_map*()/pci_unmap*() are still required, but mappings are
4865 * static. They're effectively set up in advance, and any given
4866 * system page will always map to the same PCI bus address. I.e.
4867 * physical 0x00000000xxxxxxxx => PCI 0x08000000xxxxxxxx
4868 *
4869 * This driver does not support the 2G default window because
4870 * of its limited size, and for reasons having to do with UVM.
4871 *
4872 * Linux on POWER8 will only provide the DDW-style full linear
4873 * mapping when the driver claims support for 64-bit DMA addressing
4874 * (a pre-requisite because the PCI addresses used in this case will
4875 * be near the top of the 64-bit range). The linear mapping
4876 * is not available in all system configurations.
4877 *
4878 * Detect whether the linear mapping is present by claiming
4879 * 64-bit support and then mapping physical page 0. For historical
4880 * reasons, Linux on POWER8 will never map a page to PCI address 0x0.
4881 * In the "default window" case page 0 will be mapped to some
4882 * non-zero address below the 32-bit line. In the
4883 * DDW/linear-mapping case, it will be mapped to address 0 plus
4884 * some high-order offset.
4885 *
4886 * If the linear mapping is present and sane then return the offset
4887 * as the starting address for all DMA mappings.
4888 */
4889 saved_dma_mask = pci_dev->dma_mask;
4890 if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)) != 0)
4891 {
4892 goto done;
4893 }
4894
4895 dma_addr = dma_map_single(&pci_dev->dev, NULL, 1, DMA_BIDIRECTIONAL);
4896 if (dma_mapping_error(&pci_dev->dev, dma_addr))
4897 {
4898 dma_set_mask(&pci_dev->dev, saved_dma_mask);
4899 goto done;
4900 }
4901
4902 dma_unmap_single(&pci_dev->dev, dma_addr, 1, DMA_BIDIRECTIONAL);
4903
4904 /*
4905 * From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation
4906 * of full 64-bit DMA support will establish a window in address-space
4907 * with the high 14 bits being constant and the bottom up-to-50 bits
4908 * varying with the mapping."
4909 *
4910 * Unfortunately, we don't have any good interfaces or definitions from
4911 * the kernel to get information about the DMA offset assigned by OS.
4912 * However, we have been told that the offset will be defined by the top
4913 * 14 bits of the address, and bits 40-49 will not vary for any DMA
4914 * mappings until 1TB of system memory is surpassed; this limitation is
4915 * essential for us to function properly since our current GPUs only
4916 * support 40 physical address bits. We are in a fragile place where we
4917 * need to tell the OS that we're capable of 64-bit addressing, while
4918 * relying on the assumption that the top 24 bits will not vary in this
4919 * case.
4920 *
4921 * The way we try to compute the window, then, is mask the trial mapping
4922 * against the DMA capabilities of the device. That way, devices with
4923 * greater addressing capabilities will only take the bits it needs to
4924 * define the window.
4925 */
4926 if ((dma_addr & DMA_BIT_MASK(32)) != 0)
4927 {
4928 /*
4929 * Huge DDW not available - page 0 mapped to non-zero address below
4930 * the 32-bit line.
4931 */
4932 nv_printf(NV_DBG_WARNINGS,
4933 "NVRM: DMA window limited by platform\n");
4934 dma_set_mask(&pci_dev->dev, saved_dma_mask);
4935 goto done;
4936 }
4937 else if ((dma_addr & saved_dma_mask) != 0)
4938 {
4939 NvU64 memory_size = NV_NUM_PHYSPAGES * PAGE_SIZE;
4940 if ((dma_addr & ~saved_dma_mask) !=
4941 ((dma_addr + memory_size) & ~saved_dma_mask))
4942 {
4943 /*
4944 * The physical window straddles our addressing limit boundary,
4945 * e.g., for an adapter that can address up to 1TB, the window
4946 * crosses the 40-bit limit so that the lower end of the range
4947 * has different bits 63:40 than the higher end of the range.
4948 * We can only handle a single, static value for bits 63:40, so
4949 * we must fall back here.
4950 */
4951 nv_printf(NV_DBG_WARNINGS,
4952 "NVRM: DMA window limited by memory size\n");
4953 dma_set_mask(&pci_dev->dev, saved_dma_mask);
4954 goto done;
4955 }
4956 }
4957
4958 nvl->tce_bypass_enabled = NV_TRUE;
4959 nvl->dma_dev.addressable_range.start = dma_addr & ~(saved_dma_mask);
4960
4961 /* Update the coherent mask to match */
4962 dma_set_coherent_mask(&pci_dev->dev, pci_dev->dma_mask);
4963
4964 done:
4965 return nvl->dma_dev.addressable_range.start;
4966 #else
4967 return 0;
4968 #endif
4969 }
4970
nv_set_primary_vga_status(nv_state_t * nv)4971 NV_STATUS NV_API_CALL nv_set_primary_vga_status(
4972 nv_state_t *nv
4973 )
4974 {
4975 /* IORESOURCE_ROM_SHADOW wasn't added until 2.6.10 */
4976 #if defined(IORESOURCE_ROM_SHADOW)
4977 nv_linux_state_t *nvl;
4978 struct pci_dev *pci_dev;
4979
4980 nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4981 pci_dev = nvl->pci_dev;
4982
4983 nv->primary_vga = ((NV_PCI_RESOURCE_FLAGS(pci_dev, PCI_ROM_RESOURCE) &
4984 IORESOURCE_ROM_SHADOW) == IORESOURCE_ROM_SHADOW);
4985 return NV_OK;
4986 #else
4987 return NV_ERR_NOT_SUPPORTED;
4988 #endif
4989 }
4990
nv_pci_trigger_recovery(nv_state_t * nv)4991 NV_STATUS NV_API_CALL nv_pci_trigger_recovery(
4992 nv_state_t *nv
4993 )
4994 {
4995 NV_STATUS status = NV_ERR_NOT_SUPPORTED;
4996 #if defined(NV_PCI_ERROR_RECOVERY)
4997 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
4998
4999 /*
5000 * Calling readl() on PPC64LE will allow the kernel to check its state for
5001 * the device and update it accordingly. This needs to be done before
5002 * checking if the PCI channel is offline, so that we don't check stale
5003 * state.
5004 *
5005 * This will also kick off the recovery process for the device.
5006 */
5007 if (NV_PCI_ERROR_RECOVERY_ENABLED())
5008 {
5009 if (readl(nv->regs->map) == 0xFFFFFFFF)
5010 {
5011 if (pci_channel_offline(nvl->pci_dev))
5012 {
5013 NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
5014 "PCI channel for the device is offline\n");
5015 status = NV_OK;
5016 }
5017 }
5018 }
5019 #endif
5020 return status;
5021 }
5022
nv_requires_dma_remap(nv_state_t * nv)5023 NvBool NV_API_CALL nv_requires_dma_remap(
5024 nv_state_t *nv
5025 )
5026 {
5027 NvBool dma_remap = NV_FALSE;
5028 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5029 dma_remap = !nv_dma_maps_swiotlb(nvl->dev);
5030 return dma_remap;
5031 }
5032
5033 /*
5034 * Intended for use by external kernel modules to list nvidia gpu ids.
5035 */
nvidia_get_gpuid_list(NvU32 * gpu_ids,NvU32 * gpu_count)5036 NvBool nvidia_get_gpuid_list(NvU32 *gpu_ids, NvU32 *gpu_count)
5037 {
5038 nv_linux_state_t *nvl;
5039 unsigned int count;
5040 NvBool ret = NV_TRUE;
5041
5042 LOCK_NV_LINUX_DEVICES();
5043
5044 count = 0;
5045 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
5046 count++;
5047
5048 if (*gpu_count == 0)
5049 {
5050 goto done;
5051 }
5052 else if ((*gpu_count) < count)
5053 {
5054 ret = NV_FALSE;
5055 goto done;
5056 }
5057
5058 count = 0;
5059 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
5060 {
5061 nv_state_t *nv = NV_STATE_PTR(nvl);
5062 gpu_ids[count++] = nv->gpu_id;
5063 }
5064
5065
5066 done:
5067
5068 *gpu_count = count;
5069
5070 UNLOCK_NV_LINUX_DEVICES();
5071
5072 return ret;
5073 }
5074
5075 /*
5076 * Kernel-level analog to nvidia_open, intended for use by external
5077 * kernel modules. This increments the ref count of the device with
5078 * the given gpu_id and makes sure the device has been initialized.
5079 *
5080 * Clients of this interface are counted by the RM reset path, to ensure a
5081 * GPU is not reset while the GPU is active.
5082 *
5083 * Returns -ENODEV if the given gpu_id does not exist.
5084 */
nvidia_dev_get(NvU32 gpu_id,nvidia_stack_t * sp)5085 int nvidia_dev_get(NvU32 gpu_id, nvidia_stack_t *sp)
5086 {
5087 nv_linux_state_t *nvl;
5088 int rc;
5089
5090 /* Takes nvl->ldata_lock */
5091 nvl = find_gpu_id(gpu_id);
5092 if (!nvl)
5093 return -ENODEV;
5094
5095 rc = nv_open_device(NV_STATE_PTR(nvl), sp);
5096
5097 if (rc == 0)
5098 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK);
5099
5100 up(&nvl->ldata_lock);
5101 return rc;
5102 }
5103
5104 /*
5105 * Kernel-level analog to nvidia_close, intended for use by external
5106 * kernel modules. This decrements the ref count of the device with
5107 * the given gpu_id, potentially tearing it down.
5108 */
nvidia_dev_put(NvU32 gpu_id,nvidia_stack_t * sp)5109 void nvidia_dev_put(NvU32 gpu_id, nvidia_stack_t *sp)
5110 {
5111 nv_linux_state_t *nvl;
5112
5113 /* Takes nvl->ldata_lock */
5114 nvl = find_gpu_id(gpu_id);
5115 if (!nvl)
5116 return;
5117
5118 nv_close_device(NV_STATE_PTR(nvl), sp);
5119
5120 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK);
5121
5122 up(&nvl->ldata_lock);
5123 }
5124
5125 /*
5126 * Like nvidia_dev_get but uses UUID instead of gpu_id. Note that this may
5127 * trigger initialization and teardown of unrelated devices to look up their
5128 * UUIDs.
5129 *
5130 * Clients of this interface are counted by the RM reset path, to ensure a
5131 * GPU is not reset while the GPU is active.
5132 */
nvidia_dev_get_uuid(const NvU8 * uuid,nvidia_stack_t * sp)5133 int nvidia_dev_get_uuid(const NvU8 *uuid, nvidia_stack_t *sp)
5134 {
5135 nv_state_t *nv = NULL;
5136 nv_linux_state_t *nvl = NULL;
5137 const NvU8 *dev_uuid;
5138 int rc = 0;
5139
5140 /* Takes nvl->ldata_lock */
5141 nvl = find_uuid_candidate(uuid);
5142 while (nvl)
5143 {
5144 nv = NV_STATE_PTR(nvl);
5145
5146 /*
5147 * If the device is missing its UUID, this call exists solely so
5148 * rm_get_gpu_uuid_raw will be called and we can inspect the UUID.
5149 */
5150 rc = nv_open_device(nv, sp);
5151 if (rc != 0)
5152 goto out;
5153
5154 /* The UUID should always be present following nv_open_device */
5155 dev_uuid = nv_get_cached_uuid(nv);
5156 WARN_ON(!dev_uuid);
5157 if (dev_uuid && memcmp(dev_uuid, uuid, GPU_UUID_LEN) == 0)
5158 break;
5159
5160 /* No match, try again. */
5161 nv_close_device(nv, sp);
5162 up(&nvl->ldata_lock);
5163 nvl = find_uuid_candidate(uuid);
5164 }
5165
5166 if (nvl)
5167 {
5168 rc = 0;
5169 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_TRUE) != NV_OK);
5170 }
5171 else
5172 rc = -ENODEV;
5173
5174 out:
5175 if (nvl)
5176 up(&nvl->ldata_lock);
5177 return rc;
5178 }
5179
5180 /*
5181 * Like nvidia_dev_put but uses UUID instead of gpu_id.
5182 */
nvidia_dev_put_uuid(const NvU8 * uuid,nvidia_stack_t * sp)5183 void nvidia_dev_put_uuid(const NvU8 *uuid, nvidia_stack_t *sp)
5184 {
5185 nv_linux_state_t *nvl;
5186
5187 /* Callers must already have called nvidia_dev_get_uuid() */
5188
5189 /* Takes nvl->ldata_lock */
5190 nvl = find_uuid(uuid);
5191 if (!nvl)
5192 return;
5193
5194 nv_close_device(NV_STATE_PTR(nvl), sp);
5195
5196 WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK);
5197
5198 up(&nvl->ldata_lock);
5199 }
5200
nvidia_dev_block_gc6(const NvU8 * uuid,nvidia_stack_t * sp)5201 int nvidia_dev_block_gc6(const NvU8 *uuid, nvidia_stack_t *sp)
5202
5203 {
5204 nv_linux_state_t *nvl;
5205
5206 /* Callers must already have called nvidia_dev_get_uuid() */
5207
5208 /* Takes nvl->ldata_lock */
5209 nvl = find_uuid(uuid);
5210 if (!nvl)
5211 return -ENODEV;
5212
5213 if (rm_ref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE) != NV_OK)
5214 {
5215 up(&nvl->ldata_lock);
5216 return -EINVAL;
5217 }
5218
5219 up(&nvl->ldata_lock);
5220
5221 return 0;
5222 }
5223
nvidia_dev_unblock_gc6(const NvU8 * uuid,nvidia_stack_t * sp)5224 int nvidia_dev_unblock_gc6(const NvU8 *uuid, nvidia_stack_t *sp)
5225
5226 {
5227 nv_linux_state_t *nvl;
5228
5229 /* Callers must already have called nvidia_dev_get_uuid() */
5230
5231 /* Takes nvl->ldata_lock */
5232 nvl = find_uuid(uuid);
5233 if (!nvl)
5234 return -ENODEV;
5235
5236 rm_unref_dynamic_power(sp, NV_STATE_PTR(nvl), NV_DYNAMIC_PM_FINE);
5237
5238 up(&nvl->ldata_lock);
5239
5240 return 0;
5241 }
5242
nv_get_device_memory_config(nv_state_t * nv,NvU64 * compr_addr_sys_phys,NvU64 * addr_guest_phys,NvU64 * rsvd_phys,NvU32 * addr_width,NvS32 * node_id)5243 NV_STATUS NV_API_CALL nv_get_device_memory_config(
5244 nv_state_t *nv,
5245 NvU64 *compr_addr_sys_phys,
5246 NvU64 *addr_guest_phys,
5247 NvU64 *rsvd_phys,
5248 NvU32 *addr_width,
5249 NvS32 *node_id
5250 )
5251 {
5252 NV_STATUS status = NV_ERR_NOT_SUPPORTED;
5253
5254 #if defined(NVCPU_PPC64LE)
5255 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5256
5257 if (!nv_platform_supports_numa(nvl))
5258 {
5259 return NV_ERR_NOT_SUPPORTED;
5260 }
5261
5262 if (node_id != NULL)
5263 {
5264 *node_id = nvl->numa_info.node_id;
5265 }
5266
5267 {
5268 nv_npu_numa_info_t *numa_info;
5269
5270 numa_info = &nvl->npu->numa_info;
5271
5272 if (compr_addr_sys_phys != NULL)
5273 {
5274 *compr_addr_sys_phys =
5275 numa_info->compr_sys_phys_addr;
5276 }
5277
5278 if (addr_guest_phys != NULL)
5279 {
5280 *addr_guest_phys =
5281 numa_info->guest_phys_addr;
5282 }
5283 }
5284
5285 if (addr_width != NULL)
5286 {
5287 *addr_width = nv_volta_dma_addr_size - nv_volta_addr_space_width;
5288 }
5289
5290 status = NV_OK;
5291 #endif
5292 #if defined(NVCPU_AARCH64)
5293 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5294
5295 if (node_id != NULL)
5296 {
5297 *node_id = nvl->numa_info.node_id;
5298 }
5299
5300 if (compr_addr_sys_phys)
5301 {
5302 *compr_addr_sys_phys = nvl->coherent_link_info.gpu_mem_pa;
5303 }
5304 if (addr_guest_phys)
5305 {
5306 *addr_guest_phys = nvl->coherent_link_info.gpu_mem_pa;
5307 }
5308 if (rsvd_phys)
5309 {
5310 *rsvd_phys = nvl->coherent_link_info.rsvd_mem_pa;
5311 }
5312 if (addr_width)
5313 {
5314 // TH500 PA width - NV_PFB_PRI_MMU_ATS_ADDR_RANGE_GRANULARITY
5315 *addr_width = 48 - 37;
5316 }
5317
5318 status = NV_OK;
5319 #endif
5320
5321 return status;
5322 }
5323
5324 #if defined(NVCPU_PPC64LE)
5325
nv_get_nvlink_line_rate(nv_state_t * nvState,NvU32 * linerate)5326 NV_STATUS NV_API_CALL nv_get_nvlink_line_rate(
5327 nv_state_t *nvState,
5328 NvU32 *linerate
5329 )
5330 {
5331 #if defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
5332
5333 nv_linux_state_t *nvl;
5334 struct pci_dev *npuDev;
5335 NvU32 *pSpeedPtr = NULL;
5336 NvU32 speed;
5337 int len;
5338
5339 if (nvState != NULL)
5340 nvl = NV_GET_NVL_FROM_NV_STATE(nvState);
5341 else
5342 return NV_ERR_INVALID_ARGUMENT;
5343
5344 if (!nvl->npu)
5345 {
5346 return NV_ERR_NOT_SUPPORTED;
5347 }
5348
5349 npuDev = nvl->npu->devs[0];
5350 if (!npuDev->dev.of_node)
5351 {
5352 nv_printf(NV_DBG_ERRORS, "NVRM: %s: OF Node not found in IBM-NPU device node\n",
5353 __FUNCTION__);
5354 return NV_ERR_NOT_SUPPORTED;
5355 }
5356
5357 pSpeedPtr = (NvU32 *) of_get_property(npuDev->dev.of_node, "ibm,nvlink-speed", &len);
5358
5359 if (pSpeedPtr)
5360 {
5361 speed = (NvU32) be32_to_cpup(pSpeedPtr);
5362 }
5363 else
5364 {
5365 return NV_ERR_NOT_SUPPORTED;
5366 }
5367
5368 if (!speed)
5369 {
5370 return NV_ERR_NOT_SUPPORTED;
5371 }
5372 else
5373 {
5374 *linerate = speed;
5375 }
5376
5377 return NV_OK;
5378
5379 #endif
5380
5381 return NV_ERR_NOT_SUPPORTED;
5382 }
5383
5384 #endif
5385
nv_indicate_idle(nv_state_t * nv)5386 NV_STATUS NV_API_CALL nv_indicate_idle(
5387 nv_state_t *nv
5388 )
5389 {
5390 #if NV_FILESYSTEM_ACCESS_AVAILABLE
5391 #if defined(NV_PM_RUNTIME_AVAILABLE)
5392 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5393 struct device *dev = nvl->dev;
5394 struct file *file = nvl->sysfs_config_file;
5395 loff_t f_pos = 0;
5396 char buf;
5397
5398 pm_runtime_put_noidle(dev);
5399
5400 #if defined(NV_SEQ_READ_ITER_PRESENT)
5401 {
5402 struct kernfs_open_file *of = ((struct seq_file *)file->private_data)->private;
5403 struct kernfs_node *kn;
5404
5405 mutex_lock(&of->mutex);
5406 kn = of->kn;
5407 if (kn != NULL && atomic_inc_unless_negative(&kn->active))
5408 {
5409 if ((kn->attr.ops != NULL) && (kn->attr.ops->read != NULL))
5410 {
5411 kn->attr.ops->read(of, &buf, 1, f_pos);
5412 }
5413 atomic_dec(&kn->active);
5414 }
5415 mutex_unlock(&of->mutex);
5416 }
5417 #else
5418 #if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
5419 kernel_read(file, &buf, 1, &f_pos);
5420 #else
5421 kernel_read(file, f_pos, &buf, 1);
5422 #endif
5423 #endif
5424
5425 return NV_OK;
5426 #else
5427 return NV_ERR_NOT_SUPPORTED;
5428 #endif
5429 #else
5430 return NV_ERR_NOT_SUPPORTED;
5431 #endif
5432 }
5433
nv_indicate_not_idle(nv_state_t * nv)5434 NV_STATUS NV_API_CALL nv_indicate_not_idle(
5435 nv_state_t *nv
5436 )
5437 {
5438 #if defined(NV_PM_RUNTIME_AVAILABLE)
5439 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5440 struct device *dev = nvl->dev;
5441
5442 pm_runtime_get_noresume(dev);
5443
5444 nvl->is_forced_shutdown = NV_TRUE;
5445 pci_bus_type.shutdown(dev);
5446
5447 return NV_OK;
5448 #else
5449 return NV_ERR_NOT_SUPPORTED;
5450 #endif
5451 }
5452
nv_idle_holdoff(nv_state_t * nv)5453 void NV_API_CALL nv_idle_holdoff(
5454 nv_state_t *nv
5455 )
5456 {
5457 #if defined(NV_PM_RUNTIME_AVAILABLE)
5458 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5459 struct device *dev = nvl->dev;
5460
5461 pm_runtime_get_noresume(dev);
5462 #endif
5463 }
5464
nv_dynamic_power_available(nv_state_t * nv)5465 NvBool NV_API_CALL nv_dynamic_power_available(
5466 nv_state_t *nv
5467 )
5468 {
5469 #if defined(NV_PM_RUNTIME_AVAILABLE)
5470 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5471 return nvl->sysfs_config_file != NULL;
5472 #else
5473 return NV_FALSE;
5474 #endif
5475 }
5476
5477 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */
nv_linux_add_device_locked(nv_linux_state_t * nvl)5478 int nv_linux_add_device_locked(nv_linux_state_t *nvl)
5479 {
5480 int rc = -1;
5481 int i;
5482
5483 // look for free a minor number and assign unique minor number to this device
5484 for (i = 0; i <= NV_MINOR_DEVICE_NUMBER_REGULAR_MAX; i++)
5485 {
5486 if (nv_linux_minor_num_table[i] == NULL)
5487 {
5488 nv_linux_minor_num_table[i] = nvl;
5489 nvl->minor_num = i;
5490 rc = 0;
5491 break;
5492 }
5493 }
5494
5495 // bail if no minor number is free
5496 if (rc != 0)
5497 return rc;
5498
5499 if (nv_linux_devices == NULL) {
5500 nv_linux_devices = nvl;
5501 }
5502 else
5503 {
5504 nv_linux_state_t *tnvl;
5505 for (tnvl = nv_linux_devices; tnvl->next != NULL; tnvl = tnvl->next);
5506 tnvl->next = nvl;
5507 }
5508
5509 return rc;
5510 }
5511
5512 /* caller should hold nv_linux_devices_lock using LOCK_NV_LINUX_DEVICES */
nv_linux_remove_device_locked(nv_linux_state_t * nvl)5513 void nv_linux_remove_device_locked(nv_linux_state_t *nvl)
5514 {
5515 if (nvl == nv_linux_devices) {
5516 nv_linux_devices = nvl->next;
5517 }
5518 else
5519 {
5520 nv_linux_state_t *tnvl;
5521 for (tnvl = nv_linux_devices; tnvl->next != nvl; tnvl = tnvl->next);
5522 tnvl->next = nvl->next;
5523 }
5524
5525 nv_linux_minor_num_table[nvl->minor_num] = NULL;
5526 }
5527
nv_linux_init_open_q(nv_linux_state_t * nvl)5528 int nv_linux_init_open_q(nv_linux_state_t *nvl)
5529 {
5530 int rc;
5531 rc = nv_kthread_q_init(&nvl->open_q, "nv_open_q");
5532 if (rc != 0)
5533 return rc;
5534
5535 down(&nvl->open_q_lock);
5536 nvl->is_accepting_opens = NV_TRUE;
5537 up(&nvl->open_q_lock);
5538 return 0;
5539 }
5540
nv_linux_stop_open_q(nv_linux_state_t * nvl)5541 void nv_linux_stop_open_q(nv_linux_state_t *nvl)
5542 {
5543 NvBool should_stop = NV_FALSE;
5544
5545 down(&nvl->open_q_lock);
5546 if (nvl->is_accepting_opens)
5547 {
5548 should_stop = NV_TRUE;
5549 nvl->is_accepting_opens = NV_FALSE;
5550 }
5551 up(&nvl->open_q_lock);
5552
5553 if (should_stop)
5554 nv_kthread_q_stop(&nvl->open_q);
5555 }
5556
nv_control_soc_irqs(nv_state_t * nv,NvBool bEnable)5557 void NV_API_CALL nv_control_soc_irqs(nv_state_t *nv, NvBool bEnable)
5558 {
5559 int count;
5560 unsigned long flags;
5561 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5562
5563 if (nv->current_soc_irq != -1)
5564 return;
5565
5566 NV_SPIN_LOCK_IRQSAVE(&nvl->soc_isr_lock, flags);
5567 if (bEnable)
5568 {
5569 for (count = 0; count < nv->num_soc_irqs; count++)
5570 {
5571 if (nv->soc_irq_info[count].ref_count == 0)
5572 {
5573 nv->soc_irq_info[count].ref_count++;
5574 enable_irq(nv->soc_irq_info[count].irq_num);
5575 }
5576 }
5577 }
5578 else
5579 {
5580 for (count = 0; count < nv->num_soc_irqs; count++)
5581 {
5582 if (nv->soc_irq_info[count].ref_count == 1)
5583 {
5584 nv->soc_irq_info[count].ref_count--;
5585 disable_irq_nosync(nv->soc_irq_info[count].irq_num);
5586 }
5587 }
5588 }
5589 NV_SPIN_UNLOCK_IRQRESTORE(&nvl->soc_isr_lock, flags);
5590 }
5591
nv_get_dev_minor(nv_state_t * nv)5592 NvU32 NV_API_CALL nv_get_dev_minor(nv_state_t *nv)
5593 {
5594 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5595
5596 return nvl->minor_num;
5597 }
5598
nv_acquire_fabric_mgmt_cap(int fd,int * duped_fd)5599 NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap(int fd, int *duped_fd)
5600 {
5601 *duped_fd = nvlink_cap_acquire(fd, NVLINK_CAP_FABRIC_MANAGEMENT);
5602 if (*duped_fd < 0)
5603 {
5604 return NV_ERR_INSUFFICIENT_PERMISSIONS;
5605 }
5606
5607 return NV_OK;
5608 }
5609
5610 /*
5611 * Wakes up the NVIDIA GPU HDA codec and contoller by reading
5612 * codec proc file.
5613 */
nv_audio_dynamic_power(nv_state_t * nv)5614 void NV_API_CALL nv_audio_dynamic_power(
5615 nv_state_t *nv
5616 )
5617 {
5618 /*
5619 * The runtime power management for nvidia HDA controller can be possible
5620 * after commit 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA
5621 * controller"). This commit has also moved 'PCI_CLASS_MULTIMEDIA_HD_AUDIO'
5622 * macro from <sound/hdaudio.h> to <linux/pci_ids.h>.
5623 * If 'NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT' is not defined, then
5624 * this function will be stub function.
5625 *
5626 * Also, check if runtime PM is enabled in the kernel (with
5627 * 'NV_PM_RUNTIME_AVAILABLE') and stub this function if it is disabled. This
5628 * function uses kernel fields only present when the kconfig has runtime PM
5629 * enabled.
5630 */
5631 #if defined(NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT) && defined(NV_PM_RUNTIME_AVAILABLE)
5632 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5633 struct device *dev = nvl->dev;
5634 struct pci_dev *audio_pci_dev, *pci_dev;
5635 struct snd_card *card;
5636
5637 if (!dev_is_pci(dev))
5638 return;
5639
5640 pci_dev = to_pci_dev(dev);
5641
5642 audio_pci_dev = os_pci_init_handle(NV_PCI_DOMAIN_NUMBER(pci_dev),
5643 NV_PCI_BUS_NUMBER(pci_dev),
5644 NV_PCI_SLOT_NUMBER(pci_dev),
5645 1, NULL, NULL);
5646
5647 if (audio_pci_dev == NULL)
5648 return;
5649
5650 /*
5651 * Check if HDA controller is in pm suspended state. The HDA contoller
5652 * can not be runtime resumed if this API is called during system
5653 * suspend/resume time and HDA controller is in pm suspended state.
5654 */
5655 if (audio_pci_dev->dev.power.is_suspended)
5656 return;
5657
5658 card = pci_get_drvdata(audio_pci_dev);
5659 if (card == NULL)
5660 return;
5661
5662 /*
5663 * Commit be57bfffb7b5 ("ALSA: hda: move hda_codec.h to include/sound")
5664 * in v4.20-rc1 moved "hda_codec.h" header file from the private sound
5665 * folder to include/sound.
5666 */
5667 #if defined(NV_SOUND_HDA_CODEC_H_PRESENT)
5668 {
5669 struct list_head *p;
5670 struct hda_codec *codec = NULL;
5671 unsigned int cmd, res;
5672
5673 /*
5674 * Traverse the list of devices which the sound card maintains and
5675 * search for HDA codec controller.
5676 */
5677 list_for_each_prev(p, &card->devices)
5678 {
5679 struct snd_device *pdev = list_entry(p, struct snd_device, list);
5680
5681 if (pdev->type == SNDRV_DEV_CODEC)
5682 {
5683 codec = pdev->device_data;
5684
5685 /*
5686 * NVIDIA HDA codec controller uses linux kernel HDA codec
5687 * driver. Commit 05852448690d ("ALSA: hda - Support indirect
5688 * execution of verbs") added support for overriding exec_verb.
5689 * This codec->core.exec_verb will be codec_exec_verb() for
5690 * NVIDIA HDA codec driver.
5691 */
5692 if (codec->core.exec_verb == NULL)
5693 {
5694 return;
5695 }
5696
5697 break;
5698 }
5699 }
5700
5701 if (codec == NULL)
5702 {
5703 return;
5704 }
5705
5706 /* If HDA codec controller is already runtime active, then return */
5707 if (snd_hdac_is_power_on(&codec->core))
5708 {
5709 return;
5710 }
5711
5712 /*
5713 * Encode codec verb for getting vendor ID from root node.
5714 * Refer Intel High Definition Audio Specification for more details.
5715 */
5716 cmd = (codec->addr << 28) | (AC_NODE_ROOT << 20) |
5717 (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
5718
5719 /*
5720 * It will internally increment the runtime PM refcount,
5721 * wake-up the audio codec controller and send the HW
5722 * command for getting vendor ID. Once the vendor ID will be
5723 * returned back, then it will decrement the runtime PM refcount
5724 * and runtime suspend audio codec controller again (If refcount is
5725 * zero) once auto suspend counter expires.
5726 */
5727 codec->core.exec_verb(&codec->core, cmd, 0, &res);
5728 }
5729 #else
5730 {
5731 int codec_addr;
5732
5733 /*
5734 * The filp_open() call below depends on the current task's fs_struct
5735 * (current->fs), which may already be NULL if this is called during
5736 * process teardown.
5737 */
5738 if (current->fs == NULL)
5739 return;
5740
5741 /* If device is runtime active, then return */
5742 if (audio_pci_dev->dev.power.runtime_status == RPM_ACTIVE)
5743 return;
5744
5745 for (codec_addr = 0; codec_addr < NV_HDA_MAX_CODECS; codec_addr++)
5746 {
5747 char filename[48];
5748 NvU8 buf;
5749 int ret;
5750
5751 ret = snprintf(filename, sizeof(filename),
5752 "/proc/asound/card%d/codec#%d",
5753 card->number, codec_addr);
5754
5755 if (ret > 0 && ret < sizeof(filename) &&
5756 (os_open_and_read_file(filename, &buf, 1) == NV_OK))
5757 {
5758 break;
5759 }
5760 }
5761 }
5762 #endif
5763 #endif
5764 }
5765
nv_match_dev_state(const void * data,struct file * filp,unsigned fd)5766 static int nv_match_dev_state(const void *data, struct file *filp, unsigned fd)
5767 {
5768 nv_linux_state_t *nvl = NULL;
5769
5770 if (filp == NULL ||
5771 filp->f_op != &nvidia_fops ||
5772 filp->private_data == NULL)
5773 return 0;
5774
5775 nvl = NV_GET_NVL_FROM_FILEP(filp);
5776 if (nvl == NULL)
5777 return 0;
5778
5779 return (data == nvl);
5780 }
5781
nv_match_gpu_os_info(nv_state_t * nv,void * os_info)5782 NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *nv, void *os_info)
5783 {
5784 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5785
5786 return nv_match_dev_state(nvl, os_info, -1);
5787 }
5788
nv_is_gpu_accessible(nv_state_t * nv)5789 NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *nv)
5790 {
5791 struct files_struct *files = current->files;
5792 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5793
5794 return !!iterate_fd(files, 0, nv_match_dev_state, nvl);
5795 }
5796
nv_platform_supports_s0ix(void)5797 NvBool NV_API_CALL nv_platform_supports_s0ix(void)
5798 {
5799 #if defined(CONFIG_ACPI)
5800 return (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) != 0;
5801 #else
5802 return NV_FALSE;
5803 #endif
5804 }
5805
nv_s2idle_pm_configured(void)5806 NvBool NV_API_CALL nv_s2idle_pm_configured(void)
5807 {
5808 NvU8 buf[8];
5809
5810 #if defined(NV_SEQ_READ_ITER_PRESENT)
5811 struct file *file;
5812 ssize_t num_read;
5813 struct kiocb kiocb;
5814 struct iov_iter iter;
5815 struct kvec iov = {
5816 .iov_base = &buf,
5817 .iov_len = sizeof(buf),
5818 };
5819
5820 if (os_open_readonly_file("/sys/power/mem_sleep", (void **)&file) != NV_OK)
5821 {
5822 return NV_FALSE;
5823 }
5824
5825 /*
5826 * init_sync_kiocb() internally uses GPL licensed __get_task_ioprio() from
5827 * v5.20-rc1.
5828 */
5829 #if defined(NV_GET_TASK_IOPRIO_PRESENT)
5830 memset(&kiocb, 0, sizeof(kiocb));
5831 kiocb.ki_filp = file;
5832 kiocb.ki_flags = iocb_flags(file);
5833 kiocb.ki_ioprio = IOPRIO_DEFAULT;
5834 #else
5835 init_sync_kiocb(&kiocb, file);
5836 #endif
5837
5838 kiocb.ki_pos = 0;
5839 iov_iter_kvec(&iter, READ, &iov, 1, sizeof(buf));
5840
5841 num_read = seq_read_iter(&kiocb, &iter);
5842
5843 os_close_file((void *)file);
5844
5845 if (num_read != sizeof(buf))
5846 {
5847 return NV_FALSE;
5848 }
5849 #else
5850 if (os_open_and_read_file("/sys/power/mem_sleep", buf,
5851 sizeof(buf)) != NV_OK)
5852 {
5853 return NV_FALSE;
5854 }
5855 #endif
5856
5857 return (memcmp(buf, "[s2idle]", 8) == 0);
5858 }
5859
5860 /*
5861 * Function query system chassis info, to figure out if the platform is
5862 * Laptop or Notebook.
5863 * This function should be used when querying GPU form factor information is
5864 * not possible via core RM or if querying both system and GPU form factor
5865 * information is necessary.
5866 */
nv_is_chassis_notebook(void)5867 NvBool NV_API_CALL nv_is_chassis_notebook(void)
5868 {
5869 const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
5870
5871 //
5872 // Return true only for Laptop & Notebook
5873 // As per SMBIOS spec Laptop = 9 and Notebook = 10
5874 //
5875 return (chassis_type && (!strcmp(chassis_type, "9") || !strcmp(chassis_type, "10")));
5876 }
5877
nv_allow_runtime_suspend(nv_state_t * nv)5878 void NV_API_CALL nv_allow_runtime_suspend
5879 (
5880 nv_state_t *nv
5881 )
5882 {
5883 #if defined(NV_PM_RUNTIME_AVAILABLE)
5884 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5885 struct device *dev = nvl->dev;
5886
5887 spin_lock_irq(&dev->power.lock);
5888
5889 if (dev->power.runtime_auto == false)
5890 {
5891 dev->power.runtime_auto = true;
5892 atomic_add_unless(&dev->power.usage_count, -1, 0);
5893 }
5894
5895 spin_unlock_irq(&dev->power.lock);
5896 #endif
5897 }
5898
nv_disallow_runtime_suspend(nv_state_t * nv)5899 void NV_API_CALL nv_disallow_runtime_suspend
5900 (
5901 nv_state_t *nv
5902 )
5903 {
5904 #if defined(NV_PM_RUNTIME_AVAILABLE)
5905 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
5906 struct device *dev = nvl->dev;
5907
5908 spin_lock_irq(&dev->power.lock);
5909
5910 if (dev->power.runtime_auto == true)
5911 {
5912 dev->power.runtime_auto = false;
5913 atomic_inc(&dev->power.usage_count);
5914 }
5915
5916 spin_unlock_irq(&dev->power.lock);
5917 #endif
5918 }
5919
nv_get_os_type(void)5920 NvU32 NV_API_CALL nv_get_os_type(void)
5921 {
5922 return OS_TYPE_LINUX;
5923 }
5924
nv_flush_coherent_cpu_cache_range(nv_state_t * nv,NvU64 cpu_virtual,NvU64 size)5925 void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size)
5926 {
5927 #if NVCPU_IS_PPC64LE
5928 return nv_ibmnpu_cache_flush_range(nv, cpu_virtual, size);
5929 #elif NVCPU_IS_AARCH64
5930 NvU64 va, cbsize;
5931 NvU64 end_cpu_virtual = cpu_virtual + size;
5932
5933 nv_printf(NV_DBG_INFO,
5934 "Flushing CPU virtual range [0x%llx, 0x%llx)\n",
5935 cpu_virtual, end_cpu_virtual);
5936
5937 cbsize = cache_line_size();
5938 // Align address to line size
5939 cpu_virtual = NV_ALIGN_UP(cpu_virtual, cbsize);
5940
5941 // Force eviction of any cache lines from the NUMA-onlined region.
5942 for (va = cpu_virtual; va < end_cpu_virtual; va += cbsize)
5943 {
5944 asm volatile("dc civac, %0" : : "r" (va): "memory");
5945 // Reschedule if necessary to avoid lockup warnings
5946 cond_resched();
5947 }
5948 asm volatile("dsb sy" : : : "memory");
5949 #endif
5950 }
5951
nv_next_resource(struct resource * p)5952 static struct resource *nv_next_resource(struct resource *p)
5953 {
5954 if (p->child != NULL)
5955 return p->child;
5956
5957 while ((p->sibling == NULL) && (p->parent != NULL))
5958 p = p->parent;
5959
5960 return p->sibling;
5961 }
5962
5963 /*
5964 * Function to get the correct PCI Bus memory window which can be mapped
5965 * in the real mode emulator (emu).
5966 * The function gets called during the initialization of the emu before
5967 * remapping it to OS.
5968 */
nv_get_updated_emu_seg(NvU32 * start,NvU32 * end)5969 void NV_API_CALL nv_get_updated_emu_seg(
5970 NvU32 *start,
5971 NvU32 *end
5972 )
5973 {
5974 struct resource *p;
5975
5976 if (*start >= *end)
5977 return;
5978
5979 for (p = iomem_resource.child; (p != NULL); p = nv_next_resource(p))
5980 {
5981 /* If we passed the resource we are looking for, stop */
5982 if (p->start > *end)
5983 {
5984 p = NULL;
5985 break;
5986 }
5987
5988 /* Skip until we find a range that matches what we look for */
5989 if (p->end < *start)
5990 continue;
5991
5992 if ((p->end > *end) && (p->child))
5993 continue;
5994
5995 if ((p->flags & IORESOURCE_MEM) != IORESOURCE_MEM)
5996 continue;
5997
5998 /* Found a match, break */
5999 break;
6000 }
6001
6002 if (p != NULL)
6003 {
6004 *start = max((resource_size_t)*start, p->start);
6005 *end = min((resource_size_t)*end, p->end);
6006 }
6007 }
6008
nv_get_egm_info(nv_state_t * nv,NvU64 * phys_addr,NvU64 * size,NvS32 * egm_node_id)6009 NV_STATUS NV_API_CALL nv_get_egm_info(
6010 nv_state_t *nv,
6011 NvU64 *phys_addr,
6012 NvU64 *size,
6013 NvS32 *egm_node_id
6014 )
6015 {
6016 #if defined(NV_DEVICE_PROPERTY_READ_U64_PRESENT) && \
6017 defined(CONFIG_ACPI_NUMA) && \
6018 NV_IS_EXPORT_SYMBOL_PRESENT_pxm_to_node
6019 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
6020 NvU64 pa, sz, pxm;
6021
6022 if (device_property_read_u64(nvl->dev, "nvidia,egm-pxm", &pxm) != 0)
6023 {
6024 goto failed;
6025 }
6026
6027 if (device_property_read_u64(nvl->dev, "nvidia,egm-base-pa", &pa) != 0)
6028 {
6029 goto failed;
6030 }
6031
6032 if (device_property_read_u64(nvl->dev, "nvidia,egm-size", &sz) != 0)
6033 {
6034 goto failed;
6035 }
6036
6037 NV_DEV_PRINTF(NV_DBG_INFO, nv, "DSD properties: \n");
6038 NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM base PA: 0x%llx \n", pa);
6039 NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM size: 0x%llx \n", sz);
6040 NV_DEV_PRINTF(NV_DBG_INFO, nv, "\tEGM _PXM: 0x%llx \n", pxm);
6041
6042 if (egm_node_id != NULL)
6043 {
6044 *egm_node_id = pxm_to_node(pxm);
6045 nv_printf(NV_DBG_INFO, "EGM node id: %d\n", *egm_node_id);
6046 }
6047
6048 if (phys_addr != NULL)
6049 {
6050 *phys_addr = pa;
6051 nv_printf(NV_DBG_INFO, "EGM base addr: 0x%llx\n", *phys_addr);
6052 }
6053
6054 if (size != NULL)
6055 {
6056 *size = sz;
6057 nv_printf(NV_DBG_INFO, "EGM size: 0x%llx\n", *size);
6058 }
6059
6060 return NV_OK;
6061
6062 failed:
6063 #endif // NV_DEVICE_PROPERTY_READ_U64_PRESENT
6064
6065 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Cannot get EGM info\n");
6066 return NV_ERR_NOT_SUPPORTED;
6067 }
6068
nv_get_screen_info(nv_state_t * nv,NvU64 * pPhysicalAddress,NvU32 * pFbWidth,NvU32 * pFbHeight,NvU32 * pFbDepth,NvU32 * pFbPitch,NvU64 * pFbSize)6069 void NV_API_CALL nv_get_screen_info(
6070 nv_state_t *nv,
6071 NvU64 *pPhysicalAddress,
6072 NvU32 *pFbWidth,
6073 NvU32 *pFbHeight,
6074 NvU32 *pFbDepth,
6075 NvU32 *pFbPitch,
6076 NvU64 *pFbSize
6077 )
6078 {
6079 *pPhysicalAddress = 0;
6080 *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
6081
6082 #if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
6083 if (num_registered_fb > 0)
6084 {
6085 int i;
6086
6087 for (i = 0; i < num_registered_fb; i++)
6088 {
6089 if (!registered_fb[i])
6090 continue;
6091
6092 /* Make sure base address is mapped to GPU BAR */
6093 if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
6094 {
6095 *pPhysicalAddress = registered_fb[i]->fix.smem_start;
6096 *pFbWidth = registered_fb[i]->var.xres;
6097 *pFbHeight = registered_fb[i]->var.yres;
6098 *pFbDepth = registered_fb[i]->var.bits_per_pixel;
6099 *pFbPitch = registered_fb[i]->fix.line_length;
6100 *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
6101 return;
6102 }
6103 }
6104 }
6105 #endif
6106
6107 /*
6108 * If the screen info is not found in the registered FBs then fallback
6109 * to the screen_info structure.
6110 *
6111 * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
6112 * generic framebuffers so the new generic system-framebuffer drivers can
6113 * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
6114 * device created by SYSFB_SIMPLEFB.
6115 *
6116 * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
6117 * information required by nv_get_screen_info(), therefore you need to
6118 * fall back onto the screen_info structure.
6119 *
6120 * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
6121 * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
6122 */
6123
6124 #if NV_CHECK_EXPORT_SYMBOL(screen_info)
6125 /*
6126 * If there is not a framebuffer console, return 0 size.
6127 *
6128 * orig_video_isVGA is set to 1 during early Linux kernel
6129 * initialization, and then will be set to a value, such as
6130 * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
6131 */
6132 if (screen_info.orig_video_isVGA > 1)
6133 {
6134 NvU64 physAddr = screen_info.lfb_base;
6135 #if defined(VIDEO_CAPABILITY_64BIT_BASE)
6136 physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
6137 #endif
6138
6139 /* Make sure base address is mapped to GPU BAR */
6140 if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
6141 {
6142 *pPhysicalAddress = physAddr;
6143 *pFbWidth = screen_info.lfb_width;
6144 *pFbHeight = screen_info.lfb_height;
6145 *pFbDepth = screen_info.lfb_depth;
6146 *pFbPitch = screen_info.lfb_linelength;
6147 *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
6148 }
6149 }
6150 #else
6151 {
6152 nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
6153 struct pci_dev *pci_dev = nvl->pci_dev;
6154 int i;
6155
6156 if (pci_dev == NULL)
6157 return;
6158
6159 BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
6160 for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
6161 {
6162 int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
6163 struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
6164 struct resource *res = gpu_bar_res->child;
6165
6166 /*
6167 * Console resource will become child resource of pci-dev resource.
6168 * Check if child resource start address matches with expected
6169 * console start address.
6170 */
6171 if ((res != NULL) &&
6172 NV_IS_CONSOLE_MAPPED(nv, res->start))
6173 {
6174 NvU32 res_name_len = strlen(res->name);
6175
6176 /*
6177 * The resource name ends with 'fb' (efifb, vesafb, etc.).
6178 * For simple-framebuffer, the resource name is 'BOOTFB'.
6179 * Confirm if the resources name either ends with 'fb' or 'FB'.
6180 */
6181 if ((res_name_len > 2) &&
6182 !strcasecmp((res->name + res_name_len - 2), "fb"))
6183 {
6184 *pPhysicalAddress = res->start;
6185 *pFbSize = resource_size(res);
6186 return;
6187 }
6188 }
6189 }
6190 }
6191 #endif
6192 }
6193
6194
6195 module_init(nvidia_init_module);
6196 module_exit(nvidia_exit_module);
6197