1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nv-pci-table.h"
25 #include "nv-pci-types.h"
26 #include "nv-pci.h"
27 #include "nv-ibmnpu.h"
28 #include "nv-frontend.h"
29 #include "nv-msi.h"
30 #include "nv-hypervisor.h"
31 
32 #if defined(NV_VGPU_KVM_BUILD)
33 #include "nv-vgpu-vfio-interface.h"
34 #endif
35 
36 #if defined(NV_SEQ_READ_ITER_PRESENT)
37 #include <linux/seq_file.h>
38 #include <linux/kernfs.h>
39 #endif
40 
41 static void
42 nv_check_and_exclude_gpu(
43     nvidia_stack_t *sp,
44     nv_state_t *nv
45 )
46 {
47     char *uuid_str;
48 
49     uuid_str = rm_get_gpu_uuid(sp, nv);
50     if (uuid_str == NULL)
51     {
52         NV_DEV_PRINTF(NV_DBG_INFO, nv, "Unable to read UUID");
53         return;
54     }
55 
56     if (nv_is_uuid_in_gpu_exclusion_list(uuid_str))
57     {
58         NV_STATUS rm_status = rm_exclude_adapter(sp, nv);
59         if (rm_status != NV_OK)
60         {
61             NV_DEV_PRINTF_STATUS(NV_DBG_ERRORS, nv, rm_status,
62                           "Failed to exclude GPU %s", uuid_str);
63             goto done;
64         }
65         nv->flags |= NV_FLAG_EXCLUDE;
66         NV_DEV_PRINTF(NV_DBG_INFO, nv, "Excluded GPU %s successfully\n",
67                       uuid_str);
68     }
69 
70 done:
71     os_free_mem(uuid_str);
72 }
73 
74 static NvBool nv_treat_missing_irq_as_error(void)
75 {
76 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED)
77     return (nv_get_hypervisor_type() != OS_HYPERVISOR_HYPERV);
78 #else
79     return NV_TRUE;
80 #endif
81 }
82 
83 static void nv_init_dynamic_power_management
84 (
85     nvidia_stack_t *sp,
86     struct pci_dev *pci_dev
87 )
88 {
89     nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
90     nv_state_t *nv = NV_STATE_PTR(nvl);
91     char filename[50];
92     int ret;
93     NvBool pr3_acpi_method_present = NV_FALSE;
94 
95     nvl->sysfs_config_file = NULL;
96 
97     ret = snprintf(filename, sizeof(filename),
98                    "/sys/bus/pci/devices/%04x:%02x:%02x.0/config",
99                    NV_PCI_DOMAIN_NUMBER(pci_dev),
100                    NV_PCI_BUS_NUMBER(pci_dev),
101                    NV_PCI_SLOT_NUMBER(pci_dev));
102     if (ret > 0 && ret < sizeof(filename))
103     {
104         struct file *file = filp_open(filename, O_RDONLY, 0);
105         if (!IS_ERR(file))
106         {
107 #if defined(NV_SEQ_READ_ITER_PRESENT)
108             /*
109              * Sanity check for confirming if file path is mounted over
110              * sysfs file system.
111              */
112             if ((file->f_inode != NULL) && (file->f_inode->i_sb != NULL) &&
113                 (strcmp(file->f_inode->i_sb->s_id, "sysfs") == 0))
114             {
115                 struct seq_file *sf = file->private_data;
116 
117                 /*
118                  * Sanity check for confirming if 'file->private_data'
119                  * actually points to 'struct seq_file'.
120                  */
121                 if ((sf != NULL) && (sf->file == file) && (sf->op == NULL))
122                 {
123                     struct kernfs_open_file *of = sf->private;
124 
125                     /*
126                      * Sanity check for confirming if 'sf->private'
127                      * actually points to 'struct kernfs_open_file'.
128                      */
129                     if ((of != NULL) && (of->file == file) &&
130                         (of->seq_file == sf))
131                     {
132                         nvl->sysfs_config_file = file;
133                     }
134                 }
135             }
136 
137             if (nvl->sysfs_config_file == NULL)
138             {
139                 filp_close(file, NULL);
140             }
141 #else
142             nvl->sysfs_config_file = file;
143 #endif
144         }
145     }
146 
147     if (nv_get_hypervisor_type() != OS_HYPERVISOR_UNKNOWN)
148     {
149         pr3_acpi_method_present = nv_acpi_power_resource_method_present(pci_dev);
150     }
151     else if (pci_dev->bus && pci_dev->bus->self)
152     {
153         pr3_acpi_method_present = nv_acpi_power_resource_method_present(pci_dev->bus->self);
154     }
155 
156     rm_init_dynamic_power_management(sp, nv, pr3_acpi_method_present);
157 }
158 
159 static int nv_resize_pcie_bars(struct pci_dev *pci_dev) {
160 #if defined(NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT)
161     u16 cmd;
162     int r, old_size, requested_size;
163     unsigned long sizes;
164     int ret = 0;
165 #if NV_IS_EXPORT_SYMBOL_PRESENT_pci_find_host_bridge
166     struct pci_host_bridge *host;
167 #endif
168 
169     if (NVreg_EnableResizableBar == 0)
170     {
171         nv_printf(NV_DBG_INFO, "NVRM: resizable BAR disabled by regkey, skipping\n");
172         return 0;
173     }
174 
175     // Check if BAR1 has PCIe rebar capabilities
176     sizes = pci_rebar_get_possible_sizes(pci_dev, NV_GPU_BAR1);
177     if (sizes == 0) {
178         /* ReBAR not available. Nothing to do. */
179         return 0;
180     }
181 
182     /* Try to resize the BAR to the largest supported size */
183     requested_size = fls(sizes) - 1;
184 
185     /* Save the current size, just in case things go wrong */
186     old_size = pci_rebar_bytes_to_size(pci_resource_len(pci_dev, NV_GPU_BAR1));
187 
188     if (old_size == requested_size) {
189         nv_printf(NV_DBG_INFO, "NVRM: %04x:%02x:%02x.%x: BAR1 already at requested size.\n",
190             NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
191             NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
192         return 0;
193     }
194 #if NV_IS_EXPORT_SYMBOL_PRESENT_pci_find_host_bridge
195     /* If the kernel will refuse us, don't even try to resize,
196        but give an informative error */
197     host = pci_find_host_bridge(pci_dev->bus);
198     if (host->preserve_config) {
199         nv_printf(NV_DBG_INFO, "NVRM: Not resizing BAR because the firmware forbids moving windows.\n");
200         return 0;
201     }
202 #endif
203     nv_printf(NV_DBG_INFO, "NVRM: %04x:%02x:%02x.%x: Attempting to resize BAR1.\n",
204         NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
205         NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
206 
207     /* Disable memory decoding - required by the kernel APIs */
208     pci_read_config_word(pci_dev, PCI_COMMAND, &cmd);
209     pci_write_config_word(pci_dev, PCI_COMMAND, cmd & ~PCI_COMMAND_MEMORY);
210 
211     /* Release BAR1 */
212     pci_release_resource(pci_dev, NV_GPU_BAR1);
213 
214     /* Release BAR3 - we don't want to resize it, it's in the same bridge, so we'll want to move it */
215     pci_release_resource(pci_dev, NV_GPU_BAR3);
216 
217 resize:
218     /* Attempt to resize BAR1 to the largest supported size */
219     r = pci_resize_resource(pci_dev, NV_GPU_BAR1, requested_size);
220 
221     if (r) {
222         if (r == -ENOSPC)
223         {
224             /* step through smaller sizes down to original size */
225             if (requested_size > old_size)
226             {
227                 clear_bit(fls(sizes) - 1, &sizes);
228                 requested_size = fls(sizes) - 1;
229                 goto resize;
230             }
231             else
232             {
233                 nv_printf(NV_DBG_ERRORS, "NVRM: No address space to allocate resized BAR1.\n");
234             }
235         }
236         else if (r == -EOPNOTSUPP)
237         {
238             nv_printf(NV_DBG_WARNINGS, "NVRM: BAR resize resource not supported.\n");
239         }
240         else
241         {
242             nv_printf(NV_DBG_WARNINGS, "NVRM: BAR resizing failed with error `%d`.\n", r);
243         }
244     }
245 
246     /* Re-attempt assignment of PCIe resources */
247     pci_assign_unassigned_bus_resources(pci_dev->bus);
248 
249     if ((pci_resource_flags(pci_dev, NV_GPU_BAR1) & IORESOURCE_UNSET) ||
250         (pci_resource_flags(pci_dev, NV_GPU_BAR3) & IORESOURCE_UNSET)) {
251         if (requested_size != old_size) {
252             /* Try to get the BAR back with the original size */
253             requested_size = old_size;
254             goto resize;
255         }
256         /* Something went horribly wrong and the kernel didn't manage to re-allocate BAR1.
257            This is unlikely (because we had space before), but can happen. */
258         nv_printf(NV_DBG_ERRORS, "NVRM: FATAL: Failed to re-allocate BAR1.\n");
259         ret = -ENODEV;
260     }
261 
262     /* Re-enable memory decoding */
263     pci_write_config_word(pci_dev, PCI_COMMAND, cmd);
264 
265     return ret;
266 #else
267     nv_printf(NV_DBG_INFO, "NVRM: Resizable BAR is not supported on this kernel version.\n");
268     return 0;
269 #endif /* NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT */
270 }
271 
272 /* find nvidia devices and set initial state */
273 static int
274 nv_pci_probe
275 (
276     struct pci_dev *pci_dev,
277     const struct pci_device_id *id_table
278 )
279 {
280     nv_state_t *nv = NULL;
281     nv_linux_state_t *nvl = NULL;
282     unsigned int i, j;
283     int flags = 0;
284     nvidia_stack_t *sp = NULL;
285     NvBool prev_nv_ats_supported = nv_ats_supported;
286     NV_STATUS status;
287     NvBool last_bar_64bit = NV_FALSE;
288     NvU8 regs_bar_index = nv_bar_index_to_os_bar_index(pci_dev,
289                                                        NV_GPU_BAR_INDEX_REGS);
290 
291     nv_printf(NV_DBG_SETUP, "NVRM: probing 0x%x 0x%x, class 0x%x\n",
292         pci_dev->vendor, pci_dev->device, pci_dev->class);
293 
294     if (nv_kmem_cache_alloc_stack(&sp) != 0)
295     {
296         return -1;
297     }
298 
299 #ifdef NV_PCI_SRIOV_SUPPORT
300     if (pci_dev->is_virtfn)
301     {
302 #if defined(NV_VGPU_KVM_BUILD)
303         nvl = pci_get_drvdata(pci_dev->physfn);
304         if (!nvl)
305         {
306             nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
307                       "since PF is not bound to nvidia driver.\n",
308                        NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
309                        NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
310             goto failed;
311         }
312 
313         if (pci_dev->dev.bus->iommu_ops == NULL)
314         {
315             nv = NV_STATE_PTR(nvl);
316             if (rm_is_iommu_needed_for_sriov(sp, nv))
317             {
318                 nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
319                           "since IOMMU is not present on the system.\n",
320                            NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
321                            NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
322                 goto failed;
323             }
324         }
325 
326         if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK)
327         {
328             nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module");
329             goto failed;
330         }
331 
332         nv_kmem_cache_free_stack(sp);
333         return 0;
334 #else
335         nv_printf(NV_DBG_ERRORS, "NVRM: Ignoring probe for VF %04x:%02x:%02x.%x ",
336                   NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
337                   NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
338 
339         goto failed;
340 #endif /* NV_VGPU_KVM_BUILD */
341     }
342 #endif /* NV_PCI_SRIOV_SUPPORT */
343 
344     if (!rm_is_supported_pci_device(
345                 (pci_dev->class >> 16) & 0xFF,
346                 (pci_dev->class >> 8) & 0xFF,
347                 pci_dev->vendor,
348                 pci_dev->device,
349                 pci_dev->subsystem_vendor,
350                 pci_dev->subsystem_device,
351                 NV_FALSE /* print_legacy_warning */))
352     {
353         nv_printf(NV_DBG_ERRORS, "NVRM: ignoring the legacy GPU %04x:%02x:%02x.%x\n",
354                   NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
355                   NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
356         goto failed;
357     }
358 
359     num_probed_nv_devices++;
360 
361     if (pci_enable_device(pci_dev) != 0)
362     {
363         nv_printf(NV_DBG_ERRORS,
364             "NVRM: pci_enable_device failed, aborting\n");
365         goto failed;
366     }
367 
368     if ((pci_dev->irq == 0 && !pci_find_capability(pci_dev, PCI_CAP_ID_MSIX))
369         && nv_treat_missing_irq_as_error())
370     {
371         nv_printf(NV_DBG_ERRORS, "NVRM: Can't find an IRQ for your NVIDIA card!\n");
372         nv_printf(NV_DBG_ERRORS, "NVRM: Please check your BIOS settings.\n");
373         nv_printf(NV_DBG_ERRORS, "NVRM: [Plug & Play OS] should be set to NO\n");
374         nv_printf(NV_DBG_ERRORS, "NVRM: [Assign IRQ to VGA] should be set to YES \n");
375         goto failed;
376     }
377 
378     for (i = 0, j = 0; i < NVRM_PCICFG_NUM_BARS && j < NV_GPU_NUM_BARS; i++)
379     {
380         if (NV_PCI_RESOURCE_VALID(pci_dev, i))
381         {
382 #if defined(NV_PCI_MAX_MMIO_BITS_SUPPORTED)
383             if ((NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_TYPE_64) &&
384                 ((NV_PCI_RESOURCE_START(pci_dev, i) >> NV_PCI_MAX_MMIO_BITS_SUPPORTED)))
385             {
386                 nv_printf(NV_DBG_ERRORS,
387                     "NVRM: This is a 64-bit BAR mapped above %dGB by the system\n"
388                     "NVRM: BIOS or the %s kernel. This PCI I/O region assigned\n"
389                     "NVRM: to your NVIDIA device is not supported by the kernel.\n"
390                     "NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n",
391                     (1 << (NV_PCI_MAX_MMIO_BITS_SUPPORTED - 30)),
392                     NV_KERNEL_NAME, i,
393                     (NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
394                     (NvU64)NV_PCI_RESOURCE_START(pci_dev, i),
395                     NV_PCI_DOMAIN_NUMBER(pci_dev),
396                     NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev),
397                     PCI_FUNC(pci_dev->devfn));
398                 goto failed;
399             }
400 #endif
401             if ((NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_TYPE_64) &&
402                 (NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_PREFETCH))
403             {
404                 struct pci_dev *bridge = pci_dev->bus->self;
405                 NvU32 base_upper, limit_upper;
406 
407                 last_bar_64bit = NV_TRUE;
408 
409                 if (bridge == NULL)
410                     goto next_bar;
411 
412                 pci_read_config_dword(pci_dev, NVRM_PCICFG_BAR_OFFSET(i) + 4,
413                                       &base_upper);
414                 if (base_upper == 0)
415                     goto next_bar;
416 
417                 pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32,
418                         &base_upper);
419                 pci_read_config_dword(bridge, PCI_PREF_LIMIT_UPPER32,
420                         &limit_upper);
421 
422                 if ((base_upper != 0) && (limit_upper != 0))
423                     goto next_bar;
424 
425                 nv_printf(NV_DBG_ERRORS,
426                     "NVRM: This is a 64-bit BAR mapped above 4GB by the system\n"
427                     "NVRM: BIOS or the %s kernel, but the PCI bridge\n"
428                     "NVRM: immediately upstream of this GPU does not define\n"
429                     "NVRM: a matching prefetchable memory window.\n",
430                     NV_KERNEL_NAME);
431                 nv_printf(NV_DBG_ERRORS,
432                     "NVRM: This may be due to a known Linux kernel bug.  Please\n"
433                     "NVRM: see the README section on 64-bit BARs for additional\n"
434                     "NVRM: information.\n");
435                 goto failed;
436             }
437 
438 next_bar:
439             //
440             // If we are here, then we have found a valid BAR -- 32 or 64-bit.
441             //
442             j++;
443             continue;
444         }
445 
446         //
447         // If last_bar_64bit is "true" then, we are looking at the 2nd (upper)
448         // half of the 64-bit BAR. This is typically all 0s which looks invalid
449         // but it's normal and not a problem and we can ignore it and continue.
450         //
451         if (last_bar_64bit)
452         {
453             last_bar_64bit = NV_FALSE;
454             continue;
455         }
456 
457         // Invalid 32 or 64-bit BAR.
458         nv_printf(NV_DBG_ERRORS,
459             "NVRM: This PCI I/O region assigned to your NVIDIA device is invalid:\n"
460             "NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n", i,
461             (NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
462             (NvU64)NV_PCI_RESOURCE_START(pci_dev, i),
463             NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
464             NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
465 
466         goto failed;
467     }
468 
469     if (!request_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index),
470                             NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index),
471                             nv_device_name))
472     {
473         nv_printf(NV_DBG_ERRORS,
474             "NVRM: request_mem_region failed for %dM @ 0x%llx. This can\n"
475             "NVRM: occur when a driver such as rivatv is loaded and claims\n"
476             "NVRM: ownership of the device's registers.\n",
477             (NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20),
478             (NvU64)NV_PCI_RESOURCE_START(pci_dev, regs_bar_index));
479         goto failed;
480     }
481 
482     if (nv_resize_pcie_bars(pci_dev)) {
483         nv_printf(NV_DBG_ERRORS,
484             "NVRM: Fatal Error while attempting to resize PCIe BARs.\n");
485         goto failed;
486     }
487 
488     NV_KZALLOC(nvl, sizeof(nv_linux_state_t));
489     if (nvl == NULL)
490     {
491         nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate memory\n");
492         goto err_not_supported;
493     }
494 
495     nv  = NV_STATE_PTR(nvl);
496 
497     pci_set_drvdata(pci_dev, (void *)nvl);
498 
499     /* default to 32-bit PCI bus address space */
500     pci_dev->dma_mask = 0xffffffffULL;
501 
502     nvl->dev               = &pci_dev->dev;
503     nvl->pci_dev           = pci_dev;
504     nvl->dma_dev.dev       = nvl->dev;
505 
506     nv->pci_info.vendor_id = pci_dev->vendor;
507     nv->pci_info.device_id = pci_dev->device;
508     nv->subsystem_id       = pci_dev->subsystem_device;
509     nv->subsystem_vendor   = pci_dev->subsystem_vendor;
510     nv->os_state           = (void *) nvl;
511     nv->dma_dev            = &nvl->dma_dev;
512     nv->pci_info.domain    = NV_PCI_DOMAIN_NUMBER(pci_dev);
513     nv->pci_info.bus       = NV_PCI_BUS_NUMBER(pci_dev);
514     nv->pci_info.slot      = NV_PCI_SLOT_NUMBER(pci_dev);
515     nv->handle             = pci_dev;
516     nv->flags             |= flags;
517 
518     if (!nv_lock_init_locks(sp, nv))
519     {
520         goto err_not_supported;
521     }
522 
523     nvl->all_mappings_revoked = NV_TRUE;
524     nvl->safe_to_mmap = NV_TRUE;
525     nvl->gpu_wakeup_callback_needed = NV_TRUE;
526     INIT_LIST_HEAD(&nvl->open_files);
527 
528     for (i = 0, j = 0; i < NVRM_PCICFG_NUM_BARS && j < NV_GPU_NUM_BARS; i++)
529     {
530         if ((NV_PCI_RESOURCE_VALID(pci_dev, i)) &&
531             (NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_SPACE)
532                 == PCI_BASE_ADDRESS_SPACE_MEMORY)
533         {
534             nv->bars[j].offset = NVRM_PCICFG_BAR_OFFSET(i);
535             nv->bars[j].cpu_address = NV_PCI_RESOURCE_START(pci_dev, i);
536             nv->bars[j].size = NV_PCI_RESOURCE_SIZE(pci_dev, i);
537             j++;
538         }
539     }
540     nv->regs = &nv->bars[NV_GPU_BAR_INDEX_REGS];
541     nv->fb   = &nv->bars[NV_GPU_BAR_INDEX_FB];
542 
543     nv->interrupt_line = pci_dev->irq;
544 
545     NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED);
546     nvl->numa_info.node_id = NUMA_NO_NODE;
547 
548     nv_init_ibmnpu_info(nv);
549 
550 #if defined(NVCPU_PPC64LE)
551     // Use HW NUMA support as a proxy for ATS support. This is true in the only
552     // PPC64LE platform where ATS is currently supported (IBM P9).
553     nv_ats_supported &= nv_platform_supports_numa(nvl);
554 #else
555 #endif
556     if (nv_ats_supported)
557     {
558         NV_DEV_PRINTF(NV_DBG_INFO, nv, "ATS supported by this GPU!\n");
559     }
560     else
561     {
562         NV_DEV_PRINTF(NV_DBG_INFO, nv, "ATS not supported by this GPU. "
563                       "Disabling ATS support for all the GPUs in the system!\n");
564     }
565 
566     pci_set_master(pci_dev);
567 
568 #if defined(CONFIG_VGA_ARB) && !defined(NVCPU_PPC64LE)
569 #if defined(VGA_DEFAULT_DEVICE)
570 #if defined(NV_VGA_TRYGET_PRESENT)
571     vga_tryget(VGA_DEFAULT_DEVICE, VGA_RSRC_LEGACY_MASK);
572 #endif
573 #endif
574     vga_set_legacy_decoding(pci_dev, VGA_RSRC_NONE);
575 #endif
576 
577     status = nv_check_gpu_state(nv);
578     if (status == NV_ERR_GPU_IS_LOST)
579     {
580         NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping nv_pci_probe\n");
581         goto err_not_supported;
582     }
583 
584     if ((rm_is_supported_device(sp, nv)) != NV_OK)
585         goto err_not_supported;
586 
587     if (!rm_init_private_state(sp, nv))
588     {
589         NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "rm_init_private_state() failed!\n");
590         goto err_zero_dev;
591     }
592 
593     nv_printf(NV_DBG_INFO,
594               "NVRM: PCI:%04x:%02x:%02x.%x (%04x:%04x): BAR0 @ 0x%llx (%lluMB)\n",
595               nv->pci_info.domain, nv->pci_info.bus, nv->pci_info.slot,
596               PCI_FUNC(pci_dev->devfn), nv->pci_info.vendor_id, nv->pci_info.device_id,
597               nv->regs->cpu_address, (nv->regs->size >> 20));
598     nv_printf(NV_DBG_INFO,
599               "NVRM: PCI:%04x:%02x:%02x.%x (%04x:%04x): BAR1 @ 0x%llx (%lluMB)\n",
600               nv->pci_info.domain, nv->pci_info.bus, nv->pci_info.slot,
601               PCI_FUNC(pci_dev->devfn), nv->pci_info.vendor_id, nv->pci_info.device_id,
602               nv->fb->cpu_address, (nv->fb->size >> 20));
603 
604     num_nv_devices++;
605 
606     /*
607      * The newly created nvl object is added to the nv_linux_devices global list
608      * only after all the initialization operations for that nvl object are
609      * completed, so as to protect against simultaneous lookup operations which
610      * may discover a partially initialized nvl object in the list
611      */
612     LOCK_NV_LINUX_DEVICES();
613 
614     nv_linux_add_device_locked(nvl);
615 
616     UNLOCK_NV_LINUX_DEVICES();
617 
618     if (nvidia_frontend_add_device((void *)&nv_fops, nvl) != 0)
619         goto err_remove_device;
620 
621     pm_vt_switch_required(nvl->dev, NV_TRUE);
622 
623     nv_init_dynamic_power_management(sp, pci_dev);
624 
625     nv_procfs_add_gpu(nvl);
626 
627     /* Parse and set any per-GPU registry keys specified. */
628     nv_parse_per_device_option_string(sp);
629 
630     rm_set_rm_firmware_requested(sp, nv);
631 
632 #if defined(NV_VGPU_KVM_BUILD)
633     if (nvidia_vgpu_vfio_probe(nvl->pci_dev) != NV_OK)
634     {
635         NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Failed to register device to vGPU VFIO module");
636         nvidia_frontend_remove_device((void *)&nv_fops, nvl);
637         goto err_vgpu_kvm;
638     }
639 #endif
640 
641     nv_check_and_exclude_gpu(sp, nv);
642 
643 #if defined(DPM_FLAG_NO_DIRECT_COMPLETE)
644     dev_pm_set_driver_flags(nvl->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
645 #elif defined(DPM_FLAG_NEVER_SKIP)
646     dev_pm_set_driver_flags(nvl->dev, DPM_FLAG_NEVER_SKIP);
647 #endif
648 
649     /*
650      * Dynamic power management should be enabled as the last step.
651      * Kernel runtime power management framework can put the device
652      * into the suspended state. Hardware register access should not be done
653      * after enabling dynamic power management.
654      */
655     rm_enable_dynamic_power_management(sp, nv);
656     nv_kmem_cache_free_stack(sp);
657 
658     return 0;
659 
660 #if defined(NV_VGPU_KVM_BUILD)
661 err_vgpu_kvm:
662 #endif
663     nv_procfs_remove_gpu(nvl);
664     rm_cleanup_dynamic_power_management(sp, nv);
665     pm_vt_switch_unregister(nvl->dev);
666 err_remove_device:
667     LOCK_NV_LINUX_DEVICES();
668     nv_linux_remove_device_locked(nvl);
669     UNLOCK_NV_LINUX_DEVICES();
670 err_zero_dev:
671     rm_free_private_state(sp, nv);
672 err_not_supported:
673     nv_ats_supported = prev_nv_ats_supported;
674     nv_destroy_ibmnpu_info(nv);
675     nv_lock_destroy_locks(sp, nv);
676     if (nvl != NULL)
677     {
678         NV_KFREE(nvl, sizeof(nv_linux_state_t));
679     }
680     release_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index),
681                        NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index));
682     NV_PCI_DISABLE_DEVICE(pci_dev);
683     pci_set_drvdata(pci_dev, NULL);
684 failed:
685     nv_kmem_cache_free_stack(sp);
686     return -1;
687 }
688 
689 static void
690 nv_pci_remove(struct pci_dev *pci_dev)
691 {
692     nv_linux_state_t *nvl = NULL;
693     nv_state_t *nv;
694     nvidia_stack_t *sp = NULL;
695     NvU8 regs_bar_index = nv_bar_index_to_os_bar_index(pci_dev,
696                                                        NV_GPU_BAR_INDEX_REGS);
697 
698     nv_printf(NV_DBG_SETUP, "NVRM: removing GPU %04x:%02x:%02x.%x\n",
699               NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
700               NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
701 
702 #ifdef NV_PCI_SRIOV_SUPPORT
703     if (pci_dev->is_virtfn)
704     {
705 #if defined(NV_VGPU_KVM_BUILD)
706         /* Arg 2 == NV_TRUE means that the PCI device should be removed */
707         nvidia_vgpu_vfio_remove(pci_dev, NV_TRUE);
708 #endif /* NV_VGPU_KVM_BUILD */
709         return;
710     }
711 #endif /* NV_PCI_SRIOV_SUPPORT */
712 
713     if (nv_kmem_cache_alloc_stack(&sp) != 0)
714     {
715         return;
716     }
717 
718     LOCK_NV_LINUX_DEVICES();
719     nvl = pci_get_drvdata(pci_dev);
720     if (!nvl || (nvl->pci_dev != pci_dev))
721     {
722         goto done;
723     }
724 
725     nv = NV_STATE_PTR(nvl);
726     down(&nvl->ldata_lock);
727 
728     /*
729      * Sanity check: A removed device shouldn't have a non-zero usage_count.
730      * For eGPU, fall off the bus along with clients active is a valid scenario.
731      * Hence skipping the sanity check for eGPU.
732      */
733     if ((NV_ATOMIC_READ(nvl->usage_count) != 0) && !(nv->is_external_gpu))
734     {
735         nv_printf(NV_DBG_ERRORS,
736                   "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count!\n",
737                   NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
738                   NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
739 
740         /*
741          * We can't return from this function without corrupting state, so we wait for
742          * the usage count to go to zero.
743          */
744         while (NV_ATOMIC_READ(nvl->usage_count) != 0)
745         {
746 
747             /*
748              * While waiting, release the locks so that other threads can make
749              * forward progress.
750              */
751             up(&nvl->ldata_lock);
752             UNLOCK_NV_LINUX_DEVICES();
753 
754             os_delay(500);
755 
756             /* Re-acquire the locks before checking again */
757             LOCK_NV_LINUX_DEVICES();
758             nvl = pci_get_drvdata(pci_dev);
759             if (!nvl)
760             {
761                 /* The device was not found, which should not happen */
762                 nv_printf(NV_DBG_ERRORS,
763                           "NVRM: Failed removal of device %04x:%02x:%02x.%x!\n",
764                           NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
765                           NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
766                 WARN_ON(1);
767                 goto done;
768             }
769             nv = NV_STATE_PTR(nvl);
770             down(&nvl->ldata_lock);
771         }
772 
773         nv_printf(NV_DBG_ERRORS,
774                   "NVRM: Continuing with GPU removal for device %04x:%02x:%02x.%x\n",
775                   NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
776                   NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
777     }
778 
779     rm_check_for_gpu_surprise_removal(sp, nv);
780 
781     nv_linux_remove_device_locked(nvl);
782 
783     /* Remove proc entry for this GPU */
784     nv_procfs_remove_gpu(nvl);
785 
786     rm_cleanup_dynamic_power_management(sp, nv);
787 
788     nv->removed = NV_TRUE;
789 
790     UNLOCK_NV_LINUX_DEVICES();
791 
792     pm_vt_switch_unregister(&pci_dev->dev);
793 
794 #if defined(NV_VGPU_KVM_BUILD)
795     /* Arg 2 == NV_TRUE means that the PCI device should be removed */
796     nvidia_vgpu_vfio_remove(pci_dev, NV_TRUE);
797 #endif
798 
799     /* Update the frontend data structures */
800     if (NV_ATOMIC_READ(nvl->usage_count) == 0)
801     {
802         nvidia_frontend_remove_device((void *)&nv_fops, nvl);
803     }
804 
805     if ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) || (nv->flags & NV_FLAG_OPEN))
806     {
807         nv_acpi_unregister_notifier(nvl);
808         if (nv->flags & NV_FLAG_PERSISTENT_SW_STATE)
809         {
810             rm_disable_gpu_state_persistence(sp, nv);
811         }
812         nv_shutdown_adapter(sp, nv, nvl);
813         nv_dev_free_stacks(nvl);
814     }
815 
816     if (nvl->sysfs_config_file != NULL)
817     {
818         filp_close(nvl->sysfs_config_file, NULL);
819         nvl->sysfs_config_file = NULL;
820     }
821 
822     nv_unregister_ibmnpu_devices(nv);
823     nv_destroy_ibmnpu_info(nv);
824 
825     if (NV_ATOMIC_READ(nvl->usage_count) == 0)
826     {
827         nv_lock_destroy_locks(sp, nv);
828     }
829 
830     num_probed_nv_devices--;
831 
832     pci_set_drvdata(pci_dev, NULL);
833 
834     rm_i2c_remove_adapters(sp, nv);
835     rm_free_private_state(sp, nv);
836     release_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index),
837                        NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index));
838 
839     num_nv_devices--;
840 
841     if (NV_ATOMIC_READ(nvl->usage_count) == 0)
842     {
843         NV_PCI_DISABLE_DEVICE(pci_dev);
844         NV_KFREE(nvl, sizeof(nv_linux_state_t));
845     }
846     else
847     {
848         up(&nvl->ldata_lock);
849     }
850 
851     nv_kmem_cache_free_stack(sp);
852     return;
853 
854 done:
855     UNLOCK_NV_LINUX_DEVICES();
856     nv_kmem_cache_free_stack(sp);
857 }
858 
859 static void
860 nv_pci_shutdown(struct pci_dev *pci_dev)
861 {
862     nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
863 
864     if ((nvl != NULL) && nvl->is_forced_shutdown)
865     {
866         nvl->is_forced_shutdown = NV_FALSE;
867         return;
868     }
869 
870     if (nvl != NULL)
871     {
872         nvl->nv_state.is_shutdown = NV_TRUE;
873     }
874 
875     /* pci_clear_master is not defined for !CONFIG_PCI */
876 #ifdef CONFIG_PCI
877     pci_clear_master(pci_dev);
878 #endif
879 
880     /* SHH HW mandates 1us delay to realise the effects of
881      * Bus Mater Enable(BME) disable. Adding 1us delay for
882      * all the chips as the delay is not in the data path
883      * and not big. Creating HAL for this would be a overkill.
884      */
885     udelay(1);
886 }
887 
888 /*!
889  * @brief This function accepts pci information corresponding to a GPU
890  * and returns a reference to the nv_linux_state_t corresponding to that GPU.
891  *
892  * @param[in] domain            Pci domain number for the GPU to be found.
893  * @param[in] bus               Pci bus number for the GPU to be found.
894  * @param[in] slot              Pci slot number for the GPU to be found.
895  * @param[in] function          Pci function number for the GPU to be found.
896  *
897  * @return Pointer to nv_linux_state_t for the GPU if it is found, or NULL otherwise.
898  */
899 nv_linux_state_t * find_pci(NvU32 domain, NvU8 bus, NvU8 slot, NvU8 function)
900 {
901     nv_linux_state_t *nvl = NULL;
902 
903     LOCK_NV_LINUX_DEVICES();
904 
905     for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
906     {
907         nv_state_t *nv = NV_STATE_PTR(nvl);
908 
909         if (nv->pci_info.domain == domain &&
910             nv->pci_info.bus == bus &&
911             nv->pci_info.slot == slot &&
912             nv->pci_info.function == function)
913         {
914             break;
915         }
916     }
917 
918     UNLOCK_NV_LINUX_DEVICES();
919     return nvl;
920 }
921 
922 int nvidia_dev_get_pci_info(const NvU8 *uuid, struct pci_dev **pci_dev_out,
923     NvU64 *dma_start, NvU64 *dma_limit)
924 {
925     nv_linux_state_t *nvl;
926 
927     /* Takes nvl->ldata_lock */
928     nvl = find_uuid(uuid);
929     if (!nvl)
930         return -ENODEV;
931 
932     *pci_dev_out = nvl->pci_dev;
933     *dma_start = nvl->dma_dev.addressable_range.start;
934     *dma_limit = nvl->dma_dev.addressable_range.limit;
935 
936     up(&nvl->ldata_lock);
937 
938     return 0;
939 }
940 
941 NvU8 nv_find_pci_capability(struct pci_dev *pci_dev, NvU8 capability)
942 {
943     u16 status = 0;
944     u8  cap_ptr = 0, cap_id = 0xff;
945 
946     pci_read_config_word(pci_dev, PCI_STATUS, &status);
947     status &= PCI_STATUS_CAP_LIST;
948     if (!status)
949         return 0;
950 
951     switch (pci_dev->hdr_type) {
952         case PCI_HEADER_TYPE_NORMAL:
953         case PCI_HEADER_TYPE_BRIDGE:
954             pci_read_config_byte(pci_dev, PCI_CAPABILITY_LIST, &cap_ptr);
955             break;
956         default:
957             return 0;
958     }
959 
960     do {
961         cap_ptr &= 0xfc;
962         pci_read_config_byte(pci_dev, cap_ptr + PCI_CAP_LIST_ID, &cap_id);
963         if (cap_id == capability)
964             return cap_ptr;
965         pci_read_config_byte(pci_dev, cap_ptr + PCI_CAP_LIST_NEXT, &cap_ptr);
966     } while (cap_ptr && cap_id != 0xff);
967 
968     return 0;
969 }
970 
971 /* make sure the pci_driver called probe for all of our devices.
972  * we've seen cases where rivafb claims the device first and our driver
973  * doesn't get called.
974  */
975 int
976 nv_pci_count_devices(void)
977 {
978     struct pci_dev *pci_dev;
979     int count = 0;
980 
981     if (NVreg_RegisterPCIDriver == 0)
982     {
983         return 0;
984     }
985 
986     pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, NULL);
987     while (pci_dev)
988     {
989         if (rm_is_supported_pci_device(
990                 PCI_BASE_CLASS_DISPLAY,
991                 PCI_CLASS_DISPLAY_VGA & 0xFF,
992                 pci_dev->vendor,
993                 pci_dev->device,
994                 pci_dev->subsystem_vendor,
995                 pci_dev->subsystem_device,
996                 NV_TRUE /* print_legacy_warning */))
997         {
998             count++;
999         }
1000         pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pci_dev);
1001     }
1002 
1003     pci_dev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, NULL);
1004     while (pci_dev)
1005     {
1006         if (rm_is_supported_pci_device(
1007                 (pci_dev->class >> 16) & 0xFF,
1008                 (pci_dev->class >> 8) & 0xFF,
1009                 pci_dev->vendor,
1010                 pci_dev->device,
1011                 pci_dev->subsystem_vendor,
1012                 pci_dev->subsystem_device,
1013                 NV_TRUE /* print_legacy_warning */))
1014         {
1015             count++;
1016         }
1017         pci_dev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pci_dev);
1018     }
1019 
1020     return count;
1021 }
1022 
1023 #if defined(NV_PCI_ERROR_RECOVERY)
1024 static pci_ers_result_t
1025 nv_pci_error_detected(
1026     struct pci_dev *pci_dev,
1027     nv_pci_channel_state_t error
1028 )
1029 {
1030     nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
1031 
1032     if ((nvl == NULL) || (nvl->pci_dev != pci_dev))
1033     {
1034         nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid device!\n", __FUNCTION__);
1035         return PCI_ERS_RESULT_NONE;
1036     }
1037 
1038     /*
1039      * Tell Linux to continue recovery of the device. The kernel will enable
1040      * MMIO for the GPU and call the mmio_enabled callback.
1041      */
1042     return PCI_ERS_RESULT_CAN_RECOVER;
1043 }
1044 
1045 static pci_ers_result_t
1046 nv_pci_mmio_enabled(
1047     struct pci_dev *pci_dev
1048 )
1049 {
1050     NV_STATUS         status = NV_OK;
1051     nv_stack_t       *sp = NULL;
1052     nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
1053     nv_state_t       *nv = NULL;
1054 
1055     if ((nvl == NULL) || (nvl->pci_dev != pci_dev))
1056     {
1057         nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid device!\n", __FUNCTION__);
1058         goto done;
1059     }
1060 
1061     nv = NV_STATE_PTR(nvl);
1062 
1063     if (nv_kmem_cache_alloc_stack(&sp) != 0)
1064     {
1065         nv_printf(NV_DBG_ERRORS, "NVRM: %s: failed to allocate stack!\n",
1066             __FUNCTION__);
1067         goto done;
1068     }
1069 
1070     NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "A fatal error was detected.\n");
1071 
1072     /*
1073      * MMIO should be re-enabled now. If we still get bad reads, there's
1074      * likely something wrong with the adapter itself that will require a
1075      * reset. This should let us know whether the GPU has completely fallen
1076      * off the bus or just did something the host didn't like.
1077      */
1078     status = rm_is_supported_device(sp, nv);
1079     if (status != NV_OK)
1080     {
1081         NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
1082             "The kernel has enabled MMIO for the device,\n"
1083             "NVRM: but it still appears unreachable. The device\n"
1084             "NVRM: will not function properly until it is reset.\n");
1085     }
1086 
1087     status = rm_log_gpu_crash(sp, nv);
1088     if (status != NV_OK)
1089     {
1090         NV_DEV_PRINTF_STATUS(NV_DBG_ERRORS, nv, status,
1091                       "Failed to log crash data\n");
1092         goto done;
1093     }
1094 
1095 done:
1096     if (sp != NULL)
1097     {
1098         nv_kmem_cache_free_stack(sp);
1099     }
1100 
1101     /*
1102      * Tell Linux to abandon recovery of the device. The kernel might be able
1103      * to recover the device, but RM and clients don't yet support that.
1104      */
1105     return PCI_ERS_RESULT_DISCONNECT;
1106 }
1107 
1108 struct pci_error_handlers nv_pci_error_handlers = {
1109     .error_detected = nv_pci_error_detected,
1110     .mmio_enabled   = nv_pci_mmio_enabled,
1111 };
1112 #endif
1113 
1114 #if defined(CONFIG_PM)
1115 extern struct dev_pm_ops nv_pm_ops;
1116 #endif
1117 
1118 struct pci_driver nv_pci_driver = {
1119     .name      = MODULE_NAME,
1120     .id_table  = nv_pci_table,
1121     .probe     = nv_pci_probe,
1122     .remove    = nv_pci_remove,
1123     .shutdown  = nv_pci_shutdown,
1124 #if defined(NV_USE_VFIO_PCI_CORE) && \
1125   defined(NV_PCI_DRIVER_HAS_DRIVER_MANAGED_DMA)
1126     .driver_managed_dma = NV_TRUE,
1127 #endif
1128 #if defined(CONFIG_PM)
1129     .driver.pm = &nv_pm_ops,
1130 #endif
1131 #if defined(NV_PCI_ERROR_RECOVERY)
1132     .err_handler = &nv_pci_error_handlers,
1133 #endif
1134 };
1135 
1136 void nv_pci_unregister_driver(void)
1137 {
1138     if (NVreg_RegisterPCIDriver == 0)
1139     {
1140         return;
1141     }
1142     return pci_unregister_driver(&nv_pci_driver);
1143 }
1144 
1145 int nv_pci_register_driver(void)
1146 {
1147     if (NVreg_RegisterPCIDriver == 0)
1148     {
1149         return 0;
1150     }
1151     return pci_register_driver(&nv_pci_driver);
1152 }
1153