1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2016-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "linux_nvswitch.h"
25 
26 #include <linux/version.h>
27 
28 #include "conftest.h"
29 #include "nvlink_errors.h"
30 #include "nvlink_linux.h"
31 #include "nvCpuUuid.h"
32 #include "nv-time.h"
33 #include "nvlink_caps.h"
34 
35 #include <linux/module.h>
36 #include <linux/interrupt.h>
37 #include <linux/cdev.h>
38 #include <linux/fs.h>
39 #include <linux/slab.h>
40 #include <linux/uaccess.h>
41 #include <linux/poll.h>
42 #include <linux/sched.h>
43 #include <linux/time.h>
44 #include <linux/string.h>
45 #include <linux/moduleparam.h>
46 #include <linux/ctype.h>
47 #include <linux/wait.h>
48 #include <linux/jiffies.h>
49 
50 #include "ioctl_nvswitch.h"
51 
52 const static struct
53 {
54     NvlStatus status;
55     int err;
56 } nvswitch_status_map[] = {
57     { NVL_ERR_GENERIC,                  -EIO        },
58     { NVL_NO_MEM,                       -ENOMEM     },
59     { NVL_BAD_ARGS,                     -EINVAL     },
60     { NVL_ERR_INVALID_STATE,            -EIO        },
61     { NVL_ERR_NOT_SUPPORTED,            -EOPNOTSUPP },
62     { NVL_NOT_FOUND,                    -EINVAL     },
63     { NVL_ERR_STATE_IN_USE,             -EBUSY      },
64     { NVL_ERR_NOT_IMPLEMENTED,          -ENOSYS     },
65     { NVL_ERR_INSUFFICIENT_PERMISSIONS, -EPERM      },
66     { NVL_ERR_OPERATING_SYSTEM,         -EIO        },
67     { NVL_MORE_PROCESSING_REQUIRED,     -EAGAIN     },
68     { NVL_SUCCESS,                       0          },
69 };
70 
71 int
72 nvswitch_map_status
73 (
74     NvlStatus status
75 )
76 {
77     int err = -EIO;
78     NvU32 i;
79     NvU32 limit = sizeof(nvswitch_status_map) / sizeof(nvswitch_status_map[0]);
80 
81     for (i = 0; i < limit; i++)
82     {
83         if (nvswitch_status_map[i].status == status ||
84             nvswitch_status_map[i].status == -status)
85         {
86             err = nvswitch_status_map[i].err;
87             break;
88         }
89     }
90 
91     return err;
92 }
93 
94 #if !defined(IRQF_SHARED)
95 #define IRQF_SHARED SA_SHIRQ
96 #endif
97 
98 #define NV_FILE_INODE(file) (file)->f_inode
99 
100 static int nvswitch_probe(struct pci_dev *, const struct pci_device_id *);
101 static void nvswitch_remove(struct pci_dev *);
102 
103 static struct pci_device_id nvswitch_pci_table[] =
104 {
105     {
106         .vendor      = PCI_VENDOR_ID_NVIDIA,
107         .device      = PCI_ANY_ID,
108         .subvendor   = PCI_ANY_ID,
109         .subdevice   = PCI_ANY_ID,
110         .class       = (PCI_CLASS_BRIDGE_OTHER << 8),
111         .class_mask  = ~0
112     },
113     {}
114 };
115 
116 static struct pci_driver nvswitch_pci_driver =
117 {
118     .name           = NVSWITCH_DRIVER_NAME,
119     .id_table       = nvswitch_pci_table,
120     .probe          = nvswitch_probe,
121     .remove         = nvswitch_remove,
122     .shutdown       = nvswitch_remove
123 };
124 
125 //
126 // nvidia_nvswitch_mknod uses minor number 255 to create nvidia-nvswitchctl
127 // node. Hence, if NVSWITCH_CTL_MINOR is changed, then NV_NVSWITCH_CTL_MINOR
128 // should be updated. See nvdia-modprobe-utils.h
129 //
130 #define NVSWITCH_CTL_MINOR 255
131 #define NVSWITCH_MINOR_COUNT (NVSWITCH_CTL_MINOR + 1)
132 
133 // 32 bit hex value - including 0x prefix. (10 chars)
134 #define NVSWITCH_REGKEY_VALUE_LEN 10
135 
136 static char *NvSwitchRegDwords;
137 module_param(NvSwitchRegDwords, charp, 0);
138 MODULE_PARM_DESC(NvSwitchRegDwords, "NvSwitch regkey");
139 
140 static char *NvSwitchBlacklist;
141 module_param(NvSwitchBlacklist, charp, 0);
142 MODULE_PARM_DESC(NvSwitchBlacklist, "NvSwitchBlacklist=uuid[,uuid...]");
143 
144 //
145 // Locking:
146 //   We handle nvswitch driver locking in the OS layer. The nvswitch lib
147 //   layer does not have its own locking. It relies on the OS layer for
148 //   atomicity.
149 //
150 //   All locking is done with sleep locks. We use threaded MSI interrupts to
151 //   facilitate this.
152 //
153 //   When handling a request from a user context we use the interruptible
154 //   version to enable a quick ^C return if there is lock contention.
155 //
156 //   nvswitch.driver_mutex is used to protect driver's global state, "struct
157 //   NVSWITCH". The driver_mutex is taken during .probe, .remove, .open,
158 //   .close, and nvswitch-ctl .ioctl operations.
159 //
160 //   nvswitch_dev.device_mutex is used to protect per-device state, "struct
161 //   NVSWITCH_DEV", once a device is opened. The device_mutex is taken during
162 //   .ioctl, .poll and other background tasks.
163 //
164 //   The kernel guarantees that .close won't happen while .ioctl and .poll
165 //   are going on and without successful .open one can't execute any file ops.
166 //   This behavior guarantees correctness of the locking model.
167 //
168 //   If .close is invoked and holding the lock which is also used by threaded
169 //   tasks such as interrupt, driver will deadlock while trying to stop such
170 //   tasks. For example, when threaded interrupts are enabled, free_irq() calls
171 //   kthread_stop() to flush pending interrupt tasks. The locking model
172 //   makes sure that such deadlock cases don't happen.
173 //
174 // Lock ordering:
175 //   nvswitch.driver_mutex
176 //   nvswitch_dev.device_mutex
177 //
178 // Note:
179 //   Due to bug 2856314, nvswitch_dev.device_mutex is taken when calling
180 //   nvswitch_post_init_device() in nvswitch_probe().
181 //
182 
183 // Per-chip driver state is defined in linux_nvswitch.h
184 
185 // Global driver state
186 typedef struct
187 {
188     NvBool initialized;
189     struct cdev cdev;
190     struct cdev cdev_ctl;
191     dev_t devno;
192     atomic_t count;
193     struct mutex driver_mutex;
194     struct list_head devices;
195 } NVSWITCH;
196 
197 static NVSWITCH nvswitch = {0};
198 
199 // NvSwitch event
200 typedef struct nvswitch_event_t
201 {
202     wait_queue_head_t wait_q_event;
203     NvBool            event_pending;
204 } nvswitch_event_t;
205 
206 typedef struct nvswitch_file_private
207 {
208     NVSWITCH_DEV     *nvswitch_dev;
209     nvswitch_event_t file_event;
210     struct
211     {
212         /* A duped file descriptor for fabric_mgmt capability */
213         int fabric_mgmt;
214     } capability_fds;
215 } nvswitch_file_private_t;
216 
217 #define NVSWITCH_SET_FILE_PRIVATE(filp, data) ((filp)->private_data = (data))
218 #define NVSWITCH_GET_FILE_PRIVATE(filp) ((nvswitch_file_private_t *)(filp)->private_data)
219 
220 static int nvswitch_device_open(struct inode *inode, struct file *file);
221 static int nvswitch_device_release(struct inode *inode, struct file *file);
222 static unsigned int nvswitch_device_poll(struct file *file, poll_table *wait);
223 static int nvswitch_device_ioctl(struct inode *inode,
224                                  struct file *file,
225                                  unsigned int cmd,
226                                  unsigned long arg);
227 static long nvswitch_device_unlocked_ioctl(struct file *file,
228                                            unsigned int cmd,
229                                            unsigned long arg);
230 
231 static int nvswitch_ctl_ioctl(struct inode *inode,
232                               struct file *file,
233                               unsigned int cmd,
234                               unsigned long arg);
235 static long nvswitch_ctl_unlocked_ioctl(struct file *file,
236                                         unsigned int cmd,
237                                         unsigned long arg);
238 
239 struct file_operations device_fops =
240 {
241     .owner = THIS_MODULE,
242     .unlocked_ioctl = nvswitch_device_unlocked_ioctl,
243     .open    = nvswitch_device_open,
244     .release = nvswitch_device_release,
245     .poll    = nvswitch_device_poll
246 };
247 
248 struct file_operations ctl_fops =
249 {
250     .owner = THIS_MODULE,
251     .unlocked_ioctl = nvswitch_ctl_unlocked_ioctl,
252 };
253 
254 static int nvswitch_initialize_device_interrupt(NVSWITCH_DEV *nvswitch_dev);
255 static void nvswitch_shutdown_device_interrupt(NVSWITCH_DEV *nvswitch_dev);
256 static void nvswitch_load_bar_info(NVSWITCH_DEV *nvswitch_dev);
257 static void nvswitch_task_dispatch(NVSWITCH_DEV *nvswitch_dev);
258 
259 static NvBool
260 nvswitch_is_device_blacklisted
261 (
262     NVSWITCH_DEV *nvswitch_dev
263 )
264 {
265     NVSWITCH_DEVICE_FABRIC_STATE device_fabric_state = 0;
266     NvlStatus status;
267 
268     status = nvswitch_lib_read_fabric_state(nvswitch_dev->lib_device,
269                                             &device_fabric_state, NULL, NULL);
270 
271     if (status != NVL_SUCCESS)
272     {
273         printk(KERN_INFO "%s: Failed to read fabric state, %x\n", nvswitch_dev->name, status);
274         return NV_FALSE;
275     }
276 
277     return device_fabric_state == NVSWITCH_DEVICE_FABRIC_STATE_BLACKLISTED;
278 }
279 
280 static void
281 nvswitch_deinit_background_tasks
282 (
283     NVSWITCH_DEV *nvswitch_dev
284 )
285 {
286     NV_ATOMIC_SET(nvswitch_dev->task_q_ready, 0);
287 
288     wake_up(&nvswitch_dev->wait_q_shutdown);
289 
290     nv_kthread_q_stop(&nvswitch_dev->task_q);
291 }
292 
293 static int
294 nvswitch_init_background_tasks
295 (
296     NVSWITCH_DEV *nvswitch_dev
297 )
298 {
299     int rc;
300 
301     rc = nv_kthread_q_init(&nvswitch_dev->task_q, nvswitch_dev->sname);
302     if (rc)
303     {
304         printk(KERN_ERR "%s: Failed to create task queue\n", nvswitch_dev->name);
305         return rc;
306     }
307 
308     NV_ATOMIC_SET(nvswitch_dev->task_q_ready, 1);
309 
310     nv_kthread_q_item_init(&nvswitch_dev->task_item,
311                            (nv_q_func_t) &nvswitch_task_dispatch,
312                            nvswitch_dev);
313 
314     if (!nv_kthread_q_schedule_q_item(&nvswitch_dev->task_q,
315                                       &nvswitch_dev->task_item))
316     {
317         printk(KERN_ERR "%s: Failed to schedule an item\n",nvswitch_dev->name);
318         rc = -ENODEV;
319         goto init_background_task_failed;
320     }
321 
322     return 0;
323 
324 init_background_task_failed:
325     nvswitch_deinit_background_tasks(nvswitch_dev);
326 
327     return rc;
328 }
329 
330 static NVSWITCH_DEV*
331 nvswitch_find_device(int minor)
332 {
333     struct list_head *cur;
334     NVSWITCH_DEV *nvswitch_dev = NULL;
335 
336     list_for_each(cur, &nvswitch.devices)
337     {
338         nvswitch_dev = list_entry(cur, NVSWITCH_DEV, list_node);
339         if (nvswitch_dev->minor == minor)
340         {
341             return nvswitch_dev;
342         }
343     }
344 
345     return NULL;
346 }
347 
348 static int
349 nvswitch_find_minor(void)
350 {
351     struct list_head *cur;
352     NVSWITCH_DEV *nvswitch_dev;
353     int minor;
354     int minor_in_use;
355 
356     for (minor = 0; minor < NVSWITCH_DEVICE_INSTANCE_MAX; minor++)
357     {
358         minor_in_use = 0;
359 
360         list_for_each(cur, &nvswitch.devices)
361         {
362             nvswitch_dev = list_entry(cur, NVSWITCH_DEV, list_node);
363             if (nvswitch_dev->minor == minor)
364             {
365                 minor_in_use = 1;
366                 break;
367             }
368         }
369 
370         if (!minor_in_use)
371         {
372             return minor;
373         }
374     }
375 
376     return NVSWITCH_DEVICE_INSTANCE_MAX;
377 }
378 
379 static int
380 nvswitch_init_i2c_adapters
381 (
382     NVSWITCH_DEV *nvswitch_dev
383 )
384 {
385     NvlStatus retval;
386     NvU32 i, valid_ports_mask;
387     struct i2c_adapter *adapter;
388     nvswitch_i2c_adapter_entry *adapter_entry;
389 
390     if (!nvswitch_lib_is_i2c_supported(nvswitch_dev->lib_device))
391     {
392         return 0;
393     }
394 
395     retval = nvswitch_lib_get_valid_ports_mask(nvswitch_dev->lib_device,
396                                                &valid_ports_mask);
397     if (retval != NVL_SUCCESS)
398     {
399         printk(KERN_ERR "Failed to get valid I2C ports mask.\n");
400         return -ENODEV;
401     }
402 
403     FOR_EACH_INDEX_IN_MASK(32, i, valid_ports_mask)
404     {
405         adapter = nvswitch_i2c_add_adapter(nvswitch_dev, i);
406         if (adapter == NULL)
407         {
408             continue;
409         }
410 
411         adapter_entry = nvswitch_os_malloc(sizeof(*adapter_entry));
412         if (adapter_entry == NULL)
413         {
414             printk(KERN_ERR "Failed to create I2C adapter entry.\n");
415             nvswitch_i2c_del_adapter(adapter);
416             continue;
417         }
418 
419         adapter_entry->adapter = adapter;
420 
421         list_add_tail(&adapter_entry->entry, &nvswitch_dev->i2c_adapter_list);
422     }
423     FOR_EACH_INDEX_IN_MASK_END;
424 
425     return 0;
426 }
427 
428 static void
429 nvswitch_deinit_i2c_adapters
430 (
431     NVSWITCH_DEV *nvswitch_dev
432 )
433 {
434     nvswitch_i2c_adapter_entry *curr;
435     nvswitch_i2c_adapter_entry *next;
436 
437     list_for_each_entry_safe(curr,
438                              next,
439                              &nvswitch_dev->i2c_adapter_list,
440                              entry)
441     {
442         nvswitch_i2c_del_adapter(curr->adapter);
443         list_del(&curr->entry);
444         nvswitch_os_free(curr);
445     }
446 }
447 
448 static int
449 nvswitch_init_device
450 (
451     NVSWITCH_DEV *nvswitch_dev
452 )
453 {
454     struct pci_dev *pci_dev = nvswitch_dev->pci_dev;
455     NvlStatus retval;
456     int rc;
457 
458     INIT_LIST_HEAD(&nvswitch_dev->i2c_adapter_list);
459 
460     retval = nvswitch_lib_register_device(NV_PCI_DOMAIN_NUMBER(pci_dev),
461                                           NV_PCI_BUS_NUMBER(pci_dev),
462                                           NV_PCI_SLOT_NUMBER(pci_dev),
463                                           PCI_FUNC(pci_dev->devfn),
464                                           pci_dev->device,
465                                           pci_dev,
466                                           nvswitch_dev->minor,
467                                           &nvswitch_dev->lib_device);
468     if (NVL_SUCCESS != retval)
469     {
470         printk(KERN_ERR "%s: Failed to register device : %d\n",
471                nvswitch_dev->name,
472                retval);
473         return -ENODEV;
474     }
475 
476     nvswitch_load_bar_info(nvswitch_dev);
477 
478     retval = nvswitch_lib_initialize_device(nvswitch_dev->lib_device);
479     if (NVL_SUCCESS != retval)
480     {
481         printk(KERN_ERR "%s: Failed to initialize device : %d\n",
482                nvswitch_dev->name,
483                retval);
484         rc = -ENODEV;
485         goto init_device_failed;
486     }
487 
488     nvswitch_lib_get_uuid(nvswitch_dev->lib_device, &nvswitch_dev->uuid);
489 
490     if (nvswitch_lib_get_bios_version(nvswitch_dev->lib_device,
491                                       &nvswitch_dev->bios_ver) != NVL_SUCCESS)
492     {
493         nvswitch_dev->bios_ver = 0;
494     }
495 
496     if (nvswitch_lib_get_physid(nvswitch_dev->lib_device,
497                                 &nvswitch_dev->phys_id) != NVL_SUCCESS)
498     {
499         nvswitch_dev->phys_id = NVSWITCH_INVALID_PHYS_ID;
500     }
501 
502     rc = nvswitch_initialize_device_interrupt(nvswitch_dev);
503     if (rc)
504     {
505         printk(KERN_ERR "%s: Failed to initialize interrupt : %d\n",
506                nvswitch_dev->name,
507                rc);
508         goto init_intr_failed;
509     }
510 
511     if (nvswitch_is_device_blacklisted(nvswitch_dev))
512     {
513         printk(KERN_ERR "%s: Blacklisted nvswitch device\n", nvswitch_dev->name);
514         // Keep device registered for HAL access and Fabric State updates
515         return 0;
516     }
517 
518     nvswitch_lib_enable_interrupts(nvswitch_dev->lib_device);
519 
520     return 0;
521 
522 init_intr_failed:
523     nvswitch_lib_shutdown_device(nvswitch_dev->lib_device);
524 
525 init_device_failed:
526     nvswitch_lib_unregister_device(nvswitch_dev->lib_device);
527     nvswitch_dev->lib_device = NULL;
528 
529     return rc;
530 }
531 
532 static int
533 nvswitch_post_init_device
534 (
535     NVSWITCH_DEV *nvswitch_dev
536 )
537 {
538     int rc;
539     NvlStatus retval;
540 
541     rc = nvswitch_init_i2c_adapters(nvswitch_dev);
542     if (rc < 0)
543     {
544        return rc;
545     }
546 
547     retval = nvswitch_lib_post_init_device(nvswitch_dev->lib_device);
548     if (retval != NVL_SUCCESS)
549     {
550         return -ENODEV;
551     }
552 
553     return 0;
554 }
555 
556 static void
557 nvswitch_post_init_blacklisted
558 (
559     NVSWITCH_DEV *nvswitch_dev
560 )
561 {
562     nvswitch_lib_post_init_blacklist_device(nvswitch_dev->lib_device);
563 }
564 
565 static void
566 nvswitch_deinit_device
567 (
568     NVSWITCH_DEV *nvswitch_dev
569 )
570 {
571     nvswitch_deinit_i2c_adapters(nvswitch_dev);
572 
573     nvswitch_lib_disable_interrupts(nvswitch_dev->lib_device);
574 
575     nvswitch_shutdown_device_interrupt(nvswitch_dev);
576 
577     nvswitch_lib_shutdown_device(nvswitch_dev->lib_device);
578 
579     nvswitch_lib_unregister_device(nvswitch_dev->lib_device);
580     nvswitch_dev->lib_device = NULL;
581 }
582 
583 static void
584 nvswitch_init_file_event
585 (
586     nvswitch_file_private_t *private
587 )
588 {
589     init_waitqueue_head(&private->file_event.wait_q_event);
590     private->file_event.event_pending = NV_FALSE;
591 }
592 
593 //
594 // Basic device open to support IOCTL interface
595 //
596 static int
597 nvswitch_device_open
598 (
599     struct inode *inode,
600     struct file *file
601 )
602 {
603     NVSWITCH_DEV *nvswitch_dev;
604     int rc = 0;
605     nvswitch_file_private_t *private = NULL;
606 
607     //
608     // Get the major/minor device
609     // We might want this for routing requests to multiple nvswitches
610     //
611     printk(KERN_INFO "nvidia-nvswitch%d: open (major=%d)\n",
612            MINOR(inode->i_rdev),
613            MAJOR(inode->i_rdev));
614 
615     rc = mutex_lock_interruptible(&nvswitch.driver_mutex);
616     if (rc)
617     {
618         return rc;
619     }
620 
621     nvswitch_dev = nvswitch_find_device(MINOR(inode->i_rdev));
622     if (!nvswitch_dev)
623     {
624         rc = -ENODEV;
625         goto done;
626     }
627 
628     if (nvswitch_is_device_blacklisted(nvswitch_dev))
629     {
630         rc = -ENODEV;
631         goto done;
632     }
633 
634     private = nvswitch_os_malloc(sizeof(*private));
635     if (private == NULL)
636     {
637         rc = -ENOMEM;
638         goto done;
639     }
640 
641     private->nvswitch_dev = nvswitch_dev;
642 
643     nvswitch_init_file_event(private);
644 
645     private->capability_fds.fabric_mgmt = -1;
646     NVSWITCH_SET_FILE_PRIVATE(file, private);
647 
648     NV_ATOMIC_INC(nvswitch_dev->ref_count);
649 
650 done:
651     mutex_unlock(&nvswitch.driver_mutex);
652 
653     return rc;
654 }
655 
656 //
657 // Basic device release to support IOCTL interface
658 //
659 static int
660 nvswitch_device_release
661 (
662     struct inode *inode,
663     struct file *file
664 )
665 {
666     nvswitch_file_private_t *private = NVSWITCH_GET_FILE_PRIVATE(file);
667     NVSWITCH_DEV *nvswitch_dev = private->nvswitch_dev;
668 
669     printk(KERN_INFO "nvidia-nvswitch%d: release (major=%d)\n",
670            MINOR(inode->i_rdev),
671            MAJOR(inode->i_rdev));
672 
673     mutex_lock(&nvswitch.driver_mutex);
674 
675     nvswitch_lib_remove_client_events(nvswitch_dev->lib_device, (void *)private);
676 
677     //
678     // If there are no outstanding references and the device is marked
679     // unusable, free it.
680     //
681     if (NV_ATOMIC_DEC_AND_TEST(nvswitch_dev->ref_count) &&
682         nvswitch_dev->unusable)
683     {
684         kfree(nvswitch_dev);
685     }
686 
687     if (private->capability_fds.fabric_mgmt > 0)
688     {
689         nvlink_cap_release(private->capability_fds.fabric_mgmt);
690         private->capability_fds.fabric_mgmt = -1;
691     }
692 
693     nvswitch_os_free(file->private_data);
694     NVSWITCH_SET_FILE_PRIVATE(file, NULL);
695 
696     mutex_unlock(&nvswitch.driver_mutex);
697 
698     return 0;
699 }
700 
701 static unsigned int
702 nvswitch_device_poll
703 (
704     struct file *file,
705     poll_table *wait
706 )
707 {
708     nvswitch_file_private_t *private = NVSWITCH_GET_FILE_PRIVATE(file);
709     NVSWITCH_DEV *nvswitch_dev = private->nvswitch_dev;
710     int rc = 0;
711     NvlStatus status;
712     struct NVSWITCH_CLIENT_EVENT *client_event;
713 
714     rc = mutex_lock_interruptible(&nvswitch_dev->device_mutex);
715     if (rc)
716     {
717         return rc;
718     }
719 
720     if (nvswitch_dev->unusable)
721     {
722         printk(KERN_INFO "%s: a stale fd detected\n", nvswitch_dev->name);
723         rc = POLLHUP;
724         goto done;
725     }
726 
727     status = nvswitch_lib_get_client_event(nvswitch_dev->lib_device,
728                                            (void *) private, &client_event);
729     if (status != NVL_SUCCESS)
730     {
731         printk(KERN_INFO "%s: no events registered for fd\n", nvswitch_dev->name);
732         rc = POLLERR;
733         goto done;
734     }
735 
736     poll_wait(file, &private->file_event.wait_q_event, wait);
737 
738     if (private->file_event.event_pending)
739     {
740         rc = POLLPRI | POLLIN;
741         private->file_event.event_pending = NV_FALSE;
742     }
743 
744 done:
745     mutex_unlock(&nvswitch_dev->device_mutex);
746 
747     return rc;
748 }
749 
750 typedef struct {
751     void *kernel_params;                // Kernel copy of ioctl parameters
752     unsigned long kernel_params_size;   // Size of ioctl params according to user
753 } IOCTL_STATE;
754 
755 //
756 // Clean up any dynamically allocated memory for ioctl state
757 //
758 static void
759 nvswitch_ioctl_state_cleanup
760 (
761     IOCTL_STATE *state
762 )
763 {
764     kfree(state->kernel_params);
765     state->kernel_params = NULL;
766 }
767 
768 //
769 // Initialize buffer state for ioctl.
770 //
771 // This handles allocating memory and copying user data into kernel space.  The
772 // ioctl params structure only is supported. Nested data pointers are not handled.
773 //
774 // State is maintained in the IOCTL_STATE struct for use by the ioctl, _sync and
775 // _cleanup calls.
776 //
777 static int
778 nvswitch_ioctl_state_start(IOCTL_STATE *state, int cmd, unsigned long user_arg)
779 {
780     int rc;
781 
782     state->kernel_params = NULL;
783     state->kernel_params_size = _IOC_SIZE(cmd);
784 
785     if (0 == state->kernel_params_size)
786     {
787         return 0;
788     }
789 
790     state->kernel_params = kzalloc(state->kernel_params_size, GFP_KERNEL);
791     if (NULL == state->kernel_params)
792     {
793         rc = -ENOMEM;
794         goto nvswitch_ioctl_state_start_fail;
795     }
796 
797     // Copy params to kernel buffers.  Simple _IOR() ioctls can skip this step.
798     if (_IOC_DIR(cmd) & _IOC_WRITE)
799     {
800         rc = copy_from_user(state->kernel_params,
801                             (const void *)user_arg,
802                             state->kernel_params_size);
803         if (rc)
804         {
805             rc = -EFAULT;
806             goto nvswitch_ioctl_state_start_fail;
807         }
808     }
809 
810     return 0;
811 
812 nvswitch_ioctl_state_start_fail:
813     nvswitch_ioctl_state_cleanup(state);
814     return rc;
815 }
816 
817 //
818 // Synchronize any ioctl output in the kernel buffers to the user mode buffers.
819 //
820 static int
821 nvswitch_ioctl_state_sync
822 (
823     IOCTL_STATE *state,
824     int cmd,
825     unsigned long user_arg
826 )
827 {
828     int rc;
829 
830     // Nothing to do if no buffer or write-only ioctl
831     if ((0 == state->kernel_params_size) || (0 == (_IOC_DIR(cmd) & _IOC_READ)))
832     {
833         return 0;
834     }
835 
836     // Copy params structure back to user mode
837     rc = copy_to_user((void *)user_arg,
838                       state->kernel_params,
839                       state->kernel_params_size);
840     if (rc)
841     {
842         rc = -EFAULT;
843     }
844 
845     return rc;
846 }
847 
848 static int
849 nvswitch_device_ioctl
850 (
851     struct inode *inode,
852     struct file *file,
853     unsigned int cmd,
854     unsigned long arg
855 )
856 {
857     nvswitch_file_private_t *private = NVSWITCH_GET_FILE_PRIVATE(file);
858     NVSWITCH_DEV *nvswitch_dev = private->nvswitch_dev;
859     IOCTL_STATE state = {0};
860     NvlStatus retval;
861     int rc = 0;
862 
863     if (_IOC_TYPE(cmd) != NVSWITCH_DEV_IO_TYPE)
864     {
865         return -EINVAL;
866     }
867 
868     rc = mutex_lock_interruptible(&nvswitch_dev->device_mutex);
869     if (rc)
870     {
871         return rc;
872     }
873 
874     if (nvswitch_dev->unusable)
875     {
876         printk(KERN_INFO "%s: a stale fd detected\n", nvswitch_dev->name);
877         rc = -ENODEV;
878         goto nvswitch_device_ioctl_exit;
879     }
880 
881     if (nvswitch_is_device_blacklisted(nvswitch_dev))
882     {
883         printk(KERN_INFO "%s: ioctl attempted on blacklisted device\n", nvswitch_dev->name);
884         rc = -ENODEV;
885         goto nvswitch_device_ioctl_exit;
886     }
887 
888     rc = nvswitch_ioctl_state_start(&state, cmd, arg);
889     if (rc)
890     {
891         goto nvswitch_device_ioctl_exit;
892     }
893 
894     retval = nvswitch_lib_ctrl(nvswitch_dev->lib_device,
895                                _IOC_NR(cmd),
896                                state.kernel_params,
897                                state.kernel_params_size,
898                                file->private_data);
899     rc = nvswitch_map_status(retval);
900     if (!rc)
901     {
902         rc = nvswitch_ioctl_state_sync(&state, cmd, arg);
903     }
904 
905     nvswitch_ioctl_state_cleanup(&state);
906 
907 nvswitch_device_ioctl_exit:
908     mutex_unlock(&nvswitch_dev->device_mutex);
909 
910     return rc;
911 }
912 
913 static long
914 nvswitch_device_unlocked_ioctl
915 (
916     struct file *file,
917     unsigned int cmd,
918     unsigned long arg
919 )
920 {
921     return nvswitch_device_ioctl(NV_FILE_INODE(file), file, cmd, arg);
922 }
923 
924 static int
925 nvswitch_ctl_check_version(NVSWITCH_CHECK_VERSION_PARAMS *p)
926 {
927     NvlStatus retval;
928 
929     p->is_compatible = 0;
930     p->user.version[NVSWITCH_VERSION_STRING_LENGTH - 1] = '\0';
931 
932     retval = nvswitch_lib_check_api_version(p->user.version, p->kernel.version,
933                                             NVSWITCH_VERSION_STRING_LENGTH);
934     if (retval == NVL_SUCCESS)
935     {
936         p->is_compatible = 1;
937     }
938     else if (retval == -NVL_ERR_NOT_SUPPORTED)
939     {
940         printk(KERN_ERR "nvidia-nvswitch: Version mismatch, "
941                "kernel version %s user version %s\n",
942                p->kernel.version, p->user.version);
943     }
944     else
945     {
946         // An unexpected failure
947         return nvswitch_map_status(retval);
948     }
949 
950     return 0;
951 }
952 
953 static void
954 nvswitch_ctl_get_devices(NVSWITCH_GET_DEVICES_PARAMS *p)
955 {
956     int index = 0;
957     NVSWITCH_DEV *nvswitch_dev;
958     struct list_head *cur;
959 
960     BUILD_BUG_ON(NVSWITCH_DEVICE_INSTANCE_MAX != NVSWITCH_MAX_DEVICES);
961 
962     list_for_each(cur, &nvswitch.devices)
963     {
964         nvswitch_dev = list_entry(cur, NVSWITCH_DEV, list_node);
965         p->info[index].deviceInstance = nvswitch_dev->minor;
966         p->info[index].pciDomain = NV_PCI_DOMAIN_NUMBER(nvswitch_dev->pci_dev);
967         p->info[index].pciBus = NV_PCI_BUS_NUMBER(nvswitch_dev->pci_dev);
968         p->info[index].pciDevice = NV_PCI_SLOT_NUMBER(nvswitch_dev->pci_dev);
969         p->info[index].pciFunction = PCI_FUNC(nvswitch_dev->pci_dev->devfn);
970         index++;
971     }
972 
973     p->deviceCount = index;
974 }
975 
976 static void
977 nvswitch_ctl_get_devices_v2(NVSWITCH_GET_DEVICES_V2_PARAMS *p)
978 {
979     int index = 0;
980     NVSWITCH_DEV *nvswitch_dev;
981     struct list_head *cur;
982 
983     BUILD_BUG_ON(NVSWITCH_DEVICE_INSTANCE_MAX != NVSWITCH_MAX_DEVICES);
984 
985     list_for_each(cur, &nvswitch.devices)
986     {
987         nvswitch_dev = list_entry(cur, NVSWITCH_DEV, list_node);
988         p->info[index].deviceInstance = nvswitch_dev->minor;
989         memcpy(&p->info[index].uuid, &nvswitch_dev->uuid, sizeof(nvswitch_dev->uuid));
990         p->info[index].pciDomain = NV_PCI_DOMAIN_NUMBER(nvswitch_dev->pci_dev);
991         p->info[index].pciBus = NV_PCI_BUS_NUMBER(nvswitch_dev->pci_dev);
992         p->info[index].pciDevice = NV_PCI_SLOT_NUMBER(nvswitch_dev->pci_dev);
993         p->info[index].pciFunction = PCI_FUNC(nvswitch_dev->pci_dev->devfn);
994         p->info[index].physId = nvswitch_dev->phys_id;
995 
996         if (nvswitch_dev->lib_device != NULL)
997         {
998             mutex_lock(&nvswitch_dev->device_mutex);
999             (void)nvswitch_lib_read_fabric_state(nvswitch_dev->lib_device,
1000                                                  &p->info[index].deviceState,
1001                                                  &p->info[index].deviceReason,
1002                                                  &p->info[index].driverState);
1003             mutex_unlock(&nvswitch_dev->device_mutex);
1004         }
1005         index++;
1006     }
1007 
1008     p->deviceCount = index;
1009 }
1010 
1011 #define NVSWITCH_CTL_CHECK_PARAMS(type, size) (sizeof(type) == size ? 0 : -EINVAL)
1012 
1013 static int
1014 nvswitch_ctl_cmd_dispatch
1015 (
1016     unsigned int cmd,
1017     void *params,
1018     unsigned int param_size
1019 )
1020 {
1021     int rc;
1022 
1023     switch(cmd)
1024     {
1025         case CTRL_NVSWITCH_CHECK_VERSION:
1026             rc = NVSWITCH_CTL_CHECK_PARAMS(NVSWITCH_CHECK_VERSION_PARAMS,
1027                                            param_size);
1028             if (!rc)
1029             {
1030                 rc = nvswitch_ctl_check_version(params);
1031             }
1032             break;
1033         case CTRL_NVSWITCH_GET_DEVICES:
1034             rc = NVSWITCH_CTL_CHECK_PARAMS(NVSWITCH_GET_DEVICES_PARAMS,
1035                                            param_size);
1036             if (!rc)
1037             {
1038                 nvswitch_ctl_get_devices(params);
1039             }
1040             break;
1041         case CTRL_NVSWITCH_GET_DEVICES_V2:
1042             rc = NVSWITCH_CTL_CHECK_PARAMS(NVSWITCH_GET_DEVICES_V2_PARAMS,
1043                                            param_size);
1044             if (!rc)
1045             {
1046                 nvswitch_ctl_get_devices_v2(params);
1047             }
1048             break;
1049 
1050         default:
1051             rc = -EINVAL;
1052             break;
1053     }
1054 
1055     return rc;
1056 }
1057 
1058 static int
1059 nvswitch_ctl_ioctl
1060 (
1061     struct inode *inode,
1062     struct file *file,
1063     unsigned int cmd,
1064     unsigned long arg
1065 )
1066 {
1067     int rc = 0;
1068     IOCTL_STATE state = {0};
1069 
1070     if (_IOC_TYPE(cmd) != NVSWITCH_CTL_IO_TYPE)
1071     {
1072         return -EINVAL;
1073     }
1074 
1075     rc = mutex_lock_interruptible(&nvswitch.driver_mutex);
1076     if (rc)
1077     {
1078         return rc;
1079     }
1080 
1081     rc = nvswitch_ioctl_state_start(&state, cmd, arg);
1082     if (rc)
1083     {
1084         goto nvswitch_ctl_ioctl_exit;
1085     }
1086 
1087     rc = nvswitch_ctl_cmd_dispatch(_IOC_NR(cmd),
1088                                    state.kernel_params,
1089                                    state.kernel_params_size);
1090     if (!rc)
1091     {
1092         rc = nvswitch_ioctl_state_sync(&state, cmd, arg);
1093     }
1094 
1095     nvswitch_ioctl_state_cleanup(&state);
1096 
1097 nvswitch_ctl_ioctl_exit:
1098     mutex_unlock(&nvswitch.driver_mutex);
1099 
1100     return rc;
1101 }
1102 
1103 static long
1104 nvswitch_ctl_unlocked_ioctl
1105 (
1106     struct file *file,
1107     unsigned int cmd,
1108     unsigned long arg
1109 )
1110 {
1111     return nvswitch_ctl_ioctl(NV_FILE_INODE(file), file, cmd, arg);
1112 }
1113 
1114 static irqreturn_t
1115 nvswitch_isr_pending
1116 (
1117     int   irq,
1118     void *arg
1119 )
1120 {
1121 
1122     NVSWITCH_DEV *nvswitch_dev = (NVSWITCH_DEV *)arg;
1123     NvlStatus retval;
1124 
1125     //
1126     // On silicon MSI must be enabled.  Since interrupts will not be shared
1127     // with MSI, we can simply signal the thread.
1128     //
1129     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_MSI)
1130     {
1131         return IRQ_WAKE_THREAD;
1132     }
1133 
1134     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_PIN)
1135     {
1136         //
1137         // We do not take mutex in the interrupt context. The interrupt
1138         // check is safe to driver state.
1139         //
1140         retval = nvswitch_lib_check_interrupts(nvswitch_dev->lib_device);
1141 
1142         // Wake interrupt thread if there is an interrupt pending
1143         if (-NVL_MORE_PROCESSING_REQUIRED == retval)
1144         {
1145             nvswitch_lib_disable_interrupts(nvswitch_dev->lib_device);
1146             return IRQ_WAKE_THREAD;
1147         }
1148 
1149         // PCI errors are handled else where.
1150         if (-NVL_PCI_ERROR == retval)
1151         {
1152             return IRQ_NONE;
1153         }
1154 
1155         if (NVL_SUCCESS != retval)
1156         {
1157             pr_err("nvidia-nvswitch: unrecoverable error in ISR\n");
1158             NVSWITCH_OS_ASSERT(0);
1159         }
1160         return IRQ_NONE;
1161     }
1162 
1163     pr_err("nvidia-nvswitch: unsupported IRQ mechanism in ISR\n");
1164     NVSWITCH_OS_ASSERT(0);
1165 
1166     return IRQ_NONE;
1167 }
1168 
1169 static irqreturn_t
1170 nvswitch_isr_thread
1171 (
1172     int   irq,
1173     void *arg
1174 )
1175 {
1176     NVSWITCH_DEV *nvswitch_dev = (NVSWITCH_DEV *)arg;
1177     NvlStatus retval;
1178 
1179     mutex_lock(&nvswitch_dev->device_mutex);
1180 
1181     retval = nvswitch_lib_service_interrupts(nvswitch_dev->lib_device);
1182 
1183     wake_up(&nvswitch_dev->wait_q_errors);
1184 
1185     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_PIN)
1186     {
1187         nvswitch_lib_enable_interrupts(nvswitch_dev->lib_device);
1188     }
1189 
1190     mutex_unlock(&nvswitch_dev->device_mutex);
1191 
1192     if (WARN_ON(retval != NVL_SUCCESS))
1193     {
1194         printk(KERN_ERR "%s: Interrupts disabled to avoid a storm\n",
1195                nvswitch_dev->name);
1196     }
1197 
1198     return IRQ_HANDLED;
1199 }
1200 
1201 static void
1202 nvswitch_task_dispatch
1203 (
1204     NVSWITCH_DEV *nvswitch_dev
1205 )
1206 {
1207     NvU64 nsec;
1208     NvU64 timeout;
1209     NvS64 rc;
1210 
1211     if (NV_ATOMIC_READ(nvswitch_dev->task_q_ready) == 0)
1212     {
1213         return;
1214     }
1215 
1216     mutex_lock(&nvswitch_dev->device_mutex);
1217 
1218     nsec = nvswitch_lib_deferred_task_dispatcher(nvswitch_dev->lib_device);
1219 
1220     mutex_unlock(&nvswitch_dev->device_mutex);
1221 
1222     timeout = usecs_to_jiffies(nsec / NSEC_PER_USEC);
1223 
1224     rc = wait_event_interruptible_timeout(nvswitch_dev->wait_q_shutdown,
1225                               (NV_ATOMIC_READ(nvswitch_dev->task_q_ready) == 0),
1226                               timeout);
1227 
1228     //
1229     // These background tasks should rarely, if ever, get interrupted. We use
1230     // the "interruptible" variant of wait_event in order to avoid contributing
1231     // to the system load average (/proc/loadavg), and to avoid softlockup
1232     // warnings that can occur if a kernel thread lingers too long in an
1233     // uninterruptible state. If this does get interrupted, we'd like to debug
1234     // and find out why, so WARN in that case.
1235     //
1236     WARN_ON(rc < 0);
1237 
1238     //
1239     // Schedule a work item only if the above actually timed out or got
1240     // interrupted, without the condition becoming true.
1241     //
1242     if (rc <= 0)
1243     {
1244         if (!nv_kthread_q_schedule_q_item(&nvswitch_dev->task_q,
1245                                           &nvswitch_dev->task_item))
1246         {
1247             printk(KERN_ERR "%s: Failed to re-schedule background task\n",
1248                    nvswitch_dev->name);
1249         }
1250     }
1251 }
1252 
1253 static int
1254 nvswitch_probe
1255 (
1256     struct pci_dev *pci_dev,
1257     const struct pci_device_id *id_table
1258 )
1259 {
1260     NVSWITCH_DEV *nvswitch_dev = NULL;
1261     int rc = 0;
1262     int minor;
1263 
1264     if (!nvswitch_lib_validate_device_id(pci_dev->device))
1265     {
1266         return -EINVAL;
1267     }
1268 
1269     printk(KERN_INFO "nvidia-nvswitch: Probing device %04x:%02x:%02x.%x, "
1270            "Vendor Id = 0x%x, Device Id = 0x%x, Class = 0x%x \n",
1271            NV_PCI_DOMAIN_NUMBER(pci_dev),
1272            NV_PCI_BUS_NUMBER(pci_dev),
1273            NV_PCI_SLOT_NUMBER(pci_dev),
1274            PCI_FUNC(pci_dev->devfn),
1275            pci_dev->vendor,
1276            pci_dev->device,
1277            pci_dev->class);
1278 
1279     mutex_lock(&nvswitch.driver_mutex);
1280 
1281     minor = nvswitch_find_minor();
1282     if (minor >= NVSWITCH_DEVICE_INSTANCE_MAX)
1283     {
1284         rc = -ERANGE;
1285         goto find_minor_failed;
1286     }
1287 
1288     nvswitch_dev = kzalloc(sizeof(*nvswitch_dev), GFP_KERNEL);
1289     if (NULL == nvswitch_dev)
1290     {
1291         rc = -ENOMEM;
1292         goto kzalloc_failed;
1293     }
1294 
1295     mutex_init(&nvswitch_dev->device_mutex);
1296     init_waitqueue_head(&nvswitch_dev->wait_q_errors);
1297     init_waitqueue_head(&nvswitch_dev->wait_q_shutdown);
1298 
1299     snprintf(nvswitch_dev->name, sizeof(nvswitch_dev->name),
1300         NVSWITCH_DRIVER_NAME "%d", minor);
1301 
1302     snprintf(nvswitch_dev->sname, sizeof(nvswitch_dev->sname),
1303         NVSWITCH_SHORT_NAME "%d", minor);
1304 
1305     rc = pci_enable_device(pci_dev);
1306     if (rc)
1307     {
1308         printk(KERN_ERR "%s: Failed to enable PCI device : %d\n",
1309                nvswitch_dev->name,
1310                rc);
1311         goto pci_enable_device_failed;
1312     }
1313 
1314     pci_set_master(pci_dev);
1315 
1316     rc = pci_request_regions(pci_dev, nvswitch_dev->name);
1317     if (rc)
1318     {
1319         printk(KERN_ERR "%s: Failed to request memory regions : %d\n",
1320                nvswitch_dev->name,
1321                rc);
1322         goto pci_request_regions_failed;
1323     }
1324 
1325     nvswitch_dev->bar0 = pci_iomap(pci_dev, 0, 0);
1326     if (!nvswitch_dev->bar0)
1327     {
1328         rc = -ENOMEM;
1329         printk(KERN_ERR "%s: Failed to map BAR0 region : %d\n",
1330                nvswitch_dev->name,
1331                rc);
1332         goto pci_iomap_failed;
1333     }
1334 
1335     nvswitch_dev->pci_dev = pci_dev;
1336     nvswitch_dev->minor = minor;
1337 
1338     rc = nvswitch_init_device(nvswitch_dev);
1339     if (rc)
1340     {
1341         printk(KERN_ERR "%s: Failed to initialize device : %d\n",
1342                nvswitch_dev->name,
1343                rc);
1344         goto init_device_failed;
1345     }
1346 
1347     if (nvswitch_is_device_blacklisted(nvswitch_dev))
1348     {
1349         nvswitch_post_init_blacklisted(nvswitch_dev);
1350         goto blacklisted;
1351     }
1352 
1353     //
1354     // device_mutex held here because post_init entries may call soeService_HAL()
1355     // with IRQs on. see bug 2856314 for more info
1356     //
1357     mutex_lock(&nvswitch_dev->device_mutex);
1358     rc = nvswitch_post_init_device(nvswitch_dev);
1359     mutex_unlock(&nvswitch_dev->device_mutex);
1360     if (rc)
1361     {
1362         printk(KERN_ERR "%s:Failed during device post init : %d\n",
1363                nvswitch_dev->name, rc);
1364         goto post_init_device_failed;
1365     }
1366 
1367 blacklisted:
1368     rc = nvswitch_init_background_tasks(nvswitch_dev);
1369     if (rc)
1370     {
1371         printk(KERN_ERR "%s: Failed to initialize background tasks : %d\n",
1372                nvswitch_dev->name,
1373                rc);
1374         goto init_background_task_failed;
1375     }
1376 
1377     pci_set_drvdata(pci_dev, nvswitch_dev);
1378 
1379     nvswitch_procfs_device_add(nvswitch_dev);
1380 
1381     list_add_tail(&nvswitch_dev->list_node, &nvswitch.devices);
1382 
1383     NV_ATOMIC_INC(nvswitch.count);
1384 
1385     mutex_unlock(&nvswitch.driver_mutex);
1386 
1387     return 0;
1388 
1389 init_background_task_failed:
1390 post_init_device_failed:
1391     nvswitch_deinit_device(nvswitch_dev);
1392 
1393 init_device_failed:
1394     pci_iounmap(pci_dev, nvswitch_dev->bar0);
1395 
1396 pci_iomap_failed:
1397     pci_release_regions(pci_dev);
1398 
1399 pci_request_regions_failed:
1400 #ifdef CONFIG_PCI
1401     pci_clear_master(pci_dev);
1402 #endif
1403     pci_disable_device(pci_dev);
1404 
1405 pci_enable_device_failed:
1406     kfree(nvswitch_dev);
1407 
1408 kzalloc_failed:
1409 find_minor_failed:
1410     mutex_unlock(&nvswitch.driver_mutex);
1411 
1412     return rc;
1413 }
1414 
1415 void
1416 nvswitch_remove
1417 (
1418     struct pci_dev *pci_dev
1419 )
1420 {
1421     NVSWITCH_DEV *nvswitch_dev;
1422 
1423     mutex_lock(&nvswitch.driver_mutex);
1424 
1425     nvswitch_dev = pci_get_drvdata(pci_dev);
1426 
1427     if (nvswitch_dev == NULL)
1428     {
1429         goto done;
1430     }
1431 
1432     printk(KERN_INFO "%s: removing device %04x:%02x:%02x.%x\n",
1433            nvswitch_dev->name,
1434            NV_PCI_DOMAIN_NUMBER(pci_dev),
1435            NV_PCI_BUS_NUMBER(pci_dev),
1436            NV_PCI_SLOT_NUMBER(pci_dev),
1437            PCI_FUNC(pci_dev->devfn));
1438 
1439     //
1440     // Synchronize with device operations such as .ioctls/.poll, and then mark
1441     // the device unusable.
1442     //
1443     mutex_lock(&nvswitch_dev->device_mutex);
1444     nvswitch_dev->unusable = NV_TRUE;
1445     mutex_unlock(&nvswitch_dev->device_mutex);
1446 
1447     NV_ATOMIC_DEC(nvswitch.count);
1448 
1449     list_del(&nvswitch_dev->list_node);
1450 
1451     nvswitch_deinit_background_tasks(nvswitch_dev);
1452 
1453     nvswitch_deinit_device(nvswitch_dev);
1454 
1455     pci_set_drvdata(pci_dev, NULL);
1456 
1457     pci_iounmap(pci_dev, nvswitch_dev->bar0);
1458 
1459     pci_release_regions(pci_dev);
1460 
1461 #ifdef CONFIG_PCI
1462     pci_clear_master(pci_dev);
1463 #endif
1464 
1465     pci_disable_device(pci_dev);
1466 
1467     nvswitch_procfs_device_remove(nvswitch_dev);
1468 
1469     // Free nvswitch_dev only if it is not in use.
1470     if (NV_ATOMIC_READ(nvswitch_dev->ref_count) == 0)
1471     {
1472         kfree(nvswitch_dev);
1473     }
1474 
1475 done:
1476     mutex_unlock(&nvswitch.driver_mutex);
1477 
1478     return;
1479 }
1480 
1481 static void
1482 nvswitch_load_bar_info
1483 (
1484     NVSWITCH_DEV *nvswitch_dev
1485 )
1486 {
1487     struct pci_dev *pci_dev = nvswitch_dev->pci_dev;
1488     nvlink_pci_info *info;
1489     NvU32 bar = 0;
1490 
1491     nvswitch_lib_get_device_info(nvswitch_dev->lib_device, &info);
1492 
1493     info->bars[0].offset = NVRM_PCICFG_BAR_OFFSET(0);
1494     pci_read_config_dword(pci_dev, info->bars[0].offset, &bar);
1495 
1496     info->bars[0].busAddress = (bar & PCI_BASE_ADDRESS_MEM_MASK);
1497     if (NV_PCI_RESOURCE_FLAGS(pci_dev, 0) & PCI_BASE_ADDRESS_MEM_TYPE_64)
1498     {
1499         pci_read_config_dword(pci_dev, info->bars[0].offset + 4, &bar);
1500         info->bars[0].busAddress |= (((NvU64)bar) << 32);
1501     }
1502 
1503     info->bars[0].baseAddr = NV_PCI_RESOURCE_START(pci_dev, 0);
1504 
1505     info->bars[0].barSize = NV_PCI_RESOURCE_SIZE(pci_dev, 0);
1506 
1507     info->bars[0].pBar = nvswitch_dev->bar0;
1508 }
1509 
1510 static int
1511 _nvswitch_initialize_msix_interrupt
1512 (
1513     NVSWITCH_DEV *nvswitch_dev
1514 )
1515 {
1516     // Not supported (bug 3018806)
1517     return -EINVAL;
1518 }
1519 
1520 static int
1521 _nvswitch_initialize_msi_interrupt
1522 (
1523     NVSWITCH_DEV *nvswitch_dev
1524 )
1525 {
1526 #ifdef CONFIG_PCI_MSI
1527     struct pci_dev *pci_dev = nvswitch_dev->pci_dev;
1528     int rc;
1529 
1530     rc = pci_enable_msi(pci_dev);
1531     if (rc)
1532     {
1533         return rc;
1534     }
1535 
1536     return 0;
1537 #else
1538     return -EINVAL;
1539 #endif
1540 }
1541 
1542 static int
1543 _nvswitch_get_irq_caps(NVSWITCH_DEV *nvswitch_dev, unsigned long *irq_caps)
1544 {
1545     struct pci_dev *pci_dev;
1546 
1547     if (!nvswitch_dev || !irq_caps)
1548         return -EINVAL;
1549 
1550     pci_dev = nvswitch_dev->pci_dev;
1551 
1552     if (pci_find_capability(pci_dev, PCI_CAP_ID_MSIX))
1553         set_bit(NVSWITCH_IRQ_MSIX, irq_caps);
1554 
1555     if (pci_find_capability(pci_dev, PCI_CAP_ID_MSI))
1556         set_bit(NVSWITCH_IRQ_MSI, irq_caps);
1557 
1558     if (nvswitch_lib_use_pin_irq(nvswitch_dev->lib_device))
1559         set_bit(NVSWITCH_IRQ_PIN, irq_caps);
1560 
1561     return 0;
1562 }
1563 
1564 static int
1565 nvswitch_initialize_device_interrupt
1566 (
1567     NVSWITCH_DEV *nvswitch_dev
1568 )
1569 {
1570     struct pci_dev *pci_dev = nvswitch_dev->pci_dev;
1571     int flags = 0;
1572     unsigned long irq_caps = 0;
1573     int rc;
1574 
1575     if (_nvswitch_get_irq_caps(nvswitch_dev, &irq_caps))
1576     {
1577         pr_err("%s: failed to retrieve device interrupt capabilities\n",
1578                nvswitch_dev->name);
1579         return -EINVAL;
1580     }
1581 
1582     nvswitch_dev->irq_mechanism = NVSWITCH_IRQ_NONE;
1583 
1584     if (test_bit(NVSWITCH_IRQ_MSIX, &irq_caps))
1585     {
1586         rc = _nvswitch_initialize_msix_interrupt(nvswitch_dev);
1587         if (!rc)
1588         {
1589             nvswitch_dev->irq_mechanism = NVSWITCH_IRQ_MSIX;
1590             pr_info("%s: using MSI-X\n", nvswitch_dev->name);
1591         }
1592     }
1593 
1594     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_NONE
1595         && test_bit(NVSWITCH_IRQ_MSI, &irq_caps))
1596     {
1597         rc = _nvswitch_initialize_msi_interrupt(nvswitch_dev);
1598         if (!rc)
1599         {
1600             nvswitch_dev->irq_mechanism = NVSWITCH_IRQ_MSI;
1601             pr_info("%s: using MSI\n", nvswitch_dev->name);
1602         }
1603     }
1604 
1605     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_NONE
1606         && test_bit(NVSWITCH_IRQ_PIN, &irq_caps))
1607     {
1608         flags |= IRQF_SHARED;
1609         nvswitch_dev->irq_mechanism = NVSWITCH_IRQ_PIN;
1610         pr_info("%s: using PCI pin\n", nvswitch_dev->name);
1611     }
1612 
1613     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_NONE)
1614     {
1615         pr_err("%s: No supported interrupt mechanism was found. This device supports:\n",
1616                nvswitch_dev->name);
1617 
1618         if (test_bit(NVSWITCH_IRQ_MSIX, &irq_caps))
1619             pr_err("%s: MSI-X\n", nvswitch_dev->name);
1620         if (test_bit(NVSWITCH_IRQ_MSI, &irq_caps))
1621             pr_err("%s: MSI\n", nvswitch_dev->name);
1622         if (test_bit(NVSWITCH_IRQ_PIN, &irq_caps))
1623              pr_err("%s: PCI Pin\n", nvswitch_dev->name);
1624 
1625         return -EINVAL;
1626     }
1627 
1628     rc = request_threaded_irq(pci_dev->irq,
1629                               nvswitch_isr_pending,
1630                               nvswitch_isr_thread,
1631                               flags, nvswitch_dev->sname,
1632                               nvswitch_dev);
1633     if (rc)
1634     {
1635 #ifdef CONFIG_PCI_MSI
1636         if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_MSI)
1637         {
1638             pci_disable_msi(pci_dev);
1639         }
1640 #endif
1641         printk(KERN_ERR "%s: failed to get IRQ\n",
1642                nvswitch_dev->name);
1643 
1644         return rc;
1645     }
1646 
1647     return 0;
1648 }
1649 
1650 void
1651 nvswitch_shutdown_device_interrupt
1652 (
1653     NVSWITCH_DEV *nvswitch_dev
1654 )
1655 {
1656     struct pci_dev *pci_dev = nvswitch_dev->pci_dev;
1657 
1658     free_irq(pci_dev->irq, nvswitch_dev);
1659 #ifdef CONFIG_PCI_MSI
1660     if (nvswitch_dev->irq_mechanism == NVSWITCH_IRQ_MSI)
1661     {
1662         pci_disable_msi(pci_dev);
1663     }
1664 #endif
1665 }
1666 
1667 static void
1668 nvswitch_ctl_exit
1669 (
1670     void
1671 )
1672 {
1673     cdev_del(&nvswitch.cdev_ctl);
1674 }
1675 
1676 static int
1677 nvswitch_ctl_init
1678 (
1679     int major
1680 )
1681 {
1682     int rc = 0;
1683     dev_t nvswitch_ctl = MKDEV(major, NVSWITCH_CTL_MINOR);
1684 
1685     cdev_init(&nvswitch.cdev_ctl, &ctl_fops);
1686 
1687     nvswitch.cdev_ctl.owner = THIS_MODULE;
1688 
1689     rc = cdev_add(&nvswitch.cdev_ctl, nvswitch_ctl, 1);
1690     if (rc < 0)
1691     {
1692         printk(KERN_ERR "nvidia-nvswitch: Unable to create cdev ctl\n");
1693         return rc;
1694     }
1695 
1696     return 0;
1697 }
1698 
1699 //
1700 // Initialize nvswitch driver SW state.  This is currently called
1701 // from the RM as a backdoor interface, and not by the Linux device
1702 // manager
1703 //
1704 int
1705 nvswitch_init
1706 (
1707     void
1708 )
1709 {
1710     int rc;
1711 
1712     if (nvswitch.initialized)
1713     {
1714         printk(KERN_ERR "nvidia-nvswitch: Interface already initialized\n");
1715         return -EBUSY;
1716     }
1717 
1718     BUILD_BUG_ON(NVSWITCH_DEVICE_INSTANCE_MAX >= NVSWITCH_MINOR_COUNT);
1719 
1720     mutex_init(&nvswitch.driver_mutex);
1721 
1722     INIT_LIST_HEAD(&nvswitch.devices);
1723 
1724     rc = alloc_chrdev_region(&nvswitch.devno,
1725                              0,
1726                              NVSWITCH_MINOR_COUNT,
1727                              NVSWITCH_DRIVER_NAME);
1728     if (rc < 0)
1729     {
1730         printk(KERN_ERR "nvidia-nvswitch: Unable to create cdev region\n");
1731         goto alloc_chrdev_region_fail;
1732     }
1733 
1734     printk(KERN_ERR, "nvidia-nvswitch: Major: %d Minor: %d\n",
1735            MAJOR(nvswitch.devno),
1736            MINOR(nvswitch.devno));
1737 
1738     cdev_init(&nvswitch.cdev, &device_fops);
1739     nvswitch.cdev.owner = THIS_MODULE;
1740     rc = cdev_add(&nvswitch.cdev, nvswitch.devno, NVSWITCH_DEVICE_INSTANCE_MAX);
1741     if (rc < 0)
1742     {
1743         printk(KERN_ERR "nvidia-nvswitch: Unable to create cdev\n");
1744         goto cdev_add_fail;
1745     }
1746 
1747     rc = nvswitch_procfs_init();
1748     if (rc < 0)
1749     {
1750         goto nvswitch_procfs_init_fail;
1751     }
1752 
1753     rc = pci_register_driver(&nvswitch_pci_driver);
1754     if (rc < 0)
1755     {
1756         printk(KERN_ERR "nvidia-nvswitch: Failed to register driver : %d\n", rc);
1757         goto pci_register_driver_fail;
1758     }
1759 
1760     rc = nvswitch_ctl_init(MAJOR(nvswitch.devno));
1761     if (rc < 0)
1762     {
1763         goto nvswitch_ctl_init_fail;
1764     }
1765 
1766     nvswitch.initialized = NV_TRUE;
1767 
1768     return 0;
1769 
1770 nvswitch_ctl_init_fail:
1771     pci_unregister_driver(&nvswitch_pci_driver);
1772 
1773 pci_register_driver_fail:
1774 nvswitch_procfs_init_fail:
1775     cdev_del(&nvswitch.cdev);
1776 
1777 cdev_add_fail:
1778     unregister_chrdev_region(nvswitch.devno, NVSWITCH_MINOR_COUNT);
1779 
1780 alloc_chrdev_region_fail:
1781 
1782     return rc;
1783 }
1784 
1785 //
1786 // Clean up driver state on exit.  Currently called from RM backdoor call,
1787 // and not by the Linux device manager.
1788 //
1789 void
1790 nvswitch_exit
1791 (
1792     void
1793 )
1794 {
1795     if (NV_FALSE == nvswitch.initialized)
1796     {
1797         return;
1798     }
1799 
1800     nvswitch_ctl_exit();
1801 
1802     pci_unregister_driver(&nvswitch_pci_driver);
1803 
1804     nvswitch_procfs_exit();
1805 
1806     cdev_del(&nvswitch.cdev);
1807 
1808     unregister_chrdev_region(nvswitch.devno, NVSWITCH_MINOR_COUNT);
1809 
1810     WARN_ON(!list_empty(&nvswitch.devices));
1811 
1812     nvswitch.initialized = NV_FALSE;
1813 }
1814 
1815 //
1816 // Get current time in seconds.nanoseconds
1817 // In this implementation, the time is monotonic time
1818 //
1819 NvU64
1820 nvswitch_os_get_platform_time
1821 (
1822     void
1823 )
1824 {
1825     struct timespec64 ts;
1826 
1827     ktime_get_raw_ts64(&ts);
1828     return (NvU64) timespec64_to_ns(&ts);
1829 }
1830 
1831 //
1832 // Get current time in seconds.nanoseconds
1833 // In this implementation, the time is from epoch time
1834 // (midnight UTC of January 1, 1970).
1835 // This implementation cannot be used for polling loops
1836 // due to clock skew during system startup (bug 3302382,
1837 // 3297170, 3273847, 3277478, 200693329).
1838 // Instead, nvswitch_os_get_platform_time() is used
1839 // for polling loops
1840 //
1841 NvU64
1842 nvswitch_os_get_platform_time_epoch
1843 (
1844     void
1845 )
1846 {
1847     struct timespec64 ts;
1848 
1849     ktime_get_real_ts64(&ts);
1850     return (NvU64) timespec64_to_ns(&ts);
1851 }
1852 
1853 void
1854 nvswitch_os_print
1855 (
1856     const int  log_level,
1857     const char *fmt,
1858     ...
1859 )
1860 {
1861     va_list arglist;
1862     char   *kern_level;
1863     char    fmt_printk[NVSWITCH_LOG_BUFFER_SIZE];
1864 
1865     switch (log_level)
1866     {
1867         case NVSWITCH_DBG_LEVEL_MMIO:
1868             kern_level = KERN_DEBUG;
1869             break;
1870         case NVSWITCH_DBG_LEVEL_INFO:
1871             kern_level = KERN_INFO;
1872             break;
1873         case NVSWITCH_DBG_LEVEL_SETUP:
1874             kern_level = KERN_INFO;
1875             break;
1876         case NVSWITCH_DBG_LEVEL_WARN:
1877             kern_level = KERN_WARNING;
1878             break;
1879         case NVSWITCH_DBG_LEVEL_ERROR:
1880             kern_level = KERN_ERR;
1881             break;
1882         default:
1883             kern_level = KERN_DEFAULT;
1884             break;
1885     }
1886 
1887     va_start(arglist, fmt);
1888     snprintf(fmt_printk, sizeof(fmt_printk), "%s%s", kern_level, fmt);
1889     vprintk(fmt_printk, arglist);
1890     va_end(arglist);
1891 }
1892 
1893 void
1894 nvswitch_os_override_platform
1895 (
1896     void *os_handle,
1897     NvBool *rtlsim
1898 )
1899 {
1900     // Never run on RTL
1901     *rtlsim = NV_FALSE;
1902 }
1903 
1904 NvlStatus
1905 nvswitch_os_read_registery_binary
1906 (
1907     void *os_handle,
1908     const char *name,
1909     NvU8 *data,
1910     NvU32 length
1911 )
1912 {
1913     return -NVL_ERR_NOT_SUPPORTED;
1914 }
1915 
1916 NvU32
1917 nvswitch_os_get_device_count
1918 (
1919     void
1920 )
1921 {
1922     return NV_ATOMIC_READ(nvswitch.count);
1923 }
1924 
1925 //
1926 // A helper to convert a string to an unsigned int.
1927 //
1928 // The string should be NULL terminated.
1929 // Only works with base16 values.
1930 //
1931 static int
1932 nvswitch_os_strtouint
1933 (
1934     char *str,
1935     unsigned int *data
1936 )
1937 {
1938     char *p;
1939     unsigned long long val;
1940 
1941     if (!str || !data)
1942     {
1943         return -EINVAL;
1944     }
1945 
1946     *data = 0;
1947     val = 0;
1948     p = str;
1949 
1950     while (*p != '\0')
1951     {
1952         if ((tolower(*p) == 'x') && (*str == '0') && (p == str + 1))
1953         {
1954             p++;
1955         }
1956         else if (*p >='0' && *p <= '9')
1957         {
1958             val = val * 16 + (*p - '0');
1959             p++;
1960         }
1961         else if (tolower(*p) >= 'a' && tolower(*p) <= 'f')
1962         {
1963             val = val * 16 + (tolower(*p) - 'a' + 10);
1964             p++;
1965         }
1966         else
1967         {
1968             return -EINVAL;
1969         }
1970     }
1971 
1972     if (val > 0xFFFFFFFF)
1973     {
1974         return -EINVAL;
1975     }
1976 
1977     *data = (unsigned int)val;
1978 
1979     return 0;
1980 }
1981 
1982 NvlStatus
1983 nvswitch_os_read_registry_dword
1984 (
1985     void *os_handle,
1986     const char *name,
1987     NvU32 *data
1988 )
1989 {
1990     char *regkey, *regkey_val_start, *regkey_val_end;
1991     char regkey_val[NVSWITCH_REGKEY_VALUE_LEN + 1];
1992     NvU32 regkey_val_len = 0;
1993 
1994     *data = 0;
1995 
1996     if (!NvSwitchRegDwords)
1997     {
1998         return -NVL_ERR_GENERIC;
1999     }
2000 
2001     regkey = strstr(NvSwitchRegDwords, name);
2002     if (!regkey)
2003     {
2004         return -NVL_ERR_GENERIC;
2005     }
2006 
2007     regkey = strchr(regkey, '=');
2008     if (!regkey)
2009     {
2010         return -NVL_ERR_GENERIC;
2011     }
2012 
2013     regkey_val_start = regkey + 1;
2014 
2015     regkey_val_end = strchr(regkey, ';');
2016     if (!regkey_val_end)
2017     {
2018         regkey_val_end = strchr(regkey, '\0');
2019     }
2020 
2021     regkey_val_len = regkey_val_end - regkey_val_start;
2022     if (regkey_val_len > NVSWITCH_REGKEY_VALUE_LEN || regkey_val_len == 0)
2023     {
2024         return -NVL_ERR_GENERIC;
2025     }
2026 
2027     strncpy(regkey_val, regkey_val_start, regkey_val_len);
2028     regkey_val[regkey_val_len] = '\0';
2029 
2030     if (nvswitch_os_strtouint(regkey_val, data) != 0)
2031     {
2032         return -NVL_ERR_GENERIC;
2033     }
2034 
2035     return NVL_SUCCESS;
2036 }
2037 
2038 static NvBool
2039 _nvswitch_is_space(const char ch)
2040 {
2041     return ((ch == ' ') || ((ch >= '\t') && (ch <= '\r')));
2042 }
2043 
2044 static char *
2045 _nvswitch_remove_spaces(const char *in)
2046 {
2047     unsigned int len = nvswitch_os_strlen(in) + 1;
2048     const char *in_ptr;
2049     char *out, *out_ptr;
2050 
2051     out = nvswitch_os_malloc(len);
2052     if (out == NULL)
2053         return NULL;
2054 
2055     in_ptr = in;
2056     out_ptr = out;
2057 
2058     while (*in_ptr != '\0')
2059     {
2060         if (!_nvswitch_is_space(*in_ptr))
2061             *out_ptr++ = *in_ptr;
2062         in_ptr++;
2063     }
2064     *out_ptr = '\0';
2065 
2066     return out;
2067 }
2068 
2069 /*
2070  * Compare given string UUID with the NvSwitchBlacklist registry parameter string and
2071  * return whether the UUID is in the NvSwitch blacklist
2072  */
2073 NvBool
2074 nvswitch_os_is_uuid_in_blacklist
2075 (
2076     NvUuid *uuid
2077 )
2078 {
2079     char *list;
2080     char *ptr;
2081     char *token;
2082     NvU8 uuid_string[NVSWITCH_UUID_STRING_LENGTH];
2083 
2084     if (NvSwitchBlacklist == NULL)
2085         return NV_FALSE;
2086 
2087     if (nvswitch_uuid_to_string(uuid, uuid_string, NVSWITCH_UUID_STRING_LENGTH) == 0)
2088         return NV_FALSE;
2089 
2090     if ((list = _nvswitch_remove_spaces(NvSwitchBlacklist)) == NULL)
2091         return NV_FALSE;
2092 
2093     ptr = list;
2094 
2095     while ((token = strsep(&ptr, ",")) != NULL)
2096     {
2097         if (strcmp(token, uuid_string) == 0)
2098         {
2099             nvswitch_os_free(list);
2100             return NV_TRUE;
2101         }
2102     }
2103     nvswitch_os_free(list);
2104     return NV_FALSE;
2105 }
2106 
2107 
2108 NvlStatus
2109 nvswitch_os_alloc_contig_memory
2110 (
2111     void *os_handle,
2112     void **virt_addr,
2113     NvU32 size,
2114     NvBool force_dma32
2115 )
2116 {
2117     NvU32 gfp_flags;
2118     unsigned long nv_gfp_addr = 0;
2119 
2120     if (!virt_addr)
2121         return -NVL_BAD_ARGS;
2122 
2123     gfp_flags = GFP_KERNEL | (force_dma32 ? GFP_DMA32 : 0);
2124     NV_GET_FREE_PAGES(nv_gfp_addr, get_order(size), gfp_flags);
2125 
2126     if(!nv_gfp_addr)
2127     {
2128         pr_err("nvidia-nvswitch: unable to allocate kernel memory\n");
2129         return -NVL_NO_MEM;
2130     }
2131 
2132     *virt_addr = (void *)nv_gfp_addr;
2133 
2134     return NVL_SUCCESS;
2135 }
2136 
2137 void
2138 nvswitch_os_free_contig_memory
2139 (
2140     void *os_handle,
2141     void *virt_addr,
2142     NvU32 size
2143 )
2144 {
2145     NV_FREE_PAGES((unsigned long)virt_addr, get_order(size));
2146 }
2147 
2148 static inline int
2149 _nvswitch_to_pci_dma_direction
2150 (
2151     NvU32 direction
2152 )
2153 {
2154     if (direction == NVSWITCH_DMA_DIR_TO_SYSMEM)
2155         return DMA_FROM_DEVICE;
2156     else if (direction == NVSWITCH_DMA_DIR_FROM_SYSMEM)
2157         return DMA_TO_DEVICE;
2158     else
2159         return DMA_BIDIRECTIONAL;
2160 }
2161 
2162 NvlStatus
2163 nvswitch_os_map_dma_region
2164 (
2165     void *os_handle,
2166     void *cpu_addr,
2167     NvU64 *dma_handle,
2168     NvU32 size,
2169     NvU32 direction
2170 )
2171 {
2172     int dma_dir;
2173     struct pci_dev *pdev = (struct pci_dev *)os_handle;
2174 
2175     if (!pdev || !cpu_addr || !dma_handle)
2176         return -NVL_BAD_ARGS;
2177 
2178     dma_dir = _nvswitch_to_pci_dma_direction(direction);
2179 
2180     *dma_handle = (NvU64)dma_map_single(&pdev->dev, cpu_addr, size, dma_dir);
2181 
2182     if (dma_mapping_error(&pdev->dev, *dma_handle))
2183     {
2184         pr_err("nvidia-nvswitch: unable to create PCI DMA mapping\n");
2185         return -NVL_ERR_GENERIC;
2186     }
2187 
2188     return NVL_SUCCESS;
2189 }
2190 
2191 NvlStatus
2192 nvswitch_os_unmap_dma_region
2193 (
2194     void *os_handle,
2195     void *cpu_addr,
2196     NvU64 dma_handle,
2197     NvU32 size,
2198     NvU32 direction
2199 )
2200 {
2201     int dma_dir;
2202     struct pci_dev *pdev = (struct pci_dev *)os_handle;
2203 
2204     if (!pdev || !cpu_addr)
2205         return -NVL_BAD_ARGS;
2206 
2207     dma_dir = _nvswitch_to_pci_dma_direction(direction);
2208 
2209     dma_unmap_single(&pdev->dev, dma_handle, size, dma_dir);
2210 
2211     return NVL_SUCCESS;
2212 }
2213 
2214 NvlStatus
2215 nvswitch_os_set_dma_mask
2216 (
2217     void *os_handle,
2218     NvU32 dma_addr_width
2219 )
2220 {
2221     struct pci_dev *pdev = (struct pci_dev *)os_handle;
2222 
2223     if (!pdev)
2224         return -NVL_BAD_ARGS;
2225 
2226     if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(dma_addr_width)))
2227         return -NVL_ERR_GENERIC;
2228 
2229     return NVL_SUCCESS;
2230 }
2231 
2232 NvlStatus
2233 nvswitch_os_sync_dma_region_for_cpu
2234 (
2235     void *os_handle,
2236     NvU64 dma_handle,
2237     NvU32 size,
2238     NvU32 direction
2239 )
2240 {
2241     int dma_dir;
2242     struct pci_dev *pdev = (struct pci_dev *)os_handle;
2243 
2244     if (!pdev)
2245         return -NVL_BAD_ARGS;
2246 
2247     dma_dir = _nvswitch_to_pci_dma_direction(direction);
2248 
2249     dma_sync_single_for_cpu(&pdev->dev, dma_handle, size, dma_dir);
2250 
2251     return NVL_SUCCESS;
2252 }
2253 
2254 NvlStatus
2255 nvswitch_os_sync_dma_region_for_device
2256 (
2257     void *os_handle,
2258     NvU64 dma_handle,
2259     NvU32 size,
2260     NvU32 direction
2261 )
2262 {
2263     int dma_dir;
2264     struct pci_dev *pdev = (struct pci_dev *)os_handle;
2265 
2266     if (!pdev)
2267         return -NVL_BAD_ARGS;
2268 
2269     dma_dir = _nvswitch_to_pci_dma_direction(direction);
2270 
2271     dma_sync_single_for_device(&pdev->dev, dma_handle, size, dma_dir);
2272 
2273     return NVL_SUCCESS;
2274 }
2275 
2276 static inline void *
2277 _nvswitch_os_malloc
2278 (
2279     NvLength size
2280 )
2281 {
2282     void *ptr = NULL;
2283 
2284     if (!NV_MAY_SLEEP())
2285     {
2286         if (size <= NVSWITCH_KMALLOC_LIMIT)
2287         {
2288             ptr = kmalloc(size, NV_GFP_ATOMIC);
2289         }
2290     }
2291     else
2292     {
2293         if (size <= NVSWITCH_KMALLOC_LIMIT)
2294         {
2295             ptr = kmalloc(size, NV_GFP_NO_OOM);
2296         }
2297 
2298         if (ptr == NULL)
2299         {
2300             ptr = vmalloc(size);
2301         }
2302     }
2303 
2304     return ptr;
2305 }
2306 
2307 void *
2308 nvswitch_os_malloc_trace
2309 (
2310     NvLength size,
2311     const char *file,
2312     NvU32 line
2313 )
2314 {
2315 #if defined(NV_MEM_LOGGER)
2316     void *ptr = _nvswitch_os_malloc(size);
2317     if (ptr)
2318     {
2319         nv_memdbg_add(ptr, size, file, line);
2320     }
2321 
2322     return ptr;
2323 #else
2324     return _nvswitch_os_malloc(size);
2325 #endif
2326 }
2327 
2328 static inline void
2329 _nvswitch_os_free
2330 (
2331     void *ptr
2332 )
2333 {
2334     if (!ptr)
2335         return;
2336 
2337     if (is_vmalloc_addr(ptr))
2338     {
2339         vfree(ptr);
2340     }
2341     else
2342     {
2343         kfree(ptr);
2344     }
2345 }
2346 
2347 void
2348 nvswitch_os_free
2349 (
2350     void *ptr
2351 )
2352 {
2353 #if defined (NV_MEM_LOGGER)
2354     if (ptr == NULL)
2355         return;
2356 
2357     nv_memdbg_remove(ptr, 0, NULL, 0);
2358 
2359     return _nvswitch_os_free(ptr);
2360 #else
2361     return _nvswitch_os_free(ptr);
2362 #endif
2363 }
2364 
2365 NvLength
2366 nvswitch_os_strlen
2367 (
2368     const char *str
2369 )
2370 {
2371     return strlen(str);
2372 }
2373 
2374 char*
2375 nvswitch_os_strncpy
2376 (
2377     char *dest,
2378     const char *src,
2379     NvLength length
2380 )
2381 {
2382     return strncpy(dest, src, length);
2383 }
2384 
2385 int
2386 nvswitch_os_strncmp
2387 (
2388     const char *s1,
2389     const char *s2,
2390     NvLength length
2391 )
2392 {
2393     return strncmp(s1, s2, length);
2394 }
2395 
2396 char*
2397 nvswitch_os_strncat
2398 (
2399     char *s1,
2400     const char *s2,
2401     NvLength length
2402 )
2403 {
2404     return strncat(s1, s2, length);
2405 }
2406 
2407 void *
2408 nvswitch_os_memset
2409 (
2410     void *dest,
2411     int value,
2412     NvLength size
2413 )
2414 {
2415      return memset(dest, value, size);
2416 }
2417 
2418 void *
2419 nvswitch_os_memcpy
2420 (
2421     void *dest,
2422     const void *src,
2423     NvLength size
2424 )
2425 {
2426     return memcpy(dest, src, size);
2427 }
2428 
2429 int
2430 nvswitch_os_memcmp
2431 (
2432     const void *s1,
2433     const void *s2,
2434     NvLength size
2435 )
2436 {
2437     return memcmp(s1, s2, size);
2438 }
2439 
2440 NvU32
2441 nvswitch_os_mem_read32
2442 (
2443     const volatile void * address
2444 )
2445 {
2446     return (*(const volatile NvU32*)(address));
2447 }
2448 
2449 void
2450 nvswitch_os_mem_write32
2451 (
2452     volatile void *address,
2453     NvU32 data
2454 )
2455 {
2456     (*(volatile NvU32 *)(address)) = data;
2457 }
2458 
2459 NvU64
2460 nvswitch_os_mem_read64
2461 (
2462     const volatile void * address
2463 )
2464 {
2465     return (*(const volatile NvU64 *)(address));
2466 }
2467 
2468 void
2469 nvswitch_os_mem_write64
2470 (
2471     volatile void *address,
2472     NvU64 data
2473 )
2474 {
2475     (*(volatile NvU64 *)(address)) = data;
2476 }
2477 
2478 int
2479 nvswitch_os_snprintf
2480 (
2481     char *dest,
2482     NvLength size,
2483     const char *fmt,
2484     ...
2485 )
2486 {
2487     va_list arglist;
2488     int chars_written;
2489 
2490     va_start(arglist, fmt);
2491     chars_written = vsnprintf(dest, size, fmt, arglist);
2492     va_end(arglist);
2493 
2494     return chars_written;
2495 }
2496 
2497 int
2498 nvswitch_os_vsnprintf
2499 (
2500     char *buf,
2501     NvLength size,
2502     const char *fmt,
2503     va_list arglist
2504 )
2505 {
2506     return vsnprintf(buf, size, fmt, arglist);
2507 }
2508 
2509 void
2510 nvswitch_os_assert_log
2511 (
2512     const char *fmt,
2513     ...
2514 )
2515 {
2516     if (printk_ratelimit())
2517     {
2518         va_list arglist;
2519         char fmt_printk[NVSWITCH_LOG_BUFFER_SIZE];
2520 
2521         va_start(arglist, fmt);
2522         vsnprintf(fmt_printk, sizeof(fmt_printk), fmt, arglist);
2523         va_end(arglist);
2524         nvswitch_os_print(NVSWITCH_DBG_LEVEL_ERROR, fmt_printk);
2525         WARN_ON(1);
2526      }
2527      dbg_breakpoint();
2528 }
2529 
2530 /*
2531  * Sleep for specified milliseconds. Yields the CPU to scheduler.
2532  */
2533 void
2534 nvswitch_os_sleep
2535 (
2536     unsigned int ms
2537 )
2538 {
2539     NV_STATUS status;
2540     status = nv_sleep_ms(ms);
2541 
2542     if (status != NV_OK)
2543     {
2544         if (printk_ratelimit())
2545         {
2546             nvswitch_os_print(NVSWITCH_DBG_LEVEL_ERROR, "NVSwitch: requested"
2547                               " sleep duration %d msec exceeded %d msec\n",
2548                               ms, NV_MAX_ISR_DELAY_MS);
2549             WARN_ON(1);
2550         }
2551     }
2552 }
2553 
2554 NvlStatus
2555 nvswitch_os_acquire_fabric_mgmt_cap
2556 (
2557     void *osPrivate,
2558     NvU64 capDescriptor
2559 )
2560 {
2561     int dup_fd = -1;
2562     nvswitch_file_private_t *private_data = (nvswitch_file_private_t *)osPrivate;
2563 
2564     if (private_data == NULL)
2565     {
2566         return -NVL_BAD_ARGS;
2567     }
2568 
2569     dup_fd = nvlink_cap_acquire((int)capDescriptor,
2570                                 NVLINK_CAP_FABRIC_MANAGEMENT);
2571     if (dup_fd < 0)
2572     {
2573         return -NVL_ERR_OPERATING_SYSTEM;
2574     }
2575 
2576     private_data->capability_fds.fabric_mgmt = dup_fd;
2577     return NVL_SUCCESS;
2578 }
2579 
2580 int
2581 nvswitch_os_is_fabric_manager
2582 (
2583     void *osPrivate
2584 )
2585 {
2586     nvswitch_file_private_t *private_data = (nvswitch_file_private_t *)osPrivate;
2587 
2588     /* Make sure that fabric mgmt capbaility fd is valid */
2589     if ((private_data == NULL) ||
2590         (private_data->capability_fds.fabric_mgmt < 0))
2591     {
2592         return 0;
2593     }
2594 
2595     return 1;
2596 }
2597 
2598 int
2599 nvswitch_os_is_admin
2600 (
2601     void
2602 )
2603 {
2604     return NV_IS_SUSER();
2605 }
2606 
2607 #define NV_KERNEL_RELEASE    ((LINUX_VERSION_CODE >> 16) & 0x0ff)
2608 #define NV_KERNEL_VERSION    ((LINUX_VERSION_CODE >> 8)  & 0x0ff)
2609 #define NV_KERNEL_SUBVERSION ((LINUX_VERSION_CODE)       & 0x0ff)
2610 
2611 NvlStatus
2612 nvswitch_os_get_os_version
2613 (
2614     NvU32 *pMajorVer,
2615     NvU32 *pMinorVer,
2616     NvU32 *pBuildNum
2617 )
2618 {
2619     if (pMajorVer)
2620         *pMajorVer = NV_KERNEL_RELEASE;
2621     if (pMinorVer)
2622         *pMinorVer = NV_KERNEL_VERSION;
2623     if (pBuildNum)
2624         *pBuildNum = NV_KERNEL_SUBVERSION;
2625 
2626     return NVL_SUCCESS;
2627 }
2628 
2629 /*!
2630  * @brief: OS specific handling to add an event.
2631  */
2632 NvlStatus
2633 nvswitch_os_add_client_event
2634 (
2635     void            *osHandle,
2636     void            *osPrivate,
2637     NvU32           eventId
2638 )
2639 {
2640     return NVL_SUCCESS;
2641 }
2642 
2643 /*!
2644  * @brief: OS specific handling to remove all events corresponding to osPrivate.
2645  */
2646 NvlStatus
2647 nvswitch_os_remove_client_event
2648 (
2649     void            *osHandle,
2650     void            *osPrivate
2651 )
2652 {
2653     return NVL_SUCCESS;
2654 }
2655 
2656 /*!
2657  * @brief: OS specific handling to notify an event.
2658  */
2659 NvlStatus
2660 nvswitch_os_notify_client_event
2661 (
2662     void *osHandle,
2663     void *osPrivate,
2664     NvU32 eventId
2665 )
2666 {
2667     nvswitch_file_private_t *private_data = (nvswitch_file_private_t *)osPrivate;
2668 
2669     if (private_data == NULL)
2670     {
2671         return -NVL_BAD_ARGS;
2672     }
2673 
2674     private_data->file_event.event_pending = NV_TRUE;
2675     wake_up_interruptible(&private_data->file_event.wait_q_event);
2676 
2677     return NVL_SUCCESS;
2678 }
2679 
2680 /*!
2681  * @brief: Gets OS specific support for the REGISTER_EVENTS ioctl
2682  */
2683 NvlStatus
2684 nvswitch_os_get_supported_register_events_params
2685 (
2686     NvBool *many_events,
2687     NvBool *os_descriptor
2688 )
2689 {
2690     *many_events   = NV_FALSE;
2691     *os_descriptor = NV_FALSE;
2692     return NVL_SUCCESS;
2693 }
2694