xref: /freebsd/sys/dev/vmware/vmci/vmci.c (revision 81b22a98)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8 
9 #include <sys/cdefs.h>
10 __FBSDID("$FreeBSD$");
11 
12 #include <sys/param.h>
13 #include <sys/bus.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/module.h>
17 #include <sys/rman.h>
18 #include <sys/systm.h>
19 
20 #include <dev/pci/pcireg.h>
21 #include <dev/pci/pcivar.h>
22 
23 #include <machine/bus.h>
24 
25 #include "vmci.h"
26 #include "vmci_doorbell.h"
27 #include "vmci_driver.h"
28 #include "vmci_kernel_defs.h"
29 #include "vmci_queue_pair.h"
30 
31 static int	vmci_probe(device_t);
32 static int	vmci_attach(device_t);
33 static int	vmci_detach(device_t);
34 static int	vmci_shutdown(device_t);
35 
36 static int	vmci_map_bars(struct vmci_softc *);
37 static void	vmci_unmap_bars(struct vmci_softc *);
38 
39 static int	vmci_config_capabilities(struct vmci_softc *);
40 
41 static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
42 		    bus_size_t, struct vmci_dma_alloc *);
43 static void	vmci_dma_free_int(struct vmci_softc *,
44 		    struct vmci_dma_alloc *);
45 
46 static int	vmci_config_interrupts(struct vmci_softc *);
47 static int	vmci_config_interrupt(struct vmci_softc *);
48 static int	vmci_check_intr_cnt(struct vmci_softc *);
49 static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
50 static int	vmci_setup_interrupts(struct vmci_softc *);
51 static void	vmci_dismantle_interrupts(struct vmci_softc *);
52 static void	vmci_interrupt(void *);
53 static void	vmci_interrupt_bm(void *);
54 static void	dispatch_datagrams(void *, int);
55 static void	process_bitmap(void *, int);
56 
57 static void	vmci_delayed_work_fn_cb(void *context, int data);
58 
59 static device_method_t vmci_methods[] = {
60 	/* Device interface. */
61 	DEVMETHOD(device_probe,		vmci_probe),
62 	DEVMETHOD(device_attach,	vmci_attach),
63 	DEVMETHOD(device_detach,	vmci_detach),
64 	DEVMETHOD(device_shutdown,	vmci_shutdown),
65 
66 	DEVMETHOD_END
67 };
68 
69 static driver_t vmci_driver = {
70 	"vmci", vmci_methods, sizeof(struct vmci_softc)
71 };
72 
73 static devclass_t vmci_devclass;
74 DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0);
75 MODULE_VERSION(vmci, VMCI_VERSION);
76 const struct {
77 	uint16_t vendor;
78 	uint16_t device;
79 	const char *desc;
80 } vmci_ids[] = {
81 	{ VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID,
82 	    "VMware Virtual Machine Communication Interface" },
83 };
84 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids,
85     nitems(vmci_ids));
86 
87 MODULE_DEPEND(vmci, pci, 1, 1, 1);
88 
89 static struct vmci_softc *vmci_sc;
90 
91 #define LGPFX	"vmci: "
92 /*
93  * Allocate a buffer for incoming datagrams globally to avoid repeated
94  * allocation in the interrupt handler's atomic context.
95  */
96 static uint8_t *data_buffer = NULL;
97 static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
98 
99 struct vmci_delayed_work_info {
100 	vmci_work_fn	*work_fn;
101 	void		*data;
102 	vmci_list_item(vmci_delayed_work_info) entry;
103 };
104 
105 /*
106  *------------------------------------------------------------------------------
107  *
108  * vmci_probe --
109  *
110  *     Probe to see if the VMCI device is present.
111  *
112  * Results:
113  *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
114  *
115  * Side effects:
116  *     None.
117  *
118  *------------------------------------------------------------------------------
119  */
120 
121 static int
122 vmci_probe(device_t dev)
123 {
124 
125 	if (pci_get_vendor(dev) == vmci_ids[0].vendor &&
126 	    pci_get_device(dev) == vmci_ids[0].device) {
127 		device_set_desc(dev, vmci_ids[0].desc);
128 
129 		return (BUS_PROBE_DEFAULT);
130 	}
131 
132 	return (ENXIO);
133 }
134 
135 /*
136  *------------------------------------------------------------------------------
137  *
138  * vmci_attach --
139  *
140  *     Attach VMCI device to the system after vmci_probe() has been called and
141  *     the device has been detected.
142  *
143  * Results:
144  *     0 if success, ENXIO otherwise.
145  *
146  * Side effects:
147  *     None.
148  *
149  *------------------------------------------------------------------------------
150  */
151 
152 static int
153 vmci_attach(device_t dev)
154 {
155 	struct vmci_softc *sc;
156 	int error, i;
157 
158 	sc = device_get_softc(dev);
159 	sc->vmci_dev = dev;
160 	vmci_sc = sc;
161 
162 	data_buffer = NULL;
163 	sc->vmci_num_intr = 0;
164 	for (i = 0; i < VMCI_MAX_INTRS; i++) {
165 		sc->vmci_intrs[i].vmci_irq = NULL;
166 		sc->vmci_intrs[i].vmci_handler = NULL;
167 	}
168 
169 	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
170 	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
171 
172 	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
173 
174 	pci_enable_busmaster(dev);
175 
176 	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
177 	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
178 	    NULL, MTX_DEF);
179 
180 	error = vmci_map_bars(sc);
181 	if (error) {
182 		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
183 		goto fail;
184 	}
185 
186 	error = vmci_config_capabilities(sc);
187 	if (error) {
188 		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
189 		goto fail;
190 	}
191 
192 	vmci_list_init(&sc->vmci_delayed_work_infos);
193 
194 	vmci_components_init();
195 	vmci_util_init();
196 	error = vmci_qp_guest_endpoints_init();
197 	if (error) {
198 		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
199 		goto fail;
200 	}
201 
202 	error = vmci_config_interrupts(sc);
203 	if (error)
204 		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
205 
206 fail:
207 	if (error) {
208 		vmci_detach(dev);
209 		return (ENXIO);
210 	}
211 
212 	return (0);
213 }
214 
215 /*
216  *------------------------------------------------------------------------------
217  *
218  * vmci_detach --
219  *
220  *     Detach the VMCI device.
221  *
222  * Results:
223  *     0
224  *
225  * Side effects:
226  *     None.
227  *
228  *------------------------------------------------------------------------------
229  */
230 
231 static int
232 vmci_detach(device_t dev)
233 {
234 	struct vmci_softc *sc;
235 
236 	sc = device_get_softc(dev);
237 
238 	vmci_qp_guest_endpoints_exit();
239 	vmci_util_exit();
240 
241 	vmci_dismantle_interrupts(sc);
242 
243 	vmci_components_cleanup();
244 
245 	if mtx_initialized(&sc->vmci_spinlock) {
246 		taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
247 		mtx_destroy(&sc->vmci_delayed_work_lock);
248 	}
249 
250 	if (sc->vmci_res0 != NULL)
251 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
252 		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
253 
254 	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
255 		vmci_dma_free(&sc->vmci_notifications_bitmap);
256 
257 	vmci_unmap_bars(sc);
258 
259 	if mtx_initialized(&sc->vmci_spinlock)
260 		mtx_destroy(&sc->vmci_spinlock);
261 
262 	pci_disable_busmaster(dev);
263 
264 	return (0);
265 }
266 
267 /*
268  *------------------------------------------------------------------------------
269  *
270  * vmci_shutdown --
271  *
272  *     This function is called during system shutdown. We don't do anything.
273  *
274  * Results:
275  *     0
276  *
277  * Side effects:
278  *     None.
279  *
280  *------------------------------------------------------------------------------
281  */
282 
283 static int
284 vmci_shutdown(device_t dev)
285 {
286 
287 	return (0);
288 }
289 
290 /*
291  *------------------------------------------------------------------------------
292  *
293  * vmci_map_bars --
294  *
295  *     Maps the PCI I/O and MMIO BARs.
296  *
297  * Results:
298  *     0 on success, ENXIO otherwise.
299  *
300  * Side effects:
301  *     None.
302  *
303  *------------------------------------------------------------------------------
304  */
305 
306 static int
307 vmci_map_bars(struct vmci_softc *sc)
308 {
309 	int rid;
310 
311 	/* Map the PCI I/O BAR: BAR0 */
312 	rid = PCIR_BAR(0);
313 	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
314 	    &rid, RF_ACTIVE);
315 	if (sc->vmci_res0 == NULL) {
316 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
317 		return (ENXIO);
318 	}
319 
320 	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
321 	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
322 	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
323 
324 	/* Map the PCI MMIO BAR: BAR1 */
325 	rid = PCIR_BAR(1);
326 	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
327 	    &rid, RF_ACTIVE);
328 	if (sc->vmci_res1 == NULL) {
329 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
330 		return (ENXIO);
331 	}
332 
333 	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
334 	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
335 
336 	return (0);
337 }
338 
339 /*
340  *------------------------------------------------------------------------------
341  *
342  * vmci_unmap_bars --
343  *
344  *     Unmaps the VMCI PCI I/O and MMIO BARs.
345  *
346  * Results:
347  *     None.
348  *
349  * Side effects:
350  *     None.
351  *
352  *------------------------------------------------------------------------------
353  */
354 
355 static void
356 vmci_unmap_bars(struct vmci_softc *sc)
357 {
358 	int rid;
359 
360 	if (sc->vmci_res0 != NULL) {
361 		rid = PCIR_BAR(0);
362 		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
363 		    sc->vmci_res0);
364 		sc->vmci_res0 = NULL;
365 	}
366 
367 	if (sc->vmci_res1 != NULL) {
368 		rid = PCIR_BAR(1);
369 		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
370 		    sc->vmci_res1);
371 		sc->vmci_res1 = NULL;
372 	}
373 }
374 
375 /*
376  *------------------------------------------------------------------------------
377  *
378  * vmci_config_capabilities --
379  *
380  *     Check the VMCI device capabilities and configure the device accordingly.
381  *
382  * Results:
383  *     0 if success, ENODEV otherwise.
384  *
385  * Side effects:
386  *     Device capabilities are enabled.
387  *
388  *------------------------------------------------------------------------------
389  */
390 
391 static int
392 vmci_config_capabilities(struct vmci_softc *sc)
393 {
394 	unsigned long bitmap_PPN;
395 	int error;
396 
397 	/*
398 	 * Verify that the VMCI device supports the capabilities that we
399 	 * need. Datagrams are necessary and notifications will be used
400 	 * if the device supports it.
401 	 */
402 	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
403 	    VMCI_CAPS_ADDR);
404 
405 	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
406 		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
407 		    "datagrams.\n");
408 		return (ENODEV);
409 	}
410 
411 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
412 		sc->capabilities = VMCI_CAPS_DATAGRAM;
413 		error = vmci_dma_malloc(PAGE_SIZE, 1,
414 		    &sc->vmci_notifications_bitmap);
415 		if (error)
416 			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
417 			    "notification bitmap.\n");
418 		else {
419 			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
420 			    PAGE_SIZE);
421 			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
422 		}
423 	} else
424 		sc->capabilities = VMCI_CAPS_DATAGRAM;
425 
426 	/* Let the host know which capabilities we intend to use. */
427 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
428 	    VMCI_CAPS_ADDR, sc->capabilities);
429 
430 	/*
431 	 * Register notification bitmap with device if that capability is
432 	 * used.
433 	 */
434 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
435 		bitmap_PPN =
436 		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
437 		vmci_register_notification_bitmap(bitmap_PPN);
438 	}
439 
440 	/* Check host capabilities. */
441 	if (!vmci_check_host_capabilities())
442 		return (ENODEV);
443 
444 	return (0);
445 }
446 
447 /*
448  *------------------------------------------------------------------------------
449  *
450  * vmci_dmamap_cb --
451  *
452  *     Callback to receive mapping information resulting from the load of a
453  *     bus_dmamap_t via bus_dmamap_load()
454  *
455  * Results:
456  *     None.
457  *
458  * Side effects:
459  *     None.
460  *
461  *------------------------------------------------------------------------------
462  */
463 
464 static void
465 vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
466 {
467 	bus_addr_t *baddr = arg;
468 
469 	if (error == 0)
470 		*baddr = segs->ds_addr;
471 }
472 
473 /*
474  *------------------------------------------------------------------------------
475  *
476  * vmci_dma_malloc_int --
477  *
478  *     Internal function that allocates DMA memory.
479  *
480  * Results:
481  *     0 if success.
482  *     ENOMEM if insufficient memory.
483  *     EINPROGRESS if mapping is deferred.
484  *     EINVAL if the request was invalid.
485  *
486  * Side effects:
487  *     DMA memory is allocated.
488  *
489  *------------------------------------------------------------------------------
490  */
491 
492 static int
493 vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
494     struct vmci_dma_alloc *dma)
495 {
496 	int error;
497 
498 	bzero(dma, sizeof(struct vmci_dma_alloc));
499 
500 	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
501 	    align, 0,		/* alignment, bounds */
502 	    BUS_SPACE_MAXADDR,	/* lowaddr */
503 	    BUS_SPACE_MAXADDR,	/* highaddr */
504 	    NULL, NULL,		/* filter, filterarg */
505 	    size,		/* maxsize */
506 	    1,			/* nsegments */
507 	    size,		/* maxsegsize */
508 	    BUS_DMA_ALLOCNOW,	/* flags */
509 	    NULL,		/* lockfunc */
510 	    NULL,		/* lockfuncarg */
511 	    &dma->dma_tag);
512 	if (error) {
513 		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
514 		goto fail;
515 	}
516 
517 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
518 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
519 	if (error) {
520 		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
521 		goto fail;
522 	}
523 
524 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
525 	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
526 	if (error) {
527 		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
528 		goto fail;
529 	}
530 
531 	dma->dma_size = size;
532 
533 fail:
534 	if (error)
535 		vmci_dma_free(dma);
536 
537 	return (error);
538 }
539 
540 /*
541  *------------------------------------------------------------------------------
542  *
543  * vmci_dma_malloc --
544  *
545  *     This function is a wrapper around vmci_dma_malloc_int for callers
546  *     outside of this module. Since we only support a single VMCI device, this
547  *     wrapper provides access to the device softc structure.
548  *
549  * Results:
550  *     0 if success.
551  *     ENOMEM if insufficient memory.
552  *     EINPROGRESS if mapping is deferred.
553  *     EINVAL if the request was invalid.
554  *
555  * Side effects:
556  *     DMA memory is allocated.
557  *
558  *------------------------------------------------------------------------------
559  */
560 
561 int
562 vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
563 {
564 
565 	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
566 }
567 
568 /*
569  *------------------------------------------------------------------------------
570  *
571  * vmci_dma_free_int --
572  *
573  *     Internal function that frees DMA memory.
574  *
575  * Results:
576  *     None.
577  *
578  * Side effects:
579  *     Frees DMA memory.
580  *
581  *------------------------------------------------------------------------------
582  */
583 
584 static void
585 vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
586 {
587 
588 	if (dma->dma_tag != NULL) {
589 		if (dma->dma_paddr != 0) {
590 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
591 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
592 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
593 		}
594 
595 		if (dma->dma_vaddr != NULL)
596 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
597 			    dma->dma_map);
598 
599 		bus_dma_tag_destroy(dma->dma_tag);
600 	}
601 	bzero(dma, sizeof(struct vmci_dma_alloc));
602 }
603 
604 /*
605  *------------------------------------------------------------------------------
606  *
607  * vmci_dma_free --
608  *
609  *     This function is a wrapper around vmci_dma_free_int for callers outside
610  *     of this module. Since we only support a single VMCI device, this wrapper
611  *     provides access to the device softc structure.
612  *
613  * Results:
614  *     None.
615  *
616  * Side effects:
617  *     Frees DMA memory.
618  *
619  *------------------------------------------------------------------------------
620  */
621 
622 void
623 vmci_dma_free(struct vmci_dma_alloc *dma)
624 {
625 
626 	vmci_dma_free_int(vmci_sc, dma);
627 }
628 
629 /*
630  *------------------------------------------------------------------------------
631  *
632  * vmci_config_interrupts --
633  *
634  *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
635  *     try to configure MSI. If even this fails, try legacy interrupts.
636  *
637  * Results:
638  *     0 if success.
639  *     ENOMEM if insufficient memory.
640  *     ENODEV if the device doesn't support interrupts.
641  *     ENXIO if the device configuration failed.
642  *
643  * Side effects:
644  *     Interrupts get enabled if successful.
645  *
646  *------------------------------------------------------------------------------
647  */
648 
649 static int
650 vmci_config_interrupts(struct vmci_softc *sc)
651 {
652 	int error;
653 
654 	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
655 	if (data_buffer == NULL)
656 		return (ENOMEM);
657 
658 	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
659 	error = vmci_config_interrupt(sc);
660 	if (error) {
661 		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
662 		error = vmci_config_interrupt(sc);
663 	}
664 	if (error) {
665 		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
666 		error = vmci_config_interrupt(sc);
667 	}
668 	if (error)
669 		return (error);
670 
671 	/* Enable specific interrupt bits. */
672 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
673 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
674 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
675 	else
676 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
677 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
678 
679 	/* Enable interrupts. */
680 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
681 	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
682 
683 	return (0);
684 }
685 
686 /*
687  *------------------------------------------------------------------------------
688  *
689  * vmci_config_interrupt --
690  *
691  *     Check the number of interrupts supported, allocate resources and setup
692  *     interrupts.
693  *
694  * Results:
695  *     0 if success.
696  *     ENOMEM if insufficient memory.
697  *     ENODEV if the device doesn't support interrupts.
698  *     ENXIO if the device configuration failed.
699  *
700  * Side effects:
701  *     Resources get allocated and interrupts get setup (but not enabled) if
702  *     successful.
703  *
704  *------------------------------------------------------------------------------
705  */
706 
707 static int
708 vmci_config_interrupt(struct vmci_softc *sc)
709 {
710 	int error;
711 
712 	error = vmci_check_intr_cnt(sc);
713 	if (error)
714 		return (error);
715 
716 	error = vmci_allocate_interrupt_resources(sc);
717 	if (error)
718 		return (error);
719 
720 	error = vmci_setup_interrupts(sc);
721 	if (error)
722 		return (error);
723 
724 	return (0);
725 }
726 
727 /*
728  *------------------------------------------------------------------------------
729  *
730  * vmci_check_intr_cnt --
731  *
732  *     Check the number of interrupts supported by the device and ask PCI bus
733  *     to allocate appropriate number of interrupts.
734  *
735  * Results:
736  *     0 if success.
737  *     ENODEV if the device doesn't support any interrupts.
738  *     ENXIO if the device configuration failed.
739  *
740  * Side effects:
741  *     Resources get allocated on success.
742  *
743  *------------------------------------------------------------------------------
744  */
745 
746 static int
747 vmci_check_intr_cnt(struct vmci_softc *sc)
748 {
749 
750 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
751 		sc->vmci_num_intr = 1;
752 		return (0);
753 	}
754 
755 	/*
756 	 * Make sure that the device supports the required number of MSI/MSI-X
757 	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
758 	 * least 1 MSI message.
759 	 */
760 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
761 	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
762 
763 	if (!sc->vmci_num_intr) {
764 		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
765 		    " messages");
766 		return (ENODEV);
767 	}
768 
769 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
770 	    VMCI_MAX_INTRS : 1;
771 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
772 		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
773 			return (ENXIO);
774 	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
775 		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
776 			return (ENXIO);
777 	}
778 
779 	return (0);
780 }
781 
782 /*
783  *------------------------------------------------------------------------------
784  *
785  * vmci_allocate_interrupt_resources --
786  *
787  *     Allocate resources necessary for interrupts.
788  *
789  * Results:
790  *     0 if success, ENXIO otherwise.
791  *
792  * Side effects:
793  *     Resources get allocated on success.
794  *
795  *------------------------------------------------------------------------------
796  */
797 
798 static int
799 vmci_allocate_interrupt_resources(struct vmci_softc *sc)
800 {
801 	struct resource *irq;
802 	int flags, i, rid;
803 
804 	flags = RF_ACTIVE;
805 	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
806 	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
807 
808 	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
809 		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
810 		    flags);
811 		if (irq == NULL)
812 			return (ENXIO);
813 		sc->vmci_intrs[i].vmci_irq = irq;
814 		sc->vmci_intrs[i].vmci_rid = rid;
815 	}
816 
817 	return (0);
818 }
819 
820 /*
821  *------------------------------------------------------------------------------
822  *
823  * vmci_setup_interrupts --
824  *
825  *     Sets up the interrupts.
826  *
827  * Results:
828  *     0 if success, appropriate error code from bus_setup_intr otherwise.
829  *
830  * Side effects:
831  *     Interrupt handler gets attached.
832  *
833  *------------------------------------------------------------------------------
834  */
835 
836 static int
837 vmci_setup_interrupts(struct vmci_softc *sc)
838 {
839 	struct vmci_interrupt *intr;
840 	int error, flags;
841 
842 	flags = INTR_TYPE_NET | INTR_MPSAFE;
843 	if (sc->vmci_num_intr > 1)
844 		flags |= INTR_EXCL;
845 
846 	intr = &sc->vmci_intrs[0];
847 	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
848 	    vmci_interrupt, NULL, &intr->vmci_handler);
849 	if (error)
850 		return (error);
851 
852 	if (sc->vmci_num_intr == 2) {
853 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
854 		    intr->vmci_handler, "dg");
855 		intr = &sc->vmci_intrs[1];
856 		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
857 		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
858 		if (error)
859 			return (error);
860 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
861 		    intr->vmci_handler, "bm");
862 	}
863 
864 	return (0);
865 }
866 
867 /*
868  *------------------------------------------------------------------------------
869  *
870  * vmci_interrupt --
871  *
872  *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
873  *     interrupt (vector VMCI_INTR_DATAGRAM).
874  *
875  * Results:
876  *     None.
877  *
878  * Side effects:
879  *     None.
880  *
881  *------------------------------------------------------------------------------
882  */
883 
884 static void
885 vmci_interrupt(void *arg)
886 {
887 
888 	if (vmci_sc->vmci_num_intr == 2)
889 		taskqueue_enqueue(taskqueue_swi,
890 		    &vmci_sc->vmci_interrupt_dq_task);
891 	else {
892 		unsigned int icr;
893 
894 		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
895 		if (icr == 0 || icr == 0xffffffff)
896 			return;
897 		if (icr & VMCI_ICR_DATAGRAM) {
898 			taskqueue_enqueue(taskqueue_swi,
899 			    &vmci_sc->vmci_interrupt_dq_task);
900 			icr &= ~VMCI_ICR_DATAGRAM;
901 		}
902 		if (icr & VMCI_ICR_NOTIFICATION) {
903 			taskqueue_enqueue(taskqueue_swi,
904 			    &vmci_sc->vmci_interrupt_bm_task);
905 			icr &= ~VMCI_ICR_NOTIFICATION;
906 		}
907 		if (icr != 0)
908 			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
909 			    "cause");
910 	}
911 }
912 
913 /*
914  *------------------------------------------------------------------------------
915  *
916  * vmci_interrupt_bm --
917  *
918  *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
919  *     which is for the notification bitmap. Will only get called if we are
920  *     using MSI-X with exclusive vectors.
921  *
922  * Results:
923  *     None.
924  *
925  * Side effects:
926  *     None.
927  *
928  *------------------------------------------------------------------------------
929  */
930 
931 static void
932 vmci_interrupt_bm(void *arg)
933 {
934 
935 	ASSERT(vmci_sc->vmci_num_intr == 2);
936 	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
937 }
938 
939 /*
940  *------------------------------------------------------------------------------
941  *
942  * dispatch_datagrams --
943  *
944  *     Reads and dispatches incoming datagrams.
945  *
946  * Results:
947  *     None.
948  *
949  * Side effects:
950  *     Reads data from the device.
951  *
952  *------------------------------------------------------------------------------
953  */
954 
955 static void
956 dispatch_datagrams(void *context, int data)
957 {
958 
959 	if (data_buffer == NULL)
960 		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
961 		    "present");
962 
963 	vmci_read_datagrams_from_port((vmci_io_handle) 0,
964 	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
965 	    data_buffer, data_buffer_size);
966 }
967 
968 /*
969  *------------------------------------------------------------------------------
970  *
971  * process_bitmap --
972  *
973  *     Scans the notification bitmap for raised flags, clears them and handles
974  *     the notifications.
975  *
976  * Results:
977  *     None.
978  *
979  * Side effects:
980  *     None.
981  *
982  *------------------------------------------------------------------------------
983  */
984 
985 static void
986 process_bitmap(void *context, int data)
987 {
988 
989 	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
990 		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
991 
992 	vmci_scan_notification_bitmap(
993 	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
994 }
995 
996 /*
997  *------------------------------------------------------------------------------
998  *
999  * vmci_dismantle_interrupts --
1000  *
1001  *     Releases resources, detaches the interrupt handler and drains the task
1002  *     queue.
1003  *
1004  * Results:
1005  *     None.
1006  *
1007  * Side effects:
1008  *     No more interrupts.
1009  *
1010  *------------------------------------------------------------------------------
1011  */
1012 
1013 static void
1014 vmci_dismantle_interrupts(struct vmci_softc *sc)
1015 {
1016 	struct vmci_interrupt *intr;
1017 	int i;
1018 
1019 	for (i = 0; i < sc->vmci_num_intr; i++) {
1020 		intr = &sc->vmci_intrs[i];
1021 		if (intr->vmci_handler != NULL) {
1022 			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1023 			    intr->vmci_handler);
1024 			intr->vmci_handler = NULL;
1025 		}
1026 		if (intr->vmci_irq != NULL) {
1027 			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1028 			    intr->vmci_rid, intr->vmci_irq);
1029 			intr->vmci_irq = NULL;
1030 			intr->vmci_rid = -1;
1031 		}
1032 	}
1033 
1034 	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1035 	    (sc->vmci_num_intr))
1036 		pci_release_msi(sc->vmci_dev);
1037 
1038 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1039 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1040 
1041 	if (data_buffer != NULL)
1042 		free(data_buffer, M_DEVBUF);
1043 }
1044 
1045 /*
1046  *------------------------------------------------------------------------------
1047  *
1048  * vmci_delayed_work_fn_cb --
1049  *
1050  *     Callback function that executes the queued up delayed work functions.
1051  *
1052  * Results:
1053  *     None.
1054  *
1055  * Side effects:
1056  *     None.
1057  *
1058  *------------------------------------------------------------------------------
1059  */
1060 
1061 static void
1062 vmci_delayed_work_fn_cb(void *context, int data)
1063 {
1064 	vmci_list(vmci_delayed_work_info) temp_list;
1065 
1066 	vmci_list_init(&temp_list);
1067 
1068 	/*
1069 	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1070 	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1071 	 * and temp_list would contain the elements from the original
1072 	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1073 	 * executing the delayed callbacks.
1074 	 */
1075 
1076 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1077 	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1078 	    vmci_delayed_work_info, entry);
1079 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1080 
1081 	while (!vmci_list_empty(&temp_list)) {
1082 		struct vmci_delayed_work_info *delayed_work_info =
1083 		    vmci_list_first(&temp_list);
1084 
1085 		delayed_work_info->work_fn(delayed_work_info->data);
1086 
1087 		vmci_list_remove(delayed_work_info, entry);
1088 		vmci_free_kernel_mem(delayed_work_info,
1089 		    sizeof(*delayed_work_info));
1090 	}
1091 }
1092 
1093 /*
1094  *------------------------------------------------------------------------------
1095  *
1096  * vmci_schedule_delayed_work_fn --
1097  *
1098  *     Schedule the specified callback.
1099  *
1100  * Results:
1101  *     0 if success, error code otherwise.
1102  *
1103  * Side effects:
1104  *     None.
1105  *
1106  *------------------------------------------------------------------------------
1107  */
1108 
1109 int
1110 vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1111 {
1112 	struct vmci_delayed_work_info *delayed_work_info;
1113 
1114 	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1115 	    VMCI_MEMORY_ATOMIC);
1116 
1117 	if (!delayed_work_info)
1118 		return (VMCI_ERROR_NO_MEM);
1119 
1120 	delayed_work_info->work_fn = work_fn;
1121 	delayed_work_info->data = data;
1122 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1123 	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1124 	    delayed_work_info, entry);
1125 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1126 
1127 	taskqueue_enqueue(taskqueue_thread,
1128 	    &vmci_sc->vmci_delayed_work_task);
1129 
1130 	return (VMCI_SUCCESS);
1131 }
1132 
1133 /*
1134  *------------------------------------------------------------------------------
1135  *
1136  * vmci_send_datagram --
1137  *
1138  *     VM to hypervisor call mechanism.
1139  *
1140  * Results:
1141  *     The result of the hypercall.
1142  *
1143  * Side effects:
1144  *     None.
1145  *
1146  *------------------------------------------------------------------------------
1147  */
1148 
1149 int
1150 vmci_send_datagram(struct vmci_datagram *dg)
1151 {
1152 	int result;
1153 
1154 	if (dg == NULL)
1155 		return (VMCI_ERROR_INVALID_ARGS);
1156 
1157 	/*
1158 	 * Need to acquire spinlock on the device because
1159 	 * the datagram data may be spread over multiple pages and the monitor
1160 	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1161 	 * the spinlock precludes that possibility. Disabling interrupts to
1162 	 * avoid incoming datagrams during a "rep out" and possibly landing up
1163 	 * in this function.
1164 	 */
1165 	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1166 
1167 	/*
1168 	 * Send the datagram and retrieve the return value from the result
1169 	 * register.
1170 	 */
1171 	__asm__ __volatile__(
1172 	    "cld\n\t"
1173 	    "rep outsb\n\t"
1174 	    : /* No output. */
1175 	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1176 	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1177 	    );
1178 
1179 	/*
1180 	 * XXX: Should read result high port as well when updating handlers to
1181 	 * return 64bit.
1182 	 */
1183 
1184 	result = bus_space_read_4(vmci_sc->vmci_iot0,
1185 	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1186 	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1187 
1188 	return (result);
1189 }
1190