1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.c,v 1.3 2012/04/14 05:48:04 grehan Exp $
27  */
28 
29 /* Driver for the VirtIO PCI interface. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/serialize.h>
38 
39 #include <bus/pci/pcivar.h>
40 #include <bus/pci/pcireg.h>
41 
42 #include <sys/rman.h>
43 
44 #include <dev/virtual/virtio/virtio/virtio.h>
45 #include <dev/virtual/virtio/virtio/virtqueue.h>
46 #include "virtio_pci.h"
47 #include "virtio_bus_if.h"
48 
49 struct vqentry {
50 	int what;
51 	struct virtqueue *vq;
52 	driver_intr_t *handler;
53 	void *arg;
54 	TAILQ_ENTRY(vqentry) entries;
55 };
56 
57 TAILQ_HEAD(vqirq_list, vqentry);
58 
59 struct vtpci_softc {
60 	device_t			 vtpci_dev;
61 	struct resource			*vtpci_res;
62 	struct resource			*vtpci_msix_res;
63 	uint64_t			 vtpci_features;
64 	uint32_t			 vtpci_flags;
65 #define VIRTIO_PCI_FLAG_MSI		 0x0001
66 #define VIRTIO_PCI_FLAG_MSIX		 0x0010
67 
68 	device_t			 vtpci_child_dev;
69 	struct virtio_feature_desc	*vtpci_child_feat_desc;
70 
71 	/*
72 	 * Ideally, each virtqueue that the driver provides a callback for
73 	 * will receive its own MSIX vector. If there are not sufficient
74 	 * vectors available, we will then attempt to have all the VQs
75 	 * share one vector. Note that when using MSIX, the configuration
76 	 * changed notifications must be on their own vector.
77 	 *
78 	 * If MSIX is not available, we will attempt to have the whole
79 	 * device share one MSI vector, and then, finally, one legacy
80 	 * interrupt.
81 	 */
82 	int				 vtpci_nvqs;
83 	struct vtpci_virtqueue {
84 		struct virtqueue *vq;
85 
86 		/* Index into vtpci_intr_res[] below. -1 if no IRQ assigned. */
87 		int		  ires_idx;
88 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
89 
90 	/*
91 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
92 	 * is always the configuration changed notifications. The remaining
93 	 * element(s) are used for the virtqueues.
94 	 *
95 	 * With MSI and legacy interrupts, only the first element of
96 	 * vtpci_intr_res[] is used.
97 	 */
98 	int				 vtpci_nintr_res;
99 	int				 vtpci_irq_flags;
100 	struct vtpci_intr_resource {
101 		struct vtpci_softc *ires_sc;
102 		struct resource	*irq;
103 		int		 rid;
104 		void		*intrhand;
105 		struct vqirq_list ls;
106 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
107 
108 	int				 vtpci_config_irq;
109 };
110 
111 static int	vtpci_probe(device_t);
112 static int	vtpci_attach(device_t);
113 static int	vtpci_detach(device_t);
114 static int	vtpci_suspend(device_t);
115 static int	vtpci_resume(device_t);
116 static int	vtpci_shutdown(device_t);
117 static void	vtpci_driver_added(device_t, driver_t *);
118 static void	vtpci_child_detached(device_t, device_t);
119 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
120 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
121 
122 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
123 static int	vtpci_with_feature(device_t, uint64_t);
124 static int	vtpci_intr_count(device_t dev);
125 static int	vtpci_intr_alloc(device_t dev, int *cnt, int use_config,
126 		    int *cpus);
127 static int	vtpci_intr_release(device_t dev);
128 static int	vtpci_alloc_virtqueues(device_t, int, struct vq_alloc_info *);
129 static int	vtpci_setup_intr(device_t, uint irq, lwkt_serialize_t);
130 static int	vtpci_teardown_intr(device_t, uint irq);
131 static int	vtpci_bind_intr(device_t, uint, int, driver_intr_t, void *);
132 static int	vtpci_unbind_intr(device_t, int);
133 static void	vtpci_stop(device_t);
134 static int	vtpci_reinit(device_t, uint64_t);
135 static void	vtpci_reinit_complete(device_t);
136 static void	vtpci_notify_virtqueue(device_t, uint16_t);
137 static uint8_t	vtpci_get_status(device_t);
138 static void	vtpci_set_status(device_t, uint8_t);
139 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
140 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
141 
142 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
143 		    uint64_t);
144 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
145 
146 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
147 
148 static void	vtpci_free_interrupts(struct vtpci_softc *);
149 static void	vtpci_free_virtqueues(struct vtpci_softc *);
150 static void	vtpci_release_child_resources(struct vtpci_softc *);
151 static void	vtpci_reset(struct vtpci_softc *);
152 
153 static void	vtpci_legacy_intr(void *);
154 static void	vtpci_msix_intr(void *);
155 
156 /*
157  * I/O port read/write wrappers.
158  */
159 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
160 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
161 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
162 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
163 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
164 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
165 
166 /* Tunables. */
167 static int vtpci_disable_msix = 0;
168 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
169 
170 static device_method_t vtpci_methods[] = {
171 	/* Device interface. */
172 	DEVMETHOD(device_probe,			  vtpci_probe),
173 	DEVMETHOD(device_attach,		  vtpci_attach),
174 	DEVMETHOD(device_detach,		  vtpci_detach),
175 	DEVMETHOD(device_suspend,		  vtpci_suspend),
176 	DEVMETHOD(device_resume,		  vtpci_resume),
177 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
178 
179 	/* Bus interface. */
180 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
181 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
182 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
183 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
184 
185 	/* VirtIO bus interface. */
186 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
187 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
188 	DEVMETHOD(virtio_bus_intr_count,	  vtpci_intr_count),
189 	DEVMETHOD(virtio_bus_intr_alloc,	  vtpci_intr_alloc),
190 	DEVMETHOD(virtio_bus_intr_release,	  vtpci_intr_release),
191 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
192 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
193 	DEVMETHOD(virtio_bus_teardown_intr,	  vtpci_teardown_intr),
194 	DEVMETHOD(virtio_bus_bind_intr,		  vtpci_bind_intr),
195 	DEVMETHOD(virtio_bus_unbind_intr,	  vtpci_unbind_intr),
196 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
197 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
198 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
199 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
200 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
201 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
202 
203 	DEVMETHOD_END
204 };
205 
206 static driver_t vtpci_driver = {
207 	"virtio_pci",
208 	vtpci_methods,
209 	sizeof(struct vtpci_softc)
210 };
211 
212 devclass_t vtpci_devclass;
213 
214 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, NULL, NULL);
215 MODULE_VERSION(virtio_pci, 1);
216 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
217 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
218 
219 static int
220 vtpci_probe(device_t dev)
221 {
222 	char desc[36];
223 	const char *name;
224 
225 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
226 		return (ENXIO);
227 
228 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
229 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
230 		return (ENXIO);
231 
232 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
233 		return (ENXIO);
234 
235 	name = virtio_device_name(pci_get_subdevice(dev));
236 	if (name == NULL)
237 		name = "Unknown";
238 
239 	ksnprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
240 	device_set_desc_copy(dev, desc);
241 
242 	return (BUS_PROBE_DEFAULT);
243 }
244 
245 static int
246 vtpci_attach(device_t dev)
247 {
248 	struct vtpci_softc *sc;
249 	device_t child;
250 	int msix_cap, rid;
251 
252 	sc = device_get_softc(dev);
253 	sc->vtpci_dev = dev;
254 	sc->vtpci_config_irq = -1;
255 
256 	pci_enable_busmaster(dev);
257 
258 	rid = PCIR_BAR(0);
259 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
260 	    RF_ACTIVE);
261 	if (sc->vtpci_res == NULL) {
262 		device_printf(dev, "cannot map I/O space\n");
263 		return (ENXIO);
264 	}
265 
266 	if (pci_find_extcap(dev, PCIY_MSIX, &msix_cap) == 0) {
267 		uint32_t val;
268 		val = pci_read_config(dev, msix_cap + PCIR_MSIX_TABLE, 4);
269 		rid = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
270 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
271 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
272 	}
273 
274 	vtpci_reset(sc);
275 
276 	/* Tell the host we've noticed this device. */
277 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
278 
279 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
280 		device_printf(dev, "cannot create child device\n");
281 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
282 		vtpci_detach(dev);
283 		return (ENOMEM);
284 	}
285 
286 	sc->vtpci_child_dev = child;
287 	vtpci_probe_and_attach_child(sc);
288 
289 	return (0);
290 }
291 
292 static int
293 vtpci_detach(device_t dev)
294 {
295 	struct vtpci_softc *sc;
296 	device_t child;
297 	int error;
298 
299 	sc = device_get_softc(dev);
300 
301 	if ((child = sc->vtpci_child_dev) != NULL) {
302 		error = device_delete_child(dev, child);
303 		if (error)
304 			return (error);
305 		sc->vtpci_child_dev = NULL;
306 	}
307 
308 	vtpci_reset(sc);
309 
310 	if (sc->vtpci_msix_res != NULL) {
311 		bus_release_resource(dev, SYS_RES_MEMORY,
312 		    rman_get_rid(sc->vtpci_msix_res), sc->vtpci_msix_res);
313 		sc->vtpci_msix_res = NULL;
314 	}
315 
316 	if (sc->vtpci_res != NULL) {
317 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
318 		    sc->vtpci_res);
319 		sc->vtpci_res = NULL;
320 	}
321 
322 	return (0);
323 }
324 
325 static int
326 vtpci_suspend(device_t dev)
327 {
328 
329 	return (bus_generic_suspend(dev));
330 }
331 
332 static int
333 vtpci_resume(device_t dev)
334 {
335 
336 	return (bus_generic_resume(dev));
337 }
338 
339 static int
340 vtpci_shutdown(device_t dev)
341 {
342 
343 	(void) bus_generic_shutdown(dev);
344 	/* Forcibly stop the host device. */
345 	vtpci_stop(dev);
346 
347 	return (0);
348 }
349 
350 static void
351 vtpci_driver_added(device_t dev, driver_t *driver)
352 {
353 	struct vtpci_softc *sc;
354 
355 	sc = device_get_softc(dev);
356 
357 	vtpci_probe_and_attach_child(sc);
358 }
359 
360 static void
361 vtpci_child_detached(device_t dev, device_t child)
362 {
363 	struct vtpci_softc *sc;
364 
365 	sc = device_get_softc(dev);
366 
367 	vtpci_reset(sc);
368 	vtpci_release_child_resources(sc);
369 }
370 
371 static int
372 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
373 {
374 	struct vtpci_softc *sc;
375 
376 	sc = device_get_softc(dev);
377 
378 	if (sc->vtpci_child_dev != child)
379 		return (ENOENT);
380 
381 	switch (index) {
382 	case VIRTIO_IVAR_DEVTYPE:
383 		*result = pci_get_subdevice(dev);
384 		break;
385 	default:
386 		return (ENOENT);
387 	}
388 
389 	return (0);
390 }
391 
392 static int
393 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
394 {
395 	struct vtpci_softc *sc;
396 
397 	sc = device_get_softc(dev);
398 
399 	if (sc->vtpci_child_dev != child)
400 		return (ENOENT);
401 
402 	switch (index) {
403 	case VIRTIO_IVAR_FEATURE_DESC:
404 		sc->vtpci_child_feat_desc = (void *) value;
405 		break;
406 	default:
407 		return (ENOENT);
408 	}
409 
410 	return (0);
411 }
412 
413 static uint64_t
414 vtpci_negotiate_features(device_t dev, uint64_t child_features)
415 {
416 	struct vtpci_softc *sc;
417 	uint64_t host_features, features;
418 
419 	sc = device_get_softc(dev);
420 
421 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
422 	vtpci_describe_features(sc, "host", host_features);
423 
424 	/*
425 	 * Limit negotiated features to what the driver, virtqueue, and
426 	 * host all support.
427 	 */
428 	features = host_features & child_features;
429 	features = virtqueue_filter_features(features);
430 	sc->vtpci_features = features;
431 
432 	vtpci_describe_features(sc, "negotiated", features);
433 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
434 
435 	return (features);
436 }
437 
438 static int
439 vtpci_with_feature(device_t dev, uint64_t feature)
440 {
441 	struct vtpci_softc *sc;
442 
443 	sc = device_get_softc(dev);
444 
445 	return ((sc->vtpci_features & feature) != 0);
446 }
447 
448 static int
449 vtpci_intr_count(device_t dev)
450 {
451 	struct vtpci_softc *sc = device_get_softc(dev);
452 
453 	if (vtpci_disable_msix != 0 || sc->vtpci_msix_res == NULL)
454 		return 1;
455 	else
456 		return pci_msix_count(dev);
457 }
458 
459 /* Will never return 0, with *cnt <= 0. */
460 static int
461 vtpci_intr_alloc(device_t dev, int *cnt, int use_config, int *cpus)
462 {
463 	struct vtpci_softc *sc = device_get_softc(dev);
464 	int i;
465 
466 	if (sc->vtpci_nintr_res > 0)
467 		return (EINVAL);
468 
469 	if (*cnt <= 0)
470 		return (EINVAL);
471 
472 	if (vtpci_disable_msix == 0 && sc->vtpci_msix_res != NULL) {
473 		int nmsix = pci_msix_count(dev);
474 		if (nmsix < *cnt)
475 			*cnt = nmsix;
476 	}
477 
478 	if ((*cnt > 1 || use_config == 0) &&
479 	    vtpci_disable_msix == 0 && sc->vtpci_msix_res != NULL) {
480 		if (pci_setup_msix(dev) != 0) {
481 			device_printf(dev, "pci_setup_msix failed\n");
482 			/* Just fallthrough to legacy IRQ code instead. */
483 		} else {
484 			for (i = 0; i < *cnt; i++) {
485 				int cpu, rid;
486 
487 				if (cpus != NULL && cpus[i] >= 0 &&
488 				    cpus[i] < ncpus) {
489 					cpu = cpus[i];
490 				} else {
491 					cpu = device_get_unit(dev) + i;
492 					cpu %= ncpus;
493 				}
494 				if (pci_alloc_msix_vector(dev, i, &rid, cpu)
495 				    != 0) {
496 					if (i > 1 || (i == 1 && !use_config)) {
497 						*cnt = i;
498 						/* Got some MSI-X vectors. */
499 						sc->vtpci_irq_flags = RF_ACTIVE;
500 						sc->vtpci_flags |=
501 						    VIRTIO_PCI_FLAG_MSIX;
502 						goto finish;
503 					}
504 					/*
505 					 * Allocate the legacy IRQ instead.
506 					 */
507 					if (i == 1) {
508 						pci_release_msix_vector(dev, 0);
509 					}
510 					pci_teardown_msix(dev);
511 					break;
512 				}
513 				sc->vtpci_intr_res[i].rid = rid;
514 			}
515 			/* Got all the MSI-X vectors we wanted. */
516 			sc->vtpci_irq_flags = RF_ACTIVE;
517 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
518 			/* Successfully allocated all MSI-X vectors */
519 			goto finish;
520 		}
521 	}
522 
523 	/* Legacy IRQ code: */
524 	*cnt = 1;
525 	/*
526 	 * Use MSI interrupts if available. Otherwise, we fallback
527 	 * to legacy interrupts.
528 	 */
529 	sc->vtpci_intr_res[0].rid = 0;
530 	if (pci_alloc_1intr(sc->vtpci_dev, 1,
531 	    &sc->vtpci_intr_res[0].rid,
532 	    &sc->vtpci_irq_flags) == PCI_INTR_TYPE_MSI) {
533 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
534 	}
535 
536 finish:
537 	KKASSERT(!((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSI) != 0 &&
538 		   (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) != 0));
539 
540 	sc->vtpci_nintr_res = *cnt;
541 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
542 		struct resource *irq;
543 
544 		TAILQ_INIT(&sc->vtpci_intr_res[i].ls);
545 		sc->vtpci_intr_res[i].ires_sc = sc;
546 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
547 		    &sc->vtpci_intr_res[i].rid, sc->vtpci_irq_flags);
548 		if (irq == NULL)
549 			return (ENXIO);
550 		if (cpus != NULL)
551 			cpus[i] = rman_get_cpuid(irq);
552 
553 		sc->vtpci_intr_res[i].irq = irq;
554 	}
555 
556 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
557 		device_printf(dev, "using %d MSI-X vectors\n", *cnt);
558 		pci_enable_msix(dev);
559 	}
560 
561 	return (0);
562 }
563 
564 static int
565 vtpci_intr_release(device_t dev)
566 {
567 	struct vtpci_softc *sc = device_get_softc(dev);
568 	struct vtpci_intr_resource *ires;
569 	int i;
570 
571 	if (sc->vtpci_nintr_res == 0)
572 		return (EINVAL);
573 
574 	/* XXX Make sure none of the interrupts is used at the moment. */
575 
576 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
577 		ires = &sc->vtpci_intr_res[i];
578 
579 		KKASSERT(TAILQ_EMPTY(&ires->ls));
580 		if (ires->irq != NULL) {
581 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
582 			    ires->irq);
583 			ires->irq = NULL;
584 		}
585 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX)
586 			pci_release_msix_vector(dev, ires->rid);
587 		ires->rid = 0;
588 	}
589 	sc->vtpci_nintr_res = 0;
590 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSI) {
591 		pci_release_msi(dev);
592 		sc->vtpci_flags &= ~VIRTIO_PCI_FLAG_MSI;
593 	}
594 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
595 		pci_teardown_msix(dev);
596 		sc->vtpci_flags &= ~VIRTIO_PCI_FLAG_MSIX;
597 	}
598 	return (0);
599 }
600 
601 static int
602 vtpci_alloc_virtqueues(device_t dev, int nvqs, struct vq_alloc_info *vq_info)
603 {
604 	struct vtpci_softc *sc;
605 	struct vtpci_virtqueue *vqx;
606 	struct vq_alloc_info *info;
607 	int queue, error;
608 	uint16_t vq_size;
609 
610 	sc = device_get_softc(dev);
611 
612 	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
613 	    nvqs > VIRTIO_MAX_VIRTQUEUES)
614 		return (EINVAL);
615 
616 	for (queue = 0; queue < nvqs; queue++) {
617 		vqx = &sc->vtpci_vqx[queue];
618 		info = &vq_info[queue];
619 
620 		vqx->ires_idx = -1;
621 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
622 
623 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
624 		error = virtqueue_alloc(dev, queue, vq_size,
625 		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
626 		if (error)
627 			return (error);
628 
629 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
630 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
631 
632 		*info->vqai_vq = vqx->vq;
633 		sc->vtpci_nvqs++;
634 	}
635 
636 	return (0);
637 }
638 
639 /* XXX Add argument to specify the callback function here. */
640 static int
641 vtpci_setup_intr(device_t dev, uint irq, lwkt_serialize_t slz)
642 {
643 	struct vtpci_softc *sc;
644 	struct vtpci_intr_resource *ires;
645 	int flags, error;
646 
647 	sc = device_get_softc(dev);
648 	flags = INTR_MPSAFE;
649 
650 	if ((int)irq >= sc->vtpci_nintr_res)
651 		return (EINVAL);
652 	ires = &sc->vtpci_intr_res[irq];
653 
654 	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
655 		error = bus_setup_intr(dev, ires->irq, flags,
656 				       vtpci_legacy_intr,
657 				       ires, &ires->intrhand, slz);
658 	} else {
659 		error = bus_setup_intr(dev, ires->irq, flags,
660 				       vtpci_msix_intr,
661 				       ires, &ires->intrhand, slz);
662 	}
663 	return (error);
664 }
665 
666 static int
667 vtpci_teardown_intr(device_t dev, uint irq)
668 {
669 	struct vtpci_softc *sc = device_get_softc(dev);
670 	struct vtpci_intr_resource *ires;
671 
672 	if ((int)irq >= sc->vtpci_nintr_res)
673 		return (EINVAL);
674 
675 	ires = &sc->vtpci_intr_res[irq];
676 
677 	if (ires->intrhand == NULL)
678 		return (ENXIO);
679 
680 	bus_teardown_intr(dev, ires->irq, ires->intrhand);
681 	ires->intrhand = NULL;
682 	return (0);
683 }
684 
685 static void
686 vtpci_add_irqentry(struct vtpci_intr_resource *intr_res, int what,
687     driver_intr_t handler, void *arg)
688 {
689 	struct vqentry *e;
690 
691 	TAILQ_FOREACH(e, &intr_res->ls, entries) {
692 		if (e->what == what)
693 			return;
694 	}
695 	e = kmalloc(sizeof(*e), M_DEVBUF, M_WAITOK | M_ZERO);
696 	e->what = what;
697 	if (e->what == -1) {
698 		e->vq = NULL;
699 	} else {
700 		e->vq = intr_res->ires_sc->vtpci_vqx[e->what].vq;
701 	}
702 	e->handler = handler;
703 	e->arg = arg;
704 	TAILQ_INSERT_TAIL(&intr_res->ls, e, entries);
705 }
706 
707 static void
708 vtpci_del_irqentry(struct vtpci_intr_resource *intr_res, int what)
709 {
710 	struct vqentry *e;
711 
712 	TAILQ_FOREACH(e, &intr_res->ls, entries) {
713 		if (e->what == what)
714 			break;
715 	}
716 	if (e != NULL) {
717 		TAILQ_REMOVE(&intr_res->ls, e, entries);
718 		kfree(e, M_DEVBUF);
719 	}
720 }
721 
722 /*
723  * Config intr can be bound after intr_alloc, virtqueue intrs can be bound
724  * after intr_alloc and alloc_virtqueues.
725  */
726 static int
727 vtpci_bind_intr(device_t dev, uint irq, int what,
728     driver_intr_t handler, void *arg)
729 {
730 	struct vtpci_softc *sc = device_get_softc(dev);
731 	struct vtpci_virtqueue *vqx;
732 	int error;
733 
734 	if (irq >= sc->vtpci_nintr_res)
735 		return (EINVAL);
736 
737 	if (what == -1) {
738 		if (sc->vtpci_config_irq != -1)
739 			return (EINVAL);
740 
741 		sc->vtpci_config_irq = irq;
742 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
743 			error = vtpci_register_msix_vector(sc,
744 			    VIRTIO_MSI_CONFIG_VECTOR, irq);
745 			if (error)
746 				return (error);
747 		}
748 		goto done;
749 	}
750 
751 	if (sc->vtpci_nvqs <= what || what < 0)
752 		return (EINVAL);
753 
754 	vqx = &sc->vtpci_vqx[what];
755 	if (vqx->ires_idx != -1)
756 		return (EINVAL);
757 
758 	vqx->ires_idx = irq;
759 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
760 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, what);
761 		error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR,
762 		    irq);
763 		if (error)
764 			return (error);
765 	}
766 done:
767 	vtpci_add_irqentry(&sc->vtpci_intr_res[irq], what, handler, arg);
768 	return (0);
769 }
770 
771 static int
772 vtpci_unbind_intr(device_t dev, int what)
773 {
774 	struct vtpci_softc *sc = device_get_softc(dev);
775 	struct vtpci_virtqueue *vqx;
776 	uint irq;
777 
778 	if (what == -1) {
779 		if (sc->vtpci_config_irq == -1)
780 			return (EINVAL);
781 
782 		irq = sc->vtpci_config_irq;
783 		sc->vtpci_config_irq = -1;
784 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
785 			vtpci_register_msix_vector(sc,
786 			    VIRTIO_MSI_CONFIG_VECTOR, -1);
787 		}
788 		goto done;
789 	}
790 
791 	if (sc->vtpci_nvqs <= what || what < 0)
792 		return (EINVAL);
793 
794 	vqx = &sc->vtpci_vqx[what];
795 	if (vqx->ires_idx == -1)
796 		return (EINVAL);
797 
798 	irq = vqx->ires_idx;
799 	vqx->ires_idx = -1;
800 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
801 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, what);
802 		vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR, -1);
803 	}
804 done:
805 	KKASSERT(irq >= 0 && irq < sc->vtpci_nintr_res);
806 	vtpci_del_irqentry(&sc->vtpci_intr_res[irq], what);
807 	return (0);
808 }
809 
810 static void
811 vtpci_stop(device_t dev)
812 {
813 	vtpci_reset(device_get_softc(dev));
814 }
815 
816 static int
817 vtpci_reinit(device_t dev, uint64_t features)
818 {
819 	struct vtpci_softc *sc;
820 	struct vtpci_virtqueue *vqx;
821 	struct virtqueue *vq;
822 	int queue, error;
823 	uint16_t vq_size;
824 
825 	sc = device_get_softc(dev);
826 
827 	/*
828 	 * Redrive the device initialization. This is a bit of an abuse
829 	 * of the specification, but both VirtualBox and QEMU/KVM seem
830 	 * to play nice. We do not allow the host device to change from
831 	 * what was originally negotiated beyond what the guest driver
832 	 * changed (MSIX state should not change, number of virtqueues
833 	 * and their size remain the same, etc).
834 	 */
835 
836 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
837 		vtpci_stop(dev);
838 
839 	/*
840 	 * Quickly drive the status through ACK and DRIVER. The device
841 	 * does not become usable again until vtpci_reinit_complete().
842 	 */
843 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
844 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
845 
846 	vtpci_negotiate_features(dev, features);
847 
848 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
849 		pci_enable_msix(dev);
850 		if (sc->vtpci_config_irq != -1) {
851 			error = vtpci_register_msix_vector(sc,
852 			    VIRTIO_MSI_CONFIG_VECTOR, sc->vtpci_config_irq);
853 			if (error)
854 				return (error);
855 		}
856 	}
857 
858 	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
859 		vqx = &sc->vtpci_vqx[queue];
860 		vq = vqx->vq;
861 
862 		KASSERT(vq != NULL, ("vq %d not allocated", queue));
863 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
864 
865 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
866 		error = virtqueue_reinit(vq, vq_size);
867 		if (error)
868 			return (error);
869 
870 		if (vqx->ires_idx != -1 &&
871 		    (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX)) {
872 			error = vtpci_register_msix_vector(sc,
873 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
874 			if (error)
875 				return (error);
876 		}
877 
878 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
879 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
880 	}
881 
882 	return (0);
883 }
884 
885 static void
886 vtpci_reinit_complete(device_t dev)
887 {
888 
889 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
890 }
891 
892 static void
893 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
894 {
895 	struct vtpci_softc *sc;
896 
897 	sc = device_get_softc(dev);
898 
899 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
900 }
901 
902 static uint8_t
903 vtpci_get_status(device_t dev)
904 {
905 	struct vtpci_softc *sc;
906 
907 	sc = device_get_softc(dev);
908 
909 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
910 }
911 
912 static void
913 vtpci_set_status(device_t dev, uint8_t status)
914 {
915 	struct vtpci_softc *sc;
916 
917 	sc = device_get_softc(dev);
918 
919 	if (status != VIRTIO_CONFIG_STATUS_RESET)
920 		status |= vtpci_get_status(dev);
921 
922 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
923 }
924 
925 static void
926 vtpci_read_dev_config(device_t dev, bus_size_t offset,
927     void *dst, int length)
928 {
929 	struct vtpci_softc *sc;
930 	bus_size_t off;
931 	uint8_t *d;
932 	int size;
933 
934 	sc = device_get_softc(dev);
935 	off = VIRTIO_PCI_CONFIG(sc) + offset;
936 
937 	for (d = dst; length > 0; d += size, off += size, length -= size) {
938 		if (length >= 4) {
939 			size = 4;
940 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
941 		} else if (length >= 2) {
942 			size = 2;
943 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
944 		} else {
945 			size = 1;
946 			*d = vtpci_read_config_1(sc, off);
947 		}
948 	}
949 }
950 
951 static void
952 vtpci_write_dev_config(device_t dev, bus_size_t offset,
953     void *src, int length)
954 {
955 	struct vtpci_softc *sc;
956 	bus_size_t off;
957 	uint8_t *s;
958 	int size;
959 
960 	sc = device_get_softc(dev);
961 	off = VIRTIO_PCI_CONFIG(sc) + offset;
962 
963 	for (s = src; length > 0; s += size, off += size, length -= size) {
964 		if (length >= 4) {
965 			size = 4;
966 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
967 		} else if (length >= 2) {
968 			size = 2;
969 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
970 		} else {
971 			size = 1;
972 			vtpci_write_config_1(sc, off, *s);
973 		}
974 	}
975 }
976 
977 static void
978 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
979     uint64_t features)
980 {
981 	device_t dev, child;
982 
983 	dev = sc->vtpci_dev;
984 	child = sc->vtpci_child_dev;
985 
986 	if (device_is_attached(child) && bootverbose == 0)
987 		return;
988 
989 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
990 }
991 
992 static void
993 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
994 {
995 	device_t dev, child;
996 	int error;
997 
998 	dev = sc->vtpci_dev;
999 	child = sc->vtpci_child_dev;
1000 
1001 	if (child == NULL)
1002 		return;
1003 
1004 	if (device_get_state(child) != DS_NOTPRESENT)
1005 		return;
1006 
1007 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
1008 	error = device_probe_and_attach(child);
1009 	if (error != 0 || device_get_state(child) == DS_NOTPRESENT) {
1010 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
1011 		vtpci_reset(sc);
1012 		vtpci_release_child_resources(sc);
1013 
1014 		/* Reset status for future attempt. */
1015 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
1016 	} else
1017 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
1018 }
1019 
1020 static int
1021 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
1022 {
1023 	device_t dev;
1024 	uint16_t vector;
1025 
1026 	dev = sc->vtpci_dev;
1027 
1028 	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
1029 	    offset != VIRTIO_MSI_QUEUE_VECTOR)
1030 		return (EINVAL);
1031 
1032 	if (res_idx != -1) {
1033 		/* Map from rid to host vector. */
1034 		vector = res_idx;
1035 	} else {
1036 		vector = VIRTIO_MSI_NO_VECTOR;
1037 	}
1038 
1039 	vtpci_write_config_2(sc, offset, vector);
1040 
1041 	if (vtpci_read_config_2(sc, offset) != vector) {
1042 		device_printf(dev, "insufficient host resources for "
1043 		    "MSIX interrupts\n");
1044 		return (ENODEV);
1045 	}
1046 
1047 	return (0);
1048 }
1049 
1050 static void
1051 vtpci_free_interrupts(struct vtpci_softc *sc)
1052 {
1053 	device_t dev = sc->vtpci_dev;
1054 	struct vtpci_intr_resource *ires;
1055 	int i;
1056 
1057 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
1058 		ires = &sc->vtpci_intr_res[i];
1059 
1060 		if (ires->intrhand != NULL) {
1061 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
1062 			ires->intrhand = NULL;
1063 		}
1064 		if (ires->irq != NULL) {
1065 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
1066 			    ires->irq);
1067 			ires->irq = NULL;
1068 		}
1069 	}
1070 
1071 	vtpci_unbind_intr(sc->vtpci_dev, -1);
1072 	for (i = 0; i < sc->vtpci_nvqs; i++)
1073 		vtpci_unbind_intr(sc->vtpci_dev, i);
1074 
1075 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
1076 		ires = &sc->vtpci_intr_res[i];
1077 
1078 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX)
1079 			pci_release_msix_vector(dev, ires->rid);
1080 		ires->rid = 0;
1081 	}
1082 	sc->vtpci_nintr_res = 0;
1083 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSI) {
1084 		pci_release_msi(dev);
1085 		sc->vtpci_flags &= ~VIRTIO_PCI_FLAG_MSI;
1086 	}
1087 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
1088 		pci_disable_msix(dev);
1089 		pci_teardown_msix(dev);
1090 		sc->vtpci_flags &= ~VIRTIO_PCI_FLAG_MSIX;
1091 	}
1092 
1093 }
1094 
1095 static void
1096 vtpci_free_virtqueues(struct vtpci_softc *sc)
1097 {
1098 	struct vtpci_virtqueue *vqx;
1099 	int i;
1100 
1101 	sc->vtpci_nvqs = 0;
1102 
1103 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
1104 		vqx = &sc->vtpci_vqx[i];
1105 
1106 		if (vqx->vq != NULL) {
1107 			virtqueue_free(vqx->vq);
1108 			vqx->vq = NULL;
1109 		}
1110 	}
1111 }
1112 
1113 static void
1114 vtpci_release_child_resources(struct vtpci_softc *sc)
1115 {
1116 	vtpci_free_interrupts(sc);
1117 	vtpci_free_virtqueues(sc);
1118 }
1119 
1120 static void
1121 vtpci_reset(struct vtpci_softc *sc)
1122 {
1123 
1124 	/*
1125 	 * Setting the status to RESET sets the host device to
1126 	 * the original, uninitialized state.
1127 	 */
1128 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1129 }
1130 
1131 static void
1132 vtpci_legacy_intr(void *arg)
1133 {
1134 	struct vtpci_intr_resource *ires;
1135 	struct vtpci_softc *sc;
1136 	struct vqentry *e;
1137 	uint8_t isr;
1138 
1139 	ires = arg;
1140 	sc = ires->ires_sc;
1141 
1142 	/* Reading the ISR also clears it. */
1143 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1144 
1145 	TAILQ_FOREACH(e, &ires->ls, entries) {
1146 		/*
1147 		 * The lwkt_serialize_handler_call API doesn't seem to fit
1148 		 * properly here. Instead move the virtqueue pending check
1149 		 * into the driver, who can then properly implement masking
1150 		 * of the handler itself.
1151 		 */
1152 		if (e->what == -1) {
1153 			if (isr & VIRTIO_PCI_ISR_CONFIG)
1154 				e->handler(e->arg);
1155 		} else if (isr & VIRTIO_PCI_ISR_INTR) {
1156 			e->handler(e->arg);
1157 		}
1158 	}
1159 }
1160 
1161 static void
1162 vtpci_msix_intr(void *arg)
1163 {
1164 	struct vtpci_intr_resource *ires;
1165 	struct vtpci_softc *sc;
1166 	struct vqentry *e;
1167 
1168 	ires = arg;
1169 	sc = ires->ires_sc;
1170 	TAILQ_FOREACH(e, &ires->ls, entries) {
1171 		/*
1172 		 * The lwkt_serialize_handler_call API doesn't seem to fit
1173 		 * properly here. Instead move the virtqueue pending check
1174 		 * into the driver, who can then properly implement masking
1175 		 * of the handler itself.
1176 		 */
1177 		e->handler(e->arg);
1178 	}
1179 }
1180