1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.c,v 1.3 2012/04/14 05:48:04 grehan Exp $
27  */
28 
29 /* Driver for the VirtIO PCI interface. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/serialize.h>
38 
39 #include <bus/pci/pcivar.h>
40 #include <bus/pci/pcireg.h>
41 
42 #include <sys/rman.h>
43 
44 #include <dev/virtual/virtio/virtio/virtio.h>
45 #include <dev/virtual/virtio/virtio/virtqueue.h>
46 #include "virtio_pci.h"
47 #include "virtio_if.h"
48 #include "virtio_bus_if.h"
49 
50 struct vtpci_softc {
51 	device_t			 vtpci_dev;
52 	struct resource			*vtpci_res;
53 	struct resource			*vtpci_msix_res;
54 	uint64_t			 vtpci_features;
55 	uint32_t			 vtpci_flags;
56 	int				 vtpci_irq_type;
57 	int				 vtpci_irq_rid;
58 #define VIRTIO_PCI_FLAG_NO_MSI		 0x0001
59 #define VIRTIO_PCI_FLAG_MSI		 0x0002
60 #define VIRTIO_PCI_FLAG_NO_MSIX		 0x0010
61 #define VIRTIO_PCI_FLAG_MSIX		 0x0020
62 #define VIRTIO_PCI_FLAG_SHARED_MSIX	 0x0040
63 
64 	device_t			 vtpci_child_dev;
65 	struct virtio_feature_desc	*vtpci_child_feat_desc;
66 
67 	/*
68 	 * Ideally, each virtqueue that the driver provides a callback for
69 	 * will receive its own MSIX vector. If there are not sufficient
70 	 * vectors available, we will then attempt to have all the VQs
71 	 * share one vector. Note that when using MSIX, the configuration
72 	 * changed notifications must be on their own vector.
73 	 *
74 	 * If MSIX is not available, we will attempt to have the whole
75 	 * device share one MSI vector, and then, finally, one legacy
76 	 * interrupt.
77 	 */
78 	int				 vtpci_nvqs;
79 	struct vtpci_virtqueue {
80 		struct virtqueue *vq;
81 
82 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
83 		int		  ires_idx;
84 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
85 
86 	/*
87 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
88 	 * is always the configuration changed notifications. The remaining
89 	 * element(s) are used for the virtqueues.
90 	 *
91 	 * With MSI and legacy interrupts, only the first element of
92 	 * vtpci_intr_res[] is used.
93 	 */
94 	int				 vtpci_nintr_res;
95 	struct vtpci_intr_resource {
96 		struct resource	*irq;
97 		int		 rid;
98 		void		*intrhand;
99 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
100 };
101 
102 static int	vtpci_probe(device_t);
103 static int	vtpci_attach(device_t);
104 static int	vtpci_detach(device_t);
105 static int	vtpci_suspend(device_t);
106 static int	vtpci_resume(device_t);
107 static int	vtpci_shutdown(device_t);
108 static void	vtpci_driver_added(device_t, driver_t *);
109 static void	vtpci_child_detached(device_t, device_t);
110 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
111 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
112 
113 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
114 static int	vtpci_with_feature(device_t, uint64_t);
115 static int	vtpci_alloc_virtqueues(device_t, int, int,
116 		    struct vq_alloc_info *);
117 static int	vtpci_setup_intr(device_t, lwkt_serialize_t);
118 static void	vtpci_stop(device_t);
119 static int	vtpci_reinit(device_t, uint64_t);
120 static void	vtpci_reinit_complete(device_t);
121 static void	vtpci_notify_virtqueue(device_t, uint16_t);
122 static uint8_t	vtpci_get_status(device_t);
123 static void	vtpci_set_status(device_t, uint8_t);
124 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
125 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
126 
127 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
128 		    uint64_t);
129 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
130 
131 static int	vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
132 		    struct vq_alloc_info *);
133 static int	vtpci_alloc_intr_resources(struct vtpci_softc *, int,
134 		    struct vq_alloc_info *);
135 static int	vtpci_alloc_msi(struct vtpci_softc *);
136 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
137 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
138 
139 static void	vtpci_free_interrupts(struct vtpci_softc *);
140 static void	vtpci_free_virtqueues(struct vtpci_softc *);
141 static void	vtpci_release_child_resources(struct vtpci_softc *);
142 static void	vtpci_reset(struct vtpci_softc *);
143 
144 static int	vtpci_legacy_intr(void *);
145 static int	vtpci_vq_shared_intr(void *);
146 static int	vtpci_vq_intr(void *);
147 static int	vtpci_config_intr(void *);
148 
149 /*
150  * I/O port read/write wrappers.
151  */
152 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
153 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
154 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
155 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
156 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
157 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
158 
159 /* Tunables. */
160 static int vtpci_disable_msix = 0;
161 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
162 
163 static device_method_t vtpci_methods[] = {
164 	/* Device interface. */
165 	DEVMETHOD(device_probe,			  vtpci_probe),
166 	DEVMETHOD(device_attach,		  vtpci_attach),
167 	DEVMETHOD(device_detach,		  vtpci_detach),
168 	DEVMETHOD(device_suspend,		  vtpci_suspend),
169 	DEVMETHOD(device_resume,		  vtpci_resume),
170 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
171 
172 	/* Bus interface. */
173 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
174 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
175 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
176 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
177 
178 	/* VirtIO bus interface. */
179 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
180 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
181 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
182 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
183 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
184 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
185 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
186 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
187 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
188 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
189 
190 	DEVMETHOD_END
191 };
192 
193 static driver_t vtpci_driver = {
194 	"virtio_pci",
195 	vtpci_methods,
196 	sizeof(struct vtpci_softc)
197 };
198 
199 devclass_t vtpci_devclass;
200 
201 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, NULL, NULL);
202 MODULE_VERSION(virtio_pci, 1);
203 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
204 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
205 
206 static int
207 vtpci_probe(device_t dev)
208 {
209 	char desc[36];
210 	const char *name;
211 
212 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
213 		return (ENXIO);
214 
215 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
216 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
217 		return (ENXIO);
218 
219 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
220 		return (ENXIO);
221 
222 	name = virtio_device_name(pci_get_subdevice(dev));
223 	if (name == NULL)
224 		name = "Unknown";
225 
226 	ksnprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
227 	device_set_desc_copy(dev, desc);
228 
229 	return (BUS_PROBE_DEFAULT);
230 }
231 
232 static int
233 vtpci_attach(device_t dev)
234 {
235 	struct vtpci_softc *sc;
236 	device_t child;
237 	int rid;
238 
239 	sc = device_get_softc(dev);
240 	sc->vtpci_dev = dev;
241 
242 	pci_enable_busmaster(dev);
243 
244 	rid = PCIR_BAR(0);
245 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
246 	    RF_ACTIVE);
247 	if (sc->vtpci_res == NULL) {
248 		device_printf(dev, "cannot map I/O space\n");
249 		return (ENXIO);
250 	}
251 
252 	if (pci_find_extcap(dev, PCIY_MSI, NULL) != 0)
253 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI;
254 	/* XXX(vsrinivas): Check out how to get MSI-X */
255 #ifdef OLD_MSI
256 	if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) {
257 		rid = PCIR_BAR(1);
258 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
259 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
260 	}
261 #endif
262 	if (sc->vtpci_msix_res == NULL)
263 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX;
264 
265 	vtpci_reset(sc);
266 
267 	/* Tell the host we've noticed this device. */
268 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
269 
270 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
271 		device_printf(dev, "cannot create child device\n");
272 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
273 		vtpci_detach(dev);
274 		return (ENOMEM);
275 	}
276 
277 	sc->vtpci_child_dev = child;
278 	vtpci_probe_and_attach_child(sc);
279 
280 	return (0);
281 }
282 
283 static int
284 vtpci_detach(device_t dev)
285 {
286 	struct vtpci_softc *sc;
287 	device_t child;
288 	int error;
289 
290 	sc = device_get_softc(dev);
291 
292 	if ((child = sc->vtpci_child_dev) != NULL) {
293 		error = device_delete_child(dev, child);
294 		if (error)
295 			return (error);
296 		sc->vtpci_child_dev = NULL;
297 	}
298 
299 	vtpci_reset(sc);
300 
301 	if (sc->vtpci_msix_res != NULL) {
302 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
303 		    sc->vtpci_msix_res);
304 		sc->vtpci_msix_res = NULL;
305 	}
306 
307 	if (sc->vtpci_res != NULL) {
308 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
309 		    sc->vtpci_res);
310 		sc->vtpci_res = NULL;
311 	}
312 
313 	return (0);
314 }
315 
316 static int
317 vtpci_suspend(device_t dev)
318 {
319 
320 	return (bus_generic_suspend(dev));
321 }
322 
323 static int
324 vtpci_resume(device_t dev)
325 {
326 
327 	return (bus_generic_resume(dev));
328 }
329 
330 static int
331 vtpci_shutdown(device_t dev)
332 {
333 
334 	(void) bus_generic_shutdown(dev);
335 	/* Forcibly stop the host device. */
336 	vtpci_stop(dev);
337 
338 	return (0);
339 }
340 
341 static void
342 vtpci_driver_added(device_t dev, driver_t *driver)
343 {
344 	struct vtpci_softc *sc;
345 
346 	sc = device_get_softc(dev);
347 
348 	vtpci_probe_and_attach_child(sc);
349 }
350 
351 static void
352 vtpci_child_detached(device_t dev, device_t child)
353 {
354 	struct vtpci_softc *sc;
355 
356 	sc = device_get_softc(dev);
357 
358 	vtpci_reset(sc);
359 	vtpci_release_child_resources(sc);
360 }
361 
362 static int
363 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
364 {
365 	struct vtpci_softc *sc;
366 
367 	sc = device_get_softc(dev);
368 
369 	if (sc->vtpci_child_dev != child)
370 		return (ENOENT);
371 
372 	switch (index) {
373 	case VIRTIO_IVAR_DEVTYPE:
374 		*result = pci_get_subdevice(dev);
375 		break;
376 	default:
377 		return (ENOENT);
378 	}
379 
380 	return (0);
381 }
382 
383 static int
384 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
385 {
386 	struct vtpci_softc *sc;
387 
388 	sc = device_get_softc(dev);
389 
390 	if (sc->vtpci_child_dev != child)
391 		return (ENOENT);
392 
393 	switch (index) {
394 	case VIRTIO_IVAR_FEATURE_DESC:
395 		sc->vtpci_child_feat_desc = (void *) value;
396 		break;
397 	default:
398 		return (ENOENT);
399 	}
400 
401 	return (0);
402 }
403 
404 static uint64_t
405 vtpci_negotiate_features(device_t dev, uint64_t child_features)
406 {
407 	struct vtpci_softc *sc;
408 	uint64_t host_features, features;
409 
410 	sc = device_get_softc(dev);
411 
412 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
413 	vtpci_describe_features(sc, "host", host_features);
414 
415 	/*
416 	 * Limit negotiated features to what the driver, virtqueue, and
417 	 * host all support.
418 	 */
419 	features = host_features & child_features;
420 	features = virtqueue_filter_features(features);
421 	sc->vtpci_features = features;
422 
423 	vtpci_describe_features(sc, "negotiated", features);
424 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
425 
426 	return (features);
427 }
428 
429 static int
430 vtpci_with_feature(device_t dev, uint64_t feature)
431 {
432 	struct vtpci_softc *sc;
433 
434 	sc = device_get_softc(dev);
435 
436 	return ((sc->vtpci_features & feature) != 0);
437 }
438 
439 static int
440 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
441     struct vq_alloc_info *vq_info)
442 {
443 	struct vtpci_softc *sc;
444 	struct vtpci_virtqueue *vqx;
445 	struct vq_alloc_info *info;
446 	int queue, error;
447 	uint16_t vq_size;
448 
449 	sc = device_get_softc(dev);
450 
451 	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
452 	    nvqs > VIRTIO_MAX_VIRTQUEUES)
453 		return (EINVAL);
454 
455 	error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
456 	if (error) {
457 		device_printf(dev, "cannot allocate interrupts\n");
458 		return (error);
459 	}
460 
461 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
462 		error = vtpci_register_msix_vector(sc,
463 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
464 		if (error)
465 			return (error);
466 	}
467 
468 	for (queue = 0; queue < nvqs; queue++) {
469 		vqx = &sc->vtpci_vqx[queue];
470 		info = &vq_info[queue];
471 
472 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
473 
474 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
475 		error = virtqueue_alloc(dev, queue, vq_size,
476 		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
477 		if (error)
478 			return (error);
479 
480 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
481 			error = vtpci_register_msix_vector(sc,
482 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
483 			if (error)
484 				return (error);
485 		}
486 
487 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
488 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
489 
490 		*info->vqai_vq = vqx->vq;
491 		sc->vtpci_nvqs++;
492 	}
493 
494 	return (0);
495 }
496 
497 static int
498 vtpci_setup_intr(device_t dev, lwkt_serialize_t slz)
499 {
500 	struct vtpci_softc *sc;
501 	struct vtpci_intr_resource *ires;
502 	struct vtpci_virtqueue *vqx;
503 	int i, flags, error;
504 
505 	sc = device_get_softc(dev);
506 	flags = INTR_MPSAFE;
507 	ires = &sc->vtpci_intr_res[0];
508 
509 	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
510 		error = bus_setup_intr(dev, ires->irq, flags,
511 				       (driver_intr_t *) vtpci_legacy_intr,
512 				       sc, &ires->intrhand, slz);
513 		return (error);
514 	}
515 
516 	error = bus_setup_intr(dev, ires->irq, flags,
517 			       (driver_intr_t *) vtpci_config_intr,
518 			       sc, &ires->intrhand, slz);
519 	if (error)
520 		return (error);
521 
522 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
523 		ires = &sc->vtpci_intr_res[1];
524 		error = bus_setup_intr(dev, ires->irq, flags,
525 				       (driver_intr_t *) vtpci_vq_shared_intr,
526 				       sc, &ires->intrhand, slz);
527 
528 		return (error);
529 	}
530 
531 	/* Setup an interrupt handler for each virtqueue. */
532 	for (i = 0; i < sc->vtpci_nvqs; i++) {
533 		vqx = &sc->vtpci_vqx[i];
534 		if (vqx->ires_idx < 1)
535 			continue;
536 
537 		ires = &sc->vtpci_intr_res[vqx->ires_idx];
538 		error = bus_setup_intr(dev, ires->irq, flags,
539 				       (driver_intr_t *) vtpci_vq_intr,
540 				       vqx->vq, &ires->intrhand, slz);
541 		if (error)
542 			return (error);
543 	}
544 
545 	return (0);
546 }
547 
548 static void
549 vtpci_stop(device_t dev)
550 {
551 	vtpci_reset(device_get_softc(dev));
552 }
553 
554 static int
555 vtpci_reinit(device_t dev, uint64_t features)
556 {
557 	struct vtpci_softc *sc;
558 	struct vtpci_virtqueue *vqx;
559 	struct virtqueue *vq;
560 	int queue, error;
561 	uint16_t vq_size;
562 
563 	sc = device_get_softc(dev);
564 
565 	/*
566 	 * Redrive the device initialization. This is a bit of an abuse
567 	 * of the specification, but both VirtualBox and QEMU/KVM seem
568 	 * to play nice. We do not allow the host device to change from
569 	 * what was originally negotiated beyond what the guest driver
570 	 * changed (MSIX state should not change, number of virtqueues
571 	 * and their size remain the same, etc).
572 	 */
573 
574 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
575 		vtpci_stop(dev);
576 
577 	/*
578 	 * Quickly drive the status through ACK and DRIVER. The device
579 	 * does not become usable again until vtpci_reinit_complete().
580 	 */
581 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
582 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
583 
584 	vtpci_negotiate_features(dev, features);
585 
586 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
587 		error = vtpci_register_msix_vector(sc,
588 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
589 		if (error)
590 			return (error);
591 	}
592 
593 	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
594 		vqx = &sc->vtpci_vqx[queue];
595 		vq = vqx->vq;
596 
597 		KASSERT(vq != NULL, ("vq %d not allocated", queue));
598 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
599 
600 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
601 		error = virtqueue_reinit(vq, vq_size);
602 		if (error)
603 			return (error);
604 
605 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
606 			error = vtpci_register_msix_vector(sc,
607 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
608 			if (error)
609 				return (error);
610 		}
611 
612 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
613 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
614 	}
615 
616 	return (0);
617 }
618 
619 static void
620 vtpci_reinit_complete(device_t dev)
621 {
622 
623 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
624 }
625 
626 static void
627 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
628 {
629 	struct vtpci_softc *sc;
630 
631 	sc = device_get_softc(dev);
632 
633 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
634 }
635 
636 static uint8_t
637 vtpci_get_status(device_t dev)
638 {
639 	struct vtpci_softc *sc;
640 
641 	sc = device_get_softc(dev);
642 
643 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
644 }
645 
646 static void
647 vtpci_set_status(device_t dev, uint8_t status)
648 {
649 	struct vtpci_softc *sc;
650 
651 	sc = device_get_softc(dev);
652 
653 	if (status != VIRTIO_CONFIG_STATUS_RESET)
654 		status |= vtpci_get_status(dev);
655 
656 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
657 }
658 
659 static void
660 vtpci_read_dev_config(device_t dev, bus_size_t offset,
661     void *dst, int length)
662 {
663 	struct vtpci_softc *sc;
664 	bus_size_t off;
665 	uint8_t *d;
666 	int size;
667 
668 	sc = device_get_softc(dev);
669 	off = VIRTIO_PCI_CONFIG(sc) + offset;
670 
671 	for (d = dst; length > 0; d += size, off += size, length -= size) {
672 		if (length >= 4) {
673 			size = 4;
674 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
675 		} else if (length >= 2) {
676 			size = 2;
677 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
678 		} else {
679 			size = 1;
680 			*d = vtpci_read_config_1(sc, off);
681 		}
682 	}
683 }
684 
685 static void
686 vtpci_write_dev_config(device_t dev, bus_size_t offset,
687     void *src, int length)
688 {
689 	struct vtpci_softc *sc;
690 	bus_size_t off;
691 	uint8_t *s;
692 	int size;
693 
694 	sc = device_get_softc(dev);
695 	off = VIRTIO_PCI_CONFIG(sc) + offset;
696 
697 	for (s = src; length > 0; s += size, off += size, length -= size) {
698 		if (length >= 4) {
699 			size = 4;
700 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
701 		} else if (length >= 2) {
702 			size = 2;
703 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
704 		} else {
705 			size = 1;
706 			vtpci_write_config_1(sc, off, *s);
707 		}
708 	}
709 }
710 
711 static void
712 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
713     uint64_t features)
714 {
715 	device_t dev, child;
716 
717 	dev = sc->vtpci_dev;
718 	child = sc->vtpci_child_dev;
719 
720 	if (device_is_attached(child) && bootverbose == 0)
721 		return;
722 
723 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
724 }
725 
726 static void
727 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
728 {
729 	device_t dev, child;
730 	int error;
731 
732 	dev = sc->vtpci_dev;
733 	child = sc->vtpci_child_dev;
734 
735 	if (child == NULL)
736 		return;
737 
738 	if (device_get_state(child) != DS_NOTPRESENT)
739 		return;
740 
741 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
742 	error = device_probe_and_attach(child);
743 	if (error != 0 || device_get_state(child) == DS_NOTPRESENT) {
744 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
745 		vtpci_reset(sc);
746 		vtpci_release_child_resources(sc);
747 
748 		/* Reset status for future attempt. */
749 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
750 	} else
751 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
752 }
753 
754 static int
755 vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
756     struct vq_alloc_info *vq_info)
757 {
758 	int i, nvectors, error;
759 
760 	/*
761 	 * Only allocate a vector for virtqueues that are actually
762 	 * expecting an interrupt.
763 	 */
764 	for (nvectors = 0, i = 0; i < nvqs; i++)
765 		if (vq_info[i].vqai_intr != NULL)
766 			nvectors++;
767 
768 	if (vtpci_disable_msix != 0 ||
769 	    sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX ||
770 	    flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
771 	    vtpci_alloc_msix(sc, nvectors) != 0) {
772 		/*
773 		 * Use MSI interrupts if available. Otherwise, we fallback
774 		 * to legacy interrupts.
775 		 */
776 		if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 &&
777 		    vtpci_alloc_msi(sc) == 0)
778 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
779 
780 		sc->vtpci_nintr_res = 1;
781 	}
782 
783 	error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
784 
785 	return (error);
786 }
787 
788 static int
789 vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
790     struct vq_alloc_info *vq_info)
791 {
792 	device_t dev;
793 	struct resource *irq;
794 	struct vtpci_virtqueue *vqx;
795 	int i, rid, flags, res_idx;
796 
797 	dev = sc->vtpci_dev;
798 	flags = RF_ACTIVE;
799 
800 	if ((sc->vtpci_flags &
801 	    (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) {
802 		rid = 0;
803 		flags |= RF_SHAREABLE;
804 	} else
805 		rid = 1;
806 
807 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
808 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
809 		if (irq == NULL)
810 			return (ENXIO);
811 
812 		sc->vtpci_intr_res[i].irq = irq;
813 		sc->vtpci_intr_res[i].rid = rid++;
814 	}
815 
816 	/*
817 	 * Map the virtqueue into the correct index in vq_intr_res[]. Note the
818 	 * first index is reserved for configuration changes notifications.
819 	 */
820 	for (i = 0, res_idx = 1; i < nvqs; i++) {
821 		vqx = &sc->vtpci_vqx[i];
822 
823 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
824 			if (vq_info[i].vqai_intr == NULL)
825 				vqx->ires_idx = -1;
826 			else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
827 				vqx->ires_idx = res_idx;
828 			else
829 				vqx->ires_idx = res_idx++;
830 		} else
831 			vqx->ires_idx = -1;
832 	}
833 
834 	return (0);
835 }
836 
837 static int
838 vtpci_alloc_msi(struct vtpci_softc *sc)
839 {
840 	device_t dev;
841 	int nmsi;
842 	u_int irq_flags;
843 
844 	dev = sc->vtpci_dev;
845 	nmsi = pci_msi_count(dev);
846 
847 	if (nmsi < 1)
848 		return (1);
849 
850 	sc->vtpci_irq_rid = 0;
851         sc->vtpci_irq_type = pci_alloc_1intr(dev, 1,
852             &sc->vtpci_irq_rid, &irq_flags);
853 
854 
855 	return (1);
856 }
857 
858 static int
859 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
860 {
861 	/* XXX(vsrinivas): Huh? Is this how MSI-X works?*/
862 	/* XXX(vsrinivas): All of this was disabled... */
863 #ifdef OLD_MSI
864 	device_t dev;
865 	int nmsix, cnt, required;
866 
867 	dev = sc->vtpci_dev;
868 
869 	nmsix = pci_msix_count(dev);
870 	if (nmsix < 1)
871 		return (1);
872 
873 	/* An additional vector is needed for the config changes. */
874 	required = nvectors + 1;
875 	if (nmsix >= required) {
876 		cnt = required;
877 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required)
878 			goto out;
879 
880 		pci_release_msi(dev);
881 	}
882 
883 	/* Attempt shared MSIX configuration. */
884 	required = 2;
885 	if (nmsix >= required) {
886 		cnt = required;
887 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
888 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
889 			goto out;
890 		}
891 
892 		pci_release_msi(dev);
893 	}
894 
895 	return (1);
896 
897 out:
898 	sc->vtpci_nintr_res = required;
899 	sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
900 
901 	if (bootverbose) {
902 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
903 			device_printf(dev, "using shared virtqueue MSIX\n");
904 		else
905 			device_printf(dev, "using per virtqueue MSIX\n");
906 	}
907 #endif
908 	return (0);
909 }
910 
911 static int
912 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
913 {
914 	device_t dev;
915 	uint16_t vector;
916 
917 	dev = sc->vtpci_dev;
918 
919 	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
920 	    offset != VIRTIO_MSI_QUEUE_VECTOR)
921 		return (EINVAL);
922 
923 	if (res_idx != -1) {
924 		/* Map from rid to host vector. */
925 		vector = sc->vtpci_intr_res[res_idx].rid - 1;
926 	} else
927 		vector = VIRTIO_MSI_NO_VECTOR;
928 
929 	/* The first resource is special; make sure it is used correctly. */
930 	if (res_idx == 0) {
931 		KASSERT(vector == 0, ("unexpected config vector"));
932 		KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
933 		    ("unexpected config offset"));
934 	}
935 
936 	vtpci_write_config_2(sc, offset, vector);
937 
938 	if (vtpci_read_config_2(sc, offset) != vector) {
939 		device_printf(dev, "insufficient host resources for "
940 		    "MSIX interrupts\n");
941 		return (ENODEV);
942 	}
943 
944 	return (0);
945 }
946 
947 static void
948 vtpci_free_interrupts(struct vtpci_softc *sc)
949 {
950 	device_t dev;
951 	struct vtpci_intr_resource *ires;
952 	int i;
953 
954 	dev = sc->vtpci_dev;
955 	sc->vtpci_nintr_res = 0;
956 
957 	if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) {
958 		pci_release_msi(dev);
959 		sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI |
960 		    VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX);
961 	}
962 
963 	for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) {
964 		ires = &sc->vtpci_intr_res[i];
965 
966 		if (ires->intrhand != NULL) {
967 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
968 			ires->intrhand = NULL;
969 		}
970 
971 		if (ires->irq != NULL) {
972 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
973 			    ires->irq);
974 			ires->irq = NULL;
975 		}
976 
977 		ires->rid = -1;
978 	}
979 }
980 
981 static void
982 vtpci_free_virtqueues(struct vtpci_softc *sc)
983 {
984 	struct vtpci_virtqueue *vqx;
985 	int i;
986 
987 	sc->vtpci_nvqs = 0;
988 
989 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
990 		vqx = &sc->vtpci_vqx[i];
991 
992 		if (vqx->vq != NULL) {
993 			virtqueue_free(vqx->vq);
994 			vqx->vq = NULL;
995 		}
996 	}
997 }
998 
999 static void
1000 vtpci_release_child_resources(struct vtpci_softc *sc)
1001 {
1002 
1003 	vtpci_free_interrupts(sc);
1004 	vtpci_free_virtqueues(sc);
1005 }
1006 
1007 static void
1008 vtpci_reset(struct vtpci_softc *sc)
1009 {
1010 
1011 	/*
1012 	 * Setting the status to RESET sets the host device to
1013 	 * the original, uninitialized state.
1014 	 */
1015 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1016 }
1017 
1018 static int
1019 vtpci_legacy_intr(void *xsc)
1020 {
1021 	struct vtpci_softc *sc;
1022 	struct vtpci_virtqueue *vqx;
1023 	int i;
1024 	uint8_t isr;
1025 
1026 	sc = xsc;
1027 	vqx = &sc->vtpci_vqx[0];
1028 
1029 	/* Reading the ISR also clears it. */
1030 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1031 
1032 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1033 		vtpci_config_intr(sc);
1034 
1035 	if (isr & VIRTIO_PCI_ISR_INTR)
1036 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1037 			virtqueue_intr(vqx->vq);
1038 
1039 	return isr;
1040 }
1041 
1042 static int
1043 vtpci_vq_shared_intr(void *xsc)
1044 {
1045 	struct vtpci_softc *sc;
1046 	struct vtpci_virtqueue *vqx;
1047 	int i, rc;
1048 
1049 	rc = 0;
1050 	sc = xsc;
1051 	vqx = &sc->vtpci_vqx[0];
1052 
1053 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1054 		rc |= virtqueue_intr(vqx->vq);
1055 
1056 	return rc;
1057 }
1058 
1059 static int
1060 vtpci_vq_intr(void *xvq)
1061 {
1062 	struct virtqueue *vq;
1063 	int rc;
1064 
1065 	vq = xvq;
1066 	rc = virtqueue_intr(vq);
1067 
1068 	return rc;
1069 }
1070 
1071 static int
1072 vtpci_config_intr(void *xsc)
1073 {
1074 	struct vtpci_softc *sc;
1075 	device_t child;
1076 	int rc;
1077 
1078 	rc = 0;
1079 	sc = xsc;
1080 	child = sc->vtpci_child_dev;
1081 
1082 	if (child != NULL)
1083 		rc = VIRTIO_CONFIG_CHANGE(child);
1084 
1085 	return rc;
1086 }
1087