1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.c,v 1.3 2012/04/14 05:48:04 grehan Exp $
27  */
28 
29 /* Driver for the VirtIO PCI interface. */
30 
31 #include <sys/cdefs.h>
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/kernel.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/serialize.h>
40 
41 #include <bus/pci/pcivar.h>
42 #include <bus/pci/pcireg.h>
43 
44 #include <sys/bus.h>
45 #include <sys/param.h>
46 #include <sys/rman.h>
47 
48 #include <dev/virtual/virtio/virtio/virtio.h>
49 #include <dev/virtual/virtio/virtio/virtqueue.h>
50 #include "virtio_pci.h"
51 #include "virtio_if.h"
52 #include "virtio_bus_if.h"
53 
54 struct vtpci_softc {
55 	device_t			 vtpci_dev;
56 	struct resource			*vtpci_res;
57 	struct resource			*vtpci_msix_res;
58 	uint64_t			 vtpci_features;
59 	uint32_t			 vtpci_flags;
60 	int				 vtpci_irq_type;
61 	int				 vtpci_irq_rid;
62 #define VIRTIO_PCI_FLAG_NO_MSI		 0x0001
63 #define VIRTIO_PCI_FLAG_MSI		 0x0002
64 #define VIRTIO_PCI_FLAG_NO_MSIX		 0x0010
65 #define VIRTIO_PCI_FLAG_MSIX		 0x0020
66 #define VIRTIO_PCI_FLAG_SHARED_MSIX	 0x0040
67 
68 	device_t			 vtpci_child_dev;
69 	struct virtio_feature_desc	*vtpci_child_feat_desc;
70 
71 	/*
72 	 * Ideally, each virtqueue that the driver provides a callback for
73 	 * will receive its own MSIX vector. If there are not sufficient
74 	 * vectors available, we will then attempt to have all the VQs
75 	 * share one vector. Note that when using MSIX, the configuration
76 	 * changed notifications must be on their own vector.
77 	 *
78 	 * If MSIX is not available, we will attempt to have the whole
79 	 * device share one MSI vector, and then, finally, one legacy
80 	 * interrupt.
81 	 */
82 	int				 vtpci_nvqs;
83 	struct vtpci_virtqueue {
84 		struct virtqueue *vq;
85 
86 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
87 		int		  ires_idx;
88 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
89 
90 	/*
91 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
92 	 * is always the configuration changed notifications. The remaining
93 	 * element(s) are used for the virtqueues.
94 	 *
95 	 * With MSI and legacy interrupts, only the first element of
96 	 * vtpci_intr_res[] is used.
97 	 */
98 	int				 vtpci_nintr_res;
99 	struct vtpci_intr_resource {
100 		struct resource	*irq;
101 		int		 rid;
102 		void		*intrhand;
103 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
104 };
105 
106 static int	vtpci_probe(device_t);
107 static int	vtpci_attach(device_t);
108 static int	vtpci_detach(device_t);
109 static int	vtpci_suspend(device_t);
110 static int	vtpci_resume(device_t);
111 static int	vtpci_shutdown(device_t);
112 static void	vtpci_driver_added(device_t, driver_t *);
113 static void	vtpci_child_detached(device_t, device_t);
114 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
115 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
116 
117 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
118 static int	vtpci_with_feature(device_t, uint64_t);
119 static int	vtpci_alloc_virtqueues(device_t, int, int,
120 		    struct vq_alloc_info *);
121 static int	vtpci_setup_intr(device_t, lwkt_serialize_t);
122 static void	vtpci_stop(device_t);
123 static int	vtpci_reinit(device_t, uint64_t);
124 static void	vtpci_reinit_complete(device_t);
125 static void	vtpci_notify_virtqueue(device_t, uint16_t);
126 static uint8_t	vtpci_get_status(device_t);
127 static void	vtpci_set_status(device_t, uint8_t);
128 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
129 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
130 
131 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
132 		    uint64_t);
133 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
134 
135 static int	vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
136 		    struct vq_alloc_info *);
137 static int	vtpci_alloc_intr_resources(struct vtpci_softc *, int,
138 		    struct vq_alloc_info *);
139 static int	vtpci_alloc_msi(struct vtpci_softc *);
140 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
141 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
142 
143 static void	vtpci_free_interrupts(struct vtpci_softc *);
144 static void	vtpci_free_virtqueues(struct vtpci_softc *);
145 static void	vtpci_release_child_resources(struct vtpci_softc *);
146 static void	vtpci_reset(struct vtpci_softc *);
147 
148 static int	vtpci_legacy_intr(void *);
149 static int	vtpci_vq_shared_intr(void *);
150 static int	vtpci_vq_intr(void *);
151 static int	vtpci_config_intr(void *);
152 
153 /*
154  * I/O port read/write wrappers.
155  */
156 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
157 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
158 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
159 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
160 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
161 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
162 
163 /* Tunables. */
164 static int vtpci_disable_msix = 0;
165 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
166 
167 static device_method_t vtpci_methods[] = {
168 	/* Device interface. */
169 	DEVMETHOD(device_probe,			  vtpci_probe),
170 	DEVMETHOD(device_attach,		  vtpci_attach),
171 	DEVMETHOD(device_detach,		  vtpci_detach),
172 	DEVMETHOD(device_suspend,		  vtpci_suspend),
173 	DEVMETHOD(device_resume,		  vtpci_resume),
174 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
175 
176 	/* Bus interface. */
177 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
178 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
179 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
180 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
181 
182 	/* VirtIO bus interface. */
183 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
184 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
185 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
186 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
187 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
188 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
189 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
190 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
191 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
192 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
193 
194 	{ 0, 0 }
195 };
196 
197 static driver_t vtpci_driver = {
198 	"virtio_pci",
199 	vtpci_methods,
200 	sizeof(struct vtpci_softc)
201 };
202 
203 devclass_t vtpci_devclass;
204 
205 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
206 MODULE_VERSION(virtio_pci, 1);
207 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
208 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
209 
210 static int
211 vtpci_probe(device_t dev)
212 {
213 	char desc[36];
214 	const char *name;
215 
216 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
217 		return (ENXIO);
218 
219 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
220 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
221 		return (ENXIO);
222 
223 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
224 		return (ENXIO);
225 
226 	name = virtio_device_name(pci_get_subdevice(dev));
227 	if (name == NULL)
228 		name = "Unknown";
229 
230 	ksnprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
231 	device_set_desc_copy(dev, desc);
232 
233 	return (BUS_PROBE_DEFAULT);
234 }
235 
236 static int
237 vtpci_attach(device_t dev)
238 {
239 	struct vtpci_softc *sc;
240 	device_t child;
241 	int rid;
242 
243 	sc = device_get_softc(dev);
244 	sc->vtpci_dev = dev;
245 
246 	pci_enable_busmaster(dev);
247 
248 	rid = PCIR_BAR(0);
249 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
250 	    RF_ACTIVE);
251 	if (sc->vtpci_res == NULL) {
252 		device_printf(dev, "cannot map I/O space\n");
253 		return (ENXIO);
254 	}
255 
256 	if (pci_find_extcap(dev, PCIY_MSI, NULL) != 0)
257 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI;
258 	/* XXX(vsrinivas): Check out how to get MSI-X */
259 #if OLD_MSI
260 	if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) {
261 		rid = PCIR_BAR(1);
262 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
263 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
264 	}
265 #endif
266 	if (sc->vtpci_msix_res == NULL)
267 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX;
268 
269 	vtpci_reset(sc);
270 
271 	/* Tell the host we've noticed this device. */
272 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
273 
274 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
275 		device_printf(dev, "cannot create child device\n");
276 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
277 		vtpci_detach(dev);
278 		return (ENOMEM);
279 	}
280 
281 	sc->vtpci_child_dev = child;
282 	vtpci_probe_and_attach_child(sc);
283 
284 	return (0);
285 }
286 
287 static int
288 vtpci_detach(device_t dev)
289 {
290 	struct vtpci_softc *sc;
291 	device_t child;
292 	int error;
293 
294 	sc = device_get_softc(dev);
295 
296 	if ((child = sc->vtpci_child_dev) != NULL) {
297 		error = device_delete_child(dev, child);
298 		if (error)
299 			return (error);
300 		sc->vtpci_child_dev = NULL;
301 	}
302 
303 	vtpci_reset(sc);
304 
305 	if (sc->vtpci_msix_res != NULL) {
306 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
307 		    sc->vtpci_msix_res);
308 		sc->vtpci_msix_res = NULL;
309 	}
310 
311 	if (sc->vtpci_res != NULL) {
312 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
313 		    sc->vtpci_res);
314 		sc->vtpci_res = NULL;
315 	}
316 
317 	return (0);
318 }
319 
320 static int
321 vtpci_suspend(device_t dev)
322 {
323 
324 	return (bus_generic_suspend(dev));
325 }
326 
327 static int
328 vtpci_resume(device_t dev)
329 {
330 
331 	return (bus_generic_resume(dev));
332 }
333 
334 static int
335 vtpci_shutdown(device_t dev)
336 {
337 
338 	(void) bus_generic_shutdown(dev);
339 	/* Forcibly stop the host device. */
340 	vtpci_stop(dev);
341 
342 	return (0);
343 }
344 
345 static void
346 vtpci_driver_added(device_t dev, driver_t *driver)
347 {
348 	struct vtpci_softc *sc;
349 
350 	sc = device_get_softc(dev);
351 
352 	vtpci_probe_and_attach_child(sc);
353 }
354 
355 static void
356 vtpci_child_detached(device_t dev, device_t child)
357 {
358 	struct vtpci_softc *sc;
359 
360 	sc = device_get_softc(dev);
361 
362 	vtpci_reset(sc);
363 	vtpci_release_child_resources(sc);
364 }
365 
366 static int
367 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
368 {
369 	struct vtpci_softc *sc;
370 
371 	sc = device_get_softc(dev);
372 
373 	if (sc->vtpci_child_dev != child)
374 		return (ENOENT);
375 
376 	switch (index) {
377 	case VIRTIO_IVAR_DEVTYPE:
378 		*result = pci_get_subdevice(dev);
379 		break;
380 	default:
381 		return (ENOENT);
382 	}
383 
384 	return (0);
385 }
386 
387 static int
388 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
389 {
390 	struct vtpci_softc *sc;
391 
392 	sc = device_get_softc(dev);
393 
394 	if (sc->vtpci_child_dev != child)
395 		return (ENOENT);
396 
397 	switch (index) {
398 	case VIRTIO_IVAR_FEATURE_DESC:
399 		sc->vtpci_child_feat_desc = (void *) value;
400 		break;
401 	default:
402 		return (ENOENT);
403 	}
404 
405 	return (0);
406 }
407 
408 static uint64_t
409 vtpci_negotiate_features(device_t dev, uint64_t child_features)
410 {
411 	struct vtpci_softc *sc;
412 	uint64_t host_features, features;
413 
414 	sc = device_get_softc(dev);
415 
416 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
417 	vtpci_describe_features(sc, "host", host_features);
418 
419 	/*
420 	 * Limit negotiated features to what the driver, virtqueue, and
421 	 * host all support.
422 	 */
423 	features = host_features & child_features;
424 	features = virtqueue_filter_features(features);
425 	sc->vtpci_features = features;
426 
427 	vtpci_describe_features(sc, "negotiated", features);
428 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
429 
430 	return (features);
431 }
432 
433 static int
434 vtpci_with_feature(device_t dev, uint64_t feature)
435 {
436 	struct vtpci_softc *sc;
437 
438 	sc = device_get_softc(dev);
439 
440 	return ((sc->vtpci_features & feature) != 0);
441 }
442 
443 static int
444 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
445     struct vq_alloc_info *vq_info)
446 {
447 	struct vtpci_softc *sc;
448 	struct vtpci_virtqueue *vqx;
449 	struct vq_alloc_info *info;
450 	int queue, error;
451 	uint16_t vq_size;
452 
453 	sc = device_get_softc(dev);
454 
455 	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
456 	    nvqs > VIRTIO_MAX_VIRTQUEUES)
457 		return (EINVAL);
458 
459 	error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
460 	if (error) {
461 		device_printf(dev, "cannot allocate interrupts\n");
462 		return (error);
463 	}
464 
465 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
466 		error = vtpci_register_msix_vector(sc,
467 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
468 		if (error)
469 			return (error);
470 	}
471 
472 	for (queue = 0; queue < nvqs; queue++) {
473 		vqx = &sc->vtpci_vqx[queue];
474 		info = &vq_info[queue];
475 
476 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
477 
478 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
479 		error = virtqueue_alloc(dev, queue, vq_size,
480 		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
481 		if (error)
482 			return (error);
483 
484 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
485 			error = vtpci_register_msix_vector(sc,
486 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
487 			if (error)
488 				return (error);
489 		}
490 
491 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
492 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
493 
494 		*info->vqai_vq = vqx->vq;
495 		sc->vtpci_nvqs++;
496 	}
497 
498 	return (0);
499 }
500 
501 static int
502 vtpci_setup_intr(device_t dev, lwkt_serialize_t slz)
503 {
504 	struct vtpci_softc *sc;
505 	struct vtpci_intr_resource *ires;
506 	struct vtpci_virtqueue *vqx;
507 	int i, flags, error;
508 
509 	sc = device_get_softc(dev);
510 	flags = INTR_MPSAFE;
511 	ires = &sc->vtpci_intr_res[0];
512 
513 	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
514 		error = bus_setup_intr(dev, ires->irq, flags,
515 				       (driver_intr_t *) vtpci_legacy_intr,
516 				       sc, &ires->intrhand, slz);
517 		return (error);
518 	}
519 
520 	error = bus_setup_intr(dev, ires->irq, flags,
521 			       (driver_intr_t *) vtpci_config_intr,
522 			       sc, &ires->intrhand, slz);
523 	if (error)
524 		return (error);
525 
526 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
527 		ires = &sc->vtpci_intr_res[1];
528 		error = bus_setup_intr(dev, ires->irq, flags,
529 				       (driver_intr_t *) vtpci_vq_shared_intr,
530 				       sc, &ires->intrhand, slz);
531 
532 		return (error);
533 	}
534 
535 	/* Setup an interrupt handler for each virtqueue. */
536 	for (i = 0; i < sc->vtpci_nvqs; i++) {
537 		vqx = &sc->vtpci_vqx[i];
538 		if (vqx->ires_idx < 1)
539 			continue;
540 
541 		ires = &sc->vtpci_intr_res[vqx->ires_idx];
542 		error = bus_setup_intr(dev, ires->irq, flags,
543 				       (driver_intr_t *) vtpci_vq_intr,
544 				       vqx->vq, &ires->intrhand, slz);
545 		if (error)
546 			return (error);
547 	}
548 
549 	return (0);
550 }
551 
552 static void
553 vtpci_stop(device_t dev)
554 {
555 	vtpci_reset(device_get_softc(dev));
556 }
557 
558 static int
559 vtpci_reinit(device_t dev, uint64_t features)
560 {
561 	struct vtpci_softc *sc;
562 	struct vtpci_virtqueue *vqx;
563 	struct virtqueue *vq;
564 	int queue, error;
565 	uint16_t vq_size;
566 
567 	sc = device_get_softc(dev);
568 
569 	/*
570 	 * Redrive the device initialization. This is a bit of an abuse
571 	 * of the specification, but both VirtualBox and QEMU/KVM seem
572 	 * to play nice. We do not allow the host device to change from
573 	 * what was originally negotiated beyond what the guest driver
574 	 * changed (MSIX state should not change, number of virtqueues
575 	 * and their size remain the same, etc).
576 	 */
577 
578 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
579 		vtpci_stop(dev);
580 
581 	/*
582 	 * Quickly drive the status through ACK and DRIVER. The device
583 	 * does not become usable again until vtpci_reinit_complete().
584 	 */
585 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
586 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
587 
588 	vtpci_negotiate_features(dev, features);
589 
590 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
591 		error = vtpci_register_msix_vector(sc,
592 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
593 		if (error)
594 			return (error);
595 	}
596 
597 	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
598 		vqx = &sc->vtpci_vqx[queue];
599 		vq = vqx->vq;
600 
601 		KASSERT(vq != NULL, ("vq %d not allocated", queue));
602 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
603 
604 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
605 		error = virtqueue_reinit(vq, vq_size);
606 		if (error)
607 			return (error);
608 
609 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
610 			error = vtpci_register_msix_vector(sc,
611 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
612 			if (error)
613 				return (error);
614 		}
615 
616 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
617 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
618 	}
619 
620 	return (0);
621 }
622 
623 static void
624 vtpci_reinit_complete(device_t dev)
625 {
626 
627 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
628 }
629 
630 static void
631 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
632 {
633 	struct vtpci_softc *sc;
634 
635 	sc = device_get_softc(dev);
636 
637 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
638 }
639 
640 static uint8_t
641 vtpci_get_status(device_t dev)
642 {
643 	struct vtpci_softc *sc;
644 
645 	sc = device_get_softc(dev);
646 
647 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
648 }
649 
650 static void
651 vtpci_set_status(device_t dev, uint8_t status)
652 {
653 	struct vtpci_softc *sc;
654 
655 	sc = device_get_softc(dev);
656 
657 	if (status != VIRTIO_CONFIG_STATUS_RESET)
658 		status |= vtpci_get_status(dev);
659 
660 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
661 }
662 
663 static void
664 vtpci_read_dev_config(device_t dev, bus_size_t offset,
665     void *dst, int length)
666 {
667 	struct vtpci_softc *sc;
668 	bus_size_t off;
669 	uint8_t *d;
670 	int size;
671 
672 	sc = device_get_softc(dev);
673 	off = VIRTIO_PCI_CONFIG(sc) + offset;
674 
675 	for (d = dst; length > 0; d += size, off += size, length -= size) {
676 		if (length >= 4) {
677 			size = 4;
678 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
679 		} else if (length >= 2) {
680 			size = 2;
681 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
682 		} else {
683 			size = 1;
684 			*d = vtpci_read_config_1(sc, off);
685 		}
686 	}
687 }
688 
689 static void
690 vtpci_write_dev_config(device_t dev, bus_size_t offset,
691     void *src, int length)
692 {
693 	struct vtpci_softc *sc;
694 	bus_size_t off;
695 	uint8_t *s;
696 	int size;
697 
698 	sc = device_get_softc(dev);
699 	off = VIRTIO_PCI_CONFIG(sc) + offset;
700 
701 	for (s = src; length > 0; s += size, off += size, length -= size) {
702 		if (length >= 4) {
703 			size = 4;
704 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
705 		} else if (length >= 2) {
706 			size = 2;
707 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
708 		} else {
709 			size = 1;
710 			vtpci_write_config_1(sc, off, *s);
711 		}
712 	}
713 }
714 
715 static void
716 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
717     uint64_t features)
718 {
719 	device_t dev, child;
720 
721 	dev = sc->vtpci_dev;
722 	child = sc->vtpci_child_dev;
723 
724 	if (device_is_attached(child) && bootverbose == 0)
725 		return;
726 
727 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
728 }
729 
730 static void
731 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
732 {
733 	device_t dev, child;
734 
735 	dev = sc->vtpci_dev;
736 	child = sc->vtpci_child_dev;
737 
738 	if (child == NULL)
739 		return;
740 
741 	if (device_get_state(child) != DS_NOTPRESENT)
742 		return;
743 
744 	if (device_probe_child(dev, child) != 0)
745 		return;
746 
747 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
748 	if (DEVICE_ATTACH(child) != 0) {
749 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
750 		vtpci_reset(sc);
751 		vtpci_release_child_resources(sc);
752 
753 		/* Reset status for future attempt. */
754 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
755 	} else
756 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
757 }
758 
759 static int
760 vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
761     struct vq_alloc_info *vq_info)
762 {
763 	int i, nvectors, error;
764 
765 	/*
766 	 * Only allocate a vector for virtqueues that are actually
767 	 * expecting an interrupt.
768 	 */
769 	for (nvectors = 0, i = 0; i < nvqs; i++)
770 		if (vq_info[i].vqai_intr != NULL)
771 			nvectors++;
772 
773 	if (vtpci_disable_msix != 0 ||
774 	    sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX ||
775 	    flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
776 	    vtpci_alloc_msix(sc, nvectors) != 0) {
777 		/*
778 		 * Use MSI interrupts if available. Otherwise, we fallback
779 		 * to legacy interrupts.
780 		 */
781 		if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 &&
782 		    vtpci_alloc_msi(sc) == 0)
783 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
784 
785 		sc->vtpci_nintr_res = 1;
786 	}
787 
788 	error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
789 
790 	return (error);
791 }
792 
793 static int
794 vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
795     struct vq_alloc_info *vq_info)
796 {
797 	device_t dev;
798 	struct resource *irq;
799 	struct vtpci_virtqueue *vqx;
800 	int i, rid, flags, res_idx;
801 
802 	dev = sc->vtpci_dev;
803 	flags = RF_ACTIVE;
804 
805 	if ((sc->vtpci_flags &
806 	    (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) {
807 		rid = 0;
808 		flags |= RF_SHAREABLE;
809 	} else
810 		rid = 1;
811 
812 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
813 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
814 		if (irq == NULL)
815 			return (ENXIO);
816 
817 		sc->vtpci_intr_res[i].irq = irq;
818 		sc->vtpci_intr_res[i].rid = rid++;
819 	}
820 
821 	/*
822 	 * Map the virtqueue into the correct index in vq_intr_res[]. Note the
823 	 * first index is reserved for configuration changes notifications.
824 	 */
825 	for (i = 0, res_idx = 1; i < nvqs; i++) {
826 		vqx = &sc->vtpci_vqx[i];
827 
828 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
829 			if (vq_info[i].vqai_intr == NULL)
830 				vqx->ires_idx = -1;
831 			else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
832 				vqx->ires_idx = res_idx;
833 			else
834 				vqx->ires_idx = res_idx++;
835 		} else
836 			vqx->ires_idx = -1;
837 	}
838 
839 	return (0);
840 }
841 
842 static int
843 vtpci_alloc_msi(struct vtpci_softc *sc)
844 {
845 	device_t dev;
846 	int nmsi;
847 	u_int irq_flags;
848 
849 	dev = sc->vtpci_dev;
850 	nmsi = pci_msi_count(dev);
851 
852 	if (nmsi < 1)
853 		return (1);
854 
855 	sc->vtpci_irq_rid = 0;
856         sc->vtpci_irq_type = pci_alloc_1intr(dev, 1,
857             &sc->vtpci_irq_rid, &irq_flags);
858 
859 
860 	return (1);
861 }
862 
863 static int
864 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
865 {
866 	/* XXX(vsrinivas): Huh? Is this how MSI-X works?*/
867 	/* XXX(vsrinivas): All of this was disabled... */
868 #ifdef OLD_MSI
869 	device_t dev;
870 	int nmsix, cnt, required;
871 
872 	dev = sc->vtpci_dev;
873 
874 	nmsix = pci_msix_count(dev);
875 	if (nmsix < 1)
876 		return (1);
877 
878 	/* An additional vector is needed for the config changes. */
879 	required = nvectors + 1;
880 	if (nmsix >= required) {
881 		cnt = required;
882 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required)
883 			goto out;
884 
885 		pci_release_msi(dev);
886 	}
887 
888 	/* Attempt shared MSIX configuration. */
889 	required = 2;
890 	if (nmsix >= required) {
891 		cnt = required;
892 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
893 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
894 			goto out;
895 		}
896 
897 		pci_release_msi(dev);
898 	}
899 
900 	return (1);
901 
902 out:
903 	sc->vtpci_nintr_res = required;
904 	sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
905 
906 	if (bootverbose) {
907 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
908 			device_printf(dev, "using shared virtqueue MSIX\n");
909 		else
910 			device_printf(dev, "using per virtqueue MSIX\n");
911 	}
912 #endif
913 	return (0);
914 }
915 
916 static int
917 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
918 {
919 	device_t dev;
920 	uint16_t vector;
921 
922 	dev = sc->vtpci_dev;
923 
924 	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
925 	    offset != VIRTIO_MSI_QUEUE_VECTOR)
926 		return (EINVAL);
927 
928 	if (res_idx != -1) {
929 		/* Map from rid to host vector. */
930 		vector = sc->vtpci_intr_res[res_idx].rid - 1;
931 	} else
932 		vector = VIRTIO_MSI_NO_VECTOR;
933 
934 	/* The first resource is special; make sure it is used correctly. */
935 	if (res_idx == 0) {
936 		KASSERT(vector == 0, ("unexpected config vector"));
937 		KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
938 		    ("unexpected config offset"));
939 	}
940 
941 	vtpci_write_config_2(sc, offset, vector);
942 
943 	if (vtpci_read_config_2(sc, offset) != vector) {
944 		device_printf(dev, "insufficient host resources for "
945 		    "MSIX interrupts\n");
946 		return (ENODEV);
947 	}
948 
949 	return (0);
950 }
951 
952 static void
953 vtpci_free_interrupts(struct vtpci_softc *sc)
954 {
955 	device_t dev;
956 	struct vtpci_intr_resource *ires;
957 	int i;
958 
959 	dev = sc->vtpci_dev;
960 	sc->vtpci_nintr_res = 0;
961 
962 	if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) {
963 		pci_release_msi(dev);
964 		sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI |
965 		    VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX);
966 	}
967 
968 	for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) {
969 		ires = &sc->vtpci_intr_res[i];
970 
971 		if (ires->intrhand != NULL) {
972 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
973 			ires->intrhand = NULL;
974 		}
975 
976 		if (ires->irq != NULL) {
977 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
978 			    ires->irq);
979 			ires->irq = NULL;
980 		}
981 
982 		ires->rid = -1;
983 	}
984 }
985 
986 static void
987 vtpci_free_virtqueues(struct vtpci_softc *sc)
988 {
989 	struct vtpci_virtqueue *vqx;
990 	int i;
991 
992 	sc->vtpci_nvqs = 0;
993 
994 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
995 		vqx = &sc->vtpci_vqx[i];
996 
997 		if (vqx->vq != NULL) {
998 			virtqueue_free(vqx->vq);
999 			vqx->vq = NULL;
1000 		}
1001 	}
1002 }
1003 
1004 static void
1005 vtpci_release_child_resources(struct vtpci_softc *sc)
1006 {
1007 
1008 	vtpci_free_interrupts(sc);
1009 	vtpci_free_virtqueues(sc);
1010 }
1011 
1012 static void
1013 vtpci_reset(struct vtpci_softc *sc)
1014 {
1015 
1016 	/*
1017 	 * Setting the status to RESET sets the host device to
1018 	 * the original, uninitialized state.
1019 	 */
1020 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1021 }
1022 
1023 static int
1024 vtpci_legacy_intr(void *xsc)
1025 {
1026 	struct vtpci_softc *sc;
1027 	struct vtpci_virtqueue *vqx;
1028 	int i;
1029 	uint8_t isr;
1030 
1031 	sc = xsc;
1032 	vqx = &sc->vtpci_vqx[0];
1033 
1034 	/* Reading the ISR also clears it. */
1035 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1036 
1037 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1038 		vtpci_config_intr(sc);
1039 
1040 	if (isr & VIRTIO_PCI_ISR_INTR)
1041 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1042 			virtqueue_intr(vqx->vq);
1043 
1044 	return isr;
1045 }
1046 
1047 static int
1048 vtpci_vq_shared_intr(void *xsc)
1049 {
1050 	struct vtpci_softc *sc;
1051 	struct vtpci_virtqueue *vqx;
1052 	int i, rc;
1053 
1054 	rc = 0;
1055 	sc = xsc;
1056 	vqx = &sc->vtpci_vqx[0];
1057 
1058 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1059 		rc |= virtqueue_intr(vqx->vq);
1060 
1061 	return rc;
1062 }
1063 
1064 static int
1065 vtpci_vq_intr(void *xvq)
1066 {
1067 	struct virtqueue *vq;
1068 	int rc;
1069 
1070 	vq = xvq;
1071 	rc = virtqueue_intr(vq);
1072 
1073 	return rc;
1074 }
1075 
1076 static int
1077 vtpci_config_intr(void *xsc)
1078 {
1079 	struct vtpci_softc *sc;
1080 	device_t child;
1081 	int rc;
1082 
1083 	rc = 0;
1084 	sc = xsc;
1085 	child = sc->vtpci_child_dev;
1086 
1087 	if (child != NULL)
1088 		rc = VIRTIO_CONFIG_CHANGE(child);
1089 
1090 	return rc;
1091 }
1092