1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/pci/virtio_pci.c,v 1.3 2012/04/14 05:48:04 grehan Exp $
27  */
28 
29 /* Driver for the VirtIO PCI interface. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/serialize.h>
38 
39 #include <bus/pci/pcivar.h>
40 #include <bus/pci/pcireg.h>
41 
42 #include <sys/rman.h>
43 
44 #include <dev/virtual/virtio/virtio/virtio.h>
45 #include <dev/virtual/virtio/virtio/virtqueue.h>
46 #include "virtio_pci.h"
47 #include "virtio_if.h"
48 #include "virtio_bus_if.h"
49 
50 struct vtpci_softc {
51 	device_t			 vtpci_dev;
52 	struct resource			*vtpci_res;
53 	struct resource			*vtpci_msix_res;
54 	uint64_t			 vtpci_features;
55 	uint32_t			 vtpci_flags;
56 #define VIRTIO_PCI_FLAG_MSI		 0x0001
57 #define VIRTIO_PCI_FLAG_MSIX		 0x0010
58 #define VIRTIO_PCI_FLAG_SHARED_MSIX	 0x0020
59 
60 	device_t			 vtpci_child_dev;
61 	struct virtio_feature_desc	*vtpci_child_feat_desc;
62 
63 	/*
64 	 * Ideally, each virtqueue that the driver provides a callback for
65 	 * will receive its own MSIX vector. If there are not sufficient
66 	 * vectors available, we will then attempt to have all the VQs
67 	 * share one vector. Note that when using MSIX, the configuration
68 	 * changed notifications must be on their own vector.
69 	 *
70 	 * If MSIX is not available, we will attempt to have the whole
71 	 * device share one MSI vector, and then, finally, one legacy
72 	 * interrupt.
73 	 */
74 	int				 vtpci_nvqs;
75 	struct vtpci_virtqueue {
76 		struct virtqueue *vq;
77 
78 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
79 		int		  ires_idx;
80 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
81 
82 	/*
83 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
84 	 * is always the configuration changed notifications. The remaining
85 	 * element(s) are used for the virtqueues.
86 	 *
87 	 * With MSI and legacy interrupts, only the first element of
88 	 * vtpci_intr_res[] is used.
89 	 */
90 	int				 vtpci_nintr_res;
91 	int				 vtpci_irq_flags;
92 	struct vtpci_intr_resource {
93 		struct resource	*irq;
94 		int		 rid;
95 		void		*intrhand;
96 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
97 };
98 
99 static int	vtpci_probe(device_t);
100 static int	vtpci_attach(device_t);
101 static int	vtpci_detach(device_t);
102 static int	vtpci_suspend(device_t);
103 static int	vtpci_resume(device_t);
104 static int	vtpci_shutdown(device_t);
105 static void	vtpci_driver_added(device_t, driver_t *);
106 static void	vtpci_child_detached(device_t, device_t);
107 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
108 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
109 
110 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
111 static int	vtpci_with_feature(device_t, uint64_t);
112 static int	vtpci_alloc_virtqueues(device_t, int, int,
113 		    struct vq_alloc_info *);
114 static int	vtpci_setup_intr(device_t, lwkt_serialize_t);
115 static void	vtpci_stop(device_t);
116 static int	vtpci_reinit(device_t, uint64_t);
117 static void	vtpci_reinit_complete(device_t);
118 static void	vtpci_notify_virtqueue(device_t, uint16_t);
119 static uint8_t	vtpci_get_status(device_t);
120 static void	vtpci_set_status(device_t, uint8_t);
121 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
122 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
123 
124 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
125 		    uint64_t);
126 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
127 
128 static int	vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
129 		    struct vq_alloc_info *);
130 static int	vtpci_alloc_intr_resources(struct vtpci_softc *, int,
131 		    struct vq_alloc_info *);
132 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
133 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
134 
135 static void	vtpci_free_interrupts(struct vtpci_softc *);
136 static void	vtpci_free_virtqueues(struct vtpci_softc *);
137 static void	vtpci_release_child_resources(struct vtpci_softc *);
138 static void	vtpci_reset(struct vtpci_softc *);
139 
140 static int	vtpci_legacy_intr(void *);
141 static int	vtpci_vq_shared_intr(void *);
142 static int	vtpci_vq_intr(void *);
143 static int	vtpci_config_intr(void *);
144 
145 /*
146  * I/O port read/write wrappers.
147  */
148 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
149 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
150 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
151 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
152 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
153 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
154 
155 /* Tunables. */
156 static int vtpci_disable_msix = 0;
157 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
158 
159 static device_method_t vtpci_methods[] = {
160 	/* Device interface. */
161 	DEVMETHOD(device_probe,			  vtpci_probe),
162 	DEVMETHOD(device_attach,		  vtpci_attach),
163 	DEVMETHOD(device_detach,		  vtpci_detach),
164 	DEVMETHOD(device_suspend,		  vtpci_suspend),
165 	DEVMETHOD(device_resume,		  vtpci_resume),
166 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
167 
168 	/* Bus interface. */
169 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
170 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
171 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
172 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
173 
174 	/* VirtIO bus interface. */
175 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
176 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
177 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
178 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
179 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
180 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
181 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
182 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
183 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
184 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
185 
186 	DEVMETHOD_END
187 };
188 
189 static driver_t vtpci_driver = {
190 	"virtio_pci",
191 	vtpci_methods,
192 	sizeof(struct vtpci_softc)
193 };
194 
195 devclass_t vtpci_devclass;
196 
197 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, NULL, NULL);
198 MODULE_VERSION(virtio_pci, 1);
199 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
200 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
201 
202 static int
203 vtpci_probe(device_t dev)
204 {
205 	char desc[36];
206 	const char *name;
207 
208 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
209 		return (ENXIO);
210 
211 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
212 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
213 		return (ENXIO);
214 
215 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
216 		return (ENXIO);
217 
218 	name = virtio_device_name(pci_get_subdevice(dev));
219 	if (name == NULL)
220 		name = "Unknown";
221 
222 	ksnprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
223 	device_set_desc_copy(dev, desc);
224 
225 	return (BUS_PROBE_DEFAULT);
226 }
227 
228 static int
229 vtpci_attach(device_t dev)
230 {
231 	struct vtpci_softc *sc;
232 	device_t child;
233 	int msix_cap, rid;
234 
235 	sc = device_get_softc(dev);
236 	sc->vtpci_dev = dev;
237 
238 	pci_enable_busmaster(dev);
239 
240 	rid = PCIR_BAR(0);
241 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
242 	    RF_ACTIVE);
243 	if (sc->vtpci_res == NULL) {
244 		device_printf(dev, "cannot map I/O space\n");
245 		return (ENXIO);
246 	}
247 
248 	if (pci_find_extcap(dev, PCIY_MSIX, &msix_cap) == 0) {
249 		uint32_t val;
250 		val = pci_read_config(dev, msix_cap + PCIR_MSIX_TABLE, 4);
251 		rid = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
252 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
253 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
254 	}
255 
256 	vtpci_reset(sc);
257 
258 	/* Tell the host we've noticed this device. */
259 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
260 
261 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
262 		device_printf(dev, "cannot create child device\n");
263 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
264 		vtpci_detach(dev);
265 		return (ENOMEM);
266 	}
267 
268 	sc->vtpci_child_dev = child;
269 	vtpci_probe_and_attach_child(sc);
270 
271 	return (0);
272 }
273 
274 static int
275 vtpci_detach(device_t dev)
276 {
277 	struct vtpci_softc *sc;
278 	device_t child;
279 	int error;
280 
281 	sc = device_get_softc(dev);
282 
283 	if ((child = sc->vtpci_child_dev) != NULL) {
284 		error = device_delete_child(dev, child);
285 		if (error)
286 			return (error);
287 		sc->vtpci_child_dev = NULL;
288 	}
289 
290 	vtpci_reset(sc);
291 
292 	if (sc->vtpci_msix_res != NULL) {
293 		bus_release_resource(dev, SYS_RES_MEMORY,
294 		    rman_get_rid(sc->vtpci_msix_res), sc->vtpci_msix_res);
295 		sc->vtpci_msix_res = NULL;
296 	}
297 
298 	if (sc->vtpci_res != NULL) {
299 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
300 		    sc->vtpci_res);
301 		sc->vtpci_res = NULL;
302 	}
303 
304 	return (0);
305 }
306 
307 static int
308 vtpci_suspend(device_t dev)
309 {
310 
311 	return (bus_generic_suspend(dev));
312 }
313 
314 static int
315 vtpci_resume(device_t dev)
316 {
317 
318 	return (bus_generic_resume(dev));
319 }
320 
321 static int
322 vtpci_shutdown(device_t dev)
323 {
324 
325 	(void) bus_generic_shutdown(dev);
326 	/* Forcibly stop the host device. */
327 	vtpci_stop(dev);
328 
329 	return (0);
330 }
331 
332 static void
333 vtpci_driver_added(device_t dev, driver_t *driver)
334 {
335 	struct vtpci_softc *sc;
336 
337 	sc = device_get_softc(dev);
338 
339 	vtpci_probe_and_attach_child(sc);
340 }
341 
342 static void
343 vtpci_child_detached(device_t dev, device_t child)
344 {
345 	struct vtpci_softc *sc;
346 
347 	sc = device_get_softc(dev);
348 
349 	vtpci_reset(sc);
350 	vtpci_release_child_resources(sc);
351 }
352 
353 static int
354 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
355 {
356 	struct vtpci_softc *sc;
357 
358 	sc = device_get_softc(dev);
359 
360 	if (sc->vtpci_child_dev != child)
361 		return (ENOENT);
362 
363 	switch (index) {
364 	case VIRTIO_IVAR_DEVTYPE:
365 		*result = pci_get_subdevice(dev);
366 		break;
367 	default:
368 		return (ENOENT);
369 	}
370 
371 	return (0);
372 }
373 
374 static int
375 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
376 {
377 	struct vtpci_softc *sc;
378 
379 	sc = device_get_softc(dev);
380 
381 	if (sc->vtpci_child_dev != child)
382 		return (ENOENT);
383 
384 	switch (index) {
385 	case VIRTIO_IVAR_FEATURE_DESC:
386 		sc->vtpci_child_feat_desc = (void *) value;
387 		break;
388 	default:
389 		return (ENOENT);
390 	}
391 
392 	return (0);
393 }
394 
395 static uint64_t
396 vtpci_negotiate_features(device_t dev, uint64_t child_features)
397 {
398 	struct vtpci_softc *sc;
399 	uint64_t host_features, features;
400 
401 	sc = device_get_softc(dev);
402 
403 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
404 	vtpci_describe_features(sc, "host", host_features);
405 
406 	/*
407 	 * Limit negotiated features to what the driver, virtqueue, and
408 	 * host all support.
409 	 */
410 	features = host_features & child_features;
411 	features = virtqueue_filter_features(features);
412 	sc->vtpci_features = features;
413 
414 	vtpci_describe_features(sc, "negotiated", features);
415 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
416 
417 	return (features);
418 }
419 
420 static int
421 vtpci_with_feature(device_t dev, uint64_t feature)
422 {
423 	struct vtpci_softc *sc;
424 
425 	sc = device_get_softc(dev);
426 
427 	return ((sc->vtpci_features & feature) != 0);
428 }
429 
430 static int
431 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
432     struct vq_alloc_info *vq_info)
433 {
434 	struct vtpci_softc *sc;
435 	struct vtpci_virtqueue *vqx;
436 	struct vq_alloc_info *info;
437 	int queue, error;
438 	uint16_t vq_size;
439 
440 	sc = device_get_softc(dev);
441 
442 	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
443 	    nvqs > VIRTIO_MAX_VIRTQUEUES)
444 		return (EINVAL);
445 
446 	error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
447 	if (error) {
448 		device_printf(dev, "cannot allocate interrupts\n");
449 		return (error);
450 	}
451 
452 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
453 		pci_enable_msix(dev);
454 		error = vtpci_register_msix_vector(sc,
455 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
456 		if (error)
457 			return (error);
458 	}
459 
460 	for (queue = 0; queue < nvqs; queue++) {
461 		vqx = &sc->vtpci_vqx[queue];
462 		info = &vq_info[queue];
463 
464 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
465 
466 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
467 		error = virtqueue_alloc(dev, queue, vq_size,
468 		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
469 		if (error)
470 			return (error);
471 
472 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
473 			error = vtpci_register_msix_vector(sc,
474 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
475 			if (error)
476 				return (error);
477 		}
478 
479 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
480 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
481 
482 		*info->vqai_vq = vqx->vq;
483 		sc->vtpci_nvqs++;
484 	}
485 
486 	return (0);
487 }
488 
489 static int
490 vtpci_setup_intr(device_t dev, lwkt_serialize_t slz)
491 {
492 	struct vtpci_softc *sc;
493 	struct vtpci_intr_resource *ires;
494 	struct vtpci_virtqueue *vqx;
495 	int i, flags, error;
496 
497 	sc = device_get_softc(dev);
498 	flags = INTR_MPSAFE;
499 	ires = &sc->vtpci_intr_res[0];
500 
501 	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
502 		error = bus_setup_intr(dev, ires->irq, flags,
503 				       (driver_intr_t *) vtpci_legacy_intr,
504 				       sc, &ires->intrhand, slz);
505 		return (error);
506 	}
507 
508 	error = bus_setup_intr(dev, ires->irq, flags,
509 			       (driver_intr_t *) vtpci_config_intr,
510 			       sc, &ires->intrhand, slz);
511 	if (error)
512 		return (error);
513 
514 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
515 		ires = &sc->vtpci_intr_res[1];
516 		error = bus_setup_intr(dev, ires->irq, flags,
517 				       (driver_intr_t *) vtpci_vq_shared_intr,
518 				       sc, &ires->intrhand, slz);
519 
520 		return (error);
521 	}
522 
523 	/* Setup an interrupt handler for each virtqueue. */
524 	for (i = 0; i < sc->vtpci_nvqs; i++) {
525 		vqx = &sc->vtpci_vqx[i];
526 		if (vqx->ires_idx < 1)
527 			continue;
528 
529 		ires = &sc->vtpci_intr_res[vqx->ires_idx];
530 		error = bus_setup_intr(dev, ires->irq, flags,
531 				       (driver_intr_t *) vtpci_vq_intr,
532 				       vqx->vq, &ires->intrhand, slz);
533 		if (error)
534 			return (error);
535 	}
536 
537 	return (0);
538 }
539 
540 static void
541 vtpci_stop(device_t dev)
542 {
543 	vtpci_reset(device_get_softc(dev));
544 }
545 
546 static int
547 vtpci_reinit(device_t dev, uint64_t features)
548 {
549 	struct vtpci_softc *sc;
550 	struct vtpci_virtqueue *vqx;
551 	struct virtqueue *vq;
552 	int queue, error;
553 	uint16_t vq_size;
554 
555 	sc = device_get_softc(dev);
556 
557 	/*
558 	 * Redrive the device initialization. This is a bit of an abuse
559 	 * of the specification, but both VirtualBox and QEMU/KVM seem
560 	 * to play nice. We do not allow the host device to change from
561 	 * what was originally negotiated beyond what the guest driver
562 	 * changed (MSIX state should not change, number of virtqueues
563 	 * and their size remain the same, etc).
564 	 */
565 
566 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
567 		vtpci_stop(dev);
568 
569 	/*
570 	 * Quickly drive the status through ACK and DRIVER. The device
571 	 * does not become usable again until vtpci_reinit_complete().
572 	 */
573 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
574 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
575 
576 	vtpci_negotiate_features(dev, features);
577 
578 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
579 		pci_enable_msix(dev);
580 		error = vtpci_register_msix_vector(sc,
581 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
582 		if (error)
583 			return (error);
584 	}
585 
586 	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
587 		vqx = &sc->vtpci_vqx[queue];
588 		vq = vqx->vq;
589 
590 		KASSERT(vq != NULL, ("vq %d not allocated", queue));
591 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
592 
593 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
594 		error = virtqueue_reinit(vq, vq_size);
595 		if (error)
596 			return (error);
597 
598 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
599 			error = vtpci_register_msix_vector(sc,
600 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
601 			if (error)
602 				return (error);
603 		}
604 
605 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
606 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
607 	}
608 
609 	return (0);
610 }
611 
612 static void
613 vtpci_reinit_complete(device_t dev)
614 {
615 
616 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
617 }
618 
619 static void
620 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
621 {
622 	struct vtpci_softc *sc;
623 
624 	sc = device_get_softc(dev);
625 
626 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
627 }
628 
629 static uint8_t
630 vtpci_get_status(device_t dev)
631 {
632 	struct vtpci_softc *sc;
633 
634 	sc = device_get_softc(dev);
635 
636 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
637 }
638 
639 static void
640 vtpci_set_status(device_t dev, uint8_t status)
641 {
642 	struct vtpci_softc *sc;
643 
644 	sc = device_get_softc(dev);
645 
646 	if (status != VIRTIO_CONFIG_STATUS_RESET)
647 		status |= vtpci_get_status(dev);
648 
649 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
650 }
651 
652 static void
653 vtpci_read_dev_config(device_t dev, bus_size_t offset,
654     void *dst, int length)
655 {
656 	struct vtpci_softc *sc;
657 	bus_size_t off;
658 	uint8_t *d;
659 	int size;
660 
661 	sc = device_get_softc(dev);
662 	off = VIRTIO_PCI_CONFIG(sc) + offset;
663 
664 	for (d = dst; length > 0; d += size, off += size, length -= size) {
665 		if (length >= 4) {
666 			size = 4;
667 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
668 		} else if (length >= 2) {
669 			size = 2;
670 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
671 		} else {
672 			size = 1;
673 			*d = vtpci_read_config_1(sc, off);
674 		}
675 	}
676 }
677 
678 static void
679 vtpci_write_dev_config(device_t dev, bus_size_t offset,
680     void *src, int length)
681 {
682 	struct vtpci_softc *sc;
683 	bus_size_t off;
684 	uint8_t *s;
685 	int size;
686 
687 	sc = device_get_softc(dev);
688 	off = VIRTIO_PCI_CONFIG(sc) + offset;
689 
690 	for (s = src; length > 0; s += size, off += size, length -= size) {
691 		if (length >= 4) {
692 			size = 4;
693 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
694 		} else if (length >= 2) {
695 			size = 2;
696 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
697 		} else {
698 			size = 1;
699 			vtpci_write_config_1(sc, off, *s);
700 		}
701 	}
702 }
703 
704 static void
705 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
706     uint64_t features)
707 {
708 	device_t dev, child;
709 
710 	dev = sc->vtpci_dev;
711 	child = sc->vtpci_child_dev;
712 
713 	if (device_is_attached(child) && bootverbose == 0)
714 		return;
715 
716 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
717 }
718 
719 static void
720 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
721 {
722 	device_t dev, child;
723 	int error;
724 
725 	dev = sc->vtpci_dev;
726 	child = sc->vtpci_child_dev;
727 
728 	if (child == NULL)
729 		return;
730 
731 	if (device_get_state(child) != DS_NOTPRESENT)
732 		return;
733 
734 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
735 	error = device_probe_and_attach(child);
736 	if (error != 0 || device_get_state(child) == DS_NOTPRESENT) {
737 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
738 		vtpci_reset(sc);
739 		vtpci_release_child_resources(sc);
740 
741 		/* Reset status for future attempt. */
742 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
743 	} else
744 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
745 }
746 
747 static int
748 vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
749     struct vq_alloc_info *vq_info)
750 {
751 	int i, nvectors, error;
752 
753 	/*
754 	 * Only allocate a vector for virtqueues that are actually
755 	 * expecting an interrupt.
756 	 */
757 	for (nvectors = 0, i = 0; i < nvqs; i++)
758 		if (vq_info[i].vqai_intr != NULL)
759 			nvectors++;
760 
761 	if (vtpci_disable_msix != 0 ||
762 	    sc->vtpci_msix_res == NULL ||
763 	    flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
764 	    vtpci_alloc_msix(sc, nvectors) != 0) {
765 		/*
766 		 * Use MSI interrupts if available. Otherwise, we fallback
767 		 * to legacy interrupts.
768 		 */
769 		sc->vtpci_intr_res[0].rid = 0;
770 		if (pci_alloc_1intr(sc->vtpci_dev, 1,
771 		    &sc->vtpci_intr_res[0].rid,
772 		    &sc->vtpci_irq_flags) == PCI_INTR_TYPE_MSI) {
773 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
774 		}
775 		sc->vtpci_nintr_res = 1;
776 	}
777 	KKASSERT(!((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSI) != 0 &&
778 		   (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) != 0));
779 
780 	error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
781 
782 	return (error);
783 }
784 
785 static int
786 vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
787     struct vq_alloc_info *vq_info)
788 {
789 	device_t dev = sc->vtpci_dev;
790 	struct resource *irq;
791 	int i;
792 
793 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
794 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
795 		    &sc->vtpci_intr_res[i].rid, sc->vtpci_irq_flags);
796 		if (irq == NULL)
797 			return (ENXIO);
798 
799 		sc->vtpci_intr_res[i].irq = irq;
800 	}
801 
802 	/*
803 	 * Map the virtqueue into the correct index in vq_intr_res[]. Note the
804 	 * first index is reserved for configuration changes notifications.
805 	 */
806 	for (i = 0; i < nvqs; i++) {
807 		struct vtpci_virtqueue *vqx = &sc->vtpci_vqx[i];
808 
809 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
810 			if (vq_info[i].vqai_intr == NULL)
811 				vqx->ires_idx = -1;
812 			else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
813 				vqx->ires_idx = 1;
814 			else
815 				vqx->ires_idx = i + 1;
816 		} else {
817 			vqx->ires_idx = -1;
818 		}
819 	}
820 
821 	return (0);
822 }
823 
824 static int
825 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
826 {
827 	device_t dev = sc->vtpci_dev;
828 	int nmsix, cnt, i, required;
829 
830 	nmsix = pci_msix_count(dev);
831 	if (nmsix < 2)
832 		return (1);
833 
834 	if (pci_setup_msix(dev) != 0) {
835 		device_printf(dev, "pci_setup_msix failed\n");
836 		return (1);
837 	}
838 
839 	/* An additional vector is needed for the config changes. */
840 	required = nvectors + 1;
841 	if (required > nmsix)
842 		required = 2;
843 
844 	for (cnt = 0; cnt < required; cnt++) {
845 		int rid;
846 		if (pci_alloc_msix_vector(dev, cnt, &rid,
847 		    (device_get_unit(dev) + cnt) % ncpus) != 0)
848 			goto err;
849 		sc->vtpci_intr_res[cnt].rid = rid;
850 	}
851 	sc->vtpci_nintr_res = required;
852 	sc->vtpci_irq_flags = RF_ACTIVE;
853 	sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
854 	if (nvectors + 1 > nmsix)
855 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
856 	if (bootverbose) {
857 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
858 			device_printf(dev, "using shared virtqueue MSIX\n");
859 		else
860 			device_printf(dev, "using per virtqueue MSIX\n");
861 	}
862 	return (0);
863 
864 err:
865 	for (i = 0; i < cnt; i++) {
866 		pci_release_msix_vector(dev, sc->vtpci_intr_res[i].rid);
867 		sc->vtpci_intr_res[i].rid = 0;
868 	}
869 	pci_teardown_msix(dev);
870 	return (1);
871 }
872 
873 static int
874 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
875 {
876 	device_t dev;
877 	uint16_t vector;
878 
879 	dev = sc->vtpci_dev;
880 
881 	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
882 	    offset != VIRTIO_MSI_QUEUE_VECTOR)
883 		return (EINVAL);
884 
885 	if (res_idx != -1) {
886 		/* Map from rid to host vector. */
887 		vector = res_idx;
888 	} else {
889 		vector = VIRTIO_MSI_NO_VECTOR;
890 	}
891 
892 	/* The first resource is special; make sure it is used correctly. */
893 	if (res_idx == 0) {
894 		KASSERT(vector == 0, ("unexpected config vector"));
895 		KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
896 		    ("unexpected config offset"));
897 	}
898 
899 	vtpci_write_config_2(sc, offset, vector);
900 
901 	if (vtpci_read_config_2(sc, offset) != vector) {
902 		device_printf(dev, "insufficient host resources for "
903 		    "MSIX interrupts\n");
904 		return (ENODEV);
905 	}
906 
907 	return (0);
908 }
909 
910 static void
911 vtpci_free_interrupts(struct vtpci_softc *sc)
912 {
913 	device_t dev = sc->vtpci_dev;
914 	struct vtpci_intr_resource *ires;
915 	int i;
916 
917 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
918 		ires = &sc->vtpci_intr_res[i];
919 
920 		if (ires->intrhand != NULL) {
921 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
922 			ires->intrhand = NULL;
923 		}
924 		if (ires->irq != NULL) {
925 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
926 			    ires->irq);
927 			ires->irq = NULL;
928 		}
929 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX)
930 			pci_release_msix_vector(dev, ires->rid);
931 		ires->rid = 0;
932 	}
933 	sc->vtpci_nintr_res = 0;
934 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSI) {
935 		pci_release_msi(dev);
936 		sc->vtpci_flags &= ~VIRTIO_PCI_FLAG_MSI;
937 	}
938 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
939 		pci_disable_msix(dev);
940 		pci_teardown_msix(dev);
941 		sc->vtpci_flags &=
942 		    ~(VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX);
943 	}
944 
945 }
946 
947 static void
948 vtpci_free_virtqueues(struct vtpci_softc *sc)
949 {
950 	struct vtpci_virtqueue *vqx;
951 	int i;
952 
953 	sc->vtpci_nvqs = 0;
954 
955 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
956 		vqx = &sc->vtpci_vqx[i];
957 
958 		if (vqx->vq != NULL) {
959 			virtqueue_free(vqx->vq);
960 			vqx->vq = NULL;
961 		}
962 	}
963 }
964 
965 static void
966 vtpci_release_child_resources(struct vtpci_softc *sc)
967 {
968 
969 	vtpci_free_interrupts(sc);
970 	vtpci_free_virtqueues(sc);
971 }
972 
973 static void
974 vtpci_reset(struct vtpci_softc *sc)
975 {
976 
977 	/*
978 	 * Setting the status to RESET sets the host device to
979 	 * the original, uninitialized state.
980 	 */
981 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
982 }
983 
984 static int
985 vtpci_legacy_intr(void *xsc)
986 {
987 	struct vtpci_softc *sc;
988 	int i;
989 	uint8_t isr;
990 
991 	sc = xsc;
992 
993 	/* Reading the ISR also clears it. */
994 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
995 
996 	if (isr & VIRTIO_PCI_ISR_CONFIG)
997 		vtpci_config_intr(sc);
998 
999 	if (isr & VIRTIO_PCI_ISR_INTR) {
1000 		for (i = 0; i < sc->vtpci_nvqs; i++)
1001 			virtqueue_intr(sc->vtpci_vqx[i].vq);
1002 	}
1003 
1004 	return isr;
1005 }
1006 
1007 static int
1008 vtpci_vq_shared_intr(void *xsc)
1009 {
1010 	struct vtpci_softc *sc;
1011 	int i, rc;
1012 
1013 	rc = 0;
1014 	sc = xsc;
1015 
1016 	for (i = 0; i < sc->vtpci_nvqs; i++)
1017 		rc |= virtqueue_intr(sc->vtpci_vqx[i].vq);
1018 
1019 	return rc;
1020 }
1021 
1022 static int
1023 vtpci_vq_intr(void *xvq)
1024 {
1025 	struct virtqueue *vq;
1026 	int rc;
1027 
1028 	vq = xvq;
1029 	rc = virtqueue_intr(vq);
1030 
1031 	return rc;
1032 }
1033 
1034 static int
1035 vtpci_config_intr(void *xsc)
1036 {
1037 	struct vtpci_softc *sc;
1038 	device_t child;
1039 	int rc;
1040 
1041 	rc = 0;
1042 	sc = xsc;
1043 	child = sc->vtpci_child_dev;
1044 
1045 	if (child != NULL)
1046 		rc = VIRTIO_CONFIG_CHANGE(child);
1047 
1048 	return rc;
1049 }
1050