1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
38 #include <sys/lock.h>
39 #include <sys/queue.h>
40 #include <sys/serialize.h>
41 #include <sys/buf2.h>
42 #include <sys/rman.h>
43 #include <sys/disk.h>
44 #include <sys/devicestat.h>
45 
46 #include <dev/virtual/virtio/virtio/virtio.h>
47 #include <dev/virtual/virtio/virtio/virtqueue.h>
48 #include "virtio_blk.h"
49 
50 struct vtblk_request {
51 	struct virtio_blk_outhdr	vbr_hdr;
52 	struct bio			*vbr_bp;
53 	uint8_t				vbr_ack;
54 
55 	TAILQ_ENTRY(vtblk_request)	vbr_link;
56 };
57 
58 struct vtblk_softc {
59 	device_t		 	vtblk_dev;
60 	struct lwkt_serialize		vtblk_slz;
61 	uint64_t		 	vtblk_features;
62 
63 #define VTBLK_FLAG_READONLY		0x0002
64 #define VTBLK_FLAG_DETACH		0x0004
65 #define VTBLK_FLAG_SUSPEND		0x0008
66 	uint32_t			vtblk_flags;
67 
68 	struct virtqueue		*vtblk_vq;
69 	struct sglist			*vtblk_sglist;
70 	struct disk			vtblk_disk;
71 	cdev_t				cdev;
72 	struct devstat			stats;
73 
74 	struct bio_queue_head	 	vtblk_bioq;
75 	TAILQ_HEAD(, vtblk_request)	vtblk_req_free;
76 	TAILQ_HEAD(, vtblk_request)	vtblk_req_ready;
77 
78 	int			 	vtblk_sector_size;
79 	int			 	vtblk_max_nsegs;
80 	int				vtblk_unit;
81 	int			 	vtblk_request_count;
82 
83 	struct vtblk_request		vtblk_dump_request;
84 };
85 
86 static struct virtio_feature_desc vtblk_feature_desc[] = {
87 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
88 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
89 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
90 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
91 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
92 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
93 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
94 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
95 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
96 
97 	{ 0, NULL }
98 };
99 
100 static int	vtblk_modevent(module_t, int, void *);
101 
102 static int	vtblk_probe(device_t);
103 static int	vtblk_attach(device_t);
104 static int	vtblk_detach(device_t);
105 static int	vtblk_suspend(device_t);
106 static int	vtblk_resume(device_t);
107 static int	vtblk_shutdown(device_t);
108 
109 static void	vtblk_negotiate_features(struct vtblk_softc *);
110 static int	vtblk_maximum_segments(struct vtblk_softc *,
111 				       struct virtio_blk_config *);
112 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
113 static void	vtblk_alloc_disk(struct vtblk_softc *,
114 				 struct virtio_blk_config *);
115 /*
116  * Interface to the device switch.
117  */
118 static d_open_t		vtblk_open;
119 static d_strategy_t	vtblk_strategy;
120 static d_dump_t		vtblk_dump;
121 
122 static struct dev_ops vbd_disk_ops = {
123 	{ "vbd", 200, D_DISK | D_MPSAFE },
124 	.d_open		= vtblk_open,
125 	.d_close	= nullclose,
126 	.d_read		= physread,
127 	.d_write	= physwrite,
128 	.d_strategy	= vtblk_strategy,
129 	.d_dump		= vtblk_dump,
130 };
131 
132 static void vtblk_startio(struct vtblk_softc *);
133 static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *);
134 static int vtblk_execute_request(struct vtblk_softc *, struct vtblk_request *);
135 
136 static int		vtblk_vq_intr(void *);
137 static void		vtblk_complete(void *);
138 
139 static void		vtblk_stop(struct vtblk_softc *);
140 
141 static void		vtblk_drain_vq(struct vtblk_softc *, int);
142 static void		vtblk_drain(struct vtblk_softc *);
143 
144 static int		vtblk_alloc_requests(struct vtblk_softc *);
145 static void		vtblk_free_requests(struct vtblk_softc *);
146 static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *);
147 static void		vtblk_enqueue_request(struct vtblk_softc *,
148 					      struct vtblk_request *);
149 
150 static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *);
151 static void		vtblk_enqueue_ready(struct vtblk_softc *,
152 					    struct vtblk_request *);
153 
154 static void		vtblk_bio_error(struct bio *, int);
155 
156 /* Tunables. */
157 static int vtblk_no_ident = 0;
158 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
159 
160 /* Features desired/implemented by this driver. */
161 #define VTBLK_FEATURES \
162     (VIRTIO_BLK_F_BARRIER		| \
163      VIRTIO_BLK_F_SIZE_MAX		| \
164      VIRTIO_BLK_F_SEG_MAX		| \
165      VIRTIO_BLK_F_GEOMETRY		| \
166      VIRTIO_BLK_F_RO			| \
167      VIRTIO_BLK_F_BLK_SIZE		| \
168      VIRTIO_BLK_F_FLUSH)
169 
170 /*
171  * Each block request uses at least two segments - one for the header
172  * and one for the status.
173  */
174 #define VTBLK_MIN_SEGMENTS	2
175 
176 static device_method_t vtblk_methods[] = {
177 	/* Device methods. */
178 	DEVMETHOD(device_probe,		vtblk_probe),
179 	DEVMETHOD(device_attach,	vtblk_attach),
180 	DEVMETHOD(device_detach,	vtblk_detach),
181 	DEVMETHOD(device_suspend,	vtblk_suspend),
182 	DEVMETHOD(device_resume,	vtblk_resume),
183 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
184 
185 	DEVMETHOD_END
186 };
187 
188 static driver_t vtblk_driver = {
189 	"vtblk",
190 	vtblk_methods,
191 	sizeof(struct vtblk_softc)
192 };
193 static devclass_t vtblk_devclass;
194 
195 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
196 	      vtblk_modevent, NULL);
197 MODULE_VERSION(virtio_blk, 1);
198 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
199 
200 static int
201 vtblk_modevent(module_t mod, int type, void *unused)
202 {
203 	int error;
204 
205 	error = 0;
206 
207 	switch (type) {
208 	case MOD_LOAD:
209 		break;
210 	case MOD_UNLOAD:
211 		break;
212 	case MOD_SHUTDOWN:
213 		break;
214 	default:
215 		error = EOPNOTSUPP;
216 		break;
217 	}
218 
219 	return (error);
220 }
221 
222 static int
223 vtblk_probe(device_t dev)
224 {
225 
226 	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
227 		return (ENXIO);
228 
229 	device_set_desc(dev, "VirtIO Block Adapter");
230 
231 	return (BUS_PROBE_DEFAULT);
232 }
233 
234 static int
235 vtblk_attach(device_t dev)
236 {
237 	struct vtblk_softc *sc;
238 	struct virtio_blk_config blkcfg;
239 	int error;
240 
241 	sc = device_get_softc(dev);
242 	sc->vtblk_dev = dev;
243 	sc->vtblk_unit = device_get_unit(dev);
244 
245 	lwkt_serialize_init(&sc->vtblk_slz);
246 
247 	bioq_init(&sc->vtblk_bioq);
248 	TAILQ_INIT(&sc->vtblk_req_free);
249 	TAILQ_INIT(&sc->vtblk_req_ready);
250 
251 	virtio_set_feature_desc(dev, vtblk_feature_desc);
252 	vtblk_negotiate_features(sc);
253 
254 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
255 		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
256 
257 	/* Get local copy of config. */
258 	virtio_read_device_config(dev, 0, &blkcfg,
259 				  sizeof(struct virtio_blk_config));
260 
261 	/*
262 	 * With the current sglist(9) implementation, it is not easy
263 	 * for us to support a maximum segment size as adjacent
264 	 * segments are coalesced. For now, just make sure it's larger
265 	 * than the maximum supported transfer size.
266 	 */
267 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
268 		if (blkcfg.size_max < MAXPHYS) {
269 			error = ENOTSUP;
270 			device_printf(dev, "host requires unsupported "
271 			    "maximum segment size feature\n");
272 			goto fail;
273 		}
274 	}
275 
276 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
277         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
278 		error = EINVAL;
279 		device_printf(dev, "fewer than minimum number of segments "
280 		    "allowed: %d\n", sc->vtblk_max_nsegs);
281 		goto fail;
282 	}
283 
284 	/*
285 	 * Allocate working sglist. The number of segments may be too
286 	 * large to safely store on the stack.
287 	 */
288 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
289 	if (sc->vtblk_sglist == NULL) {
290 		error = ENOMEM;
291 		device_printf(dev, "cannot allocate sglist\n");
292 		goto fail;
293 	}
294 
295 	error = vtblk_alloc_virtqueue(sc);
296 	if (error) {
297 		device_printf(dev, "cannot allocate virtqueue\n");
298 		goto fail;
299 	}
300 
301 	error = vtblk_alloc_requests(sc);
302 	if (error) {
303 		device_printf(dev, "cannot preallocate requests\n");
304 		goto fail;
305 	}
306 
307 	vtblk_alloc_disk(sc, &blkcfg);
308 
309 	error = virtio_setup_intr(dev, &sc->vtblk_slz);
310 	if (error) {
311 		device_printf(dev, "cannot setup virtqueue interrupt\n");
312 		goto fail;
313 	}
314 
315 	virtqueue_enable_intr(sc->vtblk_vq);
316 
317 fail:
318 	if (error)
319 		vtblk_detach(dev);
320 
321 	return (error);
322 }
323 
324 static int
325 vtblk_detach(device_t dev)
326 {
327 	struct vtblk_softc *sc;
328 
329 	sc = device_get_softc(dev);
330 
331 	lwkt_serialize_enter(&sc->vtblk_slz);
332 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
333 	if (device_is_attached(dev))
334 		vtblk_stop(sc);
335 	lwkt_serialize_exit(&sc->vtblk_slz);
336 
337 	vtblk_drain(sc);
338 
339 	if (sc->vtblk_sglist != NULL) {
340 		sglist_free(sc->vtblk_sglist);
341 		sc->vtblk_sglist = NULL;
342 	}
343 
344 	return (0);
345 }
346 
347 static int
348 vtblk_suspend(device_t dev)
349 {
350 	struct vtblk_softc *sc;
351 
352 	sc = device_get_softc(dev);
353 
354 	lwkt_serialize_enter(&sc->vtblk_slz);
355 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
356 	/* TODO Wait for any inflight IO to complete? */
357 	lwkt_serialize_exit(&sc->vtblk_slz);
358 
359 	return (0);
360 }
361 
362 static int
363 vtblk_resume(device_t dev)
364 {
365 	struct vtblk_softc *sc;
366 
367 	sc = device_get_softc(dev);
368 
369 	lwkt_serialize_enter(&sc->vtblk_slz);
370 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
371 	/* TODO Resume IO? */
372 	lwkt_serialize_exit(&sc->vtblk_slz);
373 
374 	return (0);
375 }
376 
377 static int
378 vtblk_shutdown(device_t dev)
379 {
380 	return (0);
381 }
382 
383 static int
384 vtblk_open(struct dev_open_args *ap)
385 {
386 	struct vtblk_softc *sc;
387 	cdev_t dev = ap->a_head.a_dev;
388 	sc = dev->si_drv1;
389 	if (sc == NULL)
390 		return (ENXIO);
391 
392 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
393 }
394 
395 static int
396 vtblk_dump(struct dev_dump_args *ap)
397 {
398 	/* XXX */
399 	return (ENXIO);
400 }
401 
402 static int
403 vtblk_strategy(struct dev_strategy_args *ap)
404 {
405 	struct vtblk_softc *sc;
406 	cdev_t dev = ap->a_head.a_dev;
407 	sc = dev->si_drv1;
408 	struct bio *bio = ap->a_bio;
409 	struct buf *bp = bio->bio_buf;
410 
411 	if (sc == NULL) {
412 		vtblk_bio_error(bio, EINVAL);
413 		return EINVAL;
414 	}
415 
416 	/*
417 	 * Fail any write if RO. Unfortunately, there does not seem to
418 	 * be a better way to report our readonly'ness to GEOM above.
419 	 *
420 	 * XXX: Is that true in DFly?
421 	 */
422 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
423 	    (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
424 		vtblk_bio_error(bio, EROFS);
425 		return (EINVAL);
426 	}
427 
428 	lwkt_serialize_enter(&sc->vtblk_slz);
429 	if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
430 		devstat_start_transaction(&sc->stats);
431 		bioqdisksort(&sc->vtblk_bioq, bio);
432 		vtblk_startio(sc);
433 	} else {
434 		vtblk_bio_error(bio, ENXIO);
435 	}
436 	lwkt_serialize_exit(&sc->vtblk_slz);
437 	return 0;
438 }
439 
440 static void
441 vtblk_negotiate_features(struct vtblk_softc *sc)
442 {
443 	device_t dev;
444 	uint64_t features;
445 
446 	dev = sc->vtblk_dev;
447 	features = VTBLK_FEATURES;
448 
449 	sc->vtblk_features = virtio_negotiate_features(dev, features);
450 }
451 
452 static int
453 vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
454 {
455 	device_t dev;
456 	int nsegs;
457 
458 	dev = sc->vtblk_dev;
459 	nsegs = VTBLK_MIN_SEGMENTS;
460 
461 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
462 		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
463 	} else {
464 		nsegs += 1;
465 	}
466 
467 	return (nsegs);
468 }
469 
470 static int
471 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
472 {
473 	device_t dev;
474 	struct vq_alloc_info vq_info;
475 
476 	dev = sc->vtblk_dev;
477 
478 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
479 			   vtblk_vq_intr, sc, &sc->vtblk_vq,
480 			   "%s request", device_get_nameunit(dev));
481 
482 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
483 }
484 
485 static void
486 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
487 {
488 
489 	struct disk_info info;
490 
491 	/* construct the disk_info */
492 	bzero(&info, sizeof(info));
493 
494 	if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
495 		sc->vtblk_sector_size = blkcfg->blk_size;
496 	else
497 		sc->vtblk_sector_size = DEV_BSIZE;
498 
499 	info.d_media_blksize = sc->vtblk_sector_size;
500 	info.d_media_blocks = blkcfg->capacity;
501 
502 	info.d_ncylinders = blkcfg->geometry.cylinders;
503 	info.d_nheads = blkcfg->geometry.heads;
504 	info.d_secpertrack = blkcfg->geometry.sectors;
505 
506 	info.d_secpercyl = info.d_secpertrack * info.d_nheads;
507 
508 	devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
509 			  DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
510 			  DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
511 			  DEVSTAT_PRIORITY_DISK);
512 
513 	/* attach a generic disk device to ourselves */
514 	sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
515 			       &vbd_disk_ops);
516 
517 	sc->cdev->si_drv1 = sc;
518 	disk_setdiskinfo(&sc->vtblk_disk, &info);
519 }
520 
521 static void
522 vtblk_startio(struct vtblk_softc *sc)
523 {
524 	struct virtqueue *vq;
525 	struct vtblk_request *req;
526 	int enq;
527 
528 	vq = sc->vtblk_vq;
529 	enq = 0;
530 
531 	ASSERT_SERIALIZED(&sc->vtblk_slz);
532 
533 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
534 		return;
535 
536 	while (!virtqueue_full(vq)) {
537 		if ((req = vtblk_dequeue_ready(sc)) == NULL)
538 			req = vtblk_bio_request(sc);
539 		if (req == NULL)
540 			break;
541 
542 		if (vtblk_execute_request(sc, req) != 0) {
543 			vtblk_enqueue_ready(sc, req);
544 			break;
545 		}
546 
547 		enq++;
548 	}
549 
550 	if (enq > 0)
551 		virtqueue_notify(vq, &sc->vtblk_slz);
552 }
553 
554 static struct vtblk_request *
555 vtblk_bio_request(struct vtblk_softc *sc)
556 {
557 	struct bio_queue_head *bioq;
558 	struct vtblk_request *req;
559 	struct bio *bio;
560 	struct buf *bp;
561 
562 	bioq = &sc->vtblk_bioq;
563 
564 	if (bioq_first(bioq) == NULL)
565 		return (NULL);
566 
567 	req = vtblk_dequeue_request(sc);
568 	if (req == NULL)
569 		return (NULL);
570 
571 	bio = bioq_takefirst(bioq);
572 	req->vbr_bp = bio;
573 	req->vbr_ack = -1;
574 	req->vbr_hdr.ioprio = 1;
575 	bp = bio->bio_buf;
576 
577 	switch (bp->b_cmd) {
578 	case BUF_CMD_FLUSH:
579 		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
580 		break;
581 	case BUF_CMD_READ:
582 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
583 		req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
584 		break;
585 	case BUF_CMD_WRITE:
586 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
587 		req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
588 		break;
589 	default:
590 		KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
591 		req->vbr_hdr.type = -1;
592 		break;
593 	}
594 
595 	if (bp->b_flags & B_ORDERED)
596 		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
597 
598 	return (req);
599 }
600 
601 static int
602 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
603 {
604 	struct sglist *sg;
605 	struct bio *bio;
606 	struct buf *bp;
607 	int writable, error;
608 
609 	sg = sc->vtblk_sglist;
610 	bio = req->vbr_bp;
611 	bp = bio->bio_buf;
612 	writable = 0;
613 
614 	/*
615 	 * sglist is live throughout this subroutine.
616 	 */
617 	sglist_reset(sg);
618 
619 	error = sglist_append(sg, &req->vbr_hdr,
620 			      sizeof(struct virtio_blk_outhdr));
621 	KASSERT(error == 0, ("error adding header to sglist"));
622 	KASSERT(sg->sg_nseg == 1,
623 	    ("header spanned multiple segments: %d", sg->sg_nseg));
624 
625 	if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
626 		error = sglist_append(sg, bp->b_data, bp->b_bcount);
627 		KASSERT(error == 0, ("error adding buffer to sglist"));
628 
629 		/* BUF_CMD_READ means the host writes into our buffer. */
630 		if (bp->b_cmd == BUF_CMD_READ)
631 			writable += sg->sg_nseg - 1;
632 	}
633 
634 	error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
635 	KASSERT(error == 0, ("error adding ack to sglist"));
636 	writable++;
637 
638 	KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
639 	    ("fewer than min segments: %d", sg->sg_nseg));
640 
641 	error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
642 				  sg->sg_nseg - writable, writable);
643 
644 	sglist_reset(sg);
645 
646 	return (error);
647 }
648 
649 static int
650 vtblk_vq_intr(void *xsc)
651 {
652 	vtblk_complete(xsc);
653 
654 	return (1);
655 }
656 
657 static void
658 vtblk_complete(void *arg)
659 {
660 	struct vtblk_softc *sc;
661 	struct vtblk_request *req;
662 	struct virtqueue *vq;
663 	struct bio *bio;
664 	struct buf *bp;
665 
666 	sc = arg;
667 	vq = sc->vtblk_vq;
668 
669 	lwkt_serialize_handler_disable(&sc->vtblk_slz);
670 	virtqueue_disable_intr(sc->vtblk_vq);
671 	ASSERT_SERIALIZED(&sc->vtblk_slz);
672 
673 retry:
674 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
675 		return;
676 
677 	while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
678 		bio = req->vbr_bp;
679 		bp = bio->bio_buf;
680 
681 		if (req->vbr_ack == VIRTIO_BLK_S_OK)
682 			bp->b_resid = 0;
683 		else {
684 			bp->b_flags |= B_ERROR;
685 			if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
686 				bp->b_error = ENOTSUP;
687 			} else {
688 				bp->b_error = EIO;
689 			}
690 		}
691 
692 		devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
693 
694 		lwkt_serialize_exit(&sc->vtblk_slz);
695 		/*
696 		 * Unlocking the controller around biodone() does not allow
697 		 * processing further device interrupts; when we queued
698 		 * vtblk_complete, we disabled interrupts. It will allow
699 		 * concurrent vtblk_strategy/_startio command dispatches.
700 		 */
701 		biodone(bio);
702 		lwkt_serialize_enter(&sc->vtblk_slz);
703 
704 		vtblk_enqueue_request(sc, req);
705 	}
706 
707 	vtblk_startio(sc);
708 
709 	if (virtqueue_enable_intr(vq) != 0) {
710 		/*
711 		 * If new virtqueue entries appeared immediately after
712 		 * enabling interrupts, process them now. Release and
713 		 * retake softcontroller lock to try to avoid blocking
714 		 * I/O dispatch for too long.
715 		 */
716 		virtqueue_disable_intr(vq);
717 		goto retry;
718 	}
719 	lwkt_serialize_handler_enable(&sc->vtblk_slz);
720 }
721 
722 static void
723 vtblk_stop(struct vtblk_softc *sc)
724 {
725 	virtqueue_disable_intr(sc->vtblk_vq);
726 	virtio_stop(sc->vtblk_dev);
727 }
728 
729 static void
730 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
731 {
732 	struct virtqueue *vq;
733 	struct vtblk_request *req;
734 	int last;
735 
736 	vq = sc->vtblk_vq;
737 	last = 0;
738 
739 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
740 		if (!skip_done)
741 			vtblk_bio_error(req->vbr_bp, ENXIO);
742 
743 		vtblk_enqueue_request(sc, req);
744 	}
745 
746 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
747 }
748 
749 static void
750 vtblk_drain(struct vtblk_softc *sc)
751 {
752 	struct bio_queue_head *bioq;
753 	struct vtblk_request *req;
754 	struct bio *bp;
755 
756 	bioq = &sc->vtblk_bioq;
757 
758 	if (sc->vtblk_vq != NULL)
759 		vtblk_drain_vq(sc, 0);
760 
761 	while ((req = vtblk_dequeue_ready(sc)) != NULL) {
762 		vtblk_bio_error(req->vbr_bp, ENXIO);
763 		vtblk_enqueue_request(sc, req);
764 	}
765 
766 	while (bioq_first(bioq) != NULL) {
767 		bp = bioq_takefirst(bioq);
768 		vtblk_bio_error(bp, ENXIO);
769 	}
770 
771 	vtblk_free_requests(sc);
772 }
773 
774 static int
775 vtblk_alloc_requests(struct vtblk_softc *sc)
776 {
777 	struct vtblk_request *req;
778 	int i, nreqs;
779 
780 	nreqs = virtqueue_size(sc->vtblk_vq);
781 
782 	/*
783 	 * Preallocate sufficient requests to keep the virtqueue full. Each
784 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
785 	 * the number allocated when indirect descriptors are not available.
786 	 */
787 	nreqs /= VTBLK_MIN_SEGMENTS;
788 
789 	for (i = 0; i < nreqs; i++) {
790 		req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
791 
792 		sc->vtblk_request_count++;
793 		vtblk_enqueue_request(sc, req);
794 	}
795 
796 	return (0);
797 }
798 
799 static void
800 vtblk_free_requests(struct vtblk_softc *sc)
801 {
802 	struct vtblk_request *req;
803 
804 	while ((req = vtblk_dequeue_request(sc)) != NULL) {
805 		sc->vtblk_request_count--;
806 		kfree(req, M_DEVBUF);
807 	}
808 
809 	KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
810 }
811 
812 static struct vtblk_request *
813 vtblk_dequeue_request(struct vtblk_softc *sc)
814 {
815 	struct vtblk_request *req;
816 
817 	req = TAILQ_FIRST(&sc->vtblk_req_free);
818 	if (req != NULL)
819 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
820 
821 	return (req);
822 }
823 
824 static void
825 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
826 {
827 	bzero(req, sizeof(struct vtblk_request));
828 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
829 }
830 
831 static struct vtblk_request *
832 vtblk_dequeue_ready(struct vtblk_softc *sc)
833 {
834 	struct vtblk_request *req;
835 
836 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
837 	if (req != NULL)
838 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
839 
840 	return (req);
841 }
842 
843 static void
844 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
845 {
846 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
847 }
848 
849 static void
850 vtblk_bio_error(struct bio *bp, int error)
851 {
852 	biodone(bp);
853 }
854