1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
38 #include <sys/sysctl.h>
39 #include <sys/lock.h>
40 #include <sys/queue.h>
41 #include <sys/serialize.h>
42 #include <sys/buf2.h>
43 #include <sys/rman.h>
44 #include <sys/disk.h>
45 #include <sys/devicestat.h>
46 
47 #include <dev/virtual/virtio/virtio/virtio.h>
48 #include <dev/virtual/virtio/virtio/virtqueue.h>
49 #include "virtio_blk.h"
50 #include "virtio_if.h"
51 
52 struct vtblk_request {
53 	struct virtio_blk_outhdr	 vbr_hdr;
54 	struct bio			*vbr_bp;
55 	uint8_t				 vbr_ack;
56 	uint8_t				 vbr_barrier;
57 
58 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
59 };
60 
61 enum vtblk_cache_mode {
62 	VTBLK_CACHE_WRITETHROUGH,
63 	VTBLK_CACHE_WRITEBACK,
64 	VTBLK_CACHE_MAX
65 };
66 
67 struct vtblk_softc {
68 	device_t		 vtblk_dev;
69 	struct lwkt_serialize	 vtblk_slz;
70 	uint64_t		 vtblk_features;
71 	uint32_t		 vtblk_flags;
72 #define VTBLK_FLAG_INDIRECT	0x0001
73 #define VTBLK_FLAG_READONLY	0x0002
74 #define VTBLK_FLAG_DETACH	0x0004
75 #define VTBLK_FLAG_SUSPEND	0x0008
76 #define VTBLK_FLAG_DUMPING	0x0010
77 #define VTBLK_FLAG_BARRIER	0x0020
78 #define VTBLK_FLAG_WC_CONFIG	0x0040
79 
80 	struct virtqueue	*vtblk_vq;
81 	struct sglist		*vtblk_sglist;
82 	struct disk		 vtblk_disk;
83 	cdev_t			 cdev;
84 	struct devstat		 stats;
85 
86 	struct bio_queue_head	 vtblk_bioq;
87 	TAILQ_HEAD(, vtblk_request)
88 				 vtblk_req_free;
89 	TAILQ_HEAD(, vtblk_request)
90 				 vtblk_req_ready;
91 	struct vtblk_request	*vtblk_req_ordered;
92 
93 	int			 vtblk_sector_size;
94 	int			 vtblk_max_nsegs;
95 	int			 vtblk_request_count;
96 	enum vtblk_cache_mode	 vtblk_write_cache;
97 
98 	struct vtblk_request	 vtblk_dump_request;
99 };
100 
101 static struct virtio_feature_desc vtblk_feature_desc[] = {
102 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
103 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
104 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
105 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
106 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
107 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
108 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
109 	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
110 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
111 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
112 
113 	{ 0, NULL }
114 };
115 
116 static int	vtblk_modevent(module_t, int, void *);
117 
118 static int	vtblk_probe(device_t);
119 static int	vtblk_attach(device_t);
120 static int	vtblk_detach(device_t);
121 static int	vtblk_suspend(device_t);
122 static int	vtblk_resume(device_t);
123 static int	vtblk_shutdown(device_t);
124 
125 static void	vtblk_negotiate_features(struct vtblk_softc *);
126 static int	vtblk_maximum_segments(struct vtblk_softc *,
127 		    struct virtio_blk_config *);
128 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
129 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
130 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
131 		    struct virtio_blk_config *);
132 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
133 static void	vtblk_alloc_disk(struct vtblk_softc *,
134 		    struct virtio_blk_config *);
135 /*
136  * Interface to the device switch.
137  */
138 static d_open_t		vtblk_open;
139 static d_strategy_t	vtblk_strategy;
140 static d_dump_t		vtblk_dump;
141 
142 static struct dev_ops vbd_disk_ops = {
143 	{ "vbd", 200, D_DISK | D_MPSAFE },
144 	.d_open		= vtblk_open,
145 	.d_close	= nullclose,
146 	.d_read		= physread,
147 	.d_write	= physwrite,
148 	.d_strategy	= vtblk_strategy,
149 	.d_dump		= vtblk_dump,
150 };
151 
152 static void	vtblk_startio(struct vtblk_softc *);
153 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
154 static int	vtblk_execute_request(struct vtblk_softc *,
155 		    struct vtblk_request *);
156 
157 static int	vtblk_vq_intr(void *);
158 static void	vtblk_complete(void *);
159 
160 static void	vtblk_stop(struct vtblk_softc *);
161 
162 static void	vtblk_prepare_dump(struct vtblk_softc *);
163 static int	vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
164 static int	vtblk_flush_dump(struct vtblk_softc *);
165 static int	vtblk_poll_request(struct vtblk_softc *,
166 		    struct vtblk_request *);
167 
168 static void	vtblk_drain_vq(struct vtblk_softc *, int);
169 static void	vtblk_drain(struct vtblk_softc *);
170 
171 static int	vtblk_alloc_requests(struct vtblk_softc *);
172 static void	vtblk_free_requests(struct vtblk_softc *);
173 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
174 static void	vtblk_enqueue_request(struct vtblk_softc *,
175 		    struct vtblk_request *);
176 
177 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *);
178 static void	vtblk_enqueue_ready(struct vtblk_softc *,
179 		    struct vtblk_request *);
180 
181 static int	vtblk_request_error(struct vtblk_request *);
182 static void	vtblk_finish_bio(struct bio *, int);
183 
184 static void	vtblk_setup_sysctl(struct vtblk_softc *);
185 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
186 
187 /* Tunables. */
188 static int vtblk_writecache_mode = -1;
189 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
190 
191 /* Features desired/implemented by this driver. */
192 #define VTBLK_FEATURES \
193     (VIRTIO_BLK_F_BARRIER		| \
194      VIRTIO_BLK_F_SIZE_MAX		| \
195      VIRTIO_BLK_F_SEG_MAX		| \
196      VIRTIO_BLK_F_GEOMETRY		| \
197      VIRTIO_BLK_F_RO			| \
198      VIRTIO_BLK_F_BLK_SIZE		| \
199      VIRTIO_BLK_F_WCE			| \
200      VIRTIO_BLK_F_CONFIG_WCE)
201 
202 /*
203  * Each block request uses at least two segments - one for the header
204  * and one for the status.
205  */
206 #define VTBLK_MIN_SEGMENTS	2
207 
208 static device_method_t vtblk_methods[] = {
209 	/* Device methods. */
210 	DEVMETHOD(device_probe,		vtblk_probe),
211 	DEVMETHOD(device_attach,	vtblk_attach),
212 	DEVMETHOD(device_detach,	vtblk_detach),
213 	DEVMETHOD(device_suspend,	vtblk_suspend),
214 	DEVMETHOD(device_resume,	vtblk_resume),
215 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
216 
217 	DEVMETHOD_END
218 };
219 
220 static driver_t vtblk_driver = {
221 	"vtblk",
222 	vtblk_methods,
223 	sizeof(struct vtblk_softc)
224 };
225 static devclass_t vtblk_devclass;
226 
227 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
228     vtblk_modevent, NULL);
229 MODULE_VERSION(virtio_blk, 1);
230 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
231 
232 static int
233 vtblk_modevent(module_t mod, int type, void *unused)
234 {
235 	int error;
236 
237 	error = 0;
238 
239 	switch (type) {
240 	case MOD_LOAD:
241 		break;
242 	case MOD_UNLOAD:
243 		break;
244 	case MOD_SHUTDOWN:
245 		break;
246 	default:
247 		error = EOPNOTSUPP;
248 		break;
249 	}
250 
251 	return (error);
252 }
253 
254 static int
255 vtblk_probe(device_t dev)
256 {
257 
258 	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
259 		return (ENXIO);
260 
261 	device_set_desc(dev, "VirtIO Block Adapter");
262 
263 	return (BUS_PROBE_DEFAULT);
264 }
265 
266 static int
267 vtblk_attach(device_t dev)
268 {
269 	struct vtblk_softc *sc;
270 	struct virtio_blk_config blkcfg;
271 	int error;
272 
273 	sc = device_get_softc(dev);
274 	sc->vtblk_dev = dev;
275 
276 	lwkt_serialize_init(&sc->vtblk_slz);
277 
278 	bioq_init(&sc->vtblk_bioq);
279 	TAILQ_INIT(&sc->vtblk_req_free);
280 	TAILQ_INIT(&sc->vtblk_req_ready);
281 
282 	virtio_set_feature_desc(dev, vtblk_feature_desc);
283 	vtblk_negotiate_features(sc);
284 
285 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
286 		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
287 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
288 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
289 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
290 		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
291 
292 	vtblk_setup_sysctl(sc);
293 
294 	/* Get local copy of config. */
295 	virtio_read_device_config(dev, 0, &blkcfg,
296 				  sizeof(struct virtio_blk_config));
297 
298 	/*
299 	 * With the current sglist(9) implementation, it is not easy
300 	 * for us to support a maximum segment size as adjacent
301 	 * segments are coalesced. For now, just make sure it's larger
302 	 * than the maximum supported transfer size.
303 	 */
304 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
305 		if (blkcfg.size_max < MAXPHYS) {
306 			error = ENOTSUP;
307 			device_printf(dev, "host requires unsupported "
308 			    "maximum segment size feature\n");
309 			goto fail;
310 		}
311 	}
312 
313 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
314 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
315 		error = EINVAL;
316 		device_printf(dev, "fewer than minimum number of segments "
317 		    "allowed: %d\n", sc->vtblk_max_nsegs);
318 		goto fail;
319 	}
320 
321 	/*
322 	 * Allocate working sglist. The number of segments may be too
323 	 * large to safely store on the stack.
324 	 */
325 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
326 	if (sc->vtblk_sglist == NULL) {
327 		error = ENOMEM;
328 		device_printf(dev, "cannot allocate sglist\n");
329 		goto fail;
330 	}
331 
332 	error = vtblk_alloc_virtqueue(sc);
333 	if (error) {
334 		device_printf(dev, "cannot allocate virtqueue\n");
335 		goto fail;
336 	}
337 
338 	error = vtblk_alloc_requests(sc);
339 	if (error) {
340 		device_printf(dev, "cannot preallocate requests\n");
341 		goto fail;
342 	}
343 
344 	vtblk_alloc_disk(sc, &blkcfg);
345 
346 	error = virtio_setup_intr(dev, &sc->vtblk_slz);
347 	if (error) {
348 		device_printf(dev, "cannot setup virtqueue interrupt\n");
349 		goto fail;
350 	}
351 
352 	virtqueue_enable_intr(sc->vtblk_vq);
353 
354 fail:
355 	if (error)
356 		vtblk_detach(dev);
357 
358 	return (error);
359 }
360 
361 static int
362 vtblk_detach(device_t dev)
363 {
364 	struct vtblk_softc *sc;
365 
366 	sc = device_get_softc(dev);
367 
368 	lwkt_serialize_enter(&sc->vtblk_slz);
369 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
370 	if (device_is_attached(dev))
371 		vtblk_stop(sc);
372 	lwkt_serialize_exit(&sc->vtblk_slz);
373 
374 	vtblk_drain(sc);
375 
376 	if (sc->cdev != NULL) {
377 		disk_destroy(&sc->vtblk_disk);
378 		sc->cdev = NULL;
379 	}
380 
381 	if (sc->vtblk_sglist != NULL) {
382 		sglist_free(sc->vtblk_sglist);
383 		sc->vtblk_sglist = NULL;
384 	}
385 
386 	return (0);
387 }
388 
389 static int
390 vtblk_suspend(device_t dev)
391 {
392 	struct vtblk_softc *sc;
393 
394 	sc = device_get_softc(dev);
395 
396 	lwkt_serialize_enter(&sc->vtblk_slz);
397 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
398 	/* XXX BMV: virtio_stop(), etc needed here? */
399 	lwkt_serialize_exit(&sc->vtblk_slz);
400 
401 	return (0);
402 }
403 
404 static int
405 vtblk_resume(device_t dev)
406 {
407 	struct vtblk_softc *sc;
408 
409 	sc = device_get_softc(dev);
410 
411 	lwkt_serialize_enter(&sc->vtblk_slz);
412 	/* XXX BMV: virtio_reinit(), etc needed here? */
413 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
414 #if 0 /* XXX Resume IO? */
415 	vtblk_startio(sc);
416 #endif
417 	lwkt_serialize_exit(&sc->vtblk_slz);
418 
419 	return (0);
420 }
421 
422 static int
423 vtblk_shutdown(device_t dev)
424 {
425 
426 	return (0);
427 }
428 
429 static int
430 vtblk_open(struct dev_open_args *ap)
431 {
432 	struct vtblk_softc *sc;
433 	cdev_t dev = ap->a_head.a_dev;
434 	sc = dev->si_drv1;
435 	if (sc == NULL)
436 		return (ENXIO);
437 
438 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
439 }
440 
441 static int
442 vtblk_dump(struct dev_dump_args *ap)
443 {
444 	struct vtblk_softc *sc;
445 	cdev_t dev = ap->a_head.a_dev;
446         uint64_t buf_start, buf_len;
447         int error;
448 
449 	sc = dev->si_drv1;
450 	if (sc == NULL)
451 		return (ENXIO);
452 
453         buf_start = ap->a_offset;
454         buf_len = ap->a_length;
455 
456 //	lwkt_serialize_enter(&sc->vtblk_slz);
457 
458 	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
459 		vtblk_prepare_dump(sc);
460 		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
461 	}
462 
463 	if (buf_len > 0)
464 		error = vtblk_write_dump(sc, ap->a_virtual, buf_start,
465 		    buf_len);
466 	else if (buf_len == 0)
467 		error = vtblk_flush_dump(sc);
468 	else {
469 		error = EINVAL;
470 		sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
471 	}
472 
473 //	lwkt_serialize_exit(&sc->vtblk_slz);
474 
475 	return (error);
476 }
477 
478 static int
479 vtblk_strategy(struct dev_strategy_args *ap)
480 {
481 	struct vtblk_softc *sc;
482 	cdev_t dev = ap->a_head.a_dev;
483 	sc = dev->si_drv1;
484 	struct bio *bio = ap->a_bio;
485 	struct buf *bp = bio->bio_buf;
486 
487 	if (sc == NULL) {
488 		vtblk_finish_bio(bio, EINVAL);
489 		return EINVAL;
490 	}
491 
492 	/*
493 	 * Fail any write if RO. Unfortunately, there does not seem to
494 	 * be a better way to report our readonly'ness to GEOM above.
495 	 *
496 	 * XXX: Is that true in DFly?
497 	 */
498 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
499 	    (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
500 		vtblk_finish_bio(bio, EROFS);
501 		return (EINVAL);
502 	}
503 
504 	lwkt_serialize_enter(&sc->vtblk_slz);
505 	if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
506 		devstat_start_transaction(&sc->stats);
507 		bioqdisksort(&sc->vtblk_bioq, bio);
508 		vtblk_startio(sc);
509 	} else {
510 		vtblk_finish_bio(bio, ENXIO);
511 	}
512 	lwkt_serialize_exit(&sc->vtblk_slz);
513 	return 0;
514 }
515 
516 static void
517 vtblk_negotiate_features(struct vtblk_softc *sc)
518 {
519 	device_t dev;
520 	uint64_t features;
521 
522 	dev = sc->vtblk_dev;
523 	features = VTBLK_FEATURES;
524 
525 	sc->vtblk_features = virtio_negotiate_features(dev, features);
526 }
527 
528 static int
529 vtblk_maximum_segments(struct vtblk_softc *sc,
530     struct virtio_blk_config *blkcfg)
531 {
532 	device_t dev;
533 	int nsegs;
534 
535 	dev = sc->vtblk_dev;
536 	nsegs = VTBLK_MIN_SEGMENTS;
537 
538 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
539 		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
540 	} else
541 		nsegs += 1;
542 
543 	return (nsegs);
544 }
545 
546 static int
547 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
548 {
549 	device_t dev;
550 	struct vq_alloc_info vq_info;
551 
552 	dev = sc->vtblk_dev;
553 
554 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
555 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
556 	    "%s request", device_get_nameunit(dev));
557 
558 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
559 }
560 
561 static void
562 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
563 {
564 
565 	/* Set either writeback (1) or writethrough (0) mode. */
566 	virtio_write_dev_config_1(sc->vtblk_dev,
567 	    offsetof(struct virtio_blk_config, writeback), wc);
568 }
569 
570 static int
571 vtblk_write_cache_enabled(struct vtblk_softc *sc,
572     struct virtio_blk_config *blkcfg)
573 {
574 	int wc;
575 
576 	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
577 		wc = vtblk_tunable_int(sc, "writecache_mode",
578 		    vtblk_writecache_mode);
579 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
580 			vtblk_set_write_cache(sc, wc);
581 		else
582 			wc = blkcfg->writeback;
583 	} else
584 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
585 
586 	return (wc);
587 }
588 
589 static int
590 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
591 {
592 	struct vtblk_softc *sc;
593 	int wc, error;
594 
595 	sc = oidp->oid_arg1;
596 	wc = sc->vtblk_write_cache;
597 
598 	error = sysctl_handle_int(oidp, &wc, 0, req);
599 	if (error || req->newptr == NULL)
600 		return (error);
601 	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
602 		return (EPERM);
603 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
604 		return (EINVAL);
605 
606 	lwkt_serialize_enter(&sc->vtblk_slz);
607 	sc->vtblk_write_cache = wc;
608 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
609 	lwkt_serialize_exit(&sc->vtblk_slz);
610 
611 	return (0);
612 }
613 
614 static void
615 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
616 {
617 
618 	struct disk_info info;
619 
620 	/* construct the disk_info */
621 	bzero(&info, sizeof(info));
622 
623 	if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
624 		sc->vtblk_sector_size = blkcfg->blk_size;
625 	else
626 		sc->vtblk_sector_size = DEV_BSIZE;
627 
628 	info.d_media_blksize = sc->vtblk_sector_size;
629 	info.d_media_blocks = blkcfg->capacity;
630 
631 	info.d_ncylinders = blkcfg->geometry.cylinders;
632 	info.d_nheads = blkcfg->geometry.heads;
633 	info.d_secpertrack = blkcfg->geometry.sectors;
634 
635 	info.d_secpercyl = info.d_secpertrack * info.d_nheads;
636 
637 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
638 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
639 	else
640 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
641 
642 	devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
643 			  DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
644 			  DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
645 			  DEVSTAT_PRIORITY_DISK);
646 
647 	/* attach a generic disk device to ourselves */
648 	sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
649 			       &vbd_disk_ops);
650 
651 	sc->cdev->si_drv1 = sc;
652 	sc->cdev->si_iosize_max = MAXPHYS;
653 	disk_setdiskinfo(&sc->vtblk_disk, &info);
654 }
655 
656 static void
657 vtblk_startio(struct vtblk_softc *sc)
658 {
659 	struct virtqueue *vq;
660 	struct vtblk_request *req;
661 	int enq;
662 
663 	vq = sc->vtblk_vq;
664 	enq = 0;
665 
666 	ASSERT_SERIALIZED(&sc->vtblk_slz);
667 
668 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
669 		return;
670 
671 	while (!virtqueue_full(vq)) {
672 		if ((req = vtblk_dequeue_ready(sc)) == NULL)
673 			req = vtblk_bio_request(sc);
674 		if (req == NULL)
675 			break;
676 
677 		if (vtblk_execute_request(sc, req) != 0) {
678 			vtblk_enqueue_ready(sc, req);
679 			break;
680 		}
681 
682 		enq++;
683 	}
684 
685 	if (enq > 0)
686 		virtqueue_notify(vq, &sc->vtblk_slz);
687 }
688 
689 static struct vtblk_request *
690 vtblk_bio_request(struct vtblk_softc *sc)
691 {
692 	struct bio_queue_head *bioq;
693 	struct vtblk_request *req;
694 	struct bio *bio;
695 	struct buf *bp;
696 
697 	bioq = &sc->vtblk_bioq;
698 
699 	if (bioq_first(bioq) == NULL)
700 		return (NULL);
701 
702 	req = vtblk_dequeue_request(sc);
703 	if (req == NULL)
704 		return (NULL);
705 
706 	bio = bioq_takefirst(bioq);
707 	req->vbr_bp = bio;
708 	req->vbr_ack = -1;
709 	req->vbr_barrier = 0;
710 	req->vbr_hdr.ioprio = 1;
711 	bp = bio->bio_buf;
712 
713 	switch (bp->b_cmd) {
714 	case BUF_CMD_FLUSH:
715 		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
716 		break;
717 	case BUF_CMD_READ:
718 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
719 		req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
720 		break;
721 	case BUF_CMD_WRITE:
722 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
723 		req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
724 		break;
725 	default:
726 		KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
727 		req->vbr_hdr.type = -1;
728 		break;
729 	}
730 
731 	if (bp->b_flags & B_ORDERED) {
732 		if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0)
733 			req->vbr_barrier = 1;
734 		else
735 			req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
736 	}
737 
738 	return (req);
739 }
740 
741 static int
742 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
743 {
744 	struct sglist *sg;
745 	struct bio *bio;
746 	struct buf *bp;
747 	int ordered, writable, error;
748 
749 	sg = sc->vtblk_sglist;
750 	bio = req->vbr_bp;
751 	bp = bio->bio_buf;
752 	ordered = 0;
753 	writable = 0;
754 
755 	if (sc->vtblk_req_ordered != NULL)
756 		return (EBUSY);
757 
758 	if (req->vbr_barrier) {
759 		/*
760 		 * This request will be executed once all
761 		 * the in-flight requests are completed.
762 		 */
763 		if (!virtqueue_empty(sc->vtblk_vq))
764 			return (EBUSY);
765 		ordered = 1;
766 	}
767 
768 	/*
769 	 * sglist is live throughout this subroutine.
770 	 */
771 	sglist_reset(sg);
772 
773 	error = sglist_append(sg, &req->vbr_hdr,
774 			      sizeof(struct virtio_blk_outhdr));
775 	KASSERT(error == 0, ("error adding header to sglist"));
776 	KASSERT(sg->sg_nseg == 1,
777 	    ("header spanned multiple segments: %d", sg->sg_nseg));
778 
779 	if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
780 		error = sglist_append(sg, bp->b_data, bp->b_bcount);
781 		KASSERT(error == 0, ("error adding buffer to sglist"));
782 
783 		/* BUF_CMD_READ means the host writes into our buffer. */
784 		if (bp->b_cmd == BUF_CMD_READ)
785 			writable += sg->sg_nseg - 1;
786 	}
787 
788 	error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
789 	KASSERT(error == 0, ("error adding ack to sglist"));
790 	writable++;
791 
792 	KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
793 	    ("fewer than min segments: %d", sg->sg_nseg));
794 
795 	error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
796 				  sg->sg_nseg - writable, writable);
797 	if (error == 0 && ordered)
798 		sc->vtblk_req_ordered = req;
799 
800 	sglist_reset(sg);
801 
802 	return (error);
803 }
804 
805 static int
806 vtblk_vq_intr(void *xsc)
807 {
808 	vtblk_complete(xsc);
809 
810 	return (1);
811 }
812 
813 static void
814 vtblk_complete(void *arg)
815 {
816 	struct vtblk_softc *sc;
817 	struct vtblk_request *req;
818 	struct virtqueue *vq;
819 	struct bio *bio;
820 	struct buf *bp;
821 
822 	sc = arg;
823 	vq = sc->vtblk_vq;
824 
825 	lwkt_serialize_handler_disable(&sc->vtblk_slz);
826 	virtqueue_disable_intr(sc->vtblk_vq);
827 	ASSERT_SERIALIZED(&sc->vtblk_slz);
828 
829 retry:
830 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
831 		return;
832 
833 	while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
834 		bio = req->vbr_bp;
835 		bp = bio->bio_buf;
836 
837 		if (sc->vtblk_req_ordered != NULL) {
838 			/* This should be the only outstanding request. */
839 			KKASSERT(sc->vtblk_req_ordered == req);
840 			sc->vtblk_req_ordered = NULL;
841 		}
842 
843 		if (req->vbr_ack == VIRTIO_BLK_S_OK)
844 			bp->b_resid = 0;
845 		else {
846 			bp->b_flags |= B_ERROR;
847 			if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
848 				bp->b_error = ENOTSUP;
849 			} else {
850 				bp->b_error = EIO;
851 			}
852 		}
853 
854 		devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
855 
856 		lwkt_serialize_exit(&sc->vtblk_slz);
857 		/*
858 		 * Unlocking the controller around biodone() does not allow
859 		 * processing further device interrupts; when we queued
860 		 * vtblk_complete, we disabled interrupts. It will allow
861 		 * concurrent vtblk_strategy/_startio command dispatches.
862 		 */
863 		biodone(bio);
864 		lwkt_serialize_enter(&sc->vtblk_slz);
865 
866 		vtblk_enqueue_request(sc, req);
867 	}
868 
869 	vtblk_startio(sc);
870 
871 	if (virtqueue_enable_intr(vq) != 0) {
872 		/*
873 		 * If new virtqueue entries appeared immediately after
874 		 * enabling interrupts, process them now. Release and
875 		 * retake softcontroller lock to try to avoid blocking
876 		 * I/O dispatch for too long.
877 		 */
878 		virtqueue_disable_intr(vq);
879 		goto retry;
880 	}
881 	lwkt_serialize_handler_enable(&sc->vtblk_slz);
882 }
883 
884 static void
885 vtblk_stop(struct vtblk_softc *sc)
886 {
887 
888 	virtqueue_disable_intr(sc->vtblk_vq);
889 	virtio_stop(sc->vtblk_dev);
890 }
891 
892 static void
893 vtblk_prepare_dump(struct vtblk_softc *sc)
894 {
895 	device_t dev;
896 	struct virtqueue *vq;
897 
898 	dev = sc->vtblk_dev;
899 	vq = sc->vtblk_vq;
900 
901 	vtblk_stop(sc);
902 
903 	/*
904 	 * Drain all requests caught in-flight in the virtqueue,
905 	 * skipping biodone(). When dumping, only one request is
906 	 * outstanding at a time, and we just poll the virtqueue
907 	 * for the response.
908 	 */
909 	vtblk_drain_vq(sc, 1);
910 
911 	if (virtio_reinit(dev, sc->vtblk_features) != 0) {
912 		panic("%s: cannot reinit VirtIO block device during dump",
913 		    device_get_nameunit(dev));
914 	}
915 
916 	virtqueue_disable_intr(vq);
917 	virtio_reinit_complete(dev);
918 }
919 
920 static int
921 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
922     size_t length)
923 {
924 	struct bio bio;
925 	struct buf bp;
926 	struct vtblk_request *req;
927 
928 	req = &sc->vtblk_dump_request;
929 	req->vbr_ack = -1;
930 	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
931 	req->vbr_hdr.ioprio = 1;
932 	req->vbr_hdr.sector = offset / 512;
933 
934 	req->vbr_bp = &bio;
935 	bzero(&bio, sizeof(struct bio));
936 	bzero(&buf, sizeof(struct buf));
937 
938 	bio.bio_buf = &bp;
939 	bp.b_cmd = BUF_CMD_WRITE;
940 	bp.b_data = virtual;
941 	bp.b_bcount = length;
942 
943 	return (vtblk_poll_request(sc, req));
944 }
945 
946 static int
947 vtblk_flush_dump(struct vtblk_softc *sc)
948 {
949 	struct bio bio;
950 	struct buf bp;
951 	struct vtblk_request *req;
952 
953 	req = &sc->vtblk_dump_request;
954 	req->vbr_ack = -1;
955 	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
956 	req->vbr_hdr.ioprio = 1;
957 	req->vbr_hdr.sector = 0;
958 
959 	req->vbr_bp = &bio;
960 	bzero(&bio, sizeof(struct bio));
961 	bzero(&bp, sizeof(struct buf));
962 
963 	bio.bio_buf = &bp;
964 	bp.b_cmd = BUF_CMD_FLUSH;
965 
966 	return (vtblk_poll_request(sc, req));
967 }
968 
969 static int
970 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
971 {
972 	struct virtqueue *vq;
973 	int error;
974 
975 	vq = sc->vtblk_vq;
976 
977 	if (!virtqueue_empty(vq))
978 		return (EBUSY);
979 
980 	error = vtblk_execute_request(sc, req);
981 	if (error)
982 		return (error);
983 
984 	virtqueue_notify(vq, NULL);
985 	virtqueue_poll(vq, NULL);
986 
987 	error = vtblk_request_error(req);
988 	if (error && bootverbose) {
989 		device_printf(sc->vtblk_dev,
990 		    "%s: IO error: %d\n", __func__, error);
991 	}
992 
993 	return (error);
994 }
995 
996 static void
997 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
998 {
999 	struct virtqueue *vq;
1000 	struct vtblk_request *req;
1001 	int last;
1002 
1003 	vq = sc->vtblk_vq;
1004 	last = 0;
1005 
1006 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1007 		if (!skip_done)
1008 			vtblk_finish_bio(req->vbr_bp, ENXIO);
1009 
1010 		vtblk_enqueue_request(sc, req);
1011 	}
1012 
1013 	sc->vtblk_req_ordered = NULL;
1014 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1015 }
1016 
1017 static void
1018 vtblk_drain(struct vtblk_softc *sc)
1019 {
1020 	struct bio_queue_head *bioq;
1021 	struct vtblk_request *req;
1022 	struct bio *bp;
1023 
1024 	bioq = &sc->vtblk_bioq;
1025 
1026 	if (sc->vtblk_vq != NULL)
1027 		vtblk_drain_vq(sc, 0);
1028 
1029 	while ((req = vtblk_dequeue_ready(sc)) != NULL) {
1030 		vtblk_finish_bio(req->vbr_bp, ENXIO);
1031 		vtblk_enqueue_request(sc, req);
1032 	}
1033 
1034 	while (bioq_first(bioq) != NULL) {
1035 		bp = bioq_takefirst(bioq);
1036 		vtblk_finish_bio(bp, ENXIO);
1037 	}
1038 
1039 	vtblk_free_requests(sc);
1040 }
1041 
1042 static int
1043 vtblk_alloc_requests(struct vtblk_softc *sc)
1044 {
1045 	struct vtblk_request *req;
1046 	int i, nreqs;
1047 
1048 	nreqs = virtqueue_size(sc->vtblk_vq);
1049 
1050 	/*
1051 	 * Preallocate sufficient requests to keep the virtqueue full. Each
1052 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
1053 	 * the number allocated when indirect descriptors are not available.
1054 	 */
1055 	nreqs /= VTBLK_MIN_SEGMENTS;
1056 
1057 	for (i = 0; i < nreqs; i++) {
1058 		/* rely on at least 8 byte alignment by kmalloc */
1059 		req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
1060 
1061 		sc->vtblk_request_count++;
1062 		vtblk_enqueue_request(sc, req);
1063 	}
1064 
1065 	return (0);
1066 }
1067 
1068 static void
1069 vtblk_free_requests(struct vtblk_softc *sc)
1070 {
1071 	struct vtblk_request *req;
1072 
1073 	while ((req = vtblk_dequeue_request(sc)) != NULL) {
1074 		sc->vtblk_request_count--;
1075 		kfree(req, M_DEVBUF);
1076 	}
1077 
1078 	KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
1079 }
1080 
1081 static struct vtblk_request *
1082 vtblk_dequeue_request(struct vtblk_softc *sc)
1083 {
1084 	struct vtblk_request *req;
1085 
1086 	req = TAILQ_FIRST(&sc->vtblk_req_free);
1087 	if (req != NULL)
1088 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
1089 
1090 	return (req);
1091 }
1092 
1093 static void
1094 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
1095 {
1096 
1097 	bzero(req, sizeof(struct vtblk_request));
1098 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1099 }
1100 
1101 static struct vtblk_request *
1102 vtblk_dequeue_ready(struct vtblk_softc *sc)
1103 {
1104 	struct vtblk_request *req;
1105 
1106 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
1107 	if (req != NULL)
1108 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
1109 
1110 	return (req);
1111 }
1112 
1113 static void
1114 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
1115 {
1116 
1117 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
1118 }
1119 
1120 static int
1121 vtblk_request_error(struct vtblk_request *req)
1122 {
1123 	int error;
1124 
1125 	switch (req->vbr_ack) {
1126 	case VIRTIO_BLK_S_OK:
1127 		error = 0;
1128 		break;
1129 	case VIRTIO_BLK_S_UNSUPP:
1130 		error = ENOTSUP;
1131 		break;
1132 	default:
1133 		error = EIO;
1134 		break;
1135 	}
1136 
1137 	return (error);
1138 }
1139 
1140 static void
1141 vtblk_finish_bio(struct bio *bp, int error)
1142 {
1143 
1144 	biodone(bp);
1145 }
1146 
1147 static void
1148 vtblk_setup_sysctl(struct vtblk_softc *sc)
1149 {
1150 	device_t dev;
1151 	struct sysctl_ctx_list *ctx;
1152 	struct sysctl_oid *tree;
1153 	struct sysctl_oid_list *child;
1154 
1155 	dev = sc->vtblk_dev;
1156 	ctx = device_get_sysctl_ctx(dev);
1157 	tree = device_get_sysctl_tree(dev);
1158 	child = SYSCTL_CHILDREN(tree);
1159 
1160 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1161 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1162 	    "I", "Write cache mode (writethrough (0) or writeback (1))");
1163 }
1164 
1165 static int
1166 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1167 {
1168 	char path[64];
1169 
1170 	ksnprintf(path, sizeof(path),
1171 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1172 	TUNABLE_INT_FETCH(path, &def);
1173 
1174 	return (def);
1175 }
1176