xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision c1d255d3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bio.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/msan.h>
41 #include <sys/sglist.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/queue.h>
46 
47 #include <geom/geom.h>
48 #include <geom/geom_disk.h>
49 
50 #include <machine/bus.h>
51 #include <machine/resource.h>
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 
55 #include <dev/virtio/virtio.h>
56 #include <dev/virtio/virtqueue.h>
57 #include <dev/virtio/block/virtio_blk.h>
58 
59 #include "virtio_if.h"
60 
61 struct vtblk_request {
62 	struct virtio_blk_outhdr	 vbr_hdr;
63 	struct bio			*vbr_bp;
64 	uint8_t				 vbr_ack;
65 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
66 };
67 
68 enum vtblk_cache_mode {
69 	VTBLK_CACHE_WRITETHROUGH,
70 	VTBLK_CACHE_WRITEBACK,
71 	VTBLK_CACHE_MAX
72 };
73 
74 struct vtblk_softc {
75 	device_t		 vtblk_dev;
76 	struct mtx		 vtblk_mtx;
77 	uint64_t		 vtblk_features;
78 	uint32_t		 vtblk_flags;
79 #define VTBLK_FLAG_INDIRECT	0x0001
80 #define VTBLK_FLAG_DETACH	0x0002
81 #define VTBLK_FLAG_SUSPEND	0x0004
82 #define VTBLK_FLAG_BARRIER	0x0008
83 #define VTBLK_FLAG_WCE_CONFIG	0x0010
84 
85 	struct virtqueue	*vtblk_vq;
86 	struct sglist		*vtblk_sglist;
87 	struct disk		*vtblk_disk;
88 
89 	struct bio_queue_head	 vtblk_bioq;
90 	TAILQ_HEAD(, vtblk_request)
91 				 vtblk_req_free;
92 	TAILQ_HEAD(, vtblk_request)
93 				 vtblk_req_ready;
94 	struct vtblk_request	*vtblk_req_ordered;
95 
96 	int			 vtblk_max_nsegs;
97 	int			 vtblk_request_count;
98 	enum vtblk_cache_mode	 vtblk_write_cache;
99 
100 	struct bio_queue	 vtblk_dump_queue;
101 	struct vtblk_request	 vtblk_dump_request;
102 };
103 
104 static struct virtio_feature_desc vtblk_feature_desc[] = {
105 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
106 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
107 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
108 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
109 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
110 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
111 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
112 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
113 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
114 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
115 	{ VIRTIO_BLK_F_MQ,		"Multiqueue"	},
116 	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
117 	{ VIRTIO_BLK_F_WRITE_ZEROES,	"WriteZeros"	},
118 
119 	{ 0, NULL }
120 };
121 
122 static int	vtblk_modevent(module_t, int, void *);
123 
124 static int	vtblk_probe(device_t);
125 static int	vtblk_attach(device_t);
126 static int	vtblk_detach(device_t);
127 static int	vtblk_suspend(device_t);
128 static int	vtblk_resume(device_t);
129 static int	vtblk_shutdown(device_t);
130 static int	vtblk_attach_completed(device_t);
131 static int	vtblk_config_change(device_t);
132 
133 static int	vtblk_open(struct disk *);
134 static int	vtblk_close(struct disk *);
135 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
136 		    struct thread *);
137 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
138 static void	vtblk_strategy(struct bio *);
139 
140 static int	vtblk_negotiate_features(struct vtblk_softc *);
141 static int	vtblk_setup_features(struct vtblk_softc *);
142 static int	vtblk_maximum_segments(struct vtblk_softc *,
143 		    struct virtio_blk_config *);
144 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
145 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
146 static void	vtblk_alloc_disk(struct vtblk_softc *,
147 		    struct virtio_blk_config *);
148 static void	vtblk_create_disk(struct vtblk_softc *);
149 
150 static int	vtblk_request_prealloc(struct vtblk_softc *);
151 static void	vtblk_request_free(struct vtblk_softc *);
152 static struct vtblk_request *
153 		vtblk_request_dequeue(struct vtblk_softc *);
154 static void	vtblk_request_enqueue(struct vtblk_softc *,
155 		    struct vtblk_request *);
156 static struct vtblk_request *
157 		vtblk_request_next_ready(struct vtblk_softc *);
158 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
159 		    struct vtblk_request *);
160 static struct vtblk_request *
161 		vtblk_request_next(struct vtblk_softc *);
162 static struct vtblk_request *
163 		vtblk_request_bio(struct vtblk_softc *);
164 static int	vtblk_request_execute(struct vtblk_softc *,
165 		    struct vtblk_request *);
166 static int	vtblk_request_error(struct vtblk_request *);
167 
168 static void	vtblk_queue_completed(struct vtblk_softc *,
169 		    struct bio_queue *);
170 static void	vtblk_done_completed(struct vtblk_softc *,
171 		    struct bio_queue *);
172 static void	vtblk_drain_vq(struct vtblk_softc *);
173 static void	vtblk_drain(struct vtblk_softc *);
174 
175 static void	vtblk_startio(struct vtblk_softc *);
176 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
177 
178 static void	vtblk_read_config(struct vtblk_softc *,
179 		    struct virtio_blk_config *);
180 static void	vtblk_ident(struct vtblk_softc *);
181 static int	vtblk_poll_request(struct vtblk_softc *,
182 		    struct vtblk_request *);
183 static int	vtblk_quiesce(struct vtblk_softc *);
184 static void	vtblk_vq_intr(void *);
185 static void	vtblk_stop(struct vtblk_softc *);
186 
187 static void	vtblk_dump_quiesce(struct vtblk_softc *);
188 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
189 static int	vtblk_dump_flush(struct vtblk_softc *);
190 static void	vtblk_dump_complete(struct vtblk_softc *);
191 
192 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
193 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
194 		    struct virtio_blk_config *);
195 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
196 
197 static void	vtblk_setup_sysctl(struct vtblk_softc *);
198 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
199 
200 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
201 #define vtblk_htog16(_sc, _val)	virtio_htog16(vtblk_modern(_sc), _val)
202 #define vtblk_htog32(_sc, _val)	virtio_htog32(vtblk_modern(_sc), _val)
203 #define vtblk_htog64(_sc, _val)	virtio_htog64(vtblk_modern(_sc), _val)
204 #define vtblk_gtoh16(_sc, _val)	virtio_gtoh16(vtblk_modern(_sc), _val)
205 #define vtblk_gtoh32(_sc, _val)	virtio_gtoh32(vtblk_modern(_sc), _val)
206 #define vtblk_gtoh64(_sc, _val)	virtio_gtoh64(vtblk_modern(_sc), _val)
207 
208 /* Tunables. */
209 static int vtblk_no_ident = 0;
210 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
211 static int vtblk_writecache_mode = -1;
212 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
213 
214 #define VTBLK_COMMON_FEATURES \
215     (VIRTIO_BLK_F_SIZE_MAX		| \
216      VIRTIO_BLK_F_SEG_MAX		| \
217      VIRTIO_BLK_F_GEOMETRY		| \
218      VIRTIO_BLK_F_RO			| \
219      VIRTIO_BLK_F_BLK_SIZE		| \
220      VIRTIO_BLK_F_FLUSH			| \
221      VIRTIO_BLK_F_TOPOLOGY		| \
222      VIRTIO_BLK_F_CONFIG_WCE		| \
223      VIRTIO_BLK_F_DISCARD		| \
224      VIRTIO_RING_F_INDIRECT_DESC)
225 
226 #define VTBLK_MODERN_FEATURES	(VTBLK_COMMON_FEATURES)
227 #define VTBLK_LEGACY_FEATURES	(VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
228 
229 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
230 #define VTBLK_LOCK_INIT(_sc, _name) \
231 				mtx_init(VTBLK_MTX((_sc)), (_name), \
232 				    "VirtIO Block Lock", MTX_DEF)
233 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
234 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
235 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
236 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
237 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
238 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
239 
240 #define VTBLK_DISK_NAME		"vtbd"
241 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
242 #define VTBLK_BSIZE		512
243 
244 /*
245  * Each block request uses at least two segments - one for the header
246  * and one for the status.
247  */
248 #define VTBLK_MIN_SEGMENTS	2
249 
250 static device_method_t vtblk_methods[] = {
251 	/* Device methods. */
252 	DEVMETHOD(device_probe,		vtblk_probe),
253 	DEVMETHOD(device_attach,	vtblk_attach),
254 	DEVMETHOD(device_detach,	vtblk_detach),
255 	DEVMETHOD(device_suspend,	vtblk_suspend),
256 	DEVMETHOD(device_resume,	vtblk_resume),
257 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
258 
259 	/* VirtIO methods. */
260 	DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
261 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
262 
263 	DEVMETHOD_END
264 };
265 
266 static driver_t vtblk_driver = {
267 	"vtblk",
268 	vtblk_methods,
269 	sizeof(struct vtblk_softc)
270 };
271 static devclass_t vtblk_devclass;
272 
273 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_devclass,
274     vtblk_modevent, 0);
275 MODULE_VERSION(virtio_blk, 1);
276 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
277 
278 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
279 
280 static int
281 vtblk_modevent(module_t mod, int type, void *unused)
282 {
283 	int error;
284 
285 	error = 0;
286 
287 	switch (type) {
288 	case MOD_LOAD:
289 	case MOD_QUIESCE:
290 	case MOD_UNLOAD:
291 	case MOD_SHUTDOWN:
292 		break;
293 	default:
294 		error = EOPNOTSUPP;
295 		break;
296 	}
297 
298 	return (error);
299 }
300 
301 static int
302 vtblk_probe(device_t dev)
303 {
304 	return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
305 }
306 
307 static int
308 vtblk_attach(device_t dev)
309 {
310 	struct vtblk_softc *sc;
311 	struct virtio_blk_config blkcfg;
312 	int error;
313 
314 	sc = device_get_softc(dev);
315 	sc->vtblk_dev = dev;
316 	virtio_set_feature_desc(dev, vtblk_feature_desc);
317 
318 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
319 	bioq_init(&sc->vtblk_bioq);
320 	TAILQ_INIT(&sc->vtblk_dump_queue);
321 	TAILQ_INIT(&sc->vtblk_req_free);
322 	TAILQ_INIT(&sc->vtblk_req_ready);
323 
324 	vtblk_setup_sysctl(sc);
325 
326 	error = vtblk_setup_features(sc);
327 	if (error) {
328 		device_printf(dev, "cannot setup features\n");
329 		goto fail;
330 	}
331 
332 	vtblk_read_config(sc, &blkcfg);
333 
334 	/*
335 	 * With the current sglist(9) implementation, it is not easy
336 	 * for us to support a maximum segment size as adjacent
337 	 * segments are coalesced. For now, just make sure it's larger
338 	 * than the maximum supported transfer size.
339 	 */
340 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
341 		if (blkcfg.size_max < maxphys) {
342 			error = ENOTSUP;
343 			device_printf(dev, "host requires unsupported "
344 			    "maximum segment size feature\n");
345 			goto fail;
346 		}
347 	}
348 
349 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
350 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
351 		error = EINVAL;
352 		device_printf(dev, "fewer than minimum number of segments "
353 		    "allowed: %d\n", sc->vtblk_max_nsegs);
354 		goto fail;
355 	}
356 
357 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
358 	if (sc->vtblk_sglist == NULL) {
359 		error = ENOMEM;
360 		device_printf(dev, "cannot allocate sglist\n");
361 		goto fail;
362 	}
363 
364 	error = vtblk_alloc_virtqueue(sc);
365 	if (error) {
366 		device_printf(dev, "cannot allocate virtqueue\n");
367 		goto fail;
368 	}
369 
370 	error = vtblk_request_prealloc(sc);
371 	if (error) {
372 		device_printf(dev, "cannot preallocate requests\n");
373 		goto fail;
374 	}
375 
376 	vtblk_alloc_disk(sc, &blkcfg);
377 
378 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
379 	if (error) {
380 		device_printf(dev, "cannot setup virtqueue interrupt\n");
381 		goto fail;
382 	}
383 
384 	virtqueue_enable_intr(sc->vtblk_vq);
385 
386 fail:
387 	if (error)
388 		vtblk_detach(dev);
389 
390 	return (error);
391 }
392 
393 static int
394 vtblk_detach(device_t dev)
395 {
396 	struct vtblk_softc *sc;
397 
398 	sc = device_get_softc(dev);
399 
400 	VTBLK_LOCK(sc);
401 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
402 	if (device_is_attached(dev))
403 		vtblk_stop(sc);
404 	VTBLK_UNLOCK(sc);
405 
406 	vtblk_drain(sc);
407 
408 	if (sc->vtblk_disk != NULL) {
409 		disk_destroy(sc->vtblk_disk);
410 		sc->vtblk_disk = NULL;
411 	}
412 
413 	if (sc->vtblk_sglist != NULL) {
414 		sglist_free(sc->vtblk_sglist);
415 		sc->vtblk_sglist = NULL;
416 	}
417 
418 	VTBLK_LOCK_DESTROY(sc);
419 
420 	return (0);
421 }
422 
423 static int
424 vtblk_suspend(device_t dev)
425 {
426 	struct vtblk_softc *sc;
427 	int error;
428 
429 	sc = device_get_softc(dev);
430 
431 	VTBLK_LOCK(sc);
432 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
433 	/* XXX BMV: virtio_stop(), etc needed here? */
434 	error = vtblk_quiesce(sc);
435 	if (error)
436 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
437 	VTBLK_UNLOCK(sc);
438 
439 	return (error);
440 }
441 
442 static int
443 vtblk_resume(device_t dev)
444 {
445 	struct vtblk_softc *sc;
446 
447 	sc = device_get_softc(dev);
448 
449 	VTBLK_LOCK(sc);
450 	/* XXX BMV: virtio_reinit(), etc needed here? */
451 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
452 	vtblk_startio(sc);
453 	VTBLK_UNLOCK(sc);
454 
455 	return (0);
456 }
457 
458 static int
459 vtblk_shutdown(device_t dev)
460 {
461 
462 	return (0);
463 }
464 
465 static int
466 vtblk_attach_completed(device_t dev)
467 {
468 	struct vtblk_softc *sc;
469 
470 	sc = device_get_softc(dev);
471 
472 	/*
473 	 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
474 	 * processed after the device acknowledged
475 	 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
476 	 */
477 	vtblk_create_disk(sc);
478 	return (0);
479 }
480 
481 static int
482 vtblk_config_change(device_t dev)
483 {
484 	struct vtblk_softc *sc;
485 	struct virtio_blk_config blkcfg;
486 	uint64_t capacity;
487 
488 	sc = device_get_softc(dev);
489 
490 	vtblk_read_config(sc, &blkcfg);
491 
492 	/* Capacity is always in 512-byte units. */
493 	capacity = blkcfg.capacity * VTBLK_BSIZE;
494 
495 	if (sc->vtblk_disk->d_mediasize != capacity)
496 		vtblk_resize_disk(sc, capacity);
497 
498 	return (0);
499 }
500 
501 static int
502 vtblk_open(struct disk *dp)
503 {
504 	struct vtblk_softc *sc;
505 
506 	if ((sc = dp->d_drv1) == NULL)
507 		return (ENXIO);
508 
509 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
510 }
511 
512 static int
513 vtblk_close(struct disk *dp)
514 {
515 	struct vtblk_softc *sc;
516 
517 	if ((sc = dp->d_drv1) == NULL)
518 		return (ENXIO);
519 
520 	return (0);
521 }
522 
523 static int
524 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
525     struct thread *td)
526 {
527 	struct vtblk_softc *sc;
528 
529 	if ((sc = dp->d_drv1) == NULL)
530 		return (ENXIO);
531 
532 	return (ENOTTY);
533 }
534 
535 static int
536 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
537     size_t length)
538 {
539 	struct disk *dp;
540 	struct vtblk_softc *sc;
541 	int error;
542 
543 	dp = arg;
544 	error = 0;
545 
546 	if ((sc = dp->d_drv1) == NULL)
547 		return (ENXIO);
548 
549 	VTBLK_LOCK(sc);
550 
551 	vtblk_dump_quiesce(sc);
552 
553 	if (length > 0)
554 		error = vtblk_dump_write(sc, virtual, offset, length);
555 	if (error || (virtual == NULL && offset == 0))
556 		vtblk_dump_complete(sc);
557 
558 	VTBLK_UNLOCK(sc);
559 
560 	return (error);
561 }
562 
563 static void
564 vtblk_strategy(struct bio *bp)
565 {
566 	struct vtblk_softc *sc;
567 
568 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
569 		vtblk_bio_done(NULL, bp, EINVAL);
570 		return;
571 	}
572 
573 	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
574 	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
575 		vtblk_bio_done(sc, bp, EOPNOTSUPP);
576 		return;
577 	}
578 
579 	VTBLK_LOCK(sc);
580 
581 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
582 		VTBLK_UNLOCK(sc);
583 		vtblk_bio_done(sc, bp, ENXIO);
584 		return;
585 	}
586 
587 	bioq_insert_tail(&sc->vtblk_bioq, bp);
588 	vtblk_startio(sc);
589 
590 	VTBLK_UNLOCK(sc);
591 }
592 
593 static int
594 vtblk_negotiate_features(struct vtblk_softc *sc)
595 {
596 	device_t dev;
597 	uint64_t features;
598 
599 	dev = sc->vtblk_dev;
600 	features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
601 	    VTBLK_LEGACY_FEATURES;
602 
603 	sc->vtblk_features = virtio_negotiate_features(dev, features);
604 	return (virtio_finalize_features(dev));
605 }
606 
607 static int
608 vtblk_setup_features(struct vtblk_softc *sc)
609 {
610 	device_t dev;
611 	int error;
612 
613 	dev = sc->vtblk_dev;
614 
615 	error = vtblk_negotiate_features(sc);
616 	if (error)
617 		return (error);
618 
619 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
620 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
621 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
622 		sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
623 
624 	/* Legacy. */
625 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
626 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
627 
628 	return (0);
629 }
630 
631 static int
632 vtblk_maximum_segments(struct vtblk_softc *sc,
633     struct virtio_blk_config *blkcfg)
634 {
635 	device_t dev;
636 	int nsegs;
637 
638 	dev = sc->vtblk_dev;
639 	nsegs = VTBLK_MIN_SEGMENTS;
640 
641 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
642 		nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
643 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
644 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
645 	} else
646 		nsegs += 1;
647 
648 	return (nsegs);
649 }
650 
651 static int
652 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
653 {
654 	device_t dev;
655 	struct vq_alloc_info vq_info;
656 
657 	dev = sc->vtblk_dev;
658 
659 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
660 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
661 	    "%s request", device_get_nameunit(dev));
662 
663 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
664 }
665 
666 static void
667 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
668 {
669 	device_t dev;
670 	struct disk *dp;
671 	int error;
672 
673 	dev = sc->vtblk_dev;
674 	dp = sc->vtblk_disk;
675 
676 	dp->d_mediasize = new_capacity;
677 	if (bootverbose) {
678 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
679 		    (uintmax_t) dp->d_mediasize >> 20,
680 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
681 		    dp->d_sectorsize);
682 	}
683 
684 	error = disk_resize(dp, M_NOWAIT);
685 	if (error) {
686 		device_printf(dev,
687 		    "disk_resize(9) failed, error: %d\n", error);
688 	}
689 }
690 
691 static void
692 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
693 {
694 	device_t dev;
695 	struct disk *dp;
696 
697 	dev = sc->vtblk_dev;
698 
699 	sc->vtblk_disk = dp = disk_alloc();
700 	dp->d_open = vtblk_open;
701 	dp->d_close = vtblk_close;
702 	dp->d_ioctl = vtblk_ioctl;
703 	dp->d_strategy = vtblk_strategy;
704 	dp->d_name = VTBLK_DISK_NAME;
705 	dp->d_unit = device_get_unit(dev);
706 	dp->d_drv1 = sc;
707 	dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
708 	dp->d_hba_vendor = virtio_get_vendor(dev);
709 	dp->d_hba_device = virtio_get_device(dev);
710 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
711 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
712 
713 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
714 		dp->d_flags |= DISKFLAG_WRITE_PROTECT;
715 	else {
716 		if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
717 			dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
718 		dp->d_dump = vtblk_dump;
719 	}
720 
721 	/* Capacity is always in 512-byte units. */
722 	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
723 
724 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
725 		dp->d_sectorsize = blkcfg->blk_size;
726 	else
727 		dp->d_sectorsize = VTBLK_BSIZE;
728 
729 	/*
730 	 * The VirtIO maximum I/O size is given in terms of segments.
731 	 * However, FreeBSD limits I/O size by logical buffer size, not
732 	 * by physically contiguous pages. Therefore, we have to assume
733 	 * no pages are contiguous. This may impose an artificially low
734 	 * maximum I/O size. But in practice, since QEMU advertises 128
735 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
736 	 * which is typically greater than maxphys. Eventually we should
737 	 * just advertise maxphys and split buffers that are too big.
738 	 *
739 	 * Note we must subtract one additional segment in case of non
740 	 * page aligned buffers.
741 	 */
742 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
743 	    PAGE_SIZE;
744 	if (dp->d_maxsize < PAGE_SIZE)
745 		dp->d_maxsize = PAGE_SIZE; /* XXX */
746 
747 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
748 		dp->d_fwsectors = blkcfg->geometry.sectors;
749 		dp->d_fwheads = blkcfg->geometry.heads;
750 	}
751 
752 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
753 	    blkcfg->topology.physical_block_exp > 0) {
754 		dp->d_stripesize = dp->d_sectorsize *
755 		    (1 << blkcfg->topology.physical_block_exp);
756 		dp->d_stripeoffset = (dp->d_stripesize -
757 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
758 		    dp->d_stripesize;
759 	}
760 
761 	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
762 		dp->d_flags |= DISKFLAG_CANDELETE;
763 		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
764 	}
765 
766 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
767 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
768 	else
769 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
770 }
771 
772 static void
773 vtblk_create_disk(struct vtblk_softc *sc)
774 {
775 	struct disk *dp;
776 
777 	dp = sc->vtblk_disk;
778 
779 	vtblk_ident(sc);
780 
781 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
782 	    (uintmax_t) dp->d_mediasize >> 20,
783 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
784 	    dp->d_sectorsize);
785 
786 	disk_create(dp, DISK_VERSION);
787 }
788 
789 static int
790 vtblk_request_prealloc(struct vtblk_softc *sc)
791 {
792 	struct vtblk_request *req;
793 	int i, nreqs;
794 
795 	nreqs = virtqueue_size(sc->vtblk_vq);
796 
797 	/*
798 	 * Preallocate sufficient requests to keep the virtqueue full. Each
799 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
800 	 * the number allocated when indirect descriptors are not available.
801 	 */
802 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
803 		nreqs /= VTBLK_MIN_SEGMENTS;
804 
805 	for (i = 0; i < nreqs; i++) {
806 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
807 		if (req == NULL)
808 			return (ENOMEM);
809 
810 		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
811 		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
812 
813 		sc->vtblk_request_count++;
814 		vtblk_request_enqueue(sc, req);
815 	}
816 
817 	return (0);
818 }
819 
820 static void
821 vtblk_request_free(struct vtblk_softc *sc)
822 {
823 	struct vtblk_request *req;
824 
825 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
826 
827 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
828 		sc->vtblk_request_count--;
829 		free(req, M_DEVBUF);
830 	}
831 
832 	KASSERT(sc->vtblk_request_count == 0,
833 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
834 }
835 
836 static struct vtblk_request *
837 vtblk_request_dequeue(struct vtblk_softc *sc)
838 {
839 	struct vtblk_request *req;
840 
841 	req = TAILQ_FIRST(&sc->vtblk_req_free);
842 	if (req != NULL) {
843 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
844 		bzero(req, sizeof(struct vtblk_request));
845 	}
846 
847 	return (req);
848 }
849 
850 static void
851 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
852 {
853 
854 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
855 }
856 
857 static struct vtblk_request *
858 vtblk_request_next_ready(struct vtblk_softc *sc)
859 {
860 	struct vtblk_request *req;
861 
862 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
863 	if (req != NULL)
864 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
865 
866 	return (req);
867 }
868 
869 static void
870 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
871 {
872 
873 	/* NOTE: Currently, there will be at most one request in the queue. */
874 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
875 }
876 
877 static struct vtblk_request *
878 vtblk_request_next(struct vtblk_softc *sc)
879 {
880 	struct vtblk_request *req;
881 
882 	req = vtblk_request_next_ready(sc);
883 	if (req != NULL)
884 		return (req);
885 
886 	return (vtblk_request_bio(sc));
887 }
888 
889 static struct vtblk_request *
890 vtblk_request_bio(struct vtblk_softc *sc)
891 {
892 	struct bio_queue_head *bioq;
893 	struct vtblk_request *req;
894 	struct bio *bp;
895 
896 	bioq = &sc->vtblk_bioq;
897 
898 	if (bioq_first(bioq) == NULL)
899 		return (NULL);
900 
901 	req = vtblk_request_dequeue(sc);
902 	if (req == NULL)
903 		return (NULL);
904 
905 	bp = bioq_takefirst(bioq);
906 	req->vbr_bp = bp;
907 	req->vbr_ack = -1;
908 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
909 
910 	switch (bp->bio_cmd) {
911 	case BIO_FLUSH:
912 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
913 		req->vbr_hdr.sector = 0;
914 		break;
915 	case BIO_READ:
916 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
917 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
918 		break;
919 	case BIO_WRITE:
920 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
921 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
922 		break;
923 	case BIO_DELETE:
924 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
925 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
926 		break;
927 	default:
928 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
929 	}
930 
931 	if (bp->bio_flags & BIO_ORDERED)
932 		req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
933 
934 	return (req);
935 }
936 
937 static int
938 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
939 {
940 	struct virtqueue *vq;
941 	struct sglist *sg;
942 	struct bio *bp;
943 	int ordered, readable, writable, error;
944 
945 	vq = sc->vtblk_vq;
946 	sg = sc->vtblk_sglist;
947 	bp = req->vbr_bp;
948 	ordered = 0;
949 	writable = 0;
950 
951 	/*
952 	 * Some hosts (such as bhyve) do not implement the barrier feature,
953 	 * so we emulate it in the driver by allowing the barrier request
954 	 * to be the only one in flight.
955 	 */
956 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
957 		if (sc->vtblk_req_ordered != NULL)
958 			return (EBUSY);
959 		if (bp->bio_flags & BIO_ORDERED) {
960 			if (!virtqueue_empty(vq))
961 				return (EBUSY);
962 			ordered = 1;
963 			req->vbr_hdr.type &= vtblk_gtoh32(sc,
964 				~VIRTIO_BLK_T_BARRIER);
965 		}
966 	}
967 
968 	sglist_reset(sg);
969 	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
970 
971 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
972 		error = sglist_append_bio(sg, bp);
973 		if (error || sg->sg_nseg == sg->sg_maxseg) {
974 			panic("%s: bio %p data buffer too big %d",
975 			    __func__, bp, error);
976 		}
977 
978 		/* BIO_READ means the host writes into our buffer. */
979 		if (bp->bio_cmd == BIO_READ)
980 			writable = sg->sg_nseg - 1;
981 	} else if (bp->bio_cmd == BIO_DELETE) {
982 		struct virtio_blk_discard_write_zeroes *discard;
983 
984 		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
985 		if (discard == NULL)
986 			return (ENOMEM);
987 
988 		bp->bio_driver1 = discard;
989 		discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
990 		discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
991 		error = sglist_append(sg, discard, sizeof(*discard));
992 		if (error || sg->sg_nseg == sg->sg_maxseg) {
993 			panic("%s: bio %p data buffer too big %d",
994 			    __func__, bp, error);
995 		}
996 	}
997 
998 	writable++;
999 	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
1000 	readable = sg->sg_nseg - writable;
1001 
1002 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
1003 	if (error == 0 && ordered)
1004 		sc->vtblk_req_ordered = req;
1005 
1006 	return (error);
1007 }
1008 
1009 static int
1010 vtblk_request_error(struct vtblk_request *req)
1011 {
1012 	int error;
1013 
1014 	switch (req->vbr_ack) {
1015 	case VIRTIO_BLK_S_OK:
1016 		error = 0;
1017 		break;
1018 	case VIRTIO_BLK_S_UNSUPP:
1019 		error = ENOTSUP;
1020 		break;
1021 	default:
1022 		error = EIO;
1023 		break;
1024 	}
1025 
1026 	return (error);
1027 }
1028 
1029 static void
1030 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1031 {
1032 	struct vtblk_request *req;
1033 	struct bio *bp;
1034 
1035 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1036 		if (sc->vtblk_req_ordered != NULL) {
1037 			MPASS(sc->vtblk_req_ordered == req);
1038 			sc->vtblk_req_ordered = NULL;
1039 		}
1040 
1041 		bp = req->vbr_bp;
1042 		bp->bio_error = vtblk_request_error(req);
1043 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1044 
1045 		vtblk_request_enqueue(sc, req);
1046 	}
1047 }
1048 
1049 static void
1050 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1051 {
1052 	struct bio *bp, *tmp;
1053 
1054 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1055 		if (bp->bio_error != 0)
1056 			disk_err(bp, "hard error", -1, 1);
1057 		vtblk_bio_done(sc, bp, bp->bio_error);
1058 	}
1059 }
1060 
1061 static void
1062 vtblk_drain_vq(struct vtblk_softc *sc)
1063 {
1064 	struct virtqueue *vq;
1065 	struct vtblk_request *req;
1066 	int last;
1067 
1068 	vq = sc->vtblk_vq;
1069 	last = 0;
1070 
1071 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1072 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1073 		vtblk_request_enqueue(sc, req);
1074 	}
1075 
1076 	sc->vtblk_req_ordered = NULL;
1077 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1078 }
1079 
1080 static void
1081 vtblk_drain(struct vtblk_softc *sc)
1082 {
1083 	struct bio_queue_head *bioq;
1084 	struct vtblk_request *req;
1085 	struct bio *bp;
1086 
1087 	bioq = &sc->vtblk_bioq;
1088 
1089 	if (sc->vtblk_vq != NULL) {
1090 		struct bio_queue queue;
1091 
1092 		TAILQ_INIT(&queue);
1093 		vtblk_queue_completed(sc, &queue);
1094 		vtblk_done_completed(sc, &queue);
1095 
1096 		vtblk_drain_vq(sc);
1097 	}
1098 
1099 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1100 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1101 		vtblk_request_enqueue(sc, req);
1102 	}
1103 
1104 	while (bioq_first(bioq) != NULL) {
1105 		bp = bioq_takefirst(bioq);
1106 		vtblk_bio_done(sc, bp, ENXIO);
1107 	}
1108 
1109 	vtblk_request_free(sc);
1110 }
1111 
1112 static void
1113 vtblk_startio(struct vtblk_softc *sc)
1114 {
1115 	struct virtqueue *vq;
1116 	struct vtblk_request *req;
1117 	int enq;
1118 
1119 	VTBLK_LOCK_ASSERT(sc);
1120 	vq = sc->vtblk_vq;
1121 	enq = 0;
1122 
1123 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1124 		return;
1125 
1126 	while (!virtqueue_full(vq)) {
1127 		req = vtblk_request_next(sc);
1128 		if (req == NULL)
1129 			break;
1130 
1131 		if (vtblk_request_execute(sc, req) != 0) {
1132 			vtblk_request_requeue_ready(sc, req);
1133 			break;
1134 		}
1135 
1136 		enq++;
1137 	}
1138 
1139 	if (enq > 0)
1140 		virtqueue_notify(vq);
1141 }
1142 
1143 static void
1144 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1145 {
1146 
1147 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1148 	if (sc != NULL)
1149 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1150 
1151 	if (error) {
1152 		bp->bio_resid = bp->bio_bcount;
1153 		bp->bio_error = error;
1154 		bp->bio_flags |= BIO_ERROR;
1155 	} else {
1156 		kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1157 	}
1158 
1159 	if (bp->bio_driver1 != NULL) {
1160 		free(bp->bio_driver1, M_DEVBUF);
1161 		bp->bio_driver1 = NULL;
1162 	}
1163 
1164 	biodone(bp);
1165 }
1166 
1167 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1168 	if (virtio_with_feature(_dev, _feature)) {			\
1169 		virtio_read_device_config(_dev,				\
1170 		    offsetof(struct virtio_blk_config, _field),		\
1171 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1172 	}
1173 
1174 static void
1175 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1176 {
1177 	device_t dev;
1178 
1179 	dev = sc->vtblk_dev;
1180 
1181 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1182 
1183 	/* The capacity is always available. */
1184 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1185 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1186 
1187 	/* Read the configuration if the feature was negotiated. */
1188 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1189 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1190 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1191 	    geometry.cylinders, blkcfg);
1192 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1193 	    geometry.heads, blkcfg);
1194 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1195 	    geometry.sectors, blkcfg);
1196 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1197 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1198 	    topology.physical_block_exp, blkcfg);
1199 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1200 	    topology.alignment_offset, blkcfg);
1201 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1202 	    topology.min_io_size, blkcfg);
1203 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1204 	    topology.opt_io_size, blkcfg);
1205 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1206 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1207 	    blkcfg);
1208 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1209 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1210 	    blkcfg);
1211 }
1212 
1213 #undef VTBLK_GET_CONFIG
1214 
1215 static void
1216 vtblk_ident(struct vtblk_softc *sc)
1217 {
1218 	struct bio buf;
1219 	struct disk *dp;
1220 	struct vtblk_request *req;
1221 	int len, error;
1222 
1223 	dp = sc->vtblk_disk;
1224 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1225 
1226 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1227 		return;
1228 
1229 	req = vtblk_request_dequeue(sc);
1230 	if (req == NULL)
1231 		return;
1232 
1233 	req->vbr_ack = -1;
1234 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1235 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1236 	req->vbr_hdr.sector = 0;
1237 
1238 	req->vbr_bp = &buf;
1239 	g_reset_bio(&buf);
1240 
1241 	buf.bio_cmd = BIO_READ;
1242 	buf.bio_data = dp->d_ident;
1243 	buf.bio_bcount = len;
1244 
1245 	VTBLK_LOCK(sc);
1246 	error = vtblk_poll_request(sc, req);
1247 	VTBLK_UNLOCK(sc);
1248 
1249 	vtblk_request_enqueue(sc, req);
1250 
1251 	if (error) {
1252 		device_printf(sc->vtblk_dev,
1253 		    "error getting device identifier: %d\n", error);
1254 	}
1255 }
1256 
1257 static int
1258 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1259 {
1260 	struct virtqueue *vq;
1261 	int error;
1262 
1263 	vq = sc->vtblk_vq;
1264 
1265 	if (!virtqueue_empty(vq))
1266 		return (EBUSY);
1267 
1268 	error = vtblk_request_execute(sc, req);
1269 	if (error)
1270 		return (error);
1271 
1272 	virtqueue_notify(vq);
1273 	virtqueue_poll(vq, NULL);
1274 
1275 	error = vtblk_request_error(req);
1276 	if (error && bootverbose) {
1277 		device_printf(sc->vtblk_dev,
1278 		    "%s: IO error: %d\n", __func__, error);
1279 	}
1280 
1281 	return (error);
1282 }
1283 
1284 static int
1285 vtblk_quiesce(struct vtblk_softc *sc)
1286 {
1287 	int error;
1288 
1289 	VTBLK_LOCK_ASSERT(sc);
1290 	error = 0;
1291 
1292 	while (!virtqueue_empty(sc->vtblk_vq)) {
1293 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1294 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1295 			error = EBUSY;
1296 			break;
1297 		}
1298 	}
1299 
1300 	return (error);
1301 }
1302 
1303 static void
1304 vtblk_vq_intr(void *xsc)
1305 {
1306 	struct vtblk_softc *sc;
1307 	struct virtqueue *vq;
1308 	struct bio_queue queue;
1309 
1310 	sc = xsc;
1311 	vq = sc->vtblk_vq;
1312 	TAILQ_INIT(&queue);
1313 
1314 	VTBLK_LOCK(sc);
1315 
1316 again:
1317 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1318 		goto out;
1319 
1320 	vtblk_queue_completed(sc, &queue);
1321 	vtblk_startio(sc);
1322 
1323 	if (virtqueue_enable_intr(vq) != 0) {
1324 		virtqueue_disable_intr(vq);
1325 		goto again;
1326 	}
1327 
1328 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1329 		wakeup(&sc->vtblk_vq);
1330 
1331 out:
1332 	VTBLK_UNLOCK(sc);
1333 	vtblk_done_completed(sc, &queue);
1334 }
1335 
1336 static void
1337 vtblk_stop(struct vtblk_softc *sc)
1338 {
1339 
1340 	virtqueue_disable_intr(sc->vtblk_vq);
1341 	virtio_stop(sc->vtblk_dev);
1342 }
1343 
1344 static void
1345 vtblk_dump_quiesce(struct vtblk_softc *sc)
1346 {
1347 
1348 	/*
1349 	 * Spin here until all the requests in-flight at the time of the
1350 	 * dump are completed and queued. The queued requests will be
1351 	 * biodone'd once the dump is finished.
1352 	 */
1353 	while (!virtqueue_empty(sc->vtblk_vq))
1354 		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1355 }
1356 
1357 static int
1358 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1359     size_t length)
1360 {
1361 	struct bio buf;
1362 	struct vtblk_request *req;
1363 
1364 	req = &sc->vtblk_dump_request;
1365 	req->vbr_ack = -1;
1366 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1367 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1368 	req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1369 
1370 	req->vbr_bp = &buf;
1371 	g_reset_bio(&buf);
1372 
1373 	buf.bio_cmd = BIO_WRITE;
1374 	buf.bio_data = virtual;
1375 	buf.bio_bcount = length;
1376 
1377 	return (vtblk_poll_request(sc, req));
1378 }
1379 
1380 static int
1381 vtblk_dump_flush(struct vtblk_softc *sc)
1382 {
1383 	struct bio buf;
1384 	struct vtblk_request *req;
1385 
1386 	req = &sc->vtblk_dump_request;
1387 	req->vbr_ack = -1;
1388 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1389 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1390 	req->vbr_hdr.sector = 0;
1391 
1392 	req->vbr_bp = &buf;
1393 	g_reset_bio(&buf);
1394 
1395 	buf.bio_cmd = BIO_FLUSH;
1396 
1397 	return (vtblk_poll_request(sc, req));
1398 }
1399 
1400 static void
1401 vtblk_dump_complete(struct vtblk_softc *sc)
1402 {
1403 
1404 	vtblk_dump_flush(sc);
1405 
1406 	VTBLK_UNLOCK(sc);
1407 	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1408 	VTBLK_LOCK(sc);
1409 }
1410 
1411 static void
1412 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1413 {
1414 
1415 	/* Set either writeback (1) or writethrough (0) mode. */
1416 	virtio_write_dev_config_1(sc->vtblk_dev,
1417 	    offsetof(struct virtio_blk_config, wce), wc);
1418 }
1419 
1420 static int
1421 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1422     struct virtio_blk_config *blkcfg)
1423 {
1424 	int wc;
1425 
1426 	if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1427 		wc = vtblk_tunable_int(sc, "writecache_mode",
1428 		    vtblk_writecache_mode);
1429 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1430 			vtblk_set_write_cache(sc, wc);
1431 		else
1432 			wc = blkcfg->wce;
1433 	} else
1434 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1435 
1436 	return (wc);
1437 }
1438 
1439 static int
1440 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1441 {
1442 	struct vtblk_softc *sc;
1443 	int wc, error;
1444 
1445 	sc = oidp->oid_arg1;
1446 	wc = sc->vtblk_write_cache;
1447 
1448 	error = sysctl_handle_int(oidp, &wc, 0, req);
1449 	if (error || req->newptr == NULL)
1450 		return (error);
1451 	if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1452 		return (EPERM);
1453 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1454 		return (EINVAL);
1455 
1456 	VTBLK_LOCK(sc);
1457 	sc->vtblk_write_cache = wc;
1458 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1459 	VTBLK_UNLOCK(sc);
1460 
1461 	return (0);
1462 }
1463 
1464 static void
1465 vtblk_setup_sysctl(struct vtblk_softc *sc)
1466 {
1467 	device_t dev;
1468 	struct sysctl_ctx_list *ctx;
1469 	struct sysctl_oid *tree;
1470 	struct sysctl_oid_list *child;
1471 
1472 	dev = sc->vtblk_dev;
1473 	ctx = device_get_sysctl_ctx(dev);
1474 	tree = device_get_sysctl_tree(dev);
1475 	child = SYSCTL_CHILDREN(tree);
1476 
1477 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1478 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1479 	    vtblk_write_cache_sysctl, "I",
1480 	    "Write cache mode (writethrough (0) or writeback (1))");
1481 }
1482 
1483 static int
1484 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1485 {
1486 	char path[64];
1487 
1488 	snprintf(path, sizeof(path),
1489 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1490 	TUNABLE_INT_FETCH(path, &def);
1491 
1492 	return (def);
1493 }
1494