1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/msan.h>
38 #include <sys/sglist.h>
39 #include <sys/sysctl.h>
40 #include <sys/lock.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43
44 #include <geom/geom.h>
45 #include <geom/geom_disk.h>
46
47 #include <machine/bus.h>
48 #include <machine/resource.h>
49 #include <sys/bus.h>
50 #include <sys/rman.h>
51
52 #include <dev/virtio/virtio.h>
53 #include <dev/virtio/virtqueue.h>
54 #include <dev/virtio/block/virtio_blk.h>
55
56 #include "virtio_if.h"
57
58 struct vtblk_request {
59 struct vtblk_softc *vbr_sc;
60 bus_dmamap_t vbr_mapp;
61
62 /* Fields after this point are zeroed for each request. */
63 struct virtio_blk_outhdr vbr_hdr;
64 struct bio *vbr_bp;
65 uint8_t vbr_ack;
66 uint8_t vbr_requeue_on_error;
67 uint8_t vbr_busdma_wait;
68 int vbr_error;
69 TAILQ_ENTRY(vtblk_request) vbr_link;
70 };
71
72 enum vtblk_cache_mode {
73 VTBLK_CACHE_WRITETHROUGH,
74 VTBLK_CACHE_WRITEBACK,
75 VTBLK_CACHE_MAX
76 };
77
78 struct vtblk_softc {
79 device_t vtblk_dev;
80 struct mtx vtblk_mtx;
81 uint64_t vtblk_features;
82 uint32_t vtblk_flags;
83 #define VTBLK_FLAG_INDIRECT 0x0001
84 #define VTBLK_FLAG_DETACH 0x0002
85 #define VTBLK_FLAG_SUSPEND 0x0004
86 #define VTBLK_FLAG_BARRIER 0x0008
87 #define VTBLK_FLAG_WCE_CONFIG 0x0010
88 #define VTBLK_FLAG_BUSDMA_WAIT 0x0020
89 #define VTBLK_FLAG_BUSDMA_ALIGN 0x0040
90
91 struct virtqueue *vtblk_vq;
92 struct sglist *vtblk_sglist;
93 bus_dma_tag_t vtblk_dmat;
94 struct disk *vtblk_disk;
95
96 struct bio_queue_head vtblk_bioq;
97 TAILQ_HEAD(, vtblk_request)
98 vtblk_req_free;
99 TAILQ_HEAD(, vtblk_request)
100 vtblk_req_ready;
101 struct vtblk_request *vtblk_req_ordered;
102
103 int vtblk_max_nsegs;
104 int vtblk_request_count;
105 enum vtblk_cache_mode vtblk_write_cache;
106
107 struct bio_queue vtblk_dump_queue;
108 struct vtblk_request vtblk_dump_request;
109 };
110
111 static struct virtio_feature_desc vtblk_feature_desc[] = {
112 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
113 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
114 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
115 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
116 { VIRTIO_BLK_F_RO, "ReadOnly" },
117 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
118 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
119 { VIRTIO_BLK_F_FLUSH, "FlushCmd" },
120 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
121 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
122 { VIRTIO_BLK_F_MQ, "Multiqueue" },
123 { VIRTIO_BLK_F_DISCARD, "Discard" },
124 { VIRTIO_BLK_F_WRITE_ZEROES, "WriteZeros" },
125
126 { 0, NULL }
127 };
128
129 static int vtblk_modevent(module_t, int, void *);
130
131 static int vtblk_probe(device_t);
132 static int vtblk_attach(device_t);
133 static int vtblk_detach(device_t);
134 static int vtblk_suspend(device_t);
135 static int vtblk_resume(device_t);
136 static int vtblk_shutdown(device_t);
137 static int vtblk_attach_completed(device_t);
138 static int vtblk_config_change(device_t);
139
140 static int vtblk_open(struct disk *);
141 static int vtblk_close(struct disk *);
142 static int vtblk_ioctl(struct disk *, u_long, void *, int,
143 struct thread *);
144 static int vtblk_dump(void *, void *, off_t, size_t);
145 static void vtblk_strategy(struct bio *);
146
147 static int vtblk_negotiate_features(struct vtblk_softc *);
148 static int vtblk_setup_features(struct vtblk_softc *);
149 static int vtblk_maximum_segments(struct vtblk_softc *,
150 struct virtio_blk_config *);
151 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
152 static void vtblk_resize_disk(struct vtblk_softc *, uint64_t);
153 static void vtblk_alloc_disk(struct vtblk_softc *,
154 struct virtio_blk_config *);
155 static void vtblk_create_disk(struct vtblk_softc *);
156
157 static int vtblk_request_prealloc(struct vtblk_softc *);
158 static void vtblk_request_free(struct vtblk_softc *);
159 static struct vtblk_request *
160 vtblk_request_dequeue(struct vtblk_softc *);
161 static void vtblk_request_enqueue(struct vtblk_softc *,
162 struct vtblk_request *);
163 static struct vtblk_request *
164 vtblk_request_next_ready(struct vtblk_softc *);
165 static void vtblk_request_requeue_ready(struct vtblk_softc *,
166 struct vtblk_request *);
167 static struct vtblk_request *
168 vtblk_request_next(struct vtblk_softc *);
169 static struct vtblk_request *
170 vtblk_request_bio(struct vtblk_softc *);
171 static int vtblk_request_execute(struct vtblk_request *, int);
172 static void vtblk_request_execute_cb(void *,
173 bus_dma_segment_t *, int, int);
174 static int vtblk_request_error(struct vtblk_request *);
175
176 static void vtblk_queue_completed(struct vtblk_softc *,
177 struct bio_queue *);
178 static void vtblk_done_completed(struct vtblk_softc *,
179 struct bio_queue *);
180 static void vtblk_drain_vq(struct vtblk_softc *);
181 static void vtblk_drain(struct vtblk_softc *);
182
183 static void vtblk_startio(struct vtblk_softc *);
184 static void vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
185
186 static void vtblk_read_config(struct vtblk_softc *,
187 struct virtio_blk_config *);
188 static void vtblk_ident(struct vtblk_softc *);
189 static int vtblk_poll_request(struct vtblk_softc *,
190 struct vtblk_request *);
191 static int vtblk_quiesce(struct vtblk_softc *);
192 static void vtblk_vq_intr(void *);
193 static void vtblk_stop(struct vtblk_softc *);
194
195 static void vtblk_dump_quiesce(struct vtblk_softc *);
196 static int vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
197 static int vtblk_dump_flush(struct vtblk_softc *);
198 static void vtblk_dump_complete(struct vtblk_softc *);
199
200 static void vtblk_set_write_cache(struct vtblk_softc *, int);
201 static int vtblk_write_cache_enabled(struct vtblk_softc *sc,
202 struct virtio_blk_config *);
203 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
204
205 static void vtblk_setup_sysctl(struct vtblk_softc *);
206 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
207
208 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
209 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
210 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
211 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
212 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
213 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
214 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
215
216 /* Tunables. */
217 static int vtblk_no_ident = 0;
218 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
219 static int vtblk_writecache_mode = -1;
220 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
221
222 #define VTBLK_COMMON_FEATURES \
223 (VIRTIO_BLK_F_SIZE_MAX | \
224 VIRTIO_BLK_F_SEG_MAX | \
225 VIRTIO_BLK_F_GEOMETRY | \
226 VIRTIO_BLK_F_RO | \
227 VIRTIO_BLK_F_BLK_SIZE | \
228 VIRTIO_BLK_F_FLUSH | \
229 VIRTIO_BLK_F_TOPOLOGY | \
230 VIRTIO_BLK_F_CONFIG_WCE | \
231 VIRTIO_BLK_F_DISCARD | \
232 VIRTIO_RING_F_INDIRECT_DESC)
233
234 #define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES)
235 #define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
236
237 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
238 #define VTBLK_LOCK_INIT(_sc, _name) \
239 mtx_init(VTBLK_MTX((_sc)), (_name), \
240 "VirtIO Block Lock", MTX_DEF)
241 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc)))
242 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc)))
243 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
244 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
245 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
246 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
247
248 #define VTBLK_DISK_NAME "vtbd"
249 #define VTBLK_QUIESCE_TIMEOUT (30 * hz)
250 #define VTBLK_BSIZE 512
251
252 /*
253 * Each block request uses at least two segments - one for the header
254 * and one for the status.
255 */
256 #define VTBLK_MIN_SEGMENTS 2
257
258 static device_method_t vtblk_methods[] = {
259 /* Device methods. */
260 DEVMETHOD(device_probe, vtblk_probe),
261 DEVMETHOD(device_attach, vtblk_attach),
262 DEVMETHOD(device_detach, vtblk_detach),
263 DEVMETHOD(device_suspend, vtblk_suspend),
264 DEVMETHOD(device_resume, vtblk_resume),
265 DEVMETHOD(device_shutdown, vtblk_shutdown),
266
267 /* VirtIO methods. */
268 DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
269 DEVMETHOD(virtio_config_change, vtblk_config_change),
270
271 DEVMETHOD_END
272 };
273
274 static driver_t vtblk_driver = {
275 "vtblk",
276 vtblk_methods,
277 sizeof(struct vtblk_softc)
278 };
279
280 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
281 MODULE_VERSION(virtio_blk, 1);
282 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
283
284 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
285
286 static int
vtblk_modevent(module_t mod,int type,void * unused)287 vtblk_modevent(module_t mod, int type, void *unused)
288 {
289 int error;
290
291 error = 0;
292
293 switch (type) {
294 case MOD_LOAD:
295 case MOD_QUIESCE:
296 case MOD_UNLOAD:
297 case MOD_SHUTDOWN:
298 break;
299 default:
300 error = EOPNOTSUPP;
301 break;
302 }
303
304 return (error);
305 }
306
307 static int
vtblk_probe(device_t dev)308 vtblk_probe(device_t dev)
309 {
310 return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
311 }
312
313 static int
vtblk_attach(device_t dev)314 vtblk_attach(device_t dev)
315 {
316 struct vtblk_softc *sc;
317 struct virtio_blk_config blkcfg;
318 int error;
319
320 sc = device_get_softc(dev);
321 sc->vtblk_dev = dev;
322 virtio_set_feature_desc(dev, vtblk_feature_desc);
323
324 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
325 bioq_init(&sc->vtblk_bioq);
326 TAILQ_INIT(&sc->vtblk_dump_queue);
327 TAILQ_INIT(&sc->vtblk_req_free);
328 TAILQ_INIT(&sc->vtblk_req_ready);
329
330 vtblk_setup_sysctl(sc);
331
332 error = vtblk_setup_features(sc);
333 if (error) {
334 device_printf(dev, "cannot setup features\n");
335 goto fail;
336 }
337
338 vtblk_read_config(sc, &blkcfg);
339
340 /*
341 * With the current sglist(9) implementation, it is not easy
342 * for us to support a maximum segment size as adjacent
343 * segments are coalesced. For now, just make sure it's larger
344 * than the maximum supported transfer size.
345 */
346 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
347 if (blkcfg.size_max < maxphys) {
348 error = ENOTSUP;
349 device_printf(dev, "host requires unsupported "
350 "maximum segment size feature\n");
351 goto fail;
352 }
353 }
354
355 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
356 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
357 error = EINVAL;
358 device_printf(dev, "fewer than minimum number of segments "
359 "allowed: %d\n", sc->vtblk_max_nsegs);
360 goto fail;
361 }
362
363 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
364 if (sc->vtblk_sglist == NULL) {
365 error = ENOMEM;
366 device_printf(dev, "cannot allocate sglist\n");
367 goto fail;
368 }
369
370 /*
371 * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
372 * supports a single data segment; in that case we need busdma to
373 * align to a page boundary so we can send a *contiguous* page size
374 * request to the host.
375 */
376 if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
377 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
378 error = bus_dma_tag_create(
379 bus_get_dma_tag(dev), /* parent */
380 (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
381 0, /* boundary */
382 BUS_SPACE_MAXADDR, /* lowaddr */
383 BUS_SPACE_MAXADDR, /* highaddr */
384 NULL, NULL, /* filter, filterarg */
385 maxphys, /* max request size */
386 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS, /* max # segments */
387 maxphys, /* maxsegsize */
388 0, /* flags */
389 busdma_lock_mutex, /* lockfunc */
390 &sc->vtblk_mtx, /* lockarg */
391 &sc->vtblk_dmat);
392 if (error) {
393 device_printf(dev, "cannot create bus dma tag\n");
394 goto fail;
395 }
396
397 #ifdef __powerpc__
398 /*
399 * Virtio uses physical addresses rather than bus addresses, so we
400 * need to ask busdma to skip the iommu physical->bus mapping. At
401 * present, this is only a thing on the powerpc architectures.
402 */
403 bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
404 #endif
405
406 error = vtblk_alloc_virtqueue(sc);
407 if (error) {
408 device_printf(dev, "cannot allocate virtqueue\n");
409 goto fail;
410 }
411
412 error = vtblk_request_prealloc(sc);
413 if (error) {
414 device_printf(dev, "cannot preallocate requests\n");
415 goto fail;
416 }
417
418 vtblk_alloc_disk(sc, &blkcfg);
419
420 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
421 if (error) {
422 device_printf(dev, "cannot setup virtqueue interrupt\n");
423 goto fail;
424 }
425
426 virtqueue_enable_intr(sc->vtblk_vq);
427
428 fail:
429 if (error)
430 vtblk_detach(dev);
431
432 return (error);
433 }
434
435 static int
vtblk_detach(device_t dev)436 vtblk_detach(device_t dev)
437 {
438 struct vtblk_softc *sc;
439
440 sc = device_get_softc(dev);
441
442 VTBLK_LOCK(sc);
443 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
444 if (device_is_attached(dev))
445 vtblk_stop(sc);
446 VTBLK_UNLOCK(sc);
447
448 vtblk_drain(sc);
449
450 if (sc->vtblk_disk != NULL) {
451 disk_destroy(sc->vtblk_disk);
452 sc->vtblk_disk = NULL;
453 }
454
455 if (sc->vtblk_dmat != NULL) {
456 bus_dma_tag_destroy(sc->vtblk_dmat);
457 sc->vtblk_dmat = NULL;
458 }
459
460 if (sc->vtblk_sglist != NULL) {
461 sglist_free(sc->vtblk_sglist);
462 sc->vtblk_sglist = NULL;
463 }
464
465 VTBLK_LOCK_DESTROY(sc);
466
467 return (0);
468 }
469
470 static int
vtblk_suspend(device_t dev)471 vtblk_suspend(device_t dev)
472 {
473 struct vtblk_softc *sc;
474 int error;
475
476 sc = device_get_softc(dev);
477
478 VTBLK_LOCK(sc);
479 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
480 /* XXX BMV: virtio_stop(), etc needed here? */
481 error = vtblk_quiesce(sc);
482 if (error)
483 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
484 VTBLK_UNLOCK(sc);
485
486 return (error);
487 }
488
489 static int
vtblk_resume(device_t dev)490 vtblk_resume(device_t dev)
491 {
492 struct vtblk_softc *sc;
493
494 sc = device_get_softc(dev);
495
496 VTBLK_LOCK(sc);
497 /* XXX BMV: virtio_reinit(), etc needed here? */
498 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
499 vtblk_startio(sc);
500 VTBLK_UNLOCK(sc);
501
502 return (0);
503 }
504
505 static int
vtblk_shutdown(device_t dev)506 vtblk_shutdown(device_t dev)
507 {
508
509 return (0);
510 }
511
512 static int
vtblk_attach_completed(device_t dev)513 vtblk_attach_completed(device_t dev)
514 {
515 struct vtblk_softc *sc;
516
517 sc = device_get_softc(dev);
518
519 /*
520 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
521 * processed after the device acknowledged
522 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
523 */
524 vtblk_create_disk(sc);
525 return (0);
526 }
527
528 static int
vtblk_config_change(device_t dev)529 vtblk_config_change(device_t dev)
530 {
531 struct vtblk_softc *sc;
532 struct virtio_blk_config blkcfg;
533 uint64_t capacity;
534
535 sc = device_get_softc(dev);
536
537 vtblk_read_config(sc, &blkcfg);
538
539 /* Capacity is always in 512-byte units. */
540 capacity = blkcfg.capacity * VTBLK_BSIZE;
541
542 if (sc->vtblk_disk->d_mediasize != capacity)
543 vtblk_resize_disk(sc, capacity);
544
545 return (0);
546 }
547
548 static int
vtblk_open(struct disk * dp)549 vtblk_open(struct disk *dp)
550 {
551 struct vtblk_softc *sc;
552
553 if ((sc = dp->d_drv1) == NULL)
554 return (ENXIO);
555
556 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
557 }
558
559 static int
vtblk_close(struct disk * dp)560 vtblk_close(struct disk *dp)
561 {
562 struct vtblk_softc *sc;
563
564 if ((sc = dp->d_drv1) == NULL)
565 return (ENXIO);
566
567 return (0);
568 }
569
570 static int
vtblk_ioctl(struct disk * dp,u_long cmd,void * addr,int flag,struct thread * td)571 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
572 struct thread *td)
573 {
574 struct vtblk_softc *sc;
575
576 if ((sc = dp->d_drv1) == NULL)
577 return (ENXIO);
578
579 return (ENOTTY);
580 }
581
582 static int
vtblk_dump(void * arg,void * virtual,off_t offset,size_t length)583 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
584 {
585 struct disk *dp;
586 struct vtblk_softc *sc;
587 int error;
588
589 dp = arg;
590 error = 0;
591
592 if ((sc = dp->d_drv1) == NULL)
593 return (ENXIO);
594
595 VTBLK_LOCK(sc);
596
597 vtblk_dump_quiesce(sc);
598
599 if (length > 0)
600 error = vtblk_dump_write(sc, virtual, offset, length);
601 if (error || (virtual == NULL && offset == 0))
602 vtblk_dump_complete(sc);
603
604 VTBLK_UNLOCK(sc);
605
606 return (error);
607 }
608
609 static void
vtblk_strategy(struct bio * bp)610 vtblk_strategy(struct bio *bp)
611 {
612 struct vtblk_softc *sc;
613
614 if ((sc = bp->bio_disk->d_drv1) == NULL) {
615 vtblk_bio_done(NULL, bp, EINVAL);
616 return;
617 }
618
619 if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
620 (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
621 vtblk_bio_done(sc, bp, EOPNOTSUPP);
622 return;
623 }
624
625 VTBLK_LOCK(sc);
626
627 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
628 VTBLK_UNLOCK(sc);
629 vtblk_bio_done(sc, bp, ENXIO);
630 return;
631 }
632
633 bioq_insert_tail(&sc->vtblk_bioq, bp);
634 vtblk_startio(sc);
635
636 VTBLK_UNLOCK(sc);
637 }
638
639 static int
vtblk_negotiate_features(struct vtblk_softc * sc)640 vtblk_negotiate_features(struct vtblk_softc *sc)
641 {
642 device_t dev;
643 uint64_t features;
644
645 dev = sc->vtblk_dev;
646 features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
647 VTBLK_LEGACY_FEATURES;
648
649 sc->vtblk_features = virtio_negotiate_features(dev, features);
650 return (virtio_finalize_features(dev));
651 }
652
653 static int
vtblk_setup_features(struct vtblk_softc * sc)654 vtblk_setup_features(struct vtblk_softc *sc)
655 {
656 device_t dev;
657 int error;
658
659 dev = sc->vtblk_dev;
660
661 error = vtblk_negotiate_features(sc);
662 if (error)
663 return (error);
664
665 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
666 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
667 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
668 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
669
670 /* Legacy. */
671 if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
672 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
673
674 return (0);
675 }
676
677 static int
vtblk_maximum_segments(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)678 vtblk_maximum_segments(struct vtblk_softc *sc,
679 struct virtio_blk_config *blkcfg)
680 {
681 device_t dev;
682 int nsegs;
683
684 dev = sc->vtblk_dev;
685 nsegs = VTBLK_MIN_SEGMENTS;
686
687 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
688 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
689 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
690 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
691 } else
692 nsegs += 1;
693
694 return (nsegs);
695 }
696
697 static int
vtblk_alloc_virtqueue(struct vtblk_softc * sc)698 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
699 {
700 device_t dev;
701 struct vq_alloc_info vq_info;
702
703 dev = sc->vtblk_dev;
704
705 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
706 vtblk_vq_intr, sc, &sc->vtblk_vq,
707 "%s request", device_get_nameunit(dev));
708
709 return (virtio_alloc_virtqueues(dev, 1, &vq_info));
710 }
711
712 static void
vtblk_resize_disk(struct vtblk_softc * sc,uint64_t new_capacity)713 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
714 {
715 device_t dev;
716 struct disk *dp;
717 int error;
718
719 dev = sc->vtblk_dev;
720 dp = sc->vtblk_disk;
721
722 dp->d_mediasize = new_capacity;
723 if (bootverbose) {
724 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
725 (uintmax_t) dp->d_mediasize >> 20,
726 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
727 dp->d_sectorsize);
728 }
729
730 error = disk_resize(dp, M_NOWAIT);
731 if (error) {
732 device_printf(dev,
733 "disk_resize(9) failed, error: %d\n", error);
734 }
735 }
736
737 static void
vtblk_alloc_disk(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)738 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
739 {
740 device_t dev;
741 struct disk *dp;
742
743 dev = sc->vtblk_dev;
744
745 sc->vtblk_disk = dp = disk_alloc();
746 dp->d_open = vtblk_open;
747 dp->d_close = vtblk_close;
748 dp->d_ioctl = vtblk_ioctl;
749 dp->d_strategy = vtblk_strategy;
750 dp->d_name = VTBLK_DISK_NAME;
751 dp->d_unit = device_get_unit(dev);
752 dp->d_drv1 = sc;
753 dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
754 dp->d_hba_vendor = virtio_get_vendor(dev);
755 dp->d_hba_device = virtio_get_device(dev);
756 dp->d_hba_subvendor = virtio_get_subvendor(dev);
757 dp->d_hba_subdevice = virtio_get_subdevice(dev);
758
759 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
760 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
761 else {
762 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
763 dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
764 dp->d_dump = vtblk_dump;
765 }
766
767 /* Capacity is always in 512-byte units. */
768 dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
769
770 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
771 dp->d_sectorsize = blkcfg->blk_size;
772 else
773 dp->d_sectorsize = VTBLK_BSIZE;
774
775 /*
776 * The VirtIO maximum I/O size is given in terms of segments.
777 * However, FreeBSD limits I/O size by logical buffer size, not
778 * by physically contiguous pages. Therefore, we have to assume
779 * no pages are contiguous. This may impose an artificially low
780 * maximum I/O size. But in practice, since QEMU advertises 128
781 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
782 * which is typically greater than maxphys. Eventually we should
783 * just advertise maxphys and split buffers that are too big.
784 *
785 * If we're not asking busdma to align data to page boundaries, the
786 * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
787 * unaligned I/Os.
788 */
789 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
790 PAGE_SIZE;
791 if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
792 dp->d_maxsize -= PAGE_SIZE;
793
794 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
795 dp->d_fwsectors = blkcfg->geometry.sectors;
796 dp->d_fwheads = blkcfg->geometry.heads;
797 }
798
799 if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
800 blkcfg->topology.physical_block_exp > 0) {
801 dp->d_stripesize = dp->d_sectorsize *
802 (1 << blkcfg->topology.physical_block_exp);
803 dp->d_stripeoffset = (dp->d_stripesize -
804 blkcfg->topology.alignment_offset * dp->d_sectorsize) %
805 dp->d_stripesize;
806 }
807
808 if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
809 dp->d_flags |= DISKFLAG_CANDELETE;
810 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
811 }
812
813 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
814 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
815 else
816 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
817 }
818
819 static void
vtblk_create_disk(struct vtblk_softc * sc)820 vtblk_create_disk(struct vtblk_softc *sc)
821 {
822 struct disk *dp;
823
824 dp = sc->vtblk_disk;
825
826 vtblk_ident(sc);
827
828 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
829 (uintmax_t) dp->d_mediasize >> 20,
830 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
831 dp->d_sectorsize);
832
833 disk_create(dp, DISK_VERSION);
834 }
835
836 static int
vtblk_request_prealloc(struct vtblk_softc * sc)837 vtblk_request_prealloc(struct vtblk_softc *sc)
838 {
839 struct vtblk_request *req;
840 int i, nreqs;
841
842 nreqs = virtqueue_size(sc->vtblk_vq);
843
844 /*
845 * Preallocate sufficient requests to keep the virtqueue full. Each
846 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
847 * the number allocated when indirect descriptors are not available.
848 */
849 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
850 nreqs /= VTBLK_MIN_SEGMENTS;
851
852 for (i = 0; i < nreqs; i++) {
853 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
854 if (req == NULL)
855 return (ENOMEM);
856
857 req->vbr_sc = sc;
858 if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) {
859 free(req, M_DEVBUF);
860 return (ENOMEM);
861 }
862
863 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
864 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
865
866 sc->vtblk_request_count++;
867 vtblk_request_enqueue(sc, req);
868 }
869
870 return (0);
871 }
872
873 static void
vtblk_request_free(struct vtblk_softc * sc)874 vtblk_request_free(struct vtblk_softc *sc)
875 {
876 struct vtblk_request *req;
877
878 MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
879
880 while ((req = vtblk_request_dequeue(sc)) != NULL) {
881 sc->vtblk_request_count--;
882 bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
883 free(req, M_DEVBUF);
884 }
885
886 KASSERT(sc->vtblk_request_count == 0,
887 ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
888 }
889
890 static struct vtblk_request *
vtblk_request_dequeue(struct vtblk_softc * sc)891 vtblk_request_dequeue(struct vtblk_softc *sc)
892 {
893 struct vtblk_request *req;
894
895 req = TAILQ_FIRST(&sc->vtblk_req_free);
896 if (req != NULL) {
897 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
898 bzero(&req->vbr_hdr, sizeof(struct vtblk_request) -
899 offsetof(struct vtblk_request, vbr_hdr));
900 }
901
902 return (req);
903 }
904
905 static void
vtblk_request_enqueue(struct vtblk_softc * sc,struct vtblk_request * req)906 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
907 {
908
909 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
910 }
911
912 static struct vtblk_request *
vtblk_request_next_ready(struct vtblk_softc * sc)913 vtblk_request_next_ready(struct vtblk_softc *sc)
914 {
915 struct vtblk_request *req;
916
917 req = TAILQ_FIRST(&sc->vtblk_req_ready);
918 if (req != NULL)
919 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
920
921 return (req);
922 }
923
924 static void
vtblk_request_requeue_ready(struct vtblk_softc * sc,struct vtblk_request * req)925 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
926 {
927
928 /* NOTE: Currently, there will be at most one request in the queue. */
929 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
930 }
931
932 static struct vtblk_request *
vtblk_request_next(struct vtblk_softc * sc)933 vtblk_request_next(struct vtblk_softc *sc)
934 {
935 struct vtblk_request *req;
936
937 req = vtblk_request_next_ready(sc);
938 if (req != NULL)
939 return (req);
940
941 return (vtblk_request_bio(sc));
942 }
943
944 static struct vtblk_request *
vtblk_request_bio(struct vtblk_softc * sc)945 vtblk_request_bio(struct vtblk_softc *sc)
946 {
947 struct bio_queue_head *bioq;
948 struct vtblk_request *req;
949 struct bio *bp;
950
951 bioq = &sc->vtblk_bioq;
952
953 if (bioq_first(bioq) == NULL)
954 return (NULL);
955
956 req = vtblk_request_dequeue(sc);
957 if (req == NULL)
958 return (NULL);
959
960 bp = bioq_takefirst(bioq);
961 req->vbr_bp = bp;
962 req->vbr_ack = -1;
963 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
964
965 switch (bp->bio_cmd) {
966 case BIO_FLUSH:
967 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
968 req->vbr_hdr.sector = 0;
969 break;
970 case BIO_READ:
971 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
972 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
973 break;
974 case BIO_WRITE:
975 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
976 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
977 break;
978 case BIO_DELETE:
979 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
980 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
981 break;
982 default:
983 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
984 }
985
986 if (bp->bio_flags & BIO_ORDERED)
987 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
988
989 return (req);
990 }
991
992 static int
vtblk_request_execute(struct vtblk_request * req,int flags)993 vtblk_request_execute(struct vtblk_request *req, int flags)
994 {
995 struct vtblk_softc *sc = req->vbr_sc;
996 struct bio *bp = req->vbr_bp;
997 int error = 0;
998
999 /*
1000 * Call via bus_dmamap_load_bio or directly depending on whether we
1001 * have a buffer we need to map. If we don't have a busdma map,
1002 * try to perform the I/O directly and hope that it works (this will
1003 * happen when dumping).
1004 */
1005 if ((req->vbr_mapp != NULL) &&
1006 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
1007 error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
1008 req->vbr_bp, vtblk_request_execute_cb, req, flags);
1009 if (error == EINPROGRESS) {
1010 req->vbr_busdma_wait = 1;
1011 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
1012 }
1013 } else {
1014 vtblk_request_execute_cb(req, NULL, 0, 0);
1015 }
1016
1017 return (error ? error : req->vbr_error);
1018 }
1019
1020 static void
vtblk_request_execute_cb(void * callback_arg,bus_dma_segment_t * segs,int nseg,int error)1021 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
1022 int nseg, int error)
1023 {
1024 struct vtblk_request *req;
1025 struct vtblk_softc *sc;
1026 struct virtqueue *vq;
1027 struct sglist *sg;
1028 struct bio *bp;
1029 int ordered, readable, writable, i;
1030
1031 req = (struct vtblk_request *)callback_arg;
1032 sc = req->vbr_sc;
1033 vq = sc->vtblk_vq;
1034 sg = sc->vtblk_sglist;
1035 bp = req->vbr_bp;
1036 ordered = 0;
1037 writable = 0;
1038
1039 /*
1040 * If we paused request queueing while we waited for busdma to call us
1041 * asynchronously, unpause it now; this request made it through so we
1042 * don't need to worry about others getting ahead of us. (Note that we
1043 * hold the device mutex so nothing will happen until after we return
1044 * anyway.)
1045 */
1046 if (req->vbr_busdma_wait)
1047 sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
1048
1049 /* Fail on errors from busdma. */
1050 if (error)
1051 goto out1;
1052
1053 /*
1054 * Some hosts (such as bhyve) do not implement the barrier feature,
1055 * so we emulate it in the driver by allowing the barrier request
1056 * to be the only one in flight.
1057 */
1058 if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
1059 if (sc->vtblk_req_ordered != NULL) {
1060 error = EBUSY;
1061 goto out;
1062 }
1063 if (bp->bio_flags & BIO_ORDERED) {
1064 if (!virtqueue_empty(vq)) {
1065 error = EBUSY;
1066 goto out;
1067 }
1068 ordered = 1;
1069 req->vbr_hdr.type &= vtblk_gtoh32(sc,
1070 ~VIRTIO_BLK_T_BARRIER);
1071 }
1072 }
1073
1074 sglist_reset(sg);
1075 sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
1076
1077 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1078 /*
1079 * We cast bus_addr_t to vm_paddr_t here; since we skip the
1080 * iommu mapping (see vtblk_attach) this should be safe.
1081 */
1082 for (i = 0; i < nseg; i++) {
1083 error = sglist_append_phys(sg,
1084 (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
1085 if (error || sg->sg_nseg == sg->sg_maxseg) {
1086 panic("%s: bio %p data buffer too big %d",
1087 __func__, bp, error);
1088 }
1089 }
1090
1091 /* Special handling for dump, which bypasses busdma. */
1092 if (req->vbr_mapp == NULL) {
1093 error = sglist_append_bio(sg, bp);
1094 if (error || sg->sg_nseg == sg->sg_maxseg) {
1095 panic("%s: bio %p data buffer too big %d",
1096 __func__, bp, error);
1097 }
1098 }
1099
1100 /* BIO_READ means the host writes into our buffer. */
1101 if (bp->bio_cmd == BIO_READ)
1102 writable = sg->sg_nseg - 1;
1103 } else if (bp->bio_cmd == BIO_DELETE) {
1104 struct virtio_blk_discard_write_zeroes *discard;
1105
1106 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
1107 if (discard == NULL) {
1108 error = ENOMEM;
1109 goto out;
1110 }
1111
1112 bp->bio_driver1 = discard;
1113 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
1114 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
1115 error = sglist_append(sg, discard, sizeof(*discard));
1116 if (error || sg->sg_nseg == sg->sg_maxseg) {
1117 panic("%s: bio %p data buffer too big %d",
1118 __func__, bp, error);
1119 }
1120 }
1121
1122 writable++;
1123 sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
1124 readable = sg->sg_nseg - writable;
1125
1126 if (req->vbr_mapp != NULL) {
1127 switch (bp->bio_cmd) {
1128 case BIO_READ:
1129 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1130 BUS_DMASYNC_PREREAD);
1131 break;
1132 case BIO_WRITE:
1133 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1134 BUS_DMASYNC_PREWRITE);
1135 break;
1136 }
1137 }
1138
1139 error = virtqueue_enqueue(vq, req, sg, readable, writable);
1140 if (error == 0 && ordered)
1141 sc->vtblk_req_ordered = req;
1142
1143 /*
1144 * If we were called asynchronously, we need to notify the queue that
1145 * we've added a new request, since the notification from startio was
1146 * performed already.
1147 */
1148 if (error == 0 && req->vbr_busdma_wait)
1149 virtqueue_notify(vq);
1150
1151 out:
1152 if (error && (req->vbr_mapp != NULL))
1153 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1154 out1:
1155 if (error && req->vbr_requeue_on_error)
1156 vtblk_request_requeue_ready(sc, req);
1157 req->vbr_error = error;
1158 }
1159
1160 static int
vtblk_request_error(struct vtblk_request * req)1161 vtblk_request_error(struct vtblk_request *req)
1162 {
1163 int error;
1164
1165 switch (req->vbr_ack) {
1166 case VIRTIO_BLK_S_OK:
1167 error = 0;
1168 break;
1169 case VIRTIO_BLK_S_UNSUPP:
1170 error = ENOTSUP;
1171 break;
1172 default:
1173 error = EIO;
1174 break;
1175 }
1176
1177 return (error);
1178 }
1179
1180 static struct bio *
vtblk_queue_complete_one(struct vtblk_softc * sc,struct vtblk_request * req)1181 vtblk_queue_complete_one(struct vtblk_softc *sc, struct vtblk_request *req)
1182 {
1183 struct bio *bp;
1184
1185 if (sc->vtblk_req_ordered != NULL) {
1186 MPASS(sc->vtblk_req_ordered == req);
1187 sc->vtblk_req_ordered = NULL;
1188 }
1189
1190 bp = req->vbr_bp;
1191 if (req->vbr_mapp != NULL) {
1192 switch (bp->bio_cmd) {
1193 case BIO_READ:
1194 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1195 BUS_DMASYNC_POSTREAD);
1196 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1197 break;
1198 case BIO_WRITE:
1199 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1200 BUS_DMASYNC_POSTWRITE);
1201 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1202 break;
1203 }
1204 }
1205 bp->bio_error = vtblk_request_error(req);
1206 return (bp);
1207 }
1208
1209 static void
vtblk_queue_completed(struct vtblk_softc * sc,struct bio_queue * queue)1210 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1211 {
1212 struct vtblk_request *req;
1213 struct bio *bp;
1214
1215 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1216 bp = vtblk_queue_complete_one(sc, req);
1217
1218 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1219 vtblk_request_enqueue(sc, req);
1220 }
1221 }
1222
1223 static void
vtblk_done_completed(struct vtblk_softc * sc,struct bio_queue * queue)1224 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1225 {
1226 struct bio *bp, *tmp;
1227
1228 TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1229 if (bp->bio_error != 0)
1230 disk_err(bp, "hard error", -1, 1);
1231 vtblk_bio_done(sc, bp, bp->bio_error);
1232 }
1233 }
1234
1235 static void
vtblk_drain_vq(struct vtblk_softc * sc)1236 vtblk_drain_vq(struct vtblk_softc *sc)
1237 {
1238 struct virtqueue *vq;
1239 struct vtblk_request *req;
1240 int last;
1241
1242 vq = sc->vtblk_vq;
1243 last = 0;
1244
1245 while ((req = virtqueue_drain(vq, &last)) != NULL) {
1246 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1247 vtblk_request_enqueue(sc, req);
1248 }
1249
1250 sc->vtblk_req_ordered = NULL;
1251 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1252 }
1253
1254 static void
vtblk_drain(struct vtblk_softc * sc)1255 vtblk_drain(struct vtblk_softc *sc)
1256 {
1257 struct bio_queue_head *bioq;
1258 struct vtblk_request *req;
1259 struct bio *bp;
1260
1261 bioq = &sc->vtblk_bioq;
1262
1263 if (sc->vtblk_vq != NULL) {
1264 struct bio_queue queue;
1265
1266 TAILQ_INIT(&queue);
1267 vtblk_queue_completed(sc, &queue);
1268 vtblk_done_completed(sc, &queue);
1269
1270 vtblk_drain_vq(sc);
1271 }
1272
1273 while ((req = vtblk_request_next_ready(sc)) != NULL) {
1274 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1275 vtblk_request_enqueue(sc, req);
1276 }
1277
1278 while (bioq_first(bioq) != NULL) {
1279 bp = bioq_takefirst(bioq);
1280 vtblk_bio_done(sc, bp, ENXIO);
1281 }
1282
1283 vtblk_request_free(sc);
1284 }
1285
1286 static void
vtblk_startio(struct vtblk_softc * sc)1287 vtblk_startio(struct vtblk_softc *sc)
1288 {
1289 struct virtqueue *vq;
1290 struct vtblk_request *req;
1291 int enq;
1292
1293 VTBLK_LOCK_ASSERT(sc);
1294 vq = sc->vtblk_vq;
1295 enq = 0;
1296
1297 if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
1298 return;
1299
1300 while (!virtqueue_full(vq)) {
1301 req = vtblk_request_next(sc);
1302 if (req == NULL)
1303 break;
1304
1305 req->vbr_requeue_on_error = 1;
1306 if (vtblk_request_execute(req, BUS_DMA_WAITOK))
1307 break;
1308
1309 enq++;
1310 }
1311
1312 if (enq > 0)
1313 virtqueue_notify(vq);
1314 }
1315
1316 static void
vtblk_bio_done(struct vtblk_softc * sc,struct bio * bp,int error)1317 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1318 {
1319
1320 /* Because of GEOM direct dispatch, we cannot hold any locks. */
1321 if (sc != NULL)
1322 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1323
1324 if (error) {
1325 bp->bio_resid = bp->bio_bcount;
1326 bp->bio_error = error;
1327 bp->bio_flags |= BIO_ERROR;
1328 } else {
1329 kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1330 }
1331
1332 if (bp->bio_driver1 != NULL) {
1333 free(bp->bio_driver1, M_DEVBUF);
1334 bp->bio_driver1 = NULL;
1335 }
1336
1337 biodone(bp);
1338 }
1339
1340 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg) \
1341 if (virtio_with_feature(_dev, _feature)) { \
1342 virtio_read_device_config(_dev, \
1343 offsetof(struct virtio_blk_config, _field), \
1344 &(_cfg)->_field, sizeof((_cfg)->_field)); \
1345 }
1346
1347 static void
vtblk_read_config(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1348 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1349 {
1350 device_t dev;
1351
1352 dev = sc->vtblk_dev;
1353
1354 bzero(blkcfg, sizeof(struct virtio_blk_config));
1355
1356 /* The capacity is always available. */
1357 virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1358 capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1359
1360 /* Read the configuration if the feature was negotiated. */
1361 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1362 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1363 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1364 geometry.cylinders, blkcfg);
1365 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1366 geometry.heads, blkcfg);
1367 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1368 geometry.sectors, blkcfg);
1369 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1370 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1371 topology.physical_block_exp, blkcfg);
1372 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1373 topology.alignment_offset, blkcfg);
1374 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1375 topology.min_io_size, blkcfg);
1376 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1377 topology.opt_io_size, blkcfg);
1378 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1379 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1380 blkcfg);
1381 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1382 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1383 blkcfg);
1384 }
1385
1386 #undef VTBLK_GET_CONFIG
1387
1388 static void
vtblk_ident(struct vtblk_softc * sc)1389 vtblk_ident(struct vtblk_softc *sc)
1390 {
1391 struct bio buf;
1392 struct disk *dp;
1393 struct vtblk_request *req;
1394 int len, error;
1395
1396 dp = sc->vtblk_disk;
1397 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1398
1399 if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1400 return;
1401
1402 req = vtblk_request_dequeue(sc);
1403 if (req == NULL)
1404 return;
1405
1406 req->vbr_ack = -1;
1407 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1408 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1409 req->vbr_hdr.sector = 0;
1410
1411 req->vbr_bp = &buf;
1412 g_reset_bio(&buf);
1413
1414 buf.bio_cmd = BIO_READ;
1415 buf.bio_data = dp->d_ident;
1416 buf.bio_bcount = len;
1417
1418 VTBLK_LOCK(sc);
1419 error = vtblk_poll_request(sc, req);
1420 VTBLK_UNLOCK(sc);
1421
1422 if (error) {
1423 device_printf(sc->vtblk_dev,
1424 "error getting device identifier: %d\n", error);
1425 }
1426 }
1427
1428 static int
vtblk_poll_request(struct vtblk_softc * sc,struct vtblk_request * req)1429 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1430 {
1431 struct vtblk_request *req1 __diagused;
1432 struct virtqueue *vq;
1433 struct bio *bp;
1434 int error;
1435
1436 vq = sc->vtblk_vq;
1437
1438 if (!virtqueue_empty(vq))
1439 return (EBUSY);
1440
1441 error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
1442 if (error)
1443 return (error);
1444
1445 virtqueue_notify(vq);
1446 req1 = virtqueue_poll(vq, NULL);
1447 KASSERT(req == req1,
1448 ("%s: polling completed %p not %p", __func__, req1, req));
1449
1450 bp = vtblk_queue_complete_one(sc, req);
1451 error = bp->bio_error;
1452 if (error && bootverbose) {
1453 device_printf(sc->vtblk_dev,
1454 "%s: IO error: %d\n", __func__, error);
1455 }
1456 if (req != &sc->vtblk_dump_request)
1457 vtblk_request_enqueue(sc, req);
1458
1459 return (error);
1460 }
1461
1462 static int
vtblk_quiesce(struct vtblk_softc * sc)1463 vtblk_quiesce(struct vtblk_softc *sc)
1464 {
1465 int error;
1466
1467 VTBLK_LOCK_ASSERT(sc);
1468 error = 0;
1469
1470 while (!virtqueue_empty(sc->vtblk_vq)) {
1471 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1472 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1473 error = EBUSY;
1474 break;
1475 }
1476 }
1477
1478 return (error);
1479 }
1480
1481 static void
vtblk_vq_intr(void * xsc)1482 vtblk_vq_intr(void *xsc)
1483 {
1484 struct vtblk_softc *sc;
1485 struct virtqueue *vq;
1486 struct bio_queue queue;
1487
1488 sc = xsc;
1489 vq = sc->vtblk_vq;
1490 TAILQ_INIT(&queue);
1491
1492 VTBLK_LOCK(sc);
1493
1494 again:
1495 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1496 goto out;
1497
1498 vtblk_queue_completed(sc, &queue);
1499 vtblk_startio(sc);
1500
1501 if (virtqueue_enable_intr(vq) != 0) {
1502 virtqueue_disable_intr(vq);
1503 goto again;
1504 }
1505
1506 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1507 wakeup(&sc->vtblk_vq);
1508
1509 out:
1510 VTBLK_UNLOCK(sc);
1511 vtblk_done_completed(sc, &queue);
1512 }
1513
1514 static void
vtblk_stop(struct vtblk_softc * sc)1515 vtblk_stop(struct vtblk_softc *sc)
1516 {
1517
1518 virtqueue_disable_intr(sc->vtblk_vq);
1519 virtio_stop(sc->vtblk_dev);
1520 }
1521
1522 static void
vtblk_dump_quiesce(struct vtblk_softc * sc)1523 vtblk_dump_quiesce(struct vtblk_softc *sc)
1524 {
1525
1526 /*
1527 * Spin here until all the requests in-flight at the time of the
1528 * dump are completed and queued. The queued requests will be
1529 * biodone'd once the dump is finished.
1530 */
1531 while (!virtqueue_empty(sc->vtblk_vq))
1532 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1533 }
1534
1535 static int
vtblk_dump_write(struct vtblk_softc * sc,void * virtual,off_t offset,size_t length)1536 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1537 size_t length)
1538 {
1539 struct bio buf;
1540 struct vtblk_request *req;
1541
1542 req = &sc->vtblk_dump_request;
1543 req->vbr_sc = sc;
1544 req->vbr_ack = -1;
1545 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1546 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1547 req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1548
1549 req->vbr_bp = &buf;
1550 g_reset_bio(&buf);
1551
1552 buf.bio_cmd = BIO_WRITE;
1553 buf.bio_data = virtual;
1554 buf.bio_bcount = length;
1555
1556 return (vtblk_poll_request(sc, req));
1557 }
1558
1559 static int
vtblk_dump_flush(struct vtblk_softc * sc)1560 vtblk_dump_flush(struct vtblk_softc *sc)
1561 {
1562 struct bio buf;
1563 struct vtblk_request *req;
1564
1565 req = &sc->vtblk_dump_request;
1566 req->vbr_sc = sc;
1567 req->vbr_ack = -1;
1568 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1569 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1570 req->vbr_hdr.sector = 0;
1571
1572 req->vbr_bp = &buf;
1573 g_reset_bio(&buf);
1574
1575 buf.bio_cmd = BIO_FLUSH;
1576
1577 return (vtblk_poll_request(sc, req));
1578 }
1579
1580 static void
vtblk_dump_complete(struct vtblk_softc * sc)1581 vtblk_dump_complete(struct vtblk_softc *sc)
1582 {
1583
1584 vtblk_dump_flush(sc);
1585
1586 VTBLK_UNLOCK(sc);
1587 vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1588 VTBLK_LOCK(sc);
1589 }
1590
1591 static void
vtblk_set_write_cache(struct vtblk_softc * sc,int wc)1592 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1593 {
1594
1595 /* Set either writeback (1) or writethrough (0) mode. */
1596 virtio_write_dev_config_1(sc->vtblk_dev,
1597 offsetof(struct virtio_blk_config, wce), wc);
1598 }
1599
1600 static int
vtblk_write_cache_enabled(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1601 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1602 struct virtio_blk_config *blkcfg)
1603 {
1604 int wc;
1605
1606 if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1607 wc = vtblk_tunable_int(sc, "writecache_mode",
1608 vtblk_writecache_mode);
1609 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1610 vtblk_set_write_cache(sc, wc);
1611 else
1612 wc = blkcfg->wce;
1613 } else
1614 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1615
1616 return (wc);
1617 }
1618
1619 static int
vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)1620 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1621 {
1622 struct vtblk_softc *sc;
1623 int wc, error;
1624
1625 sc = oidp->oid_arg1;
1626 wc = sc->vtblk_write_cache;
1627
1628 error = sysctl_handle_int(oidp, &wc, 0, req);
1629 if (error || req->newptr == NULL)
1630 return (error);
1631 if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1632 return (EPERM);
1633 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1634 return (EINVAL);
1635
1636 VTBLK_LOCK(sc);
1637 sc->vtblk_write_cache = wc;
1638 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1639 VTBLK_UNLOCK(sc);
1640
1641 return (0);
1642 }
1643
1644 static void
vtblk_setup_sysctl(struct vtblk_softc * sc)1645 vtblk_setup_sysctl(struct vtblk_softc *sc)
1646 {
1647 device_t dev;
1648 struct sysctl_ctx_list *ctx;
1649 struct sysctl_oid *tree;
1650 struct sysctl_oid_list *child;
1651
1652 dev = sc->vtblk_dev;
1653 ctx = device_get_sysctl_ctx(dev);
1654 tree = device_get_sysctl_tree(dev);
1655 child = SYSCTL_CHILDREN(tree);
1656
1657 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1658 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1659 vtblk_write_cache_sysctl, "I",
1660 "Write cache mode (writethrough (0) or writeback (1))");
1661 }
1662
1663 static int
vtblk_tunable_int(struct vtblk_softc * sc,const char * knob,int def)1664 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1665 {
1666 char path[64];
1667
1668 snprintf(path, sizeof(path),
1669 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1670 TUNABLE_INT_FETCH(path, &def);
1671
1672 return (def);
1673 }
1674