1 /*-
2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $
27 */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
38 #include <sys/sysctl.h>
39 #include <sys/queue.h>
40 #include <sys/serialize.h>
41 #include <sys/buf2.h>
42 #include <sys/rman.h>
43 #include <sys/disk.h>
44 #include <sys/devicestat.h>
45
46 #include <dev/virtual/virtio/virtio/virtio.h>
47 #include <dev/virtual/virtio/virtio/virtqueue.h>
48 #include "virtio_blk.h"
49
50 struct vtblk_request {
51 struct virtio_blk_outhdr vbr_hdr __aligned(16);
52 struct bio *vbr_bio;
53 uint8_t vbr_ack;
54
55 SLIST_ENTRY(vtblk_request) vbr_link;
56 };
57
58 enum vtblk_cache_mode {
59 VTBLK_CACHE_WRITETHROUGH,
60 VTBLK_CACHE_WRITEBACK,
61 VTBLK_CACHE_MAX
62 };
63
64 struct vtblk_softc {
65 device_t vtblk_dev;
66 struct lwkt_serialize vtblk_slz;
67 uint64_t vtblk_features;
68 uint32_t vtblk_flags;
69 #define VTBLK_FLAG_INDIRECT 0x0001
70 #define VTBLK_FLAG_READONLY 0x0002
71 #define VTBLK_FLAG_DETACH 0x0004
72 #define VTBLK_FLAG_SUSPEND 0x0008
73 #define VTBLK_FLAG_DUMPING 0x0010
74 #define VTBLK_FLAG_WC_CONFIG 0x0020
75
76 struct virtqueue *vtblk_vq;
77 struct sglist *vtblk_sglist;
78 struct disk vtblk_disk;
79 cdev_t cdev;
80 struct devstat stats;
81
82 struct bio_queue_head vtblk_bioq;
83 SLIST_HEAD(, vtblk_request)
84 vtblk_req_free;
85
86 int vtblk_sector_size;
87 int vtblk_max_nsegs;
88 int vtblk_request_count;
89 enum vtblk_cache_mode vtblk_write_cache;
90
91 struct vtblk_request vtblk_dump_request;
92 };
93
94 static struct virtio_feature_desc vtblk_feature_desc[] = {
95 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
96 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
97 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
98 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
99 { VIRTIO_BLK_F_RO, "ReadOnly" },
100 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
101 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
102 { VIRTIO_BLK_F_WCE, "WriteCache" },
103 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
104 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
105
106 { 0, NULL }
107 };
108
109 static int vtblk_probe(device_t);
110 static int vtblk_attach(device_t);
111 static int vtblk_detach(device_t);
112 static int vtblk_suspend(device_t);
113 static int vtblk_resume(device_t);
114 static int vtblk_shutdown(device_t);
115
116 static void vtblk_negotiate_features(struct vtblk_softc *);
117 static int vtblk_alloc_intr(struct vtblk_softc *);
118 static int vtblk_maximum_segments(struct vtblk_softc *,
119 struct virtio_blk_config *);
120 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
121 static void vtblk_set_write_cache(struct vtblk_softc *, int);
122 static int vtblk_write_cache_enabled(struct vtblk_softc *,
123 struct virtio_blk_config *);
124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
125 static void vtblk_alloc_disk(struct vtblk_softc *,
126 struct virtio_blk_config *);
127 /*
128 * Interface to the device switch.
129 */
130 static d_open_t vtblk_open;
131 static d_strategy_t vtblk_strategy;
132 static d_dump_t vtblk_dump;
133
134 static struct dev_ops vbd_disk_ops = {
135 { "vbd", 200, D_DISK | D_MPSAFE },
136 .d_open = vtblk_open,
137 .d_close = nullclose,
138 .d_read = physread,
139 .d_write = physwrite,
140 .d_strategy = vtblk_strategy,
141 .d_dump = vtblk_dump,
142 };
143
144 static void vtblk_startio(struct vtblk_softc *);
145 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
146 static int vtblk_execute_request(struct vtblk_softc *,
147 struct vtblk_request *);
148 static void vtblk_vq_intr(void *);
149
150 static void vtblk_stop(struct vtblk_softc *);
151
152 static void vtblk_prepare_dump(struct vtblk_softc *);
153 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
154 static int vtblk_flush_dump(struct vtblk_softc *);
155 static int vtblk_poll_request(struct vtblk_softc *,
156 struct vtblk_request *);
157
158 static void vtblk_drain_vq(struct vtblk_softc *, int);
159 static void vtblk_drain(struct vtblk_softc *);
160
161 static int vtblk_alloc_requests(struct vtblk_softc *);
162 static void vtblk_free_requests(struct vtblk_softc *);
163 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
164 static void vtblk_enqueue_request(struct vtblk_softc *,
165 struct vtblk_request *);
166
167 static int vtblk_request_error(struct vtblk_request *);
168 static void vtblk_finish_bio(struct bio *, int);
169
170 static void vtblk_setup_sysctl(struct vtblk_softc *);
171 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
172
173 /* Tunables. */
174 static int vtblk_writecache_mode = -1;
175 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
176
177 /* Features desired/implemented by this driver. */
178 #define VTBLK_FEATURES \
179 (VIRTIO_BLK_F_SIZE_MAX | \
180 VIRTIO_BLK_F_SEG_MAX | \
181 VIRTIO_BLK_F_GEOMETRY | \
182 VIRTIO_BLK_F_RO | \
183 VIRTIO_BLK_F_BLK_SIZE | \
184 VIRTIO_BLK_F_WCE | \
185 VIRTIO_BLK_F_CONFIG_WCE | \
186 VIRTIO_RING_F_INDIRECT_DESC)
187
188 /*
189 * Each block request uses at least two segments - one for the header
190 * and one for the status.
191 */
192 #define VTBLK_MIN_SEGMENTS 2
193
194 static device_method_t vtblk_methods[] = {
195 /* Device methods. */
196 DEVMETHOD(device_probe, vtblk_probe),
197 DEVMETHOD(device_attach, vtblk_attach),
198 DEVMETHOD(device_detach, vtblk_detach),
199 DEVMETHOD(device_suspend, vtblk_suspend),
200 DEVMETHOD(device_resume, vtblk_resume),
201 DEVMETHOD(device_shutdown, vtblk_shutdown),
202
203 DEVMETHOD_END
204 };
205
206 static driver_t vtblk_driver = {
207 "vtblk",
208 vtblk_methods,
209 sizeof(struct vtblk_softc)
210 };
211 static devclass_t vtblk_devclass;
212
213 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, NULL, NULL);
214 MODULE_VERSION(virtio_blk, 1);
215 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
216
217 static int
vtblk_probe(device_t dev)218 vtblk_probe(device_t dev)
219 {
220
221 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
222 return (ENXIO);
223
224 device_set_desc(dev, "VirtIO Block Adapter");
225
226 return (BUS_PROBE_DEFAULT);
227 }
228
229 static int
vtblk_attach(device_t dev)230 vtblk_attach(device_t dev)
231 {
232 struct vtblk_softc *sc;
233 struct virtio_blk_config blkcfg;
234 int error;
235
236 sc = device_get_softc(dev);
237 sc->vtblk_dev = dev;
238
239 lwkt_serialize_init(&sc->vtblk_slz);
240
241 bioq_init(&sc->vtblk_bioq);
242 SLIST_INIT(&sc->vtblk_req_free);
243
244 virtio_set_feature_desc(dev, vtblk_feature_desc);
245 vtblk_negotiate_features(sc);
246
247 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
248 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
249 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
250 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
251 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
252 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
253
254 vtblk_setup_sysctl(sc);
255
256 /* Get local copy of config. */
257 virtio_read_device_config(dev, 0, &blkcfg,
258 sizeof(struct virtio_blk_config));
259
260 /*
261 * With the current sglist(9) implementation, it is not easy
262 * for us to support a maximum segment size as adjacent
263 * segments are coalesced. For now, just make sure it's larger
264 * than the maximum supported transfer size.
265 */
266 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
267 if (blkcfg.size_max < MAXPHYS) {
268 error = ENOTSUP;
269 device_printf(dev, "host requires unsupported "
270 "maximum segment size feature\n");
271 goto fail;
272 }
273 }
274
275 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
276 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
277 error = EINVAL;
278 device_printf(dev, "fewer than minimum number of segments "
279 "allowed: %d\n", sc->vtblk_max_nsegs);
280 goto fail;
281 }
282
283 /*
284 * Allocate working sglist. The number of segments may be too
285 * large to safely store on the stack.
286 */
287 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT);
288 if (sc->vtblk_sglist == NULL) {
289 error = ENOMEM;
290 device_printf(dev, "cannot allocate sglist\n");
291 goto fail;
292 }
293
294 error = vtblk_alloc_intr(sc);
295 if (error) {
296 device_printf(dev, "cannot allocate interrupt\n");
297 goto fail;
298 }
299
300 error = vtblk_alloc_virtqueue(sc);
301 if (error) {
302 device_printf(dev, "cannot allocate virtqueue\n");
303 goto fail;
304 }
305
306 error = virtio_bind_intr(sc->vtblk_dev, 0, 0, vtblk_vq_intr, sc);
307 if (error) {
308 device_printf(dev, "cannot assign virtqueue to interrupt\n");
309 goto fail;
310 }
311
312 error = vtblk_alloc_requests(sc);
313 if (error) {
314 device_printf(dev, "cannot preallocate requests\n");
315 goto fail;
316 }
317
318 error = virtio_setup_intr(dev, 0, &sc->vtblk_slz);
319 if (error) {
320 device_printf(dev, "cannot setup virtqueue interrupt\n");
321 goto fail;
322 }
323
324 virtqueue_enable_intr(sc->vtblk_vq);
325
326 vtblk_alloc_disk(sc, &blkcfg);
327
328 fail:
329 if (error)
330 vtblk_detach(dev);
331
332 return (error);
333 }
334
335 static int
vtblk_detach(device_t dev)336 vtblk_detach(device_t dev)
337 {
338 struct vtblk_softc *sc;
339
340 sc = device_get_softc(dev);
341
342 virtio_teardown_intr(dev, 0);
343
344 lwkt_serialize_enter(&sc->vtblk_slz);
345 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
346 if (device_is_attached(dev))
347 vtblk_stop(sc);
348 lwkt_serialize_exit(&sc->vtblk_slz);
349
350 vtblk_drain(sc);
351
352 if (sc->cdev != NULL) {
353 disk_destroy(&sc->vtblk_disk);
354 sc->cdev = NULL;
355 }
356
357 if (sc->vtblk_sglist != NULL) {
358 sglist_free(sc->vtblk_sglist);
359 sc->vtblk_sglist = NULL;
360 }
361
362 return (0);
363 }
364
365 static int
vtblk_suspend(device_t dev)366 vtblk_suspend(device_t dev)
367 {
368 struct vtblk_softc *sc;
369
370 sc = device_get_softc(dev);
371
372 lwkt_serialize_enter(&sc->vtblk_slz);
373 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
374 /* XXX BMV: virtio_stop(), etc needed here? */
375 lwkt_serialize_exit(&sc->vtblk_slz);
376
377 return (0);
378 }
379
380 static int
vtblk_resume(device_t dev)381 vtblk_resume(device_t dev)
382 {
383 struct vtblk_softc *sc;
384
385 sc = device_get_softc(dev);
386
387 lwkt_serialize_enter(&sc->vtblk_slz);
388 /* XXX BMV: virtio_reinit(), etc needed here? */
389 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
390 #if 0 /* XXX Resume IO? */
391 vtblk_startio(sc);
392 #endif
393 lwkt_serialize_exit(&sc->vtblk_slz);
394
395 return (0);
396 }
397
398 static int
vtblk_shutdown(device_t dev)399 vtblk_shutdown(device_t dev)
400 {
401
402 return (0);
403 }
404
405 static int
vtblk_open(struct dev_open_args * ap)406 vtblk_open(struct dev_open_args *ap)
407 {
408 struct vtblk_softc *sc;
409 cdev_t dev = ap->a_head.a_dev;
410 sc = dev->si_drv1;
411 if (sc == NULL)
412 return (ENXIO);
413
414 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
415 }
416
417 static int
vtblk_dump(struct dev_dump_args * ap)418 vtblk_dump(struct dev_dump_args *ap)
419 {
420 struct vtblk_softc *sc;
421 cdev_t dev = ap->a_head.a_dev;
422 uint64_t buf_start, buf_len;
423 int error;
424
425 sc = dev->si_drv1;
426 if (sc == NULL)
427 return (ENXIO);
428
429 buf_start = ap->a_offset;
430 buf_len = ap->a_length;
431
432 // lwkt_serialize_enter(&sc->vtblk_slz);
433
434 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
435 vtblk_prepare_dump(sc);
436 sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
437 }
438
439 if (buf_len > 0)
440 error = vtblk_write_dump(sc, ap->a_virtual, buf_start,
441 buf_len);
442 else if (buf_len == 0)
443 error = vtblk_flush_dump(sc);
444 else {
445 error = EINVAL;
446 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
447 }
448
449 // lwkt_serialize_exit(&sc->vtblk_slz);
450
451 return (error);
452 }
453
454 static int
vtblk_strategy(struct dev_strategy_args * ap)455 vtblk_strategy(struct dev_strategy_args *ap)
456 {
457 struct vtblk_softc *sc;
458 cdev_t dev = ap->a_head.a_dev;
459 sc = dev->si_drv1;
460 struct bio *bio = ap->a_bio;
461 struct buf *bp = bio->bio_buf;
462
463 if (sc == NULL) {
464 vtblk_finish_bio(bio, EINVAL);
465 return EINVAL;
466 }
467
468 /*
469 * Fail any write if RO. Unfortunately, there does not seem to
470 * be a better way to report our readonly'ness to GEOM above.
471 *
472 * XXX: Is that true in DFly?
473 */
474 if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
475 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
476 vtblk_finish_bio(bio, EROFS);
477 return (EINVAL);
478 }
479
480 lwkt_serialize_enter(&sc->vtblk_slz);
481 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
482 bioqdisksort(&sc->vtblk_bioq, bio);
483 vtblk_startio(sc);
484 lwkt_serialize_exit(&sc->vtblk_slz);
485 } else {
486 lwkt_serialize_exit(&sc->vtblk_slz);
487 vtblk_finish_bio(bio, ENXIO);
488 }
489 return 0;
490 }
491
492 static void
vtblk_negotiate_features(struct vtblk_softc * sc)493 vtblk_negotiate_features(struct vtblk_softc *sc)
494 {
495 device_t dev;
496 uint64_t features;
497
498 dev = sc->vtblk_dev;
499 features = VTBLK_FEATURES;
500
501 sc->vtblk_features = virtio_negotiate_features(dev, features);
502 }
503
504 /*
505 * Calculate the maximum number of DMA segment supported. Note
506 * that the in/out header is encoded in the segment list. We
507 * assume that VTBLK_MIN_SEGMENTS covers that part of it so
508 * we add it into the desired total. If the SEG_MAX feature
509 * is not specified we have to just assume that the host can
510 * handle the maximum number of segments required for a MAXPHYS
511 * sized request.
512 *
513 * The additional + 1 is in case a MAXPHYS-sized buffer crosses
514 * a page boundary.
515 */
516 static int
vtblk_maximum_segments(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)517 vtblk_maximum_segments(struct vtblk_softc *sc,
518 struct virtio_blk_config *blkcfg)
519 {
520 device_t dev;
521 int nsegs;
522
523 dev = sc->vtblk_dev;
524 nsegs = VTBLK_MIN_SEGMENTS;
525
526 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
527 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs);
528 } else {
529 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs;
530 }
531 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
532 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
533
534 return (nsegs);
535 }
536
537 static int
vtblk_alloc_intr(struct vtblk_softc * sc)538 vtblk_alloc_intr(struct vtblk_softc *sc)
539 {
540 int cnt = 1;
541 int error;
542
543 error = virtio_intr_alloc(sc->vtblk_dev, &cnt, 0, NULL);
544 if (error != 0)
545 return (error);
546 else if (cnt != 1)
547 return (ENXIO);
548
549 return (0);
550 }
551
552 static int
vtblk_alloc_virtqueue(struct vtblk_softc * sc)553 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
554 {
555 device_t dev;
556 struct vq_alloc_info vq_info;
557
558 dev = sc->vtblk_dev;
559
560 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
561 &sc->vtblk_vq, "%s request", device_get_nameunit(dev));
562
563 return (virtio_alloc_virtqueues(dev, 1, &vq_info));
564 }
565
566 static void
vtblk_set_write_cache(struct vtblk_softc * sc,int wc)567 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
568 {
569
570 /* Set either writeback (1) or writethrough (0) mode. */
571 virtio_write_dev_config_1(sc->vtblk_dev,
572 offsetof(struct virtio_blk_config, writeback), wc);
573 }
574
575 static int
vtblk_write_cache_enabled(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)576 vtblk_write_cache_enabled(struct vtblk_softc *sc,
577 struct virtio_blk_config *blkcfg)
578 {
579 int wc;
580
581 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
582 wc = vtblk_tunable_int(sc, "writecache_mode",
583 vtblk_writecache_mode);
584 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
585 vtblk_set_write_cache(sc, wc);
586 else
587 wc = blkcfg->writeback;
588 } else
589 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
590
591 return (wc);
592 }
593
594 static int
vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)595 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
596 {
597 struct vtblk_softc *sc;
598 int wc, error;
599
600 sc = oidp->oid_arg1;
601 wc = sc->vtblk_write_cache;
602
603 error = sysctl_handle_int(oidp, &wc, 0, req);
604 if (error || req->newptr == NULL)
605 return (error);
606 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
607 return (EPERM);
608 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
609 return (EINVAL);
610
611 lwkt_serialize_enter(&sc->vtblk_slz);
612 sc->vtblk_write_cache = wc;
613 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
614 lwkt_serialize_exit(&sc->vtblk_slz);
615
616 return (0);
617 }
618
619 static void
vtblk_alloc_disk(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)620 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
621 {
622 struct disk_info info;
623
624 /* construct the disk_info */
625 bzero(&info, sizeof(info));
626
627 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
628 sc->vtblk_sector_size = blkcfg->blk_size;
629 else
630 sc->vtblk_sector_size = 512;
631
632 /* blkcfg->capacity is always expressed in 512 byte sectors. */
633 info.d_media_blksize = 512;
634 info.d_media_blocks = blkcfg->capacity;
635
636 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_GEOMETRY)) {
637 info.d_ncylinders = blkcfg->geometry.cylinders;
638 info.d_nheads = blkcfg->geometry.heads;
639 info.d_secpertrack = blkcfg->geometry.sectors;
640 info.d_secpercyl = info.d_secpertrack * info.d_nheads;
641 } else {
642 /* Fabricate a geometry */
643 info.d_secpertrack = 1024;
644 info.d_nheads = 1;
645 info.d_secpercyl = info.d_secpertrack * info.d_nheads;
646 info.d_ncylinders =
647 (u_int)(info.d_media_blocks / info.d_secpercyl);
648 }
649
650 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
651 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
652 else
653 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
654
655 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
656 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
657 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
658 DEVSTAT_PRIORITY_DISK);
659
660 /* attach a generic disk device to ourselves */
661 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
662 &vbd_disk_ops);
663
664 sc->cdev->si_drv1 = sc;
665 sc->cdev->si_iosize_max = MAXPHYS;
666 disk_setdiskinfo(&sc->vtblk_disk, &info);
667 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) {
668 device_printf(sc->vtblk_dev, "Block size: %u\n",
669 sc->vtblk_sector_size);
670 }
671 device_printf(sc->vtblk_dev,
672 "%juMB (%ju 512 byte sectors: %dH %dS/T %dC)\n",
673 ((uintmax_t)blkcfg->capacity * 512) / (1024*1024),
674 (uintmax_t)blkcfg->capacity, blkcfg->geometry.heads,
675 blkcfg->geometry.sectors, blkcfg->geometry.cylinders);
676 }
677
678 static void
vtblk_startio(struct vtblk_softc * sc)679 vtblk_startio(struct vtblk_softc *sc)
680 {
681 struct virtqueue *vq;
682 struct vtblk_request *req;
683 int enq;
684
685 vq = sc->vtblk_vq;
686 enq = 0;
687
688 ASSERT_SERIALIZED(&sc->vtblk_slz);
689
690 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
691 return;
692
693 while (!virtqueue_full(vq)) {
694 req = vtblk_bio_request(sc);
695 if (req == NULL)
696 break;
697
698 if (vtblk_execute_request(sc, req) != 0) {
699 bioqdisksort(&sc->vtblk_bioq, req->vbr_bio);
700 vtblk_enqueue_request(sc, req);
701 break;
702 }
703 devstat_start_transaction(&sc->stats);
704
705 enq++;
706 }
707
708 if (enq > 0)
709 virtqueue_notify(vq, &sc->vtblk_slz);
710 }
711
712 static struct vtblk_request *
vtblk_bio_request(struct vtblk_softc * sc)713 vtblk_bio_request(struct vtblk_softc *sc)
714 {
715 struct bio_queue_head *bioq;
716 struct vtblk_request *req;
717 struct bio *bio;
718 struct buf *bp;
719
720 bioq = &sc->vtblk_bioq;
721
722 if (bioq_first(bioq) == NULL)
723 return (NULL);
724
725 req = vtblk_dequeue_request(sc);
726 if (req == NULL)
727 return (NULL);
728
729 bio = bioq_takefirst(bioq);
730 req->vbr_bio = bio;
731 req->vbr_ack = -1;
732 req->vbr_hdr.ioprio = 1;
733 bp = bio->bio_buf;
734
735 switch (bp->b_cmd) {
736 case BUF_CMD_FLUSH:
737 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
738 break;
739 case BUF_CMD_READ:
740 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
741 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
742 break;
743 case BUF_CMD_WRITE:
744 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
745 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
746 break;
747 default:
748 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
749 req->vbr_hdr.type = -1;
750 break;
751 }
752
753 return (req);
754 }
755
756 static int
vtblk_execute_request(struct vtblk_softc * sc,struct vtblk_request * req)757 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
758 {
759 struct sglist *sg;
760 struct bio *bio;
761 struct buf *bp;
762 int writable, error;
763
764 sg = sc->vtblk_sglist;
765 bio = req->vbr_bio;
766 bp = bio->bio_buf;
767 writable = 0;
768
769 /*
770 * sglist is live throughout this subroutine.
771 */
772 error = sglist_append(sg, &req->vbr_hdr,
773 sizeof(struct virtio_blk_outhdr));
774 KASSERT(error == 0, ("error adding header to sglist"));
775 KASSERT(sg->sg_nseg == 1,
776 ("header spanned multiple segments: %d", sg->sg_nseg));
777
778 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
779 error = sglist_append(sg, bp->b_data, bp->b_bcount);
780 KASSERT(error == 0, ("error adding buffer to sglist"));
781
782 /* BUF_CMD_READ means the host writes into our buffer. */
783 if (bp->b_cmd == BUF_CMD_READ)
784 writable += sg->sg_nseg - 1;
785 }
786
787 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
788 KASSERT(error == 0, ("error adding ack to sglist"));
789 writable++;
790
791 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
792 ("fewer than min segments: %d", sg->sg_nseg));
793
794 error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
795 sg->sg_nseg - writable, writable);
796
797 sglist_reset(sg);
798
799 return (error);
800 }
801
802 static void
vtblk_vq_intr(void * arg)803 vtblk_vq_intr(void *arg)
804 {
805 struct vtblk_softc *sc = arg;
806 struct virtqueue *vq = sc->vtblk_vq;
807 struct vtblk_request *req;
808 struct bio *bio;
809 struct buf *bp;
810
811 ASSERT_SERIALIZED(&sc->vtblk_slz);
812
813 if (!virtqueue_pending(vq))
814 return;
815
816 lwkt_serialize_handler_disable(&sc->vtblk_slz);
817 virtqueue_disable_intr(sc->vtblk_vq);
818
819 retry:
820 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
821 return;
822
823 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
824 bio = req->vbr_bio;
825 bp = bio->bio_buf;
826
827 if (req->vbr_ack == VIRTIO_BLK_S_OK) {
828 bp->b_resid = 0;
829 } else {
830 bp->b_flags |= B_ERROR;
831 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
832 bp->b_error = ENOTSUP;
833 } else {
834 bp->b_error = EIO;
835 }
836 }
837
838 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
839
840 lwkt_serialize_exit(&sc->vtblk_slz);
841 /*
842 * Unlocking the controller around biodone() does not allow
843 * processing further device interrupts; when we queued
844 * vtblk_vq_intr, we disabled interrupts. It will allow
845 * concurrent vtblk_strategy/_startio command dispatches.
846 */
847 biodone(bio);
848 lwkt_serialize_enter(&sc->vtblk_slz);
849
850 vtblk_enqueue_request(sc, req);
851 }
852
853 vtblk_startio(sc);
854
855 if (virtqueue_enable_intr(vq) != 0) {
856 /*
857 * If new virtqueue entries appeared immediately after
858 * enabling interrupts, process them now. Release and
859 * retake softcontroller lock to try to avoid blocking
860 * I/O dispatch for too long.
861 */
862 virtqueue_disable_intr(vq);
863 goto retry;
864 }
865 lwkt_serialize_handler_enable(&sc->vtblk_slz);
866 }
867
868 static void
vtblk_stop(struct vtblk_softc * sc)869 vtblk_stop(struct vtblk_softc *sc)
870 {
871
872 virtqueue_disable_intr(sc->vtblk_vq);
873 virtio_stop(sc->vtblk_dev);
874 }
875
876 static void
vtblk_prepare_dump(struct vtblk_softc * sc)877 vtblk_prepare_dump(struct vtblk_softc *sc)
878 {
879 device_t dev;
880 struct virtqueue *vq;
881
882 dev = sc->vtblk_dev;
883 vq = sc->vtblk_vq;
884
885 vtblk_stop(sc);
886
887 /*
888 * Drain all requests caught in-flight in the virtqueue,
889 * skipping biodone(). When dumping, only one request is
890 * outstanding at a time, and we just poll the virtqueue
891 * for the response.
892 */
893 vtblk_drain_vq(sc, 1);
894
895 if (virtio_reinit(dev, sc->vtblk_features) != 0) {
896 panic("%s: cannot reinit VirtIO block device during dump",
897 device_get_nameunit(dev));
898 }
899
900 virtqueue_disable_intr(vq);
901 virtio_reinit_complete(dev);
902 }
903
904 static int
vtblk_write_dump(struct vtblk_softc * sc,void * virtual,off_t offset,size_t length)905 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
906 size_t length)
907 {
908 struct bio bio;
909 struct buf bp;
910 struct vtblk_request *req;
911
912 req = &sc->vtblk_dump_request;
913 req->vbr_ack = -1;
914 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
915 req->vbr_hdr.ioprio = 1;
916 req->vbr_hdr.sector = offset / 512;
917
918 req->vbr_bio = &bio;
919 bzero(&bio, sizeof(struct bio));
920 bzero(&bp, sizeof(struct buf));
921
922 bio.bio_buf = &bp;
923 bp.b_cmd = BUF_CMD_WRITE;
924 bp.b_data = virtual;
925 bp.b_bcount = length;
926
927 return (vtblk_poll_request(sc, req));
928 }
929
930 static int
vtblk_flush_dump(struct vtblk_softc * sc)931 vtblk_flush_dump(struct vtblk_softc *sc)
932 {
933 struct bio bio;
934 struct buf bp;
935 struct vtblk_request *req;
936
937 req = &sc->vtblk_dump_request;
938 req->vbr_ack = -1;
939 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
940 req->vbr_hdr.ioprio = 1;
941 req->vbr_hdr.sector = 0;
942
943 req->vbr_bio = &bio;
944 bzero(&bio, sizeof(struct bio));
945 bzero(&bp, sizeof(struct buf));
946
947 bio.bio_buf = &bp;
948 bp.b_cmd = BUF_CMD_FLUSH;
949
950 return (vtblk_poll_request(sc, req));
951 }
952
953 static int
vtblk_poll_request(struct vtblk_softc * sc,struct vtblk_request * req)954 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
955 {
956 struct virtqueue *vq;
957 int error;
958
959 vq = sc->vtblk_vq;
960
961 if (!virtqueue_empty(vq))
962 return (EBUSY);
963
964 error = vtblk_execute_request(sc, req);
965 if (error)
966 return (error);
967
968 virtqueue_notify(vq, NULL);
969 virtqueue_poll(vq, NULL);
970
971 error = vtblk_request_error(req);
972 if (error && bootverbose) {
973 device_printf(sc->vtblk_dev,
974 "%s: IO error: %d\n", __func__, error);
975 }
976
977 return (error);
978 }
979
980 static void
vtblk_drain_vq(struct vtblk_softc * sc,int skip_done)981 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
982 {
983 struct virtqueue *vq;
984 struct vtblk_request *req;
985 int last;
986
987 vq = sc->vtblk_vq;
988 last = 0;
989
990 while ((req = virtqueue_drain(vq, &last)) != NULL) {
991 if (!skip_done)
992 vtblk_finish_bio(req->vbr_bio, ENXIO);
993
994 vtblk_enqueue_request(sc, req);
995 }
996
997 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
998 }
999
1000 static void
vtblk_drain(struct vtblk_softc * sc)1001 vtblk_drain(struct vtblk_softc *sc)
1002 {
1003 struct bio_queue_head *bioq;
1004 struct bio *bio;
1005
1006 bioq = &sc->vtblk_bioq;
1007
1008 if (sc->vtblk_vq != NULL)
1009 vtblk_drain_vq(sc, 0);
1010
1011 while (bioq_first(bioq) != NULL) {
1012 bio = bioq_takefirst(bioq);
1013 vtblk_finish_bio(bio, ENXIO);
1014 }
1015
1016 vtblk_free_requests(sc);
1017 }
1018
1019 static int
vtblk_alloc_requests(struct vtblk_softc * sc)1020 vtblk_alloc_requests(struct vtblk_softc *sc)
1021 {
1022 struct vtblk_request *req;
1023 int i, nreqs;
1024
1025 nreqs = virtqueue_size(sc->vtblk_vq);
1026
1027 /*
1028 * Preallocate sufficient requests to keep the virtqueue full. Each
1029 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
1030 * the number allocated when indirect descriptors are not available.
1031 */
1032 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
1033 nreqs /= VTBLK_MIN_SEGMENTS;
1034
1035 for (i = 0; i < nreqs; i++) {
1036 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF,
1037 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0);
1038 if (req == NULL)
1039 return (ENOMEM);
1040
1041 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr))
1042 == 1);
1043 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack))
1044 == 1);
1045
1046 sc->vtblk_request_count++;
1047 vtblk_enqueue_request(sc, req);
1048 }
1049
1050 return (0);
1051 }
1052
1053 static void
vtblk_free_requests(struct vtblk_softc * sc)1054 vtblk_free_requests(struct vtblk_softc *sc)
1055 {
1056 struct vtblk_request *req;
1057
1058 while ((req = vtblk_dequeue_request(sc)) != NULL) {
1059 sc->vtblk_request_count--;
1060 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF);
1061 }
1062
1063 KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
1064 }
1065
1066 static struct vtblk_request *
vtblk_dequeue_request(struct vtblk_softc * sc)1067 vtblk_dequeue_request(struct vtblk_softc *sc)
1068 {
1069 struct vtblk_request *req;
1070
1071 req = SLIST_FIRST(&sc->vtblk_req_free);
1072 if (req != NULL)
1073 SLIST_REMOVE_HEAD(&sc->vtblk_req_free, vbr_link);
1074
1075 return (req);
1076 }
1077
1078 static void
vtblk_enqueue_request(struct vtblk_softc * sc,struct vtblk_request * req)1079 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
1080 {
1081
1082 bzero(req, sizeof(struct vtblk_request));
1083 SLIST_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1084 }
1085
1086 static int
vtblk_request_error(struct vtblk_request * req)1087 vtblk_request_error(struct vtblk_request *req)
1088 {
1089 int error;
1090
1091 switch (req->vbr_ack) {
1092 case VIRTIO_BLK_S_OK:
1093 error = 0;
1094 break;
1095 case VIRTIO_BLK_S_UNSUPP:
1096 error = ENOTSUP;
1097 break;
1098 default:
1099 error = EIO;
1100 break;
1101 }
1102
1103 return (error);
1104 }
1105
1106 static void
vtblk_finish_bio(struct bio * bio,int error)1107 vtblk_finish_bio(struct bio *bio, int error)
1108 {
1109
1110 biodone(bio);
1111 }
1112
1113 static void
vtblk_setup_sysctl(struct vtblk_softc * sc)1114 vtblk_setup_sysctl(struct vtblk_softc *sc)
1115 {
1116 device_t dev;
1117 struct sysctl_ctx_list *ctx;
1118 struct sysctl_oid *tree;
1119 struct sysctl_oid_list *child;
1120
1121 dev = sc->vtblk_dev;
1122 ctx = device_get_sysctl_ctx(dev);
1123 tree = device_get_sysctl_tree(dev);
1124 child = SYSCTL_CHILDREN(tree);
1125
1126 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1127 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1128 "I", "Write cache mode (writethrough (0) or writeback (1))");
1129 }
1130
1131 static int
vtblk_tunable_int(struct vtblk_softc * sc,const char * knob,int def)1132 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1133 {
1134 char path[64];
1135
1136 ksnprintf(path, sizeof(path),
1137 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1138 TUNABLE_INT_FETCH(path, &def);
1139
1140 return (def);
1141 }
1142