1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $ 27 */ 28 29 /* Driver for VirtIO block devices. */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bio.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/sglist.h> 38 #include <sys/sysctl.h> 39 #include <sys/lock.h> 40 #include <sys/queue.h> 41 #include <sys/serialize.h> 42 #include <sys/buf2.h> 43 #include <sys/rman.h> 44 #include <sys/disk.h> 45 #include <sys/devicestat.h> 46 47 #include <dev/virtual/virtio/virtio/virtio.h> 48 #include <dev/virtual/virtio/virtio/virtqueue.h> 49 #include "virtio_blk.h" 50 #include "virtio_if.h" 51 52 struct vtblk_request { 53 struct virtio_blk_outhdr vbr_hdr __aligned(16); 54 struct bio *vbr_bp; 55 uint8_t vbr_ack; 56 uint8_t vbr_barrier; 57 58 TAILQ_ENTRY(vtblk_request) vbr_link; 59 }; 60 61 enum vtblk_cache_mode { 62 VTBLK_CACHE_WRITETHROUGH, 63 VTBLK_CACHE_WRITEBACK, 64 VTBLK_CACHE_MAX 65 }; 66 67 struct vtblk_softc { 68 device_t vtblk_dev; 69 struct lwkt_serialize vtblk_slz; 70 uint64_t vtblk_features; 71 uint32_t vtblk_flags; 72 #define VTBLK_FLAG_INDIRECT 0x0001 73 #define VTBLK_FLAG_READONLY 0x0002 74 #define VTBLK_FLAG_DETACH 0x0004 75 #define VTBLK_FLAG_SUSPEND 0x0008 76 #define VTBLK_FLAG_DUMPING 0x0010 77 #define VTBLK_FLAG_BARRIER 0x0020 78 #define VTBLK_FLAG_WC_CONFIG 0x0040 79 80 struct virtqueue *vtblk_vq; 81 struct sglist *vtblk_sglist; 82 struct disk vtblk_disk; 83 cdev_t cdev; 84 struct devstat stats; 85 86 struct bio_queue_head vtblk_bioq; 87 TAILQ_HEAD(, vtblk_request) 88 vtblk_req_free; 89 TAILQ_HEAD(, vtblk_request) 90 vtblk_req_ready; 91 struct vtblk_request *vtblk_req_ordered; 92 93 int vtblk_sector_size; 94 int vtblk_max_nsegs; 95 int vtblk_request_count; 96 enum vtblk_cache_mode vtblk_write_cache; 97 98 struct vtblk_request vtblk_dump_request; 99 }; 100 101 static struct virtio_feature_desc vtblk_feature_desc[] = { 102 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 103 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 104 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 105 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 106 { VIRTIO_BLK_F_RO, "ReadOnly" }, 107 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 108 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 109 { VIRTIO_BLK_F_WCE, "WriteCache" }, 110 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 111 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, 112 113 { 0, NULL } 114 }; 115 116 static int vtblk_modevent(module_t, int, void *); 117 118 static int vtblk_probe(device_t); 119 static int vtblk_attach(device_t); 120 static int vtblk_detach(device_t); 121 static int vtblk_suspend(device_t); 122 static int vtblk_resume(device_t); 123 static int vtblk_shutdown(device_t); 124 125 static void vtblk_negotiate_features(struct vtblk_softc *); 126 static int vtblk_maximum_segments(struct vtblk_softc *, 127 struct virtio_blk_config *); 128 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 129 static void vtblk_set_write_cache(struct vtblk_softc *, int); 130 static int vtblk_write_cache_enabled(struct vtblk_softc *sc, 131 struct virtio_blk_config *); 132 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); 133 static void vtblk_alloc_disk(struct vtblk_softc *, 134 struct virtio_blk_config *); 135 /* 136 * Interface to the device switch. 137 */ 138 static d_open_t vtblk_open; 139 static d_strategy_t vtblk_strategy; 140 static d_dump_t vtblk_dump; 141 142 static struct dev_ops vbd_disk_ops = { 143 { "vbd", 200, D_DISK | D_MPSAFE }, 144 .d_open = vtblk_open, 145 .d_close = nullclose, 146 .d_read = physread, 147 .d_write = physwrite, 148 .d_strategy = vtblk_strategy, 149 .d_dump = vtblk_dump, 150 }; 151 152 static void vtblk_startio(struct vtblk_softc *); 153 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 154 static int vtblk_execute_request(struct vtblk_softc *, 155 struct vtblk_request *); 156 157 static int vtblk_vq_intr(void *); 158 static void vtblk_complete(void *); 159 160 static void vtblk_stop(struct vtblk_softc *); 161 162 static void vtblk_prepare_dump(struct vtblk_softc *); 163 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 164 static int vtblk_flush_dump(struct vtblk_softc *); 165 static int vtblk_poll_request(struct vtblk_softc *, 166 struct vtblk_request *); 167 168 static void vtblk_drain_vq(struct vtblk_softc *, int); 169 static void vtblk_drain(struct vtblk_softc *); 170 171 static int vtblk_alloc_requests(struct vtblk_softc *); 172 static void vtblk_free_requests(struct vtblk_softc *); 173 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 174 static void vtblk_enqueue_request(struct vtblk_softc *, 175 struct vtblk_request *); 176 177 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); 178 static void vtblk_enqueue_ready(struct vtblk_softc *, 179 struct vtblk_request *); 180 181 static int vtblk_request_error(struct vtblk_request *); 182 static void vtblk_finish_bio(struct bio *, int); 183 184 static void vtblk_setup_sysctl(struct vtblk_softc *); 185 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); 186 187 /* Tunables. */ 188 static int vtblk_writecache_mode = -1; 189 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); 190 191 /* Features desired/implemented by this driver. */ 192 #define VTBLK_FEATURES \ 193 (VIRTIO_BLK_F_BARRIER | \ 194 VIRTIO_BLK_F_SIZE_MAX | \ 195 VIRTIO_BLK_F_SEG_MAX | \ 196 VIRTIO_BLK_F_GEOMETRY | \ 197 VIRTIO_BLK_F_RO | \ 198 VIRTIO_BLK_F_BLK_SIZE | \ 199 VIRTIO_BLK_F_WCE | \ 200 VIRTIO_BLK_F_CONFIG_WCE | \ 201 VIRTIO_RING_F_INDIRECT_DESC) 202 203 /* 204 * Each block request uses at least two segments - one for the header 205 * and one for the status. 206 */ 207 #define VTBLK_MIN_SEGMENTS 2 208 209 static device_method_t vtblk_methods[] = { 210 /* Device methods. */ 211 DEVMETHOD(device_probe, vtblk_probe), 212 DEVMETHOD(device_attach, vtblk_attach), 213 DEVMETHOD(device_detach, vtblk_detach), 214 DEVMETHOD(device_suspend, vtblk_suspend), 215 DEVMETHOD(device_resume, vtblk_resume), 216 DEVMETHOD(device_shutdown, vtblk_shutdown), 217 218 DEVMETHOD_END 219 }; 220 221 static driver_t vtblk_driver = { 222 "vtblk", 223 vtblk_methods, 224 sizeof(struct vtblk_softc) 225 }; 226 static devclass_t vtblk_devclass; 227 228 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, 229 vtblk_modevent, NULL); 230 MODULE_VERSION(virtio_blk, 1); 231 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 232 233 static int 234 vtblk_modevent(module_t mod, int type, void *unused) 235 { 236 int error; 237 238 error = 0; 239 240 switch (type) { 241 case MOD_LOAD: 242 break; 243 case MOD_UNLOAD: 244 break; 245 case MOD_SHUTDOWN: 246 break; 247 default: 248 error = EOPNOTSUPP; 249 break; 250 } 251 252 return (error); 253 } 254 255 static int 256 vtblk_probe(device_t dev) 257 { 258 259 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 260 return (ENXIO); 261 262 device_set_desc(dev, "VirtIO Block Adapter"); 263 264 return (BUS_PROBE_DEFAULT); 265 } 266 267 static int 268 vtblk_attach(device_t dev) 269 { 270 struct vtblk_softc *sc; 271 struct virtio_blk_config blkcfg; 272 int error; 273 274 sc = device_get_softc(dev); 275 sc->vtblk_dev = dev; 276 277 lwkt_serialize_init(&sc->vtblk_slz); 278 279 bioq_init(&sc->vtblk_bioq); 280 TAILQ_INIT(&sc->vtblk_req_free); 281 TAILQ_INIT(&sc->vtblk_req_ready); 282 283 virtio_set_feature_desc(dev, vtblk_feature_desc); 284 vtblk_negotiate_features(sc); 285 286 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 287 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 288 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 289 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 290 if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) 291 sc->vtblk_flags |= VTBLK_FLAG_BARRIER; 292 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) 293 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; 294 295 vtblk_setup_sysctl(sc); 296 297 /* Get local copy of config. */ 298 virtio_read_device_config(dev, 0, &blkcfg, 299 sizeof(struct virtio_blk_config)); 300 301 /* 302 * With the current sglist(9) implementation, it is not easy 303 * for us to support a maximum segment size as adjacent 304 * segments are coalesced. For now, just make sure it's larger 305 * than the maximum supported transfer size. 306 */ 307 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 308 if (blkcfg.size_max < MAXPHYS) { 309 error = ENOTSUP; 310 device_printf(dev, "host requires unsupported " 311 "maximum segment size feature\n"); 312 goto fail; 313 } 314 } 315 316 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 317 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 318 error = EINVAL; 319 device_printf(dev, "fewer than minimum number of segments " 320 "allowed: %d\n", sc->vtblk_max_nsegs); 321 goto fail; 322 } 323 324 /* 325 * Allocate working sglist. The number of segments may be too 326 * large to safely store on the stack. 327 */ 328 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT); 329 if (sc->vtblk_sglist == NULL) { 330 error = ENOMEM; 331 device_printf(dev, "cannot allocate sglist\n"); 332 goto fail; 333 } 334 335 error = vtblk_alloc_virtqueue(sc); 336 if (error) { 337 device_printf(dev, "cannot allocate virtqueue\n"); 338 goto fail; 339 } 340 341 error = vtblk_alloc_requests(sc); 342 if (error) { 343 device_printf(dev, "cannot preallocate requests\n"); 344 goto fail; 345 } 346 347 vtblk_alloc_disk(sc, &blkcfg); 348 349 error = virtio_setup_intr(dev, &sc->vtblk_slz); 350 if (error) { 351 device_printf(dev, "cannot setup virtqueue interrupt\n"); 352 goto fail; 353 } 354 355 virtqueue_enable_intr(sc->vtblk_vq); 356 357 fail: 358 if (error) 359 vtblk_detach(dev); 360 361 return (error); 362 } 363 364 static int 365 vtblk_detach(device_t dev) 366 { 367 struct vtblk_softc *sc; 368 369 sc = device_get_softc(dev); 370 371 lwkt_serialize_enter(&sc->vtblk_slz); 372 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 373 if (device_is_attached(dev)) 374 vtblk_stop(sc); 375 lwkt_serialize_exit(&sc->vtblk_slz); 376 377 vtblk_drain(sc); 378 379 if (sc->cdev != NULL) { 380 disk_destroy(&sc->vtblk_disk); 381 sc->cdev = NULL; 382 } 383 384 if (sc->vtblk_sglist != NULL) { 385 sglist_free(sc->vtblk_sglist); 386 sc->vtblk_sglist = NULL; 387 } 388 389 return (0); 390 } 391 392 static int 393 vtblk_suspend(device_t dev) 394 { 395 struct vtblk_softc *sc; 396 397 sc = device_get_softc(dev); 398 399 lwkt_serialize_enter(&sc->vtblk_slz); 400 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 401 /* XXX BMV: virtio_stop(), etc needed here? */ 402 lwkt_serialize_exit(&sc->vtblk_slz); 403 404 return (0); 405 } 406 407 static int 408 vtblk_resume(device_t dev) 409 { 410 struct vtblk_softc *sc; 411 412 sc = device_get_softc(dev); 413 414 lwkt_serialize_enter(&sc->vtblk_slz); 415 /* XXX BMV: virtio_reinit(), etc needed here? */ 416 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 417 #if 0 /* XXX Resume IO? */ 418 vtblk_startio(sc); 419 #endif 420 lwkt_serialize_exit(&sc->vtblk_slz); 421 422 return (0); 423 } 424 425 static int 426 vtblk_shutdown(device_t dev) 427 { 428 429 return (0); 430 } 431 432 static int 433 vtblk_open(struct dev_open_args *ap) 434 { 435 struct vtblk_softc *sc; 436 cdev_t dev = ap->a_head.a_dev; 437 sc = dev->si_drv1; 438 if (sc == NULL) 439 return (ENXIO); 440 441 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 442 } 443 444 static int 445 vtblk_dump(struct dev_dump_args *ap) 446 { 447 struct vtblk_softc *sc; 448 cdev_t dev = ap->a_head.a_dev; 449 uint64_t buf_start, buf_len; 450 int error; 451 452 sc = dev->si_drv1; 453 if (sc == NULL) 454 return (ENXIO); 455 456 buf_start = ap->a_offset; 457 buf_len = ap->a_length; 458 459 // lwkt_serialize_enter(&sc->vtblk_slz); 460 461 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 462 vtblk_prepare_dump(sc); 463 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 464 } 465 466 if (buf_len > 0) 467 error = vtblk_write_dump(sc, ap->a_virtual, buf_start, 468 buf_len); 469 else if (buf_len == 0) 470 error = vtblk_flush_dump(sc); 471 else { 472 error = EINVAL; 473 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 474 } 475 476 // lwkt_serialize_exit(&sc->vtblk_slz); 477 478 return (error); 479 } 480 481 static int 482 vtblk_strategy(struct dev_strategy_args *ap) 483 { 484 struct vtblk_softc *sc; 485 cdev_t dev = ap->a_head.a_dev; 486 sc = dev->si_drv1; 487 struct bio *bio = ap->a_bio; 488 struct buf *bp = bio->bio_buf; 489 490 if (sc == NULL) { 491 vtblk_finish_bio(bio, EINVAL); 492 return EINVAL; 493 } 494 495 /* 496 * Fail any write if RO. Unfortunately, there does not seem to 497 * be a better way to report our readonly'ness to GEOM above. 498 * 499 * XXX: Is that true in DFly? 500 */ 501 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 502 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) { 503 vtblk_finish_bio(bio, EROFS); 504 return (EINVAL); 505 } 506 507 lwkt_serialize_enter(&sc->vtblk_slz); 508 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) { 509 devstat_start_transaction(&sc->stats); 510 bioqdisksort(&sc->vtblk_bioq, bio); 511 vtblk_startio(sc); 512 lwkt_serialize_exit(&sc->vtblk_slz); 513 } else { 514 lwkt_serialize_exit(&sc->vtblk_slz); 515 vtblk_finish_bio(bio, ENXIO); 516 } 517 return 0; 518 } 519 520 static void 521 vtblk_negotiate_features(struct vtblk_softc *sc) 522 { 523 device_t dev; 524 uint64_t features; 525 526 dev = sc->vtblk_dev; 527 features = VTBLK_FEATURES; 528 529 sc->vtblk_features = virtio_negotiate_features(dev, features); 530 } 531 532 /* 533 * Calculate the maximum number of DMA segment supported. Note 534 * that the in/out header is encoded in the segment list. We 535 * assume that VTBLK_MIN_SEGMENTS covers that part of it so 536 * we add it into the desired total. If the SEG_MAX feature 537 * is not specified we have to just assume that the host can 538 * handle the maximum number of segments required for a MAXPHYS 539 * sized request. 540 * 541 * The additional + 1 is in case a MAXPHYS-sized buffer crosses 542 * a page boundary. 543 */ 544 static int 545 vtblk_maximum_segments(struct vtblk_softc *sc, 546 struct virtio_blk_config *blkcfg) 547 { 548 device_t dev; 549 int nsegs; 550 551 dev = sc->vtblk_dev; 552 nsegs = VTBLK_MIN_SEGMENTS; 553 554 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 555 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs); 556 } else { 557 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs; 558 } 559 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 560 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 561 562 return (nsegs); 563 } 564 565 static int 566 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 567 { 568 device_t dev; 569 struct vq_alloc_info vq_info; 570 571 dev = sc->vtblk_dev; 572 573 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 574 vtblk_vq_intr, sc, &sc->vtblk_vq, 575 "%s request", device_get_nameunit(dev)); 576 577 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 578 } 579 580 static void 581 vtblk_set_write_cache(struct vtblk_softc *sc, int wc) 582 { 583 584 /* Set either writeback (1) or writethrough (0) mode. */ 585 virtio_write_dev_config_1(sc->vtblk_dev, 586 offsetof(struct virtio_blk_config, writeback), wc); 587 } 588 589 static int 590 vtblk_write_cache_enabled(struct vtblk_softc *sc, 591 struct virtio_blk_config *blkcfg) 592 { 593 int wc; 594 595 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { 596 wc = vtblk_tunable_int(sc, "writecache_mode", 597 vtblk_writecache_mode); 598 if (wc >= 0 && wc < VTBLK_CACHE_MAX) 599 vtblk_set_write_cache(sc, wc); 600 else 601 wc = blkcfg->writeback; 602 } else 603 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); 604 605 return (wc); 606 } 607 608 static int 609 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS) 610 { 611 struct vtblk_softc *sc; 612 int wc, error; 613 614 sc = oidp->oid_arg1; 615 wc = sc->vtblk_write_cache; 616 617 error = sysctl_handle_int(oidp, &wc, 0, req); 618 if (error || req->newptr == NULL) 619 return (error); 620 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) 621 return (EPERM); 622 if (wc < 0 || wc >= VTBLK_CACHE_MAX) 623 return (EINVAL); 624 625 lwkt_serialize_enter(&sc->vtblk_slz); 626 sc->vtblk_write_cache = wc; 627 vtblk_set_write_cache(sc, sc->vtblk_write_cache); 628 lwkt_serialize_exit(&sc->vtblk_slz); 629 630 return (0); 631 } 632 633 static void 634 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 635 { 636 637 struct disk_info info; 638 639 /* construct the disk_info */ 640 bzero(&info, sizeof(info)); 641 642 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) 643 sc->vtblk_sector_size = blkcfg->blk_size; 644 else 645 sc->vtblk_sector_size = DEV_BSIZE; 646 647 info.d_media_blksize = sc->vtblk_sector_size; 648 info.d_media_blocks = blkcfg->capacity; 649 650 info.d_ncylinders = blkcfg->geometry.cylinders; 651 info.d_nheads = blkcfg->geometry.heads; 652 info.d_secpertrack = blkcfg->geometry.sectors; 653 654 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 655 656 if (vtblk_write_cache_enabled(sc, blkcfg) != 0) 657 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; 658 else 659 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH; 660 661 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev), 662 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, 663 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, 664 DEVSTAT_PRIORITY_DISK); 665 666 /* attach a generic disk device to ourselves */ 667 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk, 668 &vbd_disk_ops); 669 670 sc->cdev->si_drv1 = sc; 671 sc->cdev->si_iosize_max = MAXPHYS; 672 disk_setdiskinfo(&sc->vtblk_disk, &info); 673 } 674 675 static void 676 vtblk_startio(struct vtblk_softc *sc) 677 { 678 struct virtqueue *vq; 679 struct vtblk_request *req; 680 int enq; 681 682 vq = sc->vtblk_vq; 683 enq = 0; 684 685 ASSERT_SERIALIZED(&sc->vtblk_slz); 686 687 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) 688 return; 689 690 while (!virtqueue_full(vq)) { 691 if ((req = vtblk_dequeue_ready(sc)) == NULL) 692 req = vtblk_bio_request(sc); 693 if (req == NULL) 694 break; 695 696 if (vtblk_execute_request(sc, req) != 0) { 697 vtblk_enqueue_ready(sc, req); 698 break; 699 } 700 701 enq++; 702 } 703 704 if (enq > 0) 705 virtqueue_notify(vq, &sc->vtblk_slz); 706 } 707 708 static struct vtblk_request * 709 vtblk_bio_request(struct vtblk_softc *sc) 710 { 711 struct bio_queue_head *bioq; 712 struct vtblk_request *req; 713 struct bio *bio; 714 struct buf *bp; 715 716 bioq = &sc->vtblk_bioq; 717 718 if (bioq_first(bioq) == NULL) 719 return (NULL); 720 721 req = vtblk_dequeue_request(sc); 722 if (req == NULL) 723 return (NULL); 724 725 bio = bioq_takefirst(bioq); 726 req->vbr_bp = bio; 727 req->vbr_ack = -1; 728 req->vbr_barrier = 0; 729 req->vbr_hdr.ioprio = 1; 730 bp = bio->bio_buf; 731 732 switch (bp->b_cmd) { 733 case BUF_CMD_FLUSH: 734 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 735 break; 736 case BUF_CMD_READ: 737 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 738 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 739 break; 740 case BUF_CMD_WRITE: 741 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 742 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 743 break; 744 default: 745 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd)); 746 req->vbr_hdr.type = -1; 747 break; 748 } 749 750 if (bp->b_flags & B_ORDERED) { 751 if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) 752 req->vbr_barrier = 1; 753 else 754 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; 755 } 756 757 return (req); 758 } 759 760 static int 761 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 762 { 763 struct sglist *sg; 764 struct bio *bio; 765 struct buf *bp; 766 int ordered, writable, error; 767 768 sg = sc->vtblk_sglist; 769 bio = req->vbr_bp; 770 bp = bio->bio_buf; 771 ordered = 0; 772 writable = 0; 773 774 if (sc->vtblk_req_ordered != NULL) 775 return (EBUSY); 776 777 if (req->vbr_barrier) { 778 /* 779 * This request will be executed once all 780 * the in-flight requests are completed. 781 */ 782 if (!virtqueue_empty(sc->vtblk_vq)) 783 return (EBUSY); 784 ordered = 1; 785 } 786 787 /* 788 * sglist is live throughout this subroutine. 789 */ 790 sglist_reset(sg); 791 792 error = sglist_append(sg, &req->vbr_hdr, 793 sizeof(struct virtio_blk_outhdr)); 794 KASSERT(error == 0, ("error adding header to sglist")); 795 KASSERT(sg->sg_nseg == 1, 796 ("header spanned multiple segments: %d", sg->sg_nseg)); 797 798 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { 799 error = sglist_append(sg, bp->b_data, bp->b_bcount); 800 KASSERT(error == 0, ("error adding buffer to sglist")); 801 802 /* BUF_CMD_READ means the host writes into our buffer. */ 803 if (bp->b_cmd == BUF_CMD_READ) 804 writable += sg->sg_nseg - 1; 805 } 806 807 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 808 KASSERT(error == 0, ("error adding ack to sglist")); 809 writable++; 810 811 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 812 ("fewer than min segments: %d", sg->sg_nseg)); 813 814 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 815 sg->sg_nseg - writable, writable); 816 if (error == 0 && ordered) 817 sc->vtblk_req_ordered = req; 818 819 sglist_reset(sg); 820 821 return (error); 822 } 823 824 static int 825 vtblk_vq_intr(void *xsc) 826 { 827 vtblk_complete(xsc); 828 829 return (1); 830 } 831 832 static void 833 vtblk_complete(void *arg) 834 { 835 struct vtblk_softc *sc; 836 struct vtblk_request *req; 837 struct virtqueue *vq; 838 struct bio *bio; 839 struct buf *bp; 840 841 sc = arg; 842 vq = sc->vtblk_vq; 843 844 lwkt_serialize_handler_disable(&sc->vtblk_slz); 845 virtqueue_disable_intr(sc->vtblk_vq); 846 ASSERT_SERIALIZED(&sc->vtblk_slz); 847 848 retry: 849 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 850 return; 851 852 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 853 bio = req->vbr_bp; 854 bp = bio->bio_buf; 855 856 if (sc->vtblk_req_ordered != NULL) { 857 /* This should be the only outstanding request. */ 858 KKASSERT(sc->vtblk_req_ordered == req); 859 sc->vtblk_req_ordered = NULL; 860 } 861 862 if (req->vbr_ack == VIRTIO_BLK_S_OK) 863 bp->b_resid = 0; 864 else { 865 bp->b_flags |= B_ERROR; 866 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) { 867 bp->b_error = ENOTSUP; 868 } else { 869 bp->b_error = EIO; 870 } 871 } 872 873 devstat_end_transaction_buf(&sc->stats, bio->bio_buf); 874 875 lwkt_serialize_exit(&sc->vtblk_slz); 876 /* 877 * Unlocking the controller around biodone() does not allow 878 * processing further device interrupts; when we queued 879 * vtblk_complete, we disabled interrupts. It will allow 880 * concurrent vtblk_strategy/_startio command dispatches. 881 */ 882 biodone(bio); 883 lwkt_serialize_enter(&sc->vtblk_slz); 884 885 vtblk_enqueue_request(sc, req); 886 } 887 888 vtblk_startio(sc); 889 890 if (virtqueue_enable_intr(vq) != 0) { 891 /* 892 * If new virtqueue entries appeared immediately after 893 * enabling interrupts, process them now. Release and 894 * retake softcontroller lock to try to avoid blocking 895 * I/O dispatch for too long. 896 */ 897 virtqueue_disable_intr(vq); 898 goto retry; 899 } 900 lwkt_serialize_handler_enable(&sc->vtblk_slz); 901 } 902 903 static void 904 vtblk_stop(struct vtblk_softc *sc) 905 { 906 907 virtqueue_disable_intr(sc->vtblk_vq); 908 virtio_stop(sc->vtblk_dev); 909 } 910 911 static void 912 vtblk_prepare_dump(struct vtblk_softc *sc) 913 { 914 device_t dev; 915 struct virtqueue *vq; 916 917 dev = sc->vtblk_dev; 918 vq = sc->vtblk_vq; 919 920 vtblk_stop(sc); 921 922 /* 923 * Drain all requests caught in-flight in the virtqueue, 924 * skipping biodone(). When dumping, only one request is 925 * outstanding at a time, and we just poll the virtqueue 926 * for the response. 927 */ 928 vtblk_drain_vq(sc, 1); 929 930 if (virtio_reinit(dev, sc->vtblk_features) != 0) { 931 panic("%s: cannot reinit VirtIO block device during dump", 932 device_get_nameunit(dev)); 933 } 934 935 virtqueue_disable_intr(vq); 936 virtio_reinit_complete(dev); 937 } 938 939 static int 940 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 941 size_t length) 942 { 943 struct bio bio; 944 struct buf bp; 945 struct vtblk_request *req; 946 947 req = &sc->vtblk_dump_request; 948 req->vbr_ack = -1; 949 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 950 req->vbr_hdr.ioprio = 1; 951 req->vbr_hdr.sector = offset / 512; 952 953 req->vbr_bp = &bio; 954 bzero(&bio, sizeof(struct bio)); 955 bzero(&buf, sizeof(struct buf)); 956 957 bio.bio_buf = &bp; 958 bp.b_cmd = BUF_CMD_WRITE; 959 bp.b_data = virtual; 960 bp.b_bcount = length; 961 962 return (vtblk_poll_request(sc, req)); 963 } 964 965 static int 966 vtblk_flush_dump(struct vtblk_softc *sc) 967 { 968 struct bio bio; 969 struct buf bp; 970 struct vtblk_request *req; 971 972 req = &sc->vtblk_dump_request; 973 req->vbr_ack = -1; 974 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 975 req->vbr_hdr.ioprio = 1; 976 req->vbr_hdr.sector = 0; 977 978 req->vbr_bp = &bio; 979 bzero(&bio, sizeof(struct bio)); 980 bzero(&bp, sizeof(struct buf)); 981 982 bio.bio_buf = &bp; 983 bp.b_cmd = BUF_CMD_FLUSH; 984 985 return (vtblk_poll_request(sc, req)); 986 } 987 988 static int 989 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 990 { 991 struct virtqueue *vq; 992 int error; 993 994 vq = sc->vtblk_vq; 995 996 if (!virtqueue_empty(vq)) 997 return (EBUSY); 998 999 error = vtblk_execute_request(sc, req); 1000 if (error) 1001 return (error); 1002 1003 virtqueue_notify(vq, NULL); 1004 virtqueue_poll(vq, NULL); 1005 1006 error = vtblk_request_error(req); 1007 if (error && bootverbose) { 1008 device_printf(sc->vtblk_dev, 1009 "%s: IO error: %d\n", __func__, error); 1010 } 1011 1012 return (error); 1013 } 1014 1015 static void 1016 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 1017 { 1018 struct virtqueue *vq; 1019 struct vtblk_request *req; 1020 int last; 1021 1022 vq = sc->vtblk_vq; 1023 last = 0; 1024 1025 while ((req = virtqueue_drain(vq, &last)) != NULL) { 1026 if (!skip_done) 1027 vtblk_finish_bio(req->vbr_bp, ENXIO); 1028 1029 vtblk_enqueue_request(sc, req); 1030 } 1031 1032 sc->vtblk_req_ordered = NULL; 1033 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 1034 } 1035 1036 static void 1037 vtblk_drain(struct vtblk_softc *sc) 1038 { 1039 struct bio_queue_head *bioq; 1040 struct vtblk_request *req; 1041 struct bio *bp; 1042 1043 bioq = &sc->vtblk_bioq; 1044 1045 if (sc->vtblk_vq != NULL) 1046 vtblk_drain_vq(sc, 0); 1047 1048 while ((req = vtblk_dequeue_ready(sc)) != NULL) { 1049 vtblk_finish_bio(req->vbr_bp, ENXIO); 1050 vtblk_enqueue_request(sc, req); 1051 } 1052 1053 while (bioq_first(bioq) != NULL) { 1054 bp = bioq_takefirst(bioq); 1055 vtblk_finish_bio(bp, ENXIO); 1056 } 1057 1058 vtblk_free_requests(sc); 1059 } 1060 1061 static int 1062 vtblk_alloc_requests(struct vtblk_softc *sc) 1063 { 1064 struct vtblk_request *req; 1065 int i, nreqs; 1066 1067 nreqs = virtqueue_size(sc->vtblk_vq); 1068 1069 /* 1070 * Preallocate sufficient requests to keep the virtqueue full. Each 1071 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1072 * the number allocated when indirect descriptors are not available. 1073 */ 1074 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1075 nreqs /= VTBLK_MIN_SEGMENTS; 1076 1077 for (i = 0; i < nreqs; i++) { 1078 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF, 1079 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0); 1080 if (req == NULL) 1081 return (ENOMEM); 1082 1083 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) 1084 == 1); 1085 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) 1086 == 1); 1087 1088 sc->vtblk_request_count++; 1089 vtblk_enqueue_request(sc, req); 1090 } 1091 1092 return (0); 1093 } 1094 1095 static void 1096 vtblk_free_requests(struct vtblk_softc *sc) 1097 { 1098 struct vtblk_request *req; 1099 1100 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1101 sc->vtblk_request_count--; 1102 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF); 1103 } 1104 1105 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1106 } 1107 1108 static struct vtblk_request * 1109 vtblk_dequeue_request(struct vtblk_softc *sc) 1110 { 1111 struct vtblk_request *req; 1112 1113 req = TAILQ_FIRST(&sc->vtblk_req_free); 1114 if (req != NULL) 1115 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); 1116 1117 return (req); 1118 } 1119 1120 static void 1121 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1122 { 1123 1124 bzero(req, sizeof(struct vtblk_request)); 1125 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1126 } 1127 1128 static struct vtblk_request * 1129 vtblk_dequeue_ready(struct vtblk_softc *sc) 1130 { 1131 struct vtblk_request *req; 1132 1133 req = TAILQ_FIRST(&sc->vtblk_req_ready); 1134 if (req != NULL) 1135 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); 1136 1137 return (req); 1138 } 1139 1140 static void 1141 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) 1142 { 1143 1144 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); 1145 } 1146 1147 static int 1148 vtblk_request_error(struct vtblk_request *req) 1149 { 1150 int error; 1151 1152 switch (req->vbr_ack) { 1153 case VIRTIO_BLK_S_OK: 1154 error = 0; 1155 break; 1156 case VIRTIO_BLK_S_UNSUPP: 1157 error = ENOTSUP; 1158 break; 1159 default: 1160 error = EIO; 1161 break; 1162 } 1163 1164 return (error); 1165 } 1166 1167 static void 1168 vtblk_finish_bio(struct bio *bp, int error) 1169 { 1170 1171 biodone(bp); 1172 } 1173 1174 static void 1175 vtblk_setup_sysctl(struct vtblk_softc *sc) 1176 { 1177 device_t dev; 1178 struct sysctl_ctx_list *ctx; 1179 struct sysctl_oid *tree; 1180 struct sysctl_oid_list *child; 1181 1182 dev = sc->vtblk_dev; 1183 ctx = device_get_sysctl_ctx(dev); 1184 tree = device_get_sysctl_tree(dev); 1185 child = SYSCTL_CHILDREN(tree); 1186 1187 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode", 1188 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl, 1189 "I", "Write cache mode (writethrough (0) or writeback (1))"); 1190 } 1191 1192 static int 1193 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def) 1194 { 1195 char path[64]; 1196 1197 ksnprintf(path, sizeof(path), 1198 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob); 1199 TUNABLE_INT_FETCH(path, &def); 1200 1201 return (def); 1202 } 1203