1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $ 27 */ 28 29 /* Driver for VirtIO block devices. */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bio.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/sglist.h> 38 #include <sys/sysctl.h> 39 #include <sys/queue.h> 40 #include <sys/serialize.h> 41 #include <sys/buf2.h> 42 #include <sys/rman.h> 43 #include <sys/disk.h> 44 #include <sys/devicestat.h> 45 46 #include <dev/virtual/virtio/virtio/virtio.h> 47 #include <dev/virtual/virtio/virtio/virtqueue.h> 48 #include "virtio_blk.h" 49 #include "virtio_if.h" 50 51 struct vtblk_request { 52 struct virtio_blk_outhdr vbr_hdr __aligned(16); 53 struct bio *vbr_bio; 54 uint8_t vbr_ack; 55 56 SLIST_ENTRY(vtblk_request) vbr_link; 57 }; 58 59 enum vtblk_cache_mode { 60 VTBLK_CACHE_WRITETHROUGH, 61 VTBLK_CACHE_WRITEBACK, 62 VTBLK_CACHE_MAX 63 }; 64 65 struct vtblk_softc { 66 device_t vtblk_dev; 67 struct lwkt_serialize vtblk_slz; 68 uint64_t vtblk_features; 69 uint32_t vtblk_flags; 70 #define VTBLK_FLAG_INDIRECT 0x0001 71 #define VTBLK_FLAG_READONLY 0x0002 72 #define VTBLK_FLAG_DETACH 0x0004 73 #define VTBLK_FLAG_SUSPEND 0x0008 74 #define VTBLK_FLAG_DUMPING 0x0010 75 #define VTBLK_FLAG_WC_CONFIG 0x0020 76 77 struct virtqueue *vtblk_vq; 78 struct sglist *vtblk_sglist; 79 struct disk vtblk_disk; 80 cdev_t cdev; 81 struct devstat stats; 82 83 struct bio_queue_head vtblk_bioq; 84 SLIST_HEAD(, vtblk_request) 85 vtblk_req_free; 86 87 int vtblk_sector_size; 88 int vtblk_max_nsegs; 89 int vtblk_request_count; 90 enum vtblk_cache_mode vtblk_write_cache; 91 92 struct vtblk_request vtblk_dump_request; 93 }; 94 95 static struct virtio_feature_desc vtblk_feature_desc[] = { 96 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 97 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 98 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 99 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 100 { VIRTIO_BLK_F_RO, "ReadOnly" }, 101 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 102 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 103 { VIRTIO_BLK_F_WCE, "WriteCache" }, 104 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 105 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, 106 107 { 0, NULL } 108 }; 109 110 static int vtblk_probe(device_t); 111 static int vtblk_attach(device_t); 112 static int vtblk_detach(device_t); 113 static int vtblk_suspend(device_t); 114 static int vtblk_resume(device_t); 115 static int vtblk_shutdown(device_t); 116 117 static void vtblk_negotiate_features(struct vtblk_softc *); 118 static int vtblk_maximum_segments(struct vtblk_softc *, 119 struct virtio_blk_config *); 120 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 121 static void vtblk_set_write_cache(struct vtblk_softc *, int); 122 static int vtblk_write_cache_enabled(struct vtblk_softc *sc, 123 struct virtio_blk_config *); 124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); 125 static void vtblk_alloc_disk(struct vtblk_softc *, 126 struct virtio_blk_config *); 127 /* 128 * Interface to the device switch. 129 */ 130 static d_open_t vtblk_open; 131 static d_strategy_t vtblk_strategy; 132 static d_dump_t vtblk_dump; 133 134 static struct dev_ops vbd_disk_ops = { 135 { "vbd", 200, D_DISK | D_MPSAFE }, 136 .d_open = vtblk_open, 137 .d_close = nullclose, 138 .d_read = physread, 139 .d_write = physwrite, 140 .d_strategy = vtblk_strategy, 141 .d_dump = vtblk_dump, 142 }; 143 144 static void vtblk_startio(struct vtblk_softc *); 145 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 146 static int vtblk_execute_request(struct vtblk_softc *, 147 struct vtblk_request *); 148 149 static int vtblk_vq_intr(void *); 150 static void vtblk_complete(void *); 151 152 static void vtblk_stop(struct vtblk_softc *); 153 154 static void vtblk_prepare_dump(struct vtblk_softc *); 155 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 156 static int vtblk_flush_dump(struct vtblk_softc *); 157 static int vtblk_poll_request(struct vtblk_softc *, 158 struct vtblk_request *); 159 160 static void vtblk_drain_vq(struct vtblk_softc *, int); 161 static void vtblk_drain(struct vtblk_softc *); 162 163 static int vtblk_alloc_requests(struct vtblk_softc *); 164 static void vtblk_free_requests(struct vtblk_softc *); 165 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 166 static void vtblk_enqueue_request(struct vtblk_softc *, 167 struct vtblk_request *); 168 169 static int vtblk_request_error(struct vtblk_request *); 170 static void vtblk_finish_bio(struct bio *, int); 171 172 static void vtblk_setup_sysctl(struct vtblk_softc *); 173 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); 174 175 /* Tunables. */ 176 static int vtblk_writecache_mode = -1; 177 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); 178 179 /* Features desired/implemented by this driver. */ 180 #define VTBLK_FEATURES \ 181 (VIRTIO_BLK_F_SIZE_MAX | \ 182 VIRTIO_BLK_F_SEG_MAX | \ 183 VIRTIO_BLK_F_GEOMETRY | \ 184 VIRTIO_BLK_F_RO | \ 185 VIRTIO_BLK_F_BLK_SIZE | \ 186 VIRTIO_BLK_F_WCE | \ 187 VIRTIO_BLK_F_CONFIG_WCE | \ 188 VIRTIO_RING_F_INDIRECT_DESC) 189 190 /* 191 * Each block request uses at least two segments - one for the header 192 * and one for the status. 193 */ 194 #define VTBLK_MIN_SEGMENTS 2 195 196 static device_method_t vtblk_methods[] = { 197 /* Device methods. */ 198 DEVMETHOD(device_probe, vtblk_probe), 199 DEVMETHOD(device_attach, vtblk_attach), 200 DEVMETHOD(device_detach, vtblk_detach), 201 DEVMETHOD(device_suspend, vtblk_suspend), 202 DEVMETHOD(device_resume, vtblk_resume), 203 DEVMETHOD(device_shutdown, vtblk_shutdown), 204 205 DEVMETHOD_END 206 }; 207 208 static driver_t vtblk_driver = { 209 "vtblk", 210 vtblk_methods, 211 sizeof(struct vtblk_softc) 212 }; 213 static devclass_t vtblk_devclass; 214 215 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, NULL, NULL); 216 MODULE_VERSION(virtio_blk, 1); 217 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 218 219 static int 220 vtblk_probe(device_t dev) 221 { 222 223 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 224 return (ENXIO); 225 226 device_set_desc(dev, "VirtIO Block Adapter"); 227 228 return (BUS_PROBE_DEFAULT); 229 } 230 231 static int 232 vtblk_attach(device_t dev) 233 { 234 struct vtblk_softc *sc; 235 struct virtio_blk_config blkcfg; 236 int error; 237 238 sc = device_get_softc(dev); 239 sc->vtblk_dev = dev; 240 241 lwkt_serialize_init(&sc->vtblk_slz); 242 243 bioq_init(&sc->vtblk_bioq); 244 SLIST_INIT(&sc->vtblk_req_free); 245 246 virtio_set_feature_desc(dev, vtblk_feature_desc); 247 vtblk_negotiate_features(sc); 248 249 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 250 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 251 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 252 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 253 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) 254 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; 255 256 vtblk_setup_sysctl(sc); 257 258 /* Get local copy of config. */ 259 virtio_read_device_config(dev, 0, &blkcfg, 260 sizeof(struct virtio_blk_config)); 261 262 /* 263 * With the current sglist(9) implementation, it is not easy 264 * for us to support a maximum segment size as adjacent 265 * segments are coalesced. For now, just make sure it's larger 266 * than the maximum supported transfer size. 267 */ 268 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 269 if (blkcfg.size_max < MAXPHYS) { 270 error = ENOTSUP; 271 device_printf(dev, "host requires unsupported " 272 "maximum segment size feature\n"); 273 goto fail; 274 } 275 } 276 277 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 278 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 279 error = EINVAL; 280 device_printf(dev, "fewer than minimum number of segments " 281 "allowed: %d\n", sc->vtblk_max_nsegs); 282 goto fail; 283 } 284 285 /* 286 * Allocate working sglist. The number of segments may be too 287 * large to safely store on the stack. 288 */ 289 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT); 290 if (sc->vtblk_sglist == NULL) { 291 error = ENOMEM; 292 device_printf(dev, "cannot allocate sglist\n"); 293 goto fail; 294 } 295 296 error = vtblk_alloc_virtqueue(sc); 297 if (error) { 298 device_printf(dev, "cannot allocate virtqueue\n"); 299 goto fail; 300 } 301 302 error = vtblk_alloc_requests(sc); 303 if (error) { 304 device_printf(dev, "cannot preallocate requests\n"); 305 goto fail; 306 } 307 308 error = virtio_setup_intr(dev, &sc->vtblk_slz); 309 if (error) { 310 device_printf(dev, "cannot setup virtqueue interrupt\n"); 311 goto fail; 312 } 313 314 virtqueue_enable_intr(sc->vtblk_vq); 315 316 vtblk_alloc_disk(sc, &blkcfg); 317 318 fail: 319 if (error) 320 vtblk_detach(dev); 321 322 return (error); 323 } 324 325 static int 326 vtblk_detach(device_t dev) 327 { 328 struct vtblk_softc *sc; 329 330 sc = device_get_softc(dev); 331 332 lwkt_serialize_enter(&sc->vtblk_slz); 333 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 334 if (device_is_attached(dev)) 335 vtblk_stop(sc); 336 lwkt_serialize_exit(&sc->vtblk_slz); 337 338 vtblk_drain(sc); 339 340 if (sc->cdev != NULL) { 341 disk_destroy(&sc->vtblk_disk); 342 sc->cdev = NULL; 343 } 344 345 if (sc->vtblk_sglist != NULL) { 346 sglist_free(sc->vtblk_sglist); 347 sc->vtblk_sglist = NULL; 348 } 349 350 return (0); 351 } 352 353 static int 354 vtblk_suspend(device_t dev) 355 { 356 struct vtblk_softc *sc; 357 358 sc = device_get_softc(dev); 359 360 lwkt_serialize_enter(&sc->vtblk_slz); 361 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 362 /* XXX BMV: virtio_stop(), etc needed here? */ 363 lwkt_serialize_exit(&sc->vtblk_slz); 364 365 return (0); 366 } 367 368 static int 369 vtblk_resume(device_t dev) 370 { 371 struct vtblk_softc *sc; 372 373 sc = device_get_softc(dev); 374 375 lwkt_serialize_enter(&sc->vtblk_slz); 376 /* XXX BMV: virtio_reinit(), etc needed here? */ 377 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 378 #if 0 /* XXX Resume IO? */ 379 vtblk_startio(sc); 380 #endif 381 lwkt_serialize_exit(&sc->vtblk_slz); 382 383 return (0); 384 } 385 386 static int 387 vtblk_shutdown(device_t dev) 388 { 389 390 return (0); 391 } 392 393 static int 394 vtblk_open(struct dev_open_args *ap) 395 { 396 struct vtblk_softc *sc; 397 cdev_t dev = ap->a_head.a_dev; 398 sc = dev->si_drv1; 399 if (sc == NULL) 400 return (ENXIO); 401 402 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 403 } 404 405 static int 406 vtblk_dump(struct dev_dump_args *ap) 407 { 408 struct vtblk_softc *sc; 409 cdev_t dev = ap->a_head.a_dev; 410 uint64_t buf_start, buf_len; 411 int error; 412 413 sc = dev->si_drv1; 414 if (sc == NULL) 415 return (ENXIO); 416 417 buf_start = ap->a_offset; 418 buf_len = ap->a_length; 419 420 // lwkt_serialize_enter(&sc->vtblk_slz); 421 422 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 423 vtblk_prepare_dump(sc); 424 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 425 } 426 427 if (buf_len > 0) 428 error = vtblk_write_dump(sc, ap->a_virtual, buf_start, 429 buf_len); 430 else if (buf_len == 0) 431 error = vtblk_flush_dump(sc); 432 else { 433 error = EINVAL; 434 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 435 } 436 437 // lwkt_serialize_exit(&sc->vtblk_slz); 438 439 return (error); 440 } 441 442 static int 443 vtblk_strategy(struct dev_strategy_args *ap) 444 { 445 struct vtblk_softc *sc; 446 cdev_t dev = ap->a_head.a_dev; 447 sc = dev->si_drv1; 448 struct bio *bio = ap->a_bio; 449 struct buf *bp = bio->bio_buf; 450 451 if (sc == NULL) { 452 vtblk_finish_bio(bio, EINVAL); 453 return EINVAL; 454 } 455 456 /* 457 * Fail any write if RO. Unfortunately, there does not seem to 458 * be a better way to report our readonly'ness to GEOM above. 459 * 460 * XXX: Is that true in DFly? 461 */ 462 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 463 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) { 464 vtblk_finish_bio(bio, EROFS); 465 return (EINVAL); 466 } 467 468 lwkt_serialize_enter(&sc->vtblk_slz); 469 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) { 470 bioqdisksort(&sc->vtblk_bioq, bio); 471 vtblk_startio(sc); 472 lwkt_serialize_exit(&sc->vtblk_slz); 473 } else { 474 lwkt_serialize_exit(&sc->vtblk_slz); 475 vtblk_finish_bio(bio, ENXIO); 476 } 477 return 0; 478 } 479 480 static void 481 vtblk_negotiate_features(struct vtblk_softc *sc) 482 { 483 device_t dev; 484 uint64_t features; 485 486 dev = sc->vtblk_dev; 487 features = VTBLK_FEATURES; 488 489 sc->vtblk_features = virtio_negotiate_features(dev, features); 490 } 491 492 /* 493 * Calculate the maximum number of DMA segment supported. Note 494 * that the in/out header is encoded in the segment list. We 495 * assume that VTBLK_MIN_SEGMENTS covers that part of it so 496 * we add it into the desired total. If the SEG_MAX feature 497 * is not specified we have to just assume that the host can 498 * handle the maximum number of segments required for a MAXPHYS 499 * sized request. 500 * 501 * The additional + 1 is in case a MAXPHYS-sized buffer crosses 502 * a page boundary. 503 */ 504 static int 505 vtblk_maximum_segments(struct vtblk_softc *sc, 506 struct virtio_blk_config *blkcfg) 507 { 508 device_t dev; 509 int nsegs; 510 511 dev = sc->vtblk_dev; 512 nsegs = VTBLK_MIN_SEGMENTS; 513 514 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 515 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs); 516 } else { 517 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs; 518 } 519 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 520 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 521 522 return (nsegs); 523 } 524 525 static int 526 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 527 { 528 device_t dev; 529 struct vq_alloc_info vq_info; 530 531 dev = sc->vtblk_dev; 532 533 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 534 vtblk_vq_intr, sc, &sc->vtblk_vq, 535 "%s request", device_get_nameunit(dev)); 536 537 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 538 } 539 540 static void 541 vtblk_set_write_cache(struct vtblk_softc *sc, int wc) 542 { 543 544 /* Set either writeback (1) or writethrough (0) mode. */ 545 virtio_write_dev_config_1(sc->vtblk_dev, 546 offsetof(struct virtio_blk_config, writeback), wc); 547 } 548 549 static int 550 vtblk_write_cache_enabled(struct vtblk_softc *sc, 551 struct virtio_blk_config *blkcfg) 552 { 553 int wc; 554 555 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { 556 wc = vtblk_tunable_int(sc, "writecache_mode", 557 vtblk_writecache_mode); 558 if (wc >= 0 && wc < VTBLK_CACHE_MAX) 559 vtblk_set_write_cache(sc, wc); 560 else 561 wc = blkcfg->writeback; 562 } else 563 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); 564 565 return (wc); 566 } 567 568 static int 569 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS) 570 { 571 struct vtblk_softc *sc; 572 int wc, error; 573 574 sc = oidp->oid_arg1; 575 wc = sc->vtblk_write_cache; 576 577 error = sysctl_handle_int(oidp, &wc, 0, req); 578 if (error || req->newptr == NULL) 579 return (error); 580 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) 581 return (EPERM); 582 if (wc < 0 || wc >= VTBLK_CACHE_MAX) 583 return (EINVAL); 584 585 lwkt_serialize_enter(&sc->vtblk_slz); 586 sc->vtblk_write_cache = wc; 587 vtblk_set_write_cache(sc, sc->vtblk_write_cache); 588 lwkt_serialize_exit(&sc->vtblk_slz); 589 590 return (0); 591 } 592 593 static void 594 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 595 { 596 597 struct disk_info info; 598 599 /* construct the disk_info */ 600 bzero(&info, sizeof(info)); 601 602 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) 603 sc->vtblk_sector_size = blkcfg->blk_size; 604 else 605 sc->vtblk_sector_size = DEV_BSIZE; 606 607 info.d_media_blksize = sc->vtblk_sector_size; 608 info.d_media_blocks = blkcfg->capacity; 609 610 info.d_ncylinders = blkcfg->geometry.cylinders; 611 info.d_nheads = blkcfg->geometry.heads; 612 info.d_secpertrack = blkcfg->geometry.sectors; 613 614 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 615 616 if (vtblk_write_cache_enabled(sc, blkcfg) != 0) 617 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; 618 else 619 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH; 620 621 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev), 622 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, 623 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, 624 DEVSTAT_PRIORITY_DISK); 625 626 /* attach a generic disk device to ourselves */ 627 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk, 628 &vbd_disk_ops); 629 630 sc->cdev->si_drv1 = sc; 631 sc->cdev->si_iosize_max = MAXPHYS; 632 disk_setdiskinfo(&sc->vtblk_disk, &info); 633 } 634 635 static void 636 vtblk_startio(struct vtblk_softc *sc) 637 { 638 struct virtqueue *vq; 639 struct vtblk_request *req; 640 int enq; 641 642 vq = sc->vtblk_vq; 643 enq = 0; 644 645 ASSERT_SERIALIZED(&sc->vtblk_slz); 646 647 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) 648 return; 649 650 while (!virtqueue_full(vq)) { 651 req = vtblk_bio_request(sc); 652 if (req == NULL) 653 break; 654 655 if (vtblk_execute_request(sc, req) != 0) { 656 bioqdisksort(&sc->vtblk_bioq, req->vbr_bio); 657 vtblk_enqueue_request(sc, req); 658 break; 659 } 660 devstat_start_transaction(&sc->stats); 661 662 enq++; 663 } 664 665 if (enq > 0) 666 virtqueue_notify(vq, &sc->vtblk_slz); 667 } 668 669 static struct vtblk_request * 670 vtblk_bio_request(struct vtblk_softc *sc) 671 { 672 struct bio_queue_head *bioq; 673 struct vtblk_request *req; 674 struct bio *bio; 675 struct buf *bp; 676 677 bioq = &sc->vtblk_bioq; 678 679 if (bioq_first(bioq) == NULL) 680 return (NULL); 681 682 req = vtblk_dequeue_request(sc); 683 if (req == NULL) 684 return (NULL); 685 686 bio = bioq_takefirst(bioq); 687 req->vbr_bio = bio; 688 req->vbr_ack = -1; 689 req->vbr_hdr.ioprio = 1; 690 bp = bio->bio_buf; 691 692 switch (bp->b_cmd) { 693 case BUF_CMD_FLUSH: 694 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 695 break; 696 case BUF_CMD_READ: 697 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 698 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 699 break; 700 case BUF_CMD_WRITE: 701 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 702 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 703 break; 704 default: 705 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd)); 706 req->vbr_hdr.type = -1; 707 break; 708 } 709 710 return (req); 711 } 712 713 static int 714 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 715 { 716 struct sglist *sg; 717 struct bio *bio; 718 struct buf *bp; 719 int writable, error; 720 721 sg = sc->vtblk_sglist; 722 bio = req->vbr_bio; 723 bp = bio->bio_buf; 724 writable = 0; 725 726 /* 727 * sglist is live throughout this subroutine. 728 */ 729 sglist_reset(sg); 730 731 error = sglist_append(sg, &req->vbr_hdr, 732 sizeof(struct virtio_blk_outhdr)); 733 KASSERT(error == 0, ("error adding header to sglist")); 734 KASSERT(sg->sg_nseg == 1, 735 ("header spanned multiple segments: %d", sg->sg_nseg)); 736 737 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { 738 error = sglist_append(sg, bp->b_data, bp->b_bcount); 739 KASSERT(error == 0, ("error adding buffer to sglist")); 740 741 /* BUF_CMD_READ means the host writes into our buffer. */ 742 if (bp->b_cmd == BUF_CMD_READ) 743 writable += sg->sg_nseg - 1; 744 } 745 746 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 747 KASSERT(error == 0, ("error adding ack to sglist")); 748 writable++; 749 750 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 751 ("fewer than min segments: %d", sg->sg_nseg)); 752 753 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 754 sg->sg_nseg - writable, writable); 755 756 sglist_reset(sg); 757 758 return (error); 759 } 760 761 static int 762 vtblk_vq_intr(void *xsc) 763 { 764 vtblk_complete(xsc); 765 766 return (1); 767 } 768 769 static void 770 vtblk_complete(void *arg) 771 { 772 struct vtblk_softc *sc; 773 struct vtblk_request *req; 774 struct virtqueue *vq; 775 struct bio *bio; 776 struct buf *bp; 777 778 sc = arg; 779 vq = sc->vtblk_vq; 780 781 lwkt_serialize_handler_disable(&sc->vtblk_slz); 782 virtqueue_disable_intr(sc->vtblk_vq); 783 ASSERT_SERIALIZED(&sc->vtblk_slz); 784 785 retry: 786 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 787 return; 788 789 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 790 bio = req->vbr_bio; 791 bp = bio->bio_buf; 792 793 if (req->vbr_ack == VIRTIO_BLK_S_OK) 794 bp->b_resid = 0; 795 else { 796 bp->b_flags |= B_ERROR; 797 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) { 798 bp->b_error = ENOTSUP; 799 } else { 800 bp->b_error = EIO; 801 } 802 } 803 804 devstat_end_transaction_buf(&sc->stats, bio->bio_buf); 805 806 lwkt_serialize_exit(&sc->vtblk_slz); 807 /* 808 * Unlocking the controller around biodone() does not allow 809 * processing further device interrupts; when we queued 810 * vtblk_complete, we disabled interrupts. It will allow 811 * concurrent vtblk_strategy/_startio command dispatches. 812 */ 813 biodone(bio); 814 lwkt_serialize_enter(&sc->vtblk_slz); 815 816 vtblk_enqueue_request(sc, req); 817 } 818 819 vtblk_startio(sc); 820 821 if (virtqueue_enable_intr(vq) != 0) { 822 /* 823 * If new virtqueue entries appeared immediately after 824 * enabling interrupts, process them now. Release and 825 * retake softcontroller lock to try to avoid blocking 826 * I/O dispatch for too long. 827 */ 828 virtqueue_disable_intr(vq); 829 goto retry; 830 } 831 lwkt_serialize_handler_enable(&sc->vtblk_slz); 832 } 833 834 static void 835 vtblk_stop(struct vtblk_softc *sc) 836 { 837 838 virtqueue_disable_intr(sc->vtblk_vq); 839 virtio_stop(sc->vtblk_dev); 840 } 841 842 static void 843 vtblk_prepare_dump(struct vtblk_softc *sc) 844 { 845 device_t dev; 846 struct virtqueue *vq; 847 848 dev = sc->vtblk_dev; 849 vq = sc->vtblk_vq; 850 851 vtblk_stop(sc); 852 853 /* 854 * Drain all requests caught in-flight in the virtqueue, 855 * skipping biodone(). When dumping, only one request is 856 * outstanding at a time, and we just poll the virtqueue 857 * for the response. 858 */ 859 vtblk_drain_vq(sc, 1); 860 861 if (virtio_reinit(dev, sc->vtblk_features) != 0) { 862 panic("%s: cannot reinit VirtIO block device during dump", 863 device_get_nameunit(dev)); 864 } 865 866 virtqueue_disable_intr(vq); 867 virtio_reinit_complete(dev); 868 } 869 870 static int 871 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 872 size_t length) 873 { 874 struct bio bio; 875 struct buf bp; 876 struct vtblk_request *req; 877 878 req = &sc->vtblk_dump_request; 879 req->vbr_ack = -1; 880 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 881 req->vbr_hdr.ioprio = 1; 882 req->vbr_hdr.sector = offset / 512; 883 884 req->vbr_bio = &bio; 885 bzero(&bio, sizeof(struct bio)); 886 bzero(&buf, sizeof(struct buf)); 887 888 bio.bio_buf = &bp; 889 bp.b_cmd = BUF_CMD_WRITE; 890 bp.b_data = virtual; 891 bp.b_bcount = length; 892 893 return (vtblk_poll_request(sc, req)); 894 } 895 896 static int 897 vtblk_flush_dump(struct vtblk_softc *sc) 898 { 899 struct bio bio; 900 struct buf bp; 901 struct vtblk_request *req; 902 903 req = &sc->vtblk_dump_request; 904 req->vbr_ack = -1; 905 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 906 req->vbr_hdr.ioprio = 1; 907 req->vbr_hdr.sector = 0; 908 909 req->vbr_bio = &bio; 910 bzero(&bio, sizeof(struct bio)); 911 bzero(&bp, sizeof(struct buf)); 912 913 bio.bio_buf = &bp; 914 bp.b_cmd = BUF_CMD_FLUSH; 915 916 return (vtblk_poll_request(sc, req)); 917 } 918 919 static int 920 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 921 { 922 struct virtqueue *vq; 923 int error; 924 925 vq = sc->vtblk_vq; 926 927 if (!virtqueue_empty(vq)) 928 return (EBUSY); 929 930 error = vtblk_execute_request(sc, req); 931 if (error) 932 return (error); 933 934 virtqueue_notify(vq, NULL); 935 virtqueue_poll(vq, NULL); 936 937 error = vtblk_request_error(req); 938 if (error && bootverbose) { 939 device_printf(sc->vtblk_dev, 940 "%s: IO error: %d\n", __func__, error); 941 } 942 943 return (error); 944 } 945 946 static void 947 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 948 { 949 struct virtqueue *vq; 950 struct vtblk_request *req; 951 int last; 952 953 vq = sc->vtblk_vq; 954 last = 0; 955 956 while ((req = virtqueue_drain(vq, &last)) != NULL) { 957 if (!skip_done) 958 vtblk_finish_bio(req->vbr_bio, ENXIO); 959 960 vtblk_enqueue_request(sc, req); 961 } 962 963 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 964 } 965 966 static void 967 vtblk_drain(struct vtblk_softc *sc) 968 { 969 struct bio_queue_head *bioq; 970 struct bio *bio; 971 972 bioq = &sc->vtblk_bioq; 973 974 if (sc->vtblk_vq != NULL) 975 vtblk_drain_vq(sc, 0); 976 977 while (bioq_first(bioq) != NULL) { 978 bio = bioq_takefirst(bioq); 979 vtblk_finish_bio(bio, ENXIO); 980 } 981 982 vtblk_free_requests(sc); 983 } 984 985 static int 986 vtblk_alloc_requests(struct vtblk_softc *sc) 987 { 988 struct vtblk_request *req; 989 int i, nreqs; 990 991 nreqs = virtqueue_size(sc->vtblk_vq); 992 993 /* 994 * Preallocate sufficient requests to keep the virtqueue full. Each 995 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 996 * the number allocated when indirect descriptors are not available. 997 */ 998 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 999 nreqs /= VTBLK_MIN_SEGMENTS; 1000 1001 for (i = 0; i < nreqs; i++) { 1002 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF, 1003 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0); 1004 if (req == NULL) 1005 return (ENOMEM); 1006 1007 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) 1008 == 1); 1009 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) 1010 == 1); 1011 1012 sc->vtblk_request_count++; 1013 vtblk_enqueue_request(sc, req); 1014 } 1015 1016 return (0); 1017 } 1018 1019 static void 1020 vtblk_free_requests(struct vtblk_softc *sc) 1021 { 1022 struct vtblk_request *req; 1023 1024 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1025 sc->vtblk_request_count--; 1026 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF); 1027 } 1028 1029 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1030 } 1031 1032 static struct vtblk_request * 1033 vtblk_dequeue_request(struct vtblk_softc *sc) 1034 { 1035 struct vtblk_request *req; 1036 1037 req = SLIST_FIRST(&sc->vtblk_req_free); 1038 if (req != NULL) 1039 SLIST_REMOVE_HEAD(&sc->vtblk_req_free, vbr_link); 1040 1041 return (req); 1042 } 1043 1044 static void 1045 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1046 { 1047 1048 bzero(req, sizeof(struct vtblk_request)); 1049 SLIST_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1050 } 1051 1052 static int 1053 vtblk_request_error(struct vtblk_request *req) 1054 { 1055 int error; 1056 1057 switch (req->vbr_ack) { 1058 case VIRTIO_BLK_S_OK: 1059 error = 0; 1060 break; 1061 case VIRTIO_BLK_S_UNSUPP: 1062 error = ENOTSUP; 1063 break; 1064 default: 1065 error = EIO; 1066 break; 1067 } 1068 1069 return (error); 1070 } 1071 1072 static void 1073 vtblk_finish_bio(struct bio *bio, int error) 1074 { 1075 1076 biodone(bio); 1077 } 1078 1079 static void 1080 vtblk_setup_sysctl(struct vtblk_softc *sc) 1081 { 1082 device_t dev; 1083 struct sysctl_ctx_list *ctx; 1084 struct sysctl_oid *tree; 1085 struct sysctl_oid_list *child; 1086 1087 dev = sc->vtblk_dev; 1088 ctx = device_get_sysctl_ctx(dev); 1089 tree = device_get_sysctl_tree(dev); 1090 child = SYSCTL_CHILDREN(tree); 1091 1092 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode", 1093 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl, 1094 "I", "Write cache mode (writethrough (0) or writeback (1))"); 1095 } 1096 1097 static int 1098 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def) 1099 { 1100 char path[64]; 1101 1102 ksnprintf(path, sizeof(path), 1103 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob); 1104 TUNABLE_INT_FETCH(path, &def); 1105 1106 return (def); 1107 } 1108