1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $ 27 */ 28 29 /* Driver for VirtIO block devices. */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bio.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/sglist.h> 38 #include <sys/sysctl.h> 39 #include <sys/queue.h> 40 #include <sys/serialize.h> 41 #include <sys/buf2.h> 42 #include <sys/rman.h> 43 #include <sys/disk.h> 44 #include <sys/devicestat.h> 45 46 #include <dev/virtual/virtio/virtio/virtio.h> 47 #include <dev/virtual/virtio/virtio/virtqueue.h> 48 #include "virtio_blk.h" 49 50 struct vtblk_request { 51 struct virtio_blk_outhdr vbr_hdr __aligned(16); 52 struct bio *vbr_bio; 53 uint8_t vbr_ack; 54 55 SLIST_ENTRY(vtblk_request) vbr_link; 56 }; 57 58 enum vtblk_cache_mode { 59 VTBLK_CACHE_WRITETHROUGH, 60 VTBLK_CACHE_WRITEBACK, 61 VTBLK_CACHE_MAX 62 }; 63 64 struct vtblk_softc { 65 device_t vtblk_dev; 66 struct lwkt_serialize vtblk_slz; 67 uint64_t vtblk_features; 68 uint32_t vtblk_flags; 69 #define VTBLK_FLAG_INDIRECT 0x0001 70 #define VTBLK_FLAG_READONLY 0x0002 71 #define VTBLK_FLAG_DETACH 0x0004 72 #define VTBLK_FLAG_SUSPEND 0x0008 73 #define VTBLK_FLAG_DUMPING 0x0010 74 #define VTBLK_FLAG_WC_CONFIG 0x0020 75 76 struct virtqueue *vtblk_vq; 77 struct sglist *vtblk_sglist; 78 struct disk vtblk_disk; 79 cdev_t cdev; 80 struct devstat stats; 81 82 struct bio_queue_head vtblk_bioq; 83 SLIST_HEAD(, vtblk_request) 84 vtblk_req_free; 85 86 int vtblk_sector_size; 87 int vtblk_max_nsegs; 88 int vtblk_request_count; 89 enum vtblk_cache_mode vtblk_write_cache; 90 91 struct vtblk_request vtblk_dump_request; 92 }; 93 94 static struct virtio_feature_desc vtblk_feature_desc[] = { 95 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 96 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 97 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 98 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 99 { VIRTIO_BLK_F_RO, "ReadOnly" }, 100 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 101 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 102 { VIRTIO_BLK_F_WCE, "WriteCache" }, 103 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 104 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, 105 106 { 0, NULL } 107 }; 108 109 static int vtblk_probe(device_t); 110 static int vtblk_attach(device_t); 111 static int vtblk_detach(device_t); 112 static int vtblk_suspend(device_t); 113 static int vtblk_resume(device_t); 114 static int vtblk_shutdown(device_t); 115 116 static void vtblk_negotiate_features(struct vtblk_softc *); 117 static int vtblk_alloc_intr(struct vtblk_softc *); 118 static int vtblk_maximum_segments(struct vtblk_softc *, 119 struct virtio_blk_config *); 120 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 121 static void vtblk_set_write_cache(struct vtblk_softc *, int); 122 static int vtblk_write_cache_enabled(struct vtblk_softc *, 123 struct virtio_blk_config *); 124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); 125 static void vtblk_alloc_disk(struct vtblk_softc *, 126 struct virtio_blk_config *); 127 /* 128 * Interface to the device switch. 129 */ 130 static d_open_t vtblk_open; 131 static d_strategy_t vtblk_strategy; 132 static d_dump_t vtblk_dump; 133 134 static struct dev_ops vbd_disk_ops = { 135 { "vbd", 200, D_DISK | D_MPSAFE }, 136 .d_open = vtblk_open, 137 .d_close = nullclose, 138 .d_read = physread, 139 .d_write = physwrite, 140 .d_strategy = vtblk_strategy, 141 .d_dump = vtblk_dump, 142 }; 143 144 static void vtblk_startio(struct vtblk_softc *); 145 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 146 static int vtblk_execute_request(struct vtblk_softc *, 147 struct vtblk_request *); 148 static void vtblk_vq_intr(void *); 149 150 static void vtblk_stop(struct vtblk_softc *); 151 152 static void vtblk_prepare_dump(struct vtblk_softc *); 153 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 154 static int vtblk_flush_dump(struct vtblk_softc *); 155 static int vtblk_poll_request(struct vtblk_softc *, 156 struct vtblk_request *); 157 158 static void vtblk_drain_vq(struct vtblk_softc *, int); 159 static void vtblk_drain(struct vtblk_softc *); 160 161 static int vtblk_alloc_requests(struct vtblk_softc *); 162 static void vtblk_free_requests(struct vtblk_softc *); 163 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 164 static void vtblk_enqueue_request(struct vtblk_softc *, 165 struct vtblk_request *); 166 167 static int vtblk_request_error(struct vtblk_request *); 168 static void vtblk_finish_bio(struct bio *, int); 169 170 static void vtblk_setup_sysctl(struct vtblk_softc *); 171 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); 172 173 /* Tunables. */ 174 static int vtblk_writecache_mode = -1; 175 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); 176 177 /* Features desired/implemented by this driver. */ 178 #define VTBLK_FEATURES \ 179 (VIRTIO_BLK_F_SIZE_MAX | \ 180 VIRTIO_BLK_F_SEG_MAX | \ 181 VIRTIO_BLK_F_GEOMETRY | \ 182 VIRTIO_BLK_F_RO | \ 183 VIRTIO_BLK_F_BLK_SIZE | \ 184 VIRTIO_BLK_F_WCE | \ 185 VIRTIO_BLK_F_CONFIG_WCE | \ 186 VIRTIO_RING_F_INDIRECT_DESC) 187 188 /* 189 * Each block request uses at least two segments - one for the header 190 * and one for the status. 191 */ 192 #define VTBLK_MIN_SEGMENTS 2 193 194 static device_method_t vtblk_methods[] = { 195 /* Device methods. */ 196 DEVMETHOD(device_probe, vtblk_probe), 197 DEVMETHOD(device_attach, vtblk_attach), 198 DEVMETHOD(device_detach, vtblk_detach), 199 DEVMETHOD(device_suspend, vtblk_suspend), 200 DEVMETHOD(device_resume, vtblk_resume), 201 DEVMETHOD(device_shutdown, vtblk_shutdown), 202 203 DEVMETHOD_END 204 }; 205 206 static driver_t vtblk_driver = { 207 "vtblk", 208 vtblk_methods, 209 sizeof(struct vtblk_softc) 210 }; 211 static devclass_t vtblk_devclass; 212 213 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, NULL, NULL); 214 MODULE_VERSION(virtio_blk, 1); 215 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 216 217 static int 218 vtblk_probe(device_t dev) 219 { 220 221 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 222 return (ENXIO); 223 224 device_set_desc(dev, "VirtIO Block Adapter"); 225 226 return (BUS_PROBE_DEFAULT); 227 } 228 229 static int 230 vtblk_attach(device_t dev) 231 { 232 struct vtblk_softc *sc; 233 struct virtio_blk_config blkcfg; 234 int error; 235 236 sc = device_get_softc(dev); 237 sc->vtblk_dev = dev; 238 239 lwkt_serialize_init(&sc->vtblk_slz); 240 241 bioq_init(&sc->vtblk_bioq); 242 SLIST_INIT(&sc->vtblk_req_free); 243 244 virtio_set_feature_desc(dev, vtblk_feature_desc); 245 vtblk_negotiate_features(sc); 246 247 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 248 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 249 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 250 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 251 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) 252 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; 253 254 vtblk_setup_sysctl(sc); 255 256 /* Get local copy of config. */ 257 virtio_read_device_config(dev, 0, &blkcfg, 258 sizeof(struct virtio_blk_config)); 259 260 /* 261 * With the current sglist(9) implementation, it is not easy 262 * for us to support a maximum segment size as adjacent 263 * segments are coalesced. For now, just make sure it's larger 264 * than the maximum supported transfer size. 265 */ 266 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 267 if (blkcfg.size_max < MAXPHYS) { 268 error = ENOTSUP; 269 device_printf(dev, "host requires unsupported " 270 "maximum segment size feature\n"); 271 goto fail; 272 } 273 } 274 275 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 276 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 277 error = EINVAL; 278 device_printf(dev, "fewer than minimum number of segments " 279 "allowed: %d\n", sc->vtblk_max_nsegs); 280 goto fail; 281 } 282 283 /* 284 * Allocate working sglist. The number of segments may be too 285 * large to safely store on the stack. 286 */ 287 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT); 288 if (sc->vtblk_sglist == NULL) { 289 error = ENOMEM; 290 device_printf(dev, "cannot allocate sglist\n"); 291 goto fail; 292 } 293 294 error = vtblk_alloc_intr(sc); 295 if (error) { 296 device_printf(dev, "cannot allocate interrupt\n"); 297 goto fail; 298 } 299 300 error = vtblk_alloc_virtqueue(sc); 301 if (error) { 302 device_printf(dev, "cannot allocate virtqueue\n"); 303 goto fail; 304 } 305 306 error = virtio_bind_intr(sc->vtblk_dev, 0, 0, vtblk_vq_intr, sc); 307 if (error) { 308 device_printf(dev, "cannot assign virtqueue to interrupt\n"); 309 goto fail; 310 } 311 312 error = vtblk_alloc_requests(sc); 313 if (error) { 314 device_printf(dev, "cannot preallocate requests\n"); 315 goto fail; 316 } 317 318 error = virtio_setup_intr(dev, 0, &sc->vtblk_slz); 319 if (error) { 320 device_printf(dev, "cannot setup virtqueue interrupt\n"); 321 goto fail; 322 } 323 324 virtqueue_enable_intr(sc->vtblk_vq); 325 326 vtblk_alloc_disk(sc, &blkcfg); 327 328 fail: 329 if (error) 330 vtblk_detach(dev); 331 332 return (error); 333 } 334 335 static int 336 vtblk_detach(device_t dev) 337 { 338 struct vtblk_softc *sc; 339 340 sc = device_get_softc(dev); 341 342 virtio_teardown_intr(dev, 0); 343 344 lwkt_serialize_enter(&sc->vtblk_slz); 345 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 346 if (device_is_attached(dev)) 347 vtblk_stop(sc); 348 lwkt_serialize_exit(&sc->vtblk_slz); 349 350 vtblk_drain(sc); 351 352 if (sc->cdev != NULL) { 353 disk_destroy(&sc->vtblk_disk); 354 sc->cdev = NULL; 355 } 356 357 if (sc->vtblk_sglist != NULL) { 358 sglist_free(sc->vtblk_sglist); 359 sc->vtblk_sglist = NULL; 360 } 361 362 return (0); 363 } 364 365 static int 366 vtblk_suspend(device_t dev) 367 { 368 struct vtblk_softc *sc; 369 370 sc = device_get_softc(dev); 371 372 lwkt_serialize_enter(&sc->vtblk_slz); 373 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 374 /* XXX BMV: virtio_stop(), etc needed here? */ 375 lwkt_serialize_exit(&sc->vtblk_slz); 376 377 return (0); 378 } 379 380 static int 381 vtblk_resume(device_t dev) 382 { 383 struct vtblk_softc *sc; 384 385 sc = device_get_softc(dev); 386 387 lwkt_serialize_enter(&sc->vtblk_slz); 388 /* XXX BMV: virtio_reinit(), etc needed here? */ 389 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 390 #if 0 /* XXX Resume IO? */ 391 vtblk_startio(sc); 392 #endif 393 lwkt_serialize_exit(&sc->vtblk_slz); 394 395 return (0); 396 } 397 398 static int 399 vtblk_shutdown(device_t dev) 400 { 401 402 return (0); 403 } 404 405 static int 406 vtblk_open(struct dev_open_args *ap) 407 { 408 struct vtblk_softc *sc; 409 cdev_t dev = ap->a_head.a_dev; 410 sc = dev->si_drv1; 411 if (sc == NULL) 412 return (ENXIO); 413 414 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 415 } 416 417 static int 418 vtblk_dump(struct dev_dump_args *ap) 419 { 420 struct vtblk_softc *sc; 421 cdev_t dev = ap->a_head.a_dev; 422 uint64_t buf_start, buf_len; 423 int error; 424 425 sc = dev->si_drv1; 426 if (sc == NULL) 427 return (ENXIO); 428 429 buf_start = ap->a_offset; 430 buf_len = ap->a_length; 431 432 // lwkt_serialize_enter(&sc->vtblk_slz); 433 434 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 435 vtblk_prepare_dump(sc); 436 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 437 } 438 439 if (buf_len > 0) 440 error = vtblk_write_dump(sc, ap->a_virtual, buf_start, 441 buf_len); 442 else if (buf_len == 0) 443 error = vtblk_flush_dump(sc); 444 else { 445 error = EINVAL; 446 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 447 } 448 449 // lwkt_serialize_exit(&sc->vtblk_slz); 450 451 return (error); 452 } 453 454 static int 455 vtblk_strategy(struct dev_strategy_args *ap) 456 { 457 struct vtblk_softc *sc; 458 cdev_t dev = ap->a_head.a_dev; 459 sc = dev->si_drv1; 460 struct bio *bio = ap->a_bio; 461 struct buf *bp = bio->bio_buf; 462 463 if (sc == NULL) { 464 vtblk_finish_bio(bio, EINVAL); 465 return EINVAL; 466 } 467 468 /* 469 * Fail any write if RO. Unfortunately, there does not seem to 470 * be a better way to report our readonly'ness to GEOM above. 471 * 472 * XXX: Is that true in DFly? 473 */ 474 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 475 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) { 476 vtblk_finish_bio(bio, EROFS); 477 return (EINVAL); 478 } 479 480 lwkt_serialize_enter(&sc->vtblk_slz); 481 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) { 482 bioqdisksort(&sc->vtblk_bioq, bio); 483 vtblk_startio(sc); 484 lwkt_serialize_exit(&sc->vtblk_slz); 485 } else { 486 lwkt_serialize_exit(&sc->vtblk_slz); 487 vtblk_finish_bio(bio, ENXIO); 488 } 489 return 0; 490 } 491 492 static void 493 vtblk_negotiate_features(struct vtblk_softc *sc) 494 { 495 device_t dev; 496 uint64_t features; 497 498 dev = sc->vtblk_dev; 499 features = VTBLK_FEATURES; 500 501 sc->vtblk_features = virtio_negotiate_features(dev, features); 502 } 503 504 /* 505 * Calculate the maximum number of DMA segment supported. Note 506 * that the in/out header is encoded in the segment list. We 507 * assume that VTBLK_MIN_SEGMENTS covers that part of it so 508 * we add it into the desired total. If the SEG_MAX feature 509 * is not specified we have to just assume that the host can 510 * handle the maximum number of segments required for a MAXPHYS 511 * sized request. 512 * 513 * The additional + 1 is in case a MAXPHYS-sized buffer crosses 514 * a page boundary. 515 */ 516 static int 517 vtblk_maximum_segments(struct vtblk_softc *sc, 518 struct virtio_blk_config *blkcfg) 519 { 520 device_t dev; 521 int nsegs; 522 523 dev = sc->vtblk_dev; 524 nsegs = VTBLK_MIN_SEGMENTS; 525 526 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 527 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs); 528 } else { 529 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs; 530 } 531 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 532 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 533 534 return (nsegs); 535 } 536 537 static int 538 vtblk_alloc_intr(struct vtblk_softc *sc) 539 { 540 int cnt = 1; 541 int error; 542 543 error = virtio_intr_alloc(sc->vtblk_dev, &cnt, 0, NULL); 544 if (error != 0) 545 return (error); 546 else if (cnt != 1) 547 return (ENXIO); 548 549 return (0); 550 } 551 552 static int 553 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 554 { 555 device_t dev; 556 struct vq_alloc_info vq_info; 557 558 dev = sc->vtblk_dev; 559 560 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 561 &sc->vtblk_vq, "%s request", device_get_nameunit(dev)); 562 563 return (virtio_alloc_virtqueues(dev, 1, &vq_info)); 564 } 565 566 static void 567 vtblk_set_write_cache(struct vtblk_softc *sc, int wc) 568 { 569 570 /* Set either writeback (1) or writethrough (0) mode. */ 571 virtio_write_dev_config_1(sc->vtblk_dev, 572 offsetof(struct virtio_blk_config, writeback), wc); 573 } 574 575 static int 576 vtblk_write_cache_enabled(struct vtblk_softc *sc, 577 struct virtio_blk_config *blkcfg) 578 { 579 int wc; 580 581 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { 582 wc = vtblk_tunable_int(sc, "writecache_mode", 583 vtblk_writecache_mode); 584 if (wc >= 0 && wc < VTBLK_CACHE_MAX) 585 vtblk_set_write_cache(sc, wc); 586 else 587 wc = blkcfg->writeback; 588 } else 589 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); 590 591 return (wc); 592 } 593 594 static int 595 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS) 596 { 597 struct vtblk_softc *sc; 598 int wc, error; 599 600 sc = oidp->oid_arg1; 601 wc = sc->vtblk_write_cache; 602 603 error = sysctl_handle_int(oidp, &wc, 0, req); 604 if (error || req->newptr == NULL) 605 return (error); 606 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) 607 return (EPERM); 608 if (wc < 0 || wc >= VTBLK_CACHE_MAX) 609 return (EINVAL); 610 611 lwkt_serialize_enter(&sc->vtblk_slz); 612 sc->vtblk_write_cache = wc; 613 vtblk_set_write_cache(sc, sc->vtblk_write_cache); 614 lwkt_serialize_exit(&sc->vtblk_slz); 615 616 return (0); 617 } 618 619 static void 620 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 621 { 622 struct disk_info info; 623 624 /* construct the disk_info */ 625 bzero(&info, sizeof(info)); 626 627 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) 628 sc->vtblk_sector_size = blkcfg->blk_size; 629 else 630 sc->vtblk_sector_size = 512; 631 632 /* blkcfg->capacity is always expressed in 512 byte sectors. */ 633 info.d_media_blksize = 512; 634 info.d_media_blocks = blkcfg->capacity; 635 636 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_GEOMETRY)) { 637 info.d_ncylinders = blkcfg->geometry.cylinders; 638 info.d_nheads = blkcfg->geometry.heads; 639 info.d_secpertrack = blkcfg->geometry.sectors; 640 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 641 } else { 642 /* Fabricate a geometry */ 643 info.d_secpertrack = 1024; 644 info.d_nheads = 1; 645 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 646 info.d_ncylinders = 647 (u_int)(info.d_media_blocks / info.d_secpercyl); 648 } 649 650 if (vtblk_write_cache_enabled(sc, blkcfg) != 0) 651 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; 652 else 653 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH; 654 655 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev), 656 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, 657 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, 658 DEVSTAT_PRIORITY_DISK); 659 660 /* attach a generic disk device to ourselves */ 661 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk, 662 &vbd_disk_ops); 663 664 sc->cdev->si_drv1 = sc; 665 sc->cdev->si_iosize_max = MAXPHYS; 666 disk_setdiskinfo(&sc->vtblk_disk, &info); 667 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) { 668 device_printf(sc->vtblk_dev, "Block size: %u\n", 669 sc->vtblk_sector_size); 670 } 671 device_printf(sc->vtblk_dev, 672 "%juMB (%ju 512 byte sectors: %dH %dS/T %dC)\n", 673 ((uintmax_t)blkcfg->capacity * 512) / (1024*1024), 674 (uintmax_t)blkcfg->capacity, blkcfg->geometry.heads, 675 blkcfg->geometry.sectors, blkcfg->geometry.cylinders); 676 } 677 678 static void 679 vtblk_startio(struct vtblk_softc *sc) 680 { 681 struct virtqueue *vq; 682 struct vtblk_request *req; 683 int enq; 684 685 vq = sc->vtblk_vq; 686 enq = 0; 687 688 ASSERT_SERIALIZED(&sc->vtblk_slz); 689 690 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) 691 return; 692 693 while (!virtqueue_full(vq)) { 694 req = vtblk_bio_request(sc); 695 if (req == NULL) 696 break; 697 698 if (vtblk_execute_request(sc, req) != 0) { 699 bioqdisksort(&sc->vtblk_bioq, req->vbr_bio); 700 vtblk_enqueue_request(sc, req); 701 break; 702 } 703 devstat_start_transaction(&sc->stats); 704 705 enq++; 706 } 707 708 if (enq > 0) 709 virtqueue_notify(vq, &sc->vtblk_slz); 710 } 711 712 static struct vtblk_request * 713 vtblk_bio_request(struct vtblk_softc *sc) 714 { 715 struct bio_queue_head *bioq; 716 struct vtblk_request *req; 717 struct bio *bio; 718 struct buf *bp; 719 720 bioq = &sc->vtblk_bioq; 721 722 if (bioq_first(bioq) == NULL) 723 return (NULL); 724 725 req = vtblk_dequeue_request(sc); 726 if (req == NULL) 727 return (NULL); 728 729 bio = bioq_takefirst(bioq); 730 req->vbr_bio = bio; 731 req->vbr_ack = -1; 732 req->vbr_hdr.ioprio = 1; 733 bp = bio->bio_buf; 734 735 switch (bp->b_cmd) { 736 case BUF_CMD_FLUSH: 737 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 738 break; 739 case BUF_CMD_READ: 740 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 741 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 742 break; 743 case BUF_CMD_WRITE: 744 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 745 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 746 break; 747 default: 748 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd)); 749 req->vbr_hdr.type = -1; 750 break; 751 } 752 753 return (req); 754 } 755 756 static int 757 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 758 { 759 struct sglist *sg; 760 struct bio *bio; 761 struct buf *bp; 762 int writable, error; 763 764 sg = sc->vtblk_sglist; 765 bio = req->vbr_bio; 766 bp = bio->bio_buf; 767 writable = 0; 768 769 /* 770 * sglist is live throughout this subroutine. 771 */ 772 error = sglist_append(sg, &req->vbr_hdr, 773 sizeof(struct virtio_blk_outhdr)); 774 KASSERT(error == 0, ("error adding header to sglist")); 775 KASSERT(sg->sg_nseg == 1, 776 ("header spanned multiple segments: %d", sg->sg_nseg)); 777 778 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { 779 error = sglist_append(sg, bp->b_data, bp->b_bcount); 780 KASSERT(error == 0, ("error adding buffer to sglist")); 781 782 /* BUF_CMD_READ means the host writes into our buffer. */ 783 if (bp->b_cmd == BUF_CMD_READ) 784 writable += sg->sg_nseg - 1; 785 } 786 787 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 788 KASSERT(error == 0, ("error adding ack to sglist")); 789 writable++; 790 791 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 792 ("fewer than min segments: %d", sg->sg_nseg)); 793 794 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 795 sg->sg_nseg - writable, writable); 796 797 sglist_reset(sg); 798 799 return (error); 800 } 801 802 static void 803 vtblk_vq_intr(void *arg) 804 { 805 struct vtblk_softc *sc = arg; 806 struct virtqueue *vq = sc->vtblk_vq; 807 struct vtblk_request *req; 808 struct bio *bio; 809 struct buf *bp; 810 811 ASSERT_SERIALIZED(&sc->vtblk_slz); 812 813 if (!virtqueue_pending(vq)) 814 return; 815 816 lwkt_serialize_handler_disable(&sc->vtblk_slz); 817 virtqueue_disable_intr(sc->vtblk_vq); 818 819 retry: 820 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 821 return; 822 823 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 824 bio = req->vbr_bio; 825 bp = bio->bio_buf; 826 827 if (req->vbr_ack == VIRTIO_BLK_S_OK) { 828 bp->b_resid = 0; 829 } else { 830 bp->b_flags |= B_ERROR; 831 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) { 832 bp->b_error = ENOTSUP; 833 } else { 834 bp->b_error = EIO; 835 } 836 } 837 838 devstat_end_transaction_buf(&sc->stats, bio->bio_buf); 839 840 lwkt_serialize_exit(&sc->vtblk_slz); 841 /* 842 * Unlocking the controller around biodone() does not allow 843 * processing further device interrupts; when we queued 844 * vtblk_vq_intr, we disabled interrupts. It will allow 845 * concurrent vtblk_strategy/_startio command dispatches. 846 */ 847 biodone(bio); 848 lwkt_serialize_enter(&sc->vtblk_slz); 849 850 vtblk_enqueue_request(sc, req); 851 } 852 853 vtblk_startio(sc); 854 855 if (virtqueue_enable_intr(vq) != 0) { 856 /* 857 * If new virtqueue entries appeared immediately after 858 * enabling interrupts, process them now. Release and 859 * retake softcontroller lock to try to avoid blocking 860 * I/O dispatch for too long. 861 */ 862 virtqueue_disable_intr(vq); 863 goto retry; 864 } 865 lwkt_serialize_handler_enable(&sc->vtblk_slz); 866 } 867 868 static void 869 vtblk_stop(struct vtblk_softc *sc) 870 { 871 872 virtqueue_disable_intr(sc->vtblk_vq); 873 virtio_stop(sc->vtblk_dev); 874 } 875 876 static void 877 vtblk_prepare_dump(struct vtblk_softc *sc) 878 { 879 device_t dev; 880 struct virtqueue *vq; 881 882 dev = sc->vtblk_dev; 883 vq = sc->vtblk_vq; 884 885 vtblk_stop(sc); 886 887 /* 888 * Drain all requests caught in-flight in the virtqueue, 889 * skipping biodone(). When dumping, only one request is 890 * outstanding at a time, and we just poll the virtqueue 891 * for the response. 892 */ 893 vtblk_drain_vq(sc, 1); 894 895 if (virtio_reinit(dev, sc->vtblk_features) != 0) { 896 panic("%s: cannot reinit VirtIO block device during dump", 897 device_get_nameunit(dev)); 898 } 899 900 virtqueue_disable_intr(vq); 901 virtio_reinit_complete(dev); 902 } 903 904 static int 905 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 906 size_t length) 907 { 908 struct bio bio; 909 struct buf bp; 910 struct vtblk_request *req; 911 912 req = &sc->vtblk_dump_request; 913 req->vbr_ack = -1; 914 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 915 req->vbr_hdr.ioprio = 1; 916 req->vbr_hdr.sector = offset / 512; 917 918 req->vbr_bio = &bio; 919 bzero(&bio, sizeof(struct bio)); 920 bzero(&bp, sizeof(struct buf)); 921 922 bio.bio_buf = &bp; 923 bp.b_cmd = BUF_CMD_WRITE; 924 bp.b_data = virtual; 925 bp.b_bcount = length; 926 927 return (vtblk_poll_request(sc, req)); 928 } 929 930 static int 931 vtblk_flush_dump(struct vtblk_softc *sc) 932 { 933 struct bio bio; 934 struct buf bp; 935 struct vtblk_request *req; 936 937 req = &sc->vtblk_dump_request; 938 req->vbr_ack = -1; 939 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 940 req->vbr_hdr.ioprio = 1; 941 req->vbr_hdr.sector = 0; 942 943 req->vbr_bio = &bio; 944 bzero(&bio, sizeof(struct bio)); 945 bzero(&bp, sizeof(struct buf)); 946 947 bio.bio_buf = &bp; 948 bp.b_cmd = BUF_CMD_FLUSH; 949 950 return (vtblk_poll_request(sc, req)); 951 } 952 953 static int 954 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 955 { 956 struct virtqueue *vq; 957 int error; 958 959 vq = sc->vtblk_vq; 960 961 if (!virtqueue_empty(vq)) 962 return (EBUSY); 963 964 error = vtblk_execute_request(sc, req); 965 if (error) 966 return (error); 967 968 virtqueue_notify(vq, NULL); 969 virtqueue_poll(vq, NULL); 970 971 error = vtblk_request_error(req); 972 if (error && bootverbose) { 973 device_printf(sc->vtblk_dev, 974 "%s: IO error: %d\n", __func__, error); 975 } 976 977 return (error); 978 } 979 980 static void 981 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 982 { 983 struct virtqueue *vq; 984 struct vtblk_request *req; 985 int last; 986 987 vq = sc->vtblk_vq; 988 last = 0; 989 990 while ((req = virtqueue_drain(vq, &last)) != NULL) { 991 if (!skip_done) 992 vtblk_finish_bio(req->vbr_bio, ENXIO); 993 994 vtblk_enqueue_request(sc, req); 995 } 996 997 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 998 } 999 1000 static void 1001 vtblk_drain(struct vtblk_softc *sc) 1002 { 1003 struct bio_queue_head *bioq; 1004 struct bio *bio; 1005 1006 bioq = &sc->vtblk_bioq; 1007 1008 if (sc->vtblk_vq != NULL) 1009 vtblk_drain_vq(sc, 0); 1010 1011 while (bioq_first(bioq) != NULL) { 1012 bio = bioq_takefirst(bioq); 1013 vtblk_finish_bio(bio, ENXIO); 1014 } 1015 1016 vtblk_free_requests(sc); 1017 } 1018 1019 static int 1020 vtblk_alloc_requests(struct vtblk_softc *sc) 1021 { 1022 struct vtblk_request *req; 1023 int i, nreqs; 1024 1025 nreqs = virtqueue_size(sc->vtblk_vq); 1026 1027 /* 1028 * Preallocate sufficient requests to keep the virtqueue full. Each 1029 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1030 * the number allocated when indirect descriptors are not available. 1031 */ 1032 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1033 nreqs /= VTBLK_MIN_SEGMENTS; 1034 1035 for (i = 0; i < nreqs; i++) { 1036 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF, 1037 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0); 1038 if (req == NULL) 1039 return (ENOMEM); 1040 1041 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) 1042 == 1); 1043 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) 1044 == 1); 1045 1046 sc->vtblk_request_count++; 1047 vtblk_enqueue_request(sc, req); 1048 } 1049 1050 return (0); 1051 } 1052 1053 static void 1054 vtblk_free_requests(struct vtblk_softc *sc) 1055 { 1056 struct vtblk_request *req; 1057 1058 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1059 sc->vtblk_request_count--; 1060 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF); 1061 } 1062 1063 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1064 } 1065 1066 static struct vtblk_request * 1067 vtblk_dequeue_request(struct vtblk_softc *sc) 1068 { 1069 struct vtblk_request *req; 1070 1071 req = SLIST_FIRST(&sc->vtblk_req_free); 1072 if (req != NULL) 1073 SLIST_REMOVE_HEAD(&sc->vtblk_req_free, vbr_link); 1074 1075 return (req); 1076 } 1077 1078 static void 1079 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1080 { 1081 1082 bzero(req, sizeof(struct vtblk_request)); 1083 SLIST_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1084 } 1085 1086 static int 1087 vtblk_request_error(struct vtblk_request *req) 1088 { 1089 int error; 1090 1091 switch (req->vbr_ack) { 1092 case VIRTIO_BLK_S_OK: 1093 error = 0; 1094 break; 1095 case VIRTIO_BLK_S_UNSUPP: 1096 error = ENOTSUP; 1097 break; 1098 default: 1099 error = EIO; 1100 break; 1101 } 1102 1103 return (error); 1104 } 1105 1106 static void 1107 vtblk_finish_bio(struct bio *bio, int error) 1108 { 1109 1110 biodone(bio); 1111 } 1112 1113 static void 1114 vtblk_setup_sysctl(struct vtblk_softc *sc) 1115 { 1116 device_t dev; 1117 struct sysctl_ctx_list *ctx; 1118 struct sysctl_oid *tree; 1119 struct sysctl_oid_list *child; 1120 1121 dev = sc->vtblk_dev; 1122 ctx = device_get_sysctl_ctx(dev); 1123 tree = device_get_sysctl_tree(dev); 1124 child = SYSCTL_CHILDREN(tree); 1125 1126 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode", 1127 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl, 1128 "I", "Write cache mode (writethrough (0) or writeback (1))"); 1129 } 1130 1131 static int 1132 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def) 1133 { 1134 char path[64]; 1135 1136 ksnprintf(path, sizeof(path), 1137 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob); 1138 TUNABLE_INT_FETCH(path, &def); 1139 1140 return (def); 1141 } 1142