1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $ 27 */ 28 29 /* Driver for VirtIO block devices. */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bio.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/sglist.h> 38 #include <sys/sysctl.h> 39 #include <sys/queue.h> 40 #include <sys/serialize.h> 41 #include <sys/buf2.h> 42 #include <sys/rman.h> 43 #include <sys/disk.h> 44 #include <sys/devicestat.h> 45 46 #include <dev/virtual/virtio/virtio/virtio.h> 47 #include <dev/virtual/virtio/virtio/virtqueue.h> 48 #include "virtio_blk.h" 49 #include "virtio_if.h" 50 51 struct vtblk_request { 52 struct virtio_blk_outhdr vbr_hdr __aligned(16); 53 struct bio *vbr_bio; 54 uint8_t vbr_ack; 55 56 SLIST_ENTRY(vtblk_request) vbr_link; 57 }; 58 59 enum vtblk_cache_mode { 60 VTBLK_CACHE_WRITETHROUGH, 61 VTBLK_CACHE_WRITEBACK, 62 VTBLK_CACHE_MAX 63 }; 64 65 struct vtblk_softc { 66 device_t vtblk_dev; 67 struct lwkt_serialize vtblk_slz; 68 uint64_t vtblk_features; 69 uint32_t vtblk_flags; 70 #define VTBLK_FLAG_INDIRECT 0x0001 71 #define VTBLK_FLAG_READONLY 0x0002 72 #define VTBLK_FLAG_DETACH 0x0004 73 #define VTBLK_FLAG_SUSPEND 0x0008 74 #define VTBLK_FLAG_DUMPING 0x0010 75 #define VTBLK_FLAG_WC_CONFIG 0x0020 76 77 struct virtqueue *vtblk_vq; 78 struct sglist *vtblk_sglist; 79 struct disk vtblk_disk; 80 cdev_t cdev; 81 struct devstat stats; 82 83 struct bio_queue_head vtblk_bioq; 84 SLIST_HEAD(, vtblk_request) 85 vtblk_req_free; 86 87 int vtblk_sector_size; 88 int vtblk_max_nsegs; 89 int vtblk_request_count; 90 enum vtblk_cache_mode vtblk_write_cache; 91 92 struct vtblk_request vtblk_dump_request; 93 }; 94 95 static struct virtio_feature_desc vtblk_feature_desc[] = { 96 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 97 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 98 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 99 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 100 { VIRTIO_BLK_F_RO, "ReadOnly" }, 101 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 102 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 103 { VIRTIO_BLK_F_WCE, "WriteCache" }, 104 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 105 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, 106 107 { 0, NULL } 108 }; 109 110 static int vtblk_probe(device_t); 111 static int vtblk_attach(device_t); 112 static int vtblk_detach(device_t); 113 static int vtblk_suspend(device_t); 114 static int vtblk_resume(device_t); 115 static int vtblk_shutdown(device_t); 116 117 static void vtblk_negotiate_features(struct vtblk_softc *); 118 static int vtblk_maximum_segments(struct vtblk_softc *, 119 struct virtio_blk_config *); 120 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 121 static void vtblk_set_write_cache(struct vtblk_softc *, int); 122 static int vtblk_write_cache_enabled(struct vtblk_softc *sc, 123 struct virtio_blk_config *); 124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS); 125 static void vtblk_alloc_disk(struct vtblk_softc *, 126 struct virtio_blk_config *); 127 /* 128 * Interface to the device switch. 129 */ 130 static d_open_t vtblk_open; 131 static d_strategy_t vtblk_strategy; 132 static d_dump_t vtblk_dump; 133 134 static struct dev_ops vbd_disk_ops = { 135 { "vbd", 200, D_DISK | D_MPSAFE }, 136 .d_open = vtblk_open, 137 .d_close = nullclose, 138 .d_read = physread, 139 .d_write = physwrite, 140 .d_strategy = vtblk_strategy, 141 .d_dump = vtblk_dump, 142 }; 143 144 static void vtblk_startio(struct vtblk_softc *); 145 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 146 static int vtblk_execute_request(struct vtblk_softc *, 147 struct vtblk_request *); 148 149 static int vtblk_vq_intr(void *); 150 static void vtblk_complete(void *); 151 152 static void vtblk_stop(struct vtblk_softc *); 153 154 static void vtblk_prepare_dump(struct vtblk_softc *); 155 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 156 static int vtblk_flush_dump(struct vtblk_softc *); 157 static int vtblk_poll_request(struct vtblk_softc *, 158 struct vtblk_request *); 159 160 static void vtblk_drain_vq(struct vtblk_softc *, int); 161 static void vtblk_drain(struct vtblk_softc *); 162 163 static int vtblk_alloc_requests(struct vtblk_softc *); 164 static void vtblk_free_requests(struct vtblk_softc *); 165 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 166 static void vtblk_enqueue_request(struct vtblk_softc *, 167 struct vtblk_request *); 168 169 static int vtblk_request_error(struct vtblk_request *); 170 static void vtblk_finish_bio(struct bio *, int); 171 172 static void vtblk_setup_sysctl(struct vtblk_softc *); 173 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); 174 175 /* Tunables. */ 176 static int vtblk_writecache_mode = -1; 177 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); 178 179 /* Features desired/implemented by this driver. */ 180 #define VTBLK_FEATURES \ 181 (VIRTIO_BLK_F_SIZE_MAX | \ 182 VIRTIO_BLK_F_SEG_MAX | \ 183 VIRTIO_BLK_F_GEOMETRY | \ 184 VIRTIO_BLK_F_RO | \ 185 VIRTIO_BLK_F_BLK_SIZE | \ 186 VIRTIO_BLK_F_WCE | \ 187 VIRTIO_BLK_F_CONFIG_WCE | \ 188 VIRTIO_RING_F_INDIRECT_DESC) 189 190 /* 191 * Each block request uses at least two segments - one for the header 192 * and one for the status. 193 */ 194 #define VTBLK_MIN_SEGMENTS 2 195 196 static device_method_t vtblk_methods[] = { 197 /* Device methods. */ 198 DEVMETHOD(device_probe, vtblk_probe), 199 DEVMETHOD(device_attach, vtblk_attach), 200 DEVMETHOD(device_detach, vtblk_detach), 201 DEVMETHOD(device_suspend, vtblk_suspend), 202 DEVMETHOD(device_resume, vtblk_resume), 203 DEVMETHOD(device_shutdown, vtblk_shutdown), 204 205 DEVMETHOD_END 206 }; 207 208 static driver_t vtblk_driver = { 209 "vtblk", 210 vtblk_methods, 211 sizeof(struct vtblk_softc) 212 }; 213 static devclass_t vtblk_devclass; 214 215 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, NULL, NULL); 216 MODULE_VERSION(virtio_blk, 1); 217 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 218 219 static int 220 vtblk_probe(device_t dev) 221 { 222 223 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 224 return (ENXIO); 225 226 device_set_desc(dev, "VirtIO Block Adapter"); 227 228 return (BUS_PROBE_DEFAULT); 229 } 230 231 static int 232 vtblk_attach(device_t dev) 233 { 234 struct vtblk_softc *sc; 235 struct virtio_blk_config blkcfg; 236 int error; 237 238 sc = device_get_softc(dev); 239 sc->vtblk_dev = dev; 240 241 lwkt_serialize_init(&sc->vtblk_slz); 242 243 bioq_init(&sc->vtblk_bioq); 244 SLIST_INIT(&sc->vtblk_req_free); 245 246 virtio_set_feature_desc(dev, vtblk_feature_desc); 247 vtblk_negotiate_features(sc); 248 249 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 250 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 251 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 252 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 253 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) 254 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; 255 256 vtblk_setup_sysctl(sc); 257 258 /* Get local copy of config. */ 259 virtio_read_device_config(dev, 0, &blkcfg, 260 sizeof(struct virtio_blk_config)); 261 262 /* 263 * With the current sglist(9) implementation, it is not easy 264 * for us to support a maximum segment size as adjacent 265 * segments are coalesced. For now, just make sure it's larger 266 * than the maximum supported transfer size. 267 */ 268 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 269 if (blkcfg.size_max < MAXPHYS) { 270 error = ENOTSUP; 271 device_printf(dev, "host requires unsupported " 272 "maximum segment size feature\n"); 273 goto fail; 274 } 275 } 276 277 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 278 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 279 error = EINVAL; 280 device_printf(dev, "fewer than minimum number of segments " 281 "allowed: %d\n", sc->vtblk_max_nsegs); 282 goto fail; 283 } 284 285 /* 286 * Allocate working sglist. The number of segments may be too 287 * large to safely store on the stack. 288 */ 289 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT); 290 if (sc->vtblk_sglist == NULL) { 291 error = ENOMEM; 292 device_printf(dev, "cannot allocate sglist\n"); 293 goto fail; 294 } 295 296 error = vtblk_alloc_virtqueue(sc); 297 if (error) { 298 device_printf(dev, "cannot allocate virtqueue\n"); 299 goto fail; 300 } 301 302 error = vtblk_alloc_requests(sc); 303 if (error) { 304 device_printf(dev, "cannot preallocate requests\n"); 305 goto fail; 306 } 307 308 error = virtio_setup_intr(dev, &sc->vtblk_slz); 309 if (error) { 310 device_printf(dev, "cannot setup virtqueue interrupt\n"); 311 goto fail; 312 } 313 314 virtqueue_enable_intr(sc->vtblk_vq); 315 316 vtblk_alloc_disk(sc, &blkcfg); 317 318 fail: 319 if (error) 320 vtblk_detach(dev); 321 322 return (error); 323 } 324 325 static int 326 vtblk_detach(device_t dev) 327 { 328 struct vtblk_softc *sc; 329 330 sc = device_get_softc(dev); 331 332 lwkt_serialize_enter(&sc->vtblk_slz); 333 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 334 if (device_is_attached(dev)) 335 vtblk_stop(sc); 336 lwkt_serialize_exit(&sc->vtblk_slz); 337 338 vtblk_drain(sc); 339 340 if (sc->cdev != NULL) { 341 disk_destroy(&sc->vtblk_disk); 342 sc->cdev = NULL; 343 } 344 345 if (sc->vtblk_sglist != NULL) { 346 sglist_free(sc->vtblk_sglist); 347 sc->vtblk_sglist = NULL; 348 } 349 350 return (0); 351 } 352 353 static int 354 vtblk_suspend(device_t dev) 355 { 356 struct vtblk_softc *sc; 357 358 sc = device_get_softc(dev); 359 360 lwkt_serialize_enter(&sc->vtblk_slz); 361 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 362 /* XXX BMV: virtio_stop(), etc needed here? */ 363 lwkt_serialize_exit(&sc->vtblk_slz); 364 365 return (0); 366 } 367 368 static int 369 vtblk_resume(device_t dev) 370 { 371 struct vtblk_softc *sc; 372 373 sc = device_get_softc(dev); 374 375 lwkt_serialize_enter(&sc->vtblk_slz); 376 /* XXX BMV: virtio_reinit(), etc needed here? */ 377 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 378 #if 0 /* XXX Resume IO? */ 379 vtblk_startio(sc); 380 #endif 381 lwkt_serialize_exit(&sc->vtblk_slz); 382 383 return (0); 384 } 385 386 static int 387 vtblk_shutdown(device_t dev) 388 { 389 390 return (0); 391 } 392 393 static int 394 vtblk_open(struct dev_open_args *ap) 395 { 396 struct vtblk_softc *sc; 397 cdev_t dev = ap->a_head.a_dev; 398 sc = dev->si_drv1; 399 if (sc == NULL) 400 return (ENXIO); 401 402 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 403 } 404 405 static int 406 vtblk_dump(struct dev_dump_args *ap) 407 { 408 struct vtblk_softc *sc; 409 cdev_t dev = ap->a_head.a_dev; 410 uint64_t buf_start, buf_len; 411 int error; 412 413 sc = dev->si_drv1; 414 if (sc == NULL) 415 return (ENXIO); 416 417 buf_start = ap->a_offset; 418 buf_len = ap->a_length; 419 420 // lwkt_serialize_enter(&sc->vtblk_slz); 421 422 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 423 vtblk_prepare_dump(sc); 424 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 425 } 426 427 if (buf_len > 0) 428 error = vtblk_write_dump(sc, ap->a_virtual, buf_start, 429 buf_len); 430 else if (buf_len == 0) 431 error = vtblk_flush_dump(sc); 432 else { 433 error = EINVAL; 434 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 435 } 436 437 // lwkt_serialize_exit(&sc->vtblk_slz); 438 439 return (error); 440 } 441 442 static int 443 vtblk_strategy(struct dev_strategy_args *ap) 444 { 445 struct vtblk_softc *sc; 446 cdev_t dev = ap->a_head.a_dev; 447 sc = dev->si_drv1; 448 struct bio *bio = ap->a_bio; 449 struct buf *bp = bio->bio_buf; 450 451 if (sc == NULL) { 452 vtblk_finish_bio(bio, EINVAL); 453 return EINVAL; 454 } 455 456 /* 457 * Fail any write if RO. Unfortunately, there does not seem to 458 * be a better way to report our readonly'ness to GEOM above. 459 * 460 * XXX: Is that true in DFly? 461 */ 462 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 463 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) { 464 vtblk_finish_bio(bio, EROFS); 465 return (EINVAL); 466 } 467 468 lwkt_serialize_enter(&sc->vtblk_slz); 469 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) { 470 bioqdisksort(&sc->vtblk_bioq, bio); 471 vtblk_startio(sc); 472 lwkt_serialize_exit(&sc->vtblk_slz); 473 } else { 474 lwkt_serialize_exit(&sc->vtblk_slz); 475 vtblk_finish_bio(bio, ENXIO); 476 } 477 return 0; 478 } 479 480 static void 481 vtblk_negotiate_features(struct vtblk_softc *sc) 482 { 483 device_t dev; 484 uint64_t features; 485 486 dev = sc->vtblk_dev; 487 features = VTBLK_FEATURES; 488 489 sc->vtblk_features = virtio_negotiate_features(dev, features); 490 } 491 492 /* 493 * Calculate the maximum number of DMA segment supported. Note 494 * that the in/out header is encoded in the segment list. We 495 * assume that VTBLK_MIN_SEGMENTS covers that part of it so 496 * we add it into the desired total. If the SEG_MAX feature 497 * is not specified we have to just assume that the host can 498 * handle the maximum number of segments required for a MAXPHYS 499 * sized request. 500 * 501 * The additional + 1 is in case a MAXPHYS-sized buffer crosses 502 * a page boundary. 503 */ 504 static int 505 vtblk_maximum_segments(struct vtblk_softc *sc, 506 struct virtio_blk_config *blkcfg) 507 { 508 device_t dev; 509 int nsegs; 510 511 dev = sc->vtblk_dev; 512 nsegs = VTBLK_MIN_SEGMENTS; 513 514 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 515 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs); 516 } else { 517 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs; 518 } 519 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 520 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 521 522 return (nsegs); 523 } 524 525 static int 526 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 527 { 528 device_t dev; 529 struct vq_alloc_info vq_info; 530 531 dev = sc->vtblk_dev; 532 533 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 534 vtblk_vq_intr, sc, &sc->vtblk_vq, 535 "%s request", device_get_nameunit(dev)); 536 537 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 538 } 539 540 static void 541 vtblk_set_write_cache(struct vtblk_softc *sc, int wc) 542 { 543 544 /* Set either writeback (1) or writethrough (0) mode. */ 545 virtio_write_dev_config_1(sc->vtblk_dev, 546 offsetof(struct virtio_blk_config, writeback), wc); 547 } 548 549 static int 550 vtblk_write_cache_enabled(struct vtblk_softc *sc, 551 struct virtio_blk_config *blkcfg) 552 { 553 int wc; 554 555 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { 556 wc = vtblk_tunable_int(sc, "writecache_mode", 557 vtblk_writecache_mode); 558 if (wc >= 0 && wc < VTBLK_CACHE_MAX) 559 vtblk_set_write_cache(sc, wc); 560 else 561 wc = blkcfg->writeback; 562 } else 563 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); 564 565 return (wc); 566 } 567 568 static int 569 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS) 570 { 571 struct vtblk_softc *sc; 572 int wc, error; 573 574 sc = oidp->oid_arg1; 575 wc = sc->vtblk_write_cache; 576 577 error = sysctl_handle_int(oidp, &wc, 0, req); 578 if (error || req->newptr == NULL) 579 return (error); 580 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) 581 return (EPERM); 582 if (wc < 0 || wc >= VTBLK_CACHE_MAX) 583 return (EINVAL); 584 585 lwkt_serialize_enter(&sc->vtblk_slz); 586 sc->vtblk_write_cache = wc; 587 vtblk_set_write_cache(sc, sc->vtblk_write_cache); 588 lwkt_serialize_exit(&sc->vtblk_slz); 589 590 return (0); 591 } 592 593 static void 594 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 595 { 596 struct disk_info info; 597 598 /* construct the disk_info */ 599 bzero(&info, sizeof(info)); 600 601 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) 602 sc->vtblk_sector_size = blkcfg->blk_size; 603 else 604 sc->vtblk_sector_size = 512; 605 606 /* blkcfg->capacity is always expressed in 512 byte sectors. */ 607 info.d_media_blksize = 512; 608 info.d_media_blocks = blkcfg->capacity; 609 610 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_GEOMETRY)) { 611 info.d_ncylinders = blkcfg->geometry.cylinders; 612 info.d_nheads = blkcfg->geometry.heads; 613 info.d_secpertrack = blkcfg->geometry.sectors; 614 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 615 } else { 616 /* Fabricate a geometry */ 617 info.d_secpertrack = 1024; 618 info.d_nheads = 1; 619 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 620 info.d_ncylinders = 621 (u_int)(info.d_media_blocks / info.d_secpercyl); 622 } 623 624 if (vtblk_write_cache_enabled(sc, blkcfg) != 0) 625 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; 626 else 627 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH; 628 629 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev), 630 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, 631 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, 632 DEVSTAT_PRIORITY_DISK); 633 634 /* attach a generic disk device to ourselves */ 635 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk, 636 &vbd_disk_ops); 637 638 sc->cdev->si_drv1 = sc; 639 sc->cdev->si_iosize_max = MAXPHYS; 640 disk_setdiskinfo(&sc->vtblk_disk, &info); 641 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE)) { 642 device_printf(sc->vtblk_dev, "Block size: %u\n", 643 sc->vtblk_sector_size); 644 } 645 device_printf(sc->vtblk_dev, 646 "%juMB (%ju 512 byte sectors: %dH %dS/T %dC)\n", 647 ((uintmax_t)blkcfg->capacity * 512) / (1024*1024), 648 (uintmax_t)blkcfg->capacity, blkcfg->geometry.heads, 649 blkcfg->geometry.sectors, blkcfg->geometry.cylinders); 650 } 651 652 static void 653 vtblk_startio(struct vtblk_softc *sc) 654 { 655 struct virtqueue *vq; 656 struct vtblk_request *req; 657 int enq; 658 659 vq = sc->vtblk_vq; 660 enq = 0; 661 662 ASSERT_SERIALIZED(&sc->vtblk_slz); 663 664 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) 665 return; 666 667 while (!virtqueue_full(vq)) { 668 req = vtblk_bio_request(sc); 669 if (req == NULL) 670 break; 671 672 if (vtblk_execute_request(sc, req) != 0) { 673 bioqdisksort(&sc->vtblk_bioq, req->vbr_bio); 674 vtblk_enqueue_request(sc, req); 675 break; 676 } 677 devstat_start_transaction(&sc->stats); 678 679 enq++; 680 } 681 682 if (enq > 0) 683 virtqueue_notify(vq, &sc->vtblk_slz); 684 } 685 686 static struct vtblk_request * 687 vtblk_bio_request(struct vtblk_softc *sc) 688 { 689 struct bio_queue_head *bioq; 690 struct vtblk_request *req; 691 struct bio *bio; 692 struct buf *bp; 693 694 bioq = &sc->vtblk_bioq; 695 696 if (bioq_first(bioq) == NULL) 697 return (NULL); 698 699 req = vtblk_dequeue_request(sc); 700 if (req == NULL) 701 return (NULL); 702 703 bio = bioq_takefirst(bioq); 704 req->vbr_bio = bio; 705 req->vbr_ack = -1; 706 req->vbr_hdr.ioprio = 1; 707 bp = bio->bio_buf; 708 709 switch (bp->b_cmd) { 710 case BUF_CMD_FLUSH: 711 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 712 break; 713 case BUF_CMD_READ: 714 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 715 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 716 break; 717 case BUF_CMD_WRITE: 718 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 719 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE; 720 break; 721 default: 722 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd)); 723 req->vbr_hdr.type = -1; 724 break; 725 } 726 727 return (req); 728 } 729 730 static int 731 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 732 { 733 struct sglist *sg; 734 struct bio *bio; 735 struct buf *bp; 736 int writable, error; 737 738 sg = sc->vtblk_sglist; 739 bio = req->vbr_bio; 740 bp = bio->bio_buf; 741 writable = 0; 742 743 /* 744 * sglist is live throughout this subroutine. 745 */ 746 sglist_reset(sg); 747 748 error = sglist_append(sg, &req->vbr_hdr, 749 sizeof(struct virtio_blk_outhdr)); 750 KASSERT(error == 0, ("error adding header to sglist")); 751 KASSERT(sg->sg_nseg == 1, 752 ("header spanned multiple segments: %d", sg->sg_nseg)); 753 754 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) { 755 error = sglist_append(sg, bp->b_data, bp->b_bcount); 756 KASSERT(error == 0, ("error adding buffer to sglist")); 757 758 /* BUF_CMD_READ means the host writes into our buffer. */ 759 if (bp->b_cmd == BUF_CMD_READ) 760 writable += sg->sg_nseg - 1; 761 } 762 763 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 764 KASSERT(error == 0, ("error adding ack to sglist")); 765 writable++; 766 767 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 768 ("fewer than min segments: %d", sg->sg_nseg)); 769 770 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 771 sg->sg_nseg - writable, writable); 772 773 sglist_reset(sg); 774 775 return (error); 776 } 777 778 static int 779 vtblk_vq_intr(void *xsc) 780 { 781 vtblk_complete(xsc); 782 783 return (1); 784 } 785 786 static void 787 vtblk_complete(void *arg) 788 { 789 struct vtblk_softc *sc; 790 struct vtblk_request *req; 791 struct virtqueue *vq; 792 struct bio *bio; 793 struct buf *bp; 794 795 sc = arg; 796 vq = sc->vtblk_vq; 797 798 lwkt_serialize_handler_disable(&sc->vtblk_slz); 799 virtqueue_disable_intr(sc->vtblk_vq); 800 ASSERT_SERIALIZED(&sc->vtblk_slz); 801 802 retry: 803 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 804 return; 805 806 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 807 bio = req->vbr_bio; 808 bp = bio->bio_buf; 809 810 if (req->vbr_ack == VIRTIO_BLK_S_OK) 811 bp->b_resid = 0; 812 else { 813 bp->b_flags |= B_ERROR; 814 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) { 815 bp->b_error = ENOTSUP; 816 } else { 817 bp->b_error = EIO; 818 } 819 } 820 821 devstat_end_transaction_buf(&sc->stats, bio->bio_buf); 822 823 lwkt_serialize_exit(&sc->vtblk_slz); 824 /* 825 * Unlocking the controller around biodone() does not allow 826 * processing further device interrupts; when we queued 827 * vtblk_complete, we disabled interrupts. It will allow 828 * concurrent vtblk_strategy/_startio command dispatches. 829 */ 830 biodone(bio); 831 lwkt_serialize_enter(&sc->vtblk_slz); 832 833 vtblk_enqueue_request(sc, req); 834 } 835 836 vtblk_startio(sc); 837 838 if (virtqueue_enable_intr(vq) != 0) { 839 /* 840 * If new virtqueue entries appeared immediately after 841 * enabling interrupts, process them now. Release and 842 * retake softcontroller lock to try to avoid blocking 843 * I/O dispatch for too long. 844 */ 845 virtqueue_disable_intr(vq); 846 goto retry; 847 } 848 lwkt_serialize_handler_enable(&sc->vtblk_slz); 849 } 850 851 static void 852 vtblk_stop(struct vtblk_softc *sc) 853 { 854 855 virtqueue_disable_intr(sc->vtblk_vq); 856 virtio_stop(sc->vtblk_dev); 857 } 858 859 static void 860 vtblk_prepare_dump(struct vtblk_softc *sc) 861 { 862 device_t dev; 863 struct virtqueue *vq; 864 865 dev = sc->vtblk_dev; 866 vq = sc->vtblk_vq; 867 868 vtblk_stop(sc); 869 870 /* 871 * Drain all requests caught in-flight in the virtqueue, 872 * skipping biodone(). When dumping, only one request is 873 * outstanding at a time, and we just poll the virtqueue 874 * for the response. 875 */ 876 vtblk_drain_vq(sc, 1); 877 878 if (virtio_reinit(dev, sc->vtblk_features) != 0) { 879 panic("%s: cannot reinit VirtIO block device during dump", 880 device_get_nameunit(dev)); 881 } 882 883 virtqueue_disable_intr(vq); 884 virtio_reinit_complete(dev); 885 } 886 887 static int 888 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 889 size_t length) 890 { 891 struct bio bio; 892 struct buf bp; 893 struct vtblk_request *req; 894 895 req = &sc->vtblk_dump_request; 896 req->vbr_ack = -1; 897 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 898 req->vbr_hdr.ioprio = 1; 899 req->vbr_hdr.sector = offset / 512; 900 901 req->vbr_bio = &bio; 902 bzero(&bio, sizeof(struct bio)); 903 bzero(&buf, sizeof(struct buf)); 904 905 bio.bio_buf = &bp; 906 bp.b_cmd = BUF_CMD_WRITE; 907 bp.b_data = virtual; 908 bp.b_bcount = length; 909 910 return (vtblk_poll_request(sc, req)); 911 } 912 913 static int 914 vtblk_flush_dump(struct vtblk_softc *sc) 915 { 916 struct bio bio; 917 struct buf bp; 918 struct vtblk_request *req; 919 920 req = &sc->vtblk_dump_request; 921 req->vbr_ack = -1; 922 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 923 req->vbr_hdr.ioprio = 1; 924 req->vbr_hdr.sector = 0; 925 926 req->vbr_bio = &bio; 927 bzero(&bio, sizeof(struct bio)); 928 bzero(&bp, sizeof(struct buf)); 929 930 bio.bio_buf = &bp; 931 bp.b_cmd = BUF_CMD_FLUSH; 932 933 return (vtblk_poll_request(sc, req)); 934 } 935 936 static int 937 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 938 { 939 struct virtqueue *vq; 940 int error; 941 942 vq = sc->vtblk_vq; 943 944 if (!virtqueue_empty(vq)) 945 return (EBUSY); 946 947 error = vtblk_execute_request(sc, req); 948 if (error) 949 return (error); 950 951 virtqueue_notify(vq, NULL); 952 virtqueue_poll(vq, NULL); 953 954 error = vtblk_request_error(req); 955 if (error && bootverbose) { 956 device_printf(sc->vtblk_dev, 957 "%s: IO error: %d\n", __func__, error); 958 } 959 960 return (error); 961 } 962 963 static void 964 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 965 { 966 struct virtqueue *vq; 967 struct vtblk_request *req; 968 int last; 969 970 vq = sc->vtblk_vq; 971 last = 0; 972 973 while ((req = virtqueue_drain(vq, &last)) != NULL) { 974 if (!skip_done) 975 vtblk_finish_bio(req->vbr_bio, ENXIO); 976 977 vtblk_enqueue_request(sc, req); 978 } 979 980 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 981 } 982 983 static void 984 vtblk_drain(struct vtblk_softc *sc) 985 { 986 struct bio_queue_head *bioq; 987 struct bio *bio; 988 989 bioq = &sc->vtblk_bioq; 990 991 if (sc->vtblk_vq != NULL) 992 vtblk_drain_vq(sc, 0); 993 994 while (bioq_first(bioq) != NULL) { 995 bio = bioq_takefirst(bioq); 996 vtblk_finish_bio(bio, ENXIO); 997 } 998 999 vtblk_free_requests(sc); 1000 } 1001 1002 static int 1003 vtblk_alloc_requests(struct vtblk_softc *sc) 1004 { 1005 struct vtblk_request *req; 1006 int i, nreqs; 1007 1008 nreqs = virtqueue_size(sc->vtblk_vq); 1009 1010 /* 1011 * Preallocate sufficient requests to keep the virtqueue full. Each 1012 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1013 * the number allocated when indirect descriptors are not available. 1014 */ 1015 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1016 nreqs /= VTBLK_MIN_SEGMENTS; 1017 1018 for (i = 0; i < nreqs; i++) { 1019 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF, 1020 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0); 1021 if (req == NULL) 1022 return (ENOMEM); 1023 1024 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) 1025 == 1); 1026 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) 1027 == 1); 1028 1029 sc->vtblk_request_count++; 1030 vtblk_enqueue_request(sc, req); 1031 } 1032 1033 return (0); 1034 } 1035 1036 static void 1037 vtblk_free_requests(struct vtblk_softc *sc) 1038 { 1039 struct vtblk_request *req; 1040 1041 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1042 sc->vtblk_request_count--; 1043 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF); 1044 } 1045 1046 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1047 } 1048 1049 static struct vtblk_request * 1050 vtblk_dequeue_request(struct vtblk_softc *sc) 1051 { 1052 struct vtblk_request *req; 1053 1054 req = SLIST_FIRST(&sc->vtblk_req_free); 1055 if (req != NULL) 1056 SLIST_REMOVE_HEAD(&sc->vtblk_req_free, vbr_link); 1057 1058 return (req); 1059 } 1060 1061 static void 1062 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1063 { 1064 1065 bzero(req, sizeof(struct vtblk_request)); 1066 SLIST_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1067 } 1068 1069 static int 1070 vtblk_request_error(struct vtblk_request *req) 1071 { 1072 int error; 1073 1074 switch (req->vbr_ack) { 1075 case VIRTIO_BLK_S_OK: 1076 error = 0; 1077 break; 1078 case VIRTIO_BLK_S_UNSUPP: 1079 error = ENOTSUP; 1080 break; 1081 default: 1082 error = EIO; 1083 break; 1084 } 1085 1086 return (error); 1087 } 1088 1089 static void 1090 vtblk_finish_bio(struct bio *bio, int error) 1091 { 1092 1093 biodone(bio); 1094 } 1095 1096 static void 1097 vtblk_setup_sysctl(struct vtblk_softc *sc) 1098 { 1099 device_t dev; 1100 struct sysctl_ctx_list *ctx; 1101 struct sysctl_oid *tree; 1102 struct sysctl_oid_list *child; 1103 1104 dev = sc->vtblk_dev; 1105 ctx = device_get_sysctl_ctx(dev); 1106 tree = device_get_sysctl_tree(dev); 1107 child = SYSCTL_CHILDREN(tree); 1108 1109 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode", 1110 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl, 1111 "I", "Write cache mode (writethrough (0) or writeback (1))"); 1112 } 1113 1114 static int 1115 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def) 1116 { 1117 char path[64]; 1118 1119 ksnprintf(path, sizeof(path), 1120 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob); 1121 TUNABLE_INT_FETCH(path, &def); 1122 1123 return (def); 1124 } 1125