1 /* 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/disklabel32.h> 43 #include <sys/disklabel64.h> 44 #include <sys/diskslice.h> 45 #include <sys/diskmbr.h> 46 #include <sys/disk.h> 47 #include <sys/malloc.h> 48 #include <sys/device.h> 49 #include <sys/devfs.h> 50 #include <sys/thread.h> 51 #include <sys/queue.h> 52 #include <sys/lock.h> 53 #include <sys/stat.h> 54 #include <sys/uuid.h> 55 56 #include <sys/dmsg.h> 57 58 #include <sys/buf2.h> 59 #include <sys/mplock2.h> 60 #include <sys/msgport2.h> 61 #include <sys/thread2.h> 62 63 struct dios_open { 64 int openrd; 65 int openwr; 66 }; 67 68 struct dios_io { 69 int count; 70 int eof; 71 }; 72 73 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg"); 74 75 static int disk_iocom_reconnect(struct disk *dp, struct file *fp); 76 static int disk_rcvdmsg(kdmsg_msg_t *msg); 77 78 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg); 79 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg); 80 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg); 81 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg); 82 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg); 83 static void diskiodone(struct bio *bio); 84 85 void 86 disk_iocom_init(struct disk *dp) 87 { 88 kdmsg_iocom_init(&dp->d_iocom, dp, 89 KDMSG_IOCOMF_AUTOCONN | 90 KDMSG_IOCOMF_AUTOSPAN | 91 KDMSG_IOCOMF_AUTOCIRC, 92 M_DMSG_DISK, disk_rcvdmsg); 93 } 94 95 void 96 disk_iocom_update(struct disk *dp) 97 { 98 } 99 100 void 101 disk_iocom_uninit(struct disk *dp) 102 { 103 kdmsg_iocom_uninit(&dp->d_iocom); 104 } 105 106 int 107 disk_iocom_ioctl(struct disk *dp, int cmd, void *data) 108 { 109 struct file *fp; 110 struct disk_ioc_recluster *recl; 111 int error; 112 113 switch(cmd) { 114 case DIOCRECLUSTER: 115 recl = data; 116 fp = holdfp(curproc->p_fd, recl->fd, -1); 117 if (fp) { 118 error = disk_iocom_reconnect(dp, fp); 119 } else { 120 error = EINVAL; 121 } 122 break; 123 default: 124 error = EOPNOTSUPP; 125 break; 126 } 127 return error; 128 } 129 130 static 131 int 132 disk_iocom_reconnect(struct disk *dp, struct file *fp) 133 { 134 char devname[64]; 135 136 ksnprintf(devname, sizeof(devname), "%s%d", 137 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev)); 138 139 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname); 140 141 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER; 142 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 143 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK; 144 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK; 145 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 146 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label, 147 sizeof(dp->d_iocom.auto_lnk_conn.cl_label), 148 "%s/%s", hostname, devname); 149 if (dp->d_info.d_serialno) { 150 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label, 151 sizeof(dp->d_iocom.auto_lnk_conn.fs_label), 152 "%s", dp->d_info.d_serialno); 153 } 154 155 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER; 156 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 157 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK; 158 dp->d_iocom.auto_lnk_span.media.block.bytes = 159 dp->d_info.d_media_size; 160 dp->d_iocom.auto_lnk_span.media.block.blksize = 161 dp->d_info.d_media_blksize; 162 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label, 163 sizeof(dp->d_iocom.auto_lnk_span.cl_label), 164 "%s/%s", hostname, devname); 165 if (dp->d_info.d_serialno) { 166 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label, 167 sizeof(dp->d_iocom.auto_lnk_span.fs_label), 168 "%s", dp->d_info.d_serialno); 169 } 170 171 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL); 172 173 return (0); 174 } 175 176 int 177 disk_rcvdmsg(kdmsg_msg_t *msg) 178 { 179 struct disk *dp = msg->iocom->handle; 180 181 /* 182 * Handle debug messages (these might not be in transactions) 183 */ 184 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) { 185 case DMSG_DBG_SHELL: 186 /* 187 * Execute shell command (not supported atm) 188 */ 189 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 190 return(0); 191 case DMSG_DBG_SHELL | DMSGF_REPLY: 192 if (msg->aux_data) { 193 msg->aux_data[msg->aux_size - 1] = 0; 194 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data); 195 } 196 return(0); 197 } 198 199 /* 200 * All remaining messages must be in a transaction 201 * 202 * NOTE! We are switching on the first message's command. The 203 * actual message command within the transaction may be 204 * different (if streaming within a transaction). 205 */ 206 if (msg->state == NULL) { 207 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 208 return(0); 209 } 210 211 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) { 212 case DMSG_BLK_OPEN: 213 case DMSG_BLK_CLOSE: 214 disk_blk_open(dp, msg); 215 break; 216 case DMSG_BLK_READ: 217 disk_blk_read(dp, msg); 218 break; 219 case DMSG_BLK_WRITE: 220 disk_blk_write(dp, msg); 221 break; 222 case DMSG_BLK_FLUSH: 223 disk_blk_flush(dp, msg); 224 break; 225 case DMSG_BLK_FREEBLKS: 226 disk_blk_freeblks(dp, msg); 227 break; 228 default: 229 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) { 230 if (msg->any.head.cmd & DMSGF_DELETE) 231 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 232 else 233 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP); 234 } 235 break; 236 } 237 return (0); 238 } 239 240 static 241 void 242 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg) 243 { 244 struct dios_open *openst; 245 int error = DMSG_ERR_NOSUPP; 246 int fflags; 247 248 openst = msg->state->any.any; 249 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) { 250 if (openst == NULL) { 251 openst = kmalloc(sizeof(*openst), M_DEVBUF, 252 M_WAITOK | M_ZERO); 253 msg->state->any.any = openst; 254 } 255 fflags = 0; 256 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 257 fflags = FREAD; 258 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 259 fflags |= FWRITE; 260 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred); 261 if (error) { 262 error = DMSG_ERR_IO; 263 } else { 264 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 265 ++openst->openrd; 266 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 267 ++openst->openwr; 268 } 269 } 270 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE && 271 openst) { 272 fflags = 0; 273 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) && 274 openst->openrd) { 275 fflags = FREAD; 276 } 277 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) && 278 openst->openwr) { 279 fflags |= FWRITE; 280 } 281 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR); 282 if (error) { 283 error = DMSG_ERR_IO; 284 } else { 285 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 286 --openst->openrd; 287 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 288 --openst->openwr; 289 } 290 } 291 if (msg->any.head.cmd & DMSGF_DELETE) { 292 if (openst) { 293 while (openst->openrd && openst->openwr) { 294 --openst->openrd; 295 --openst->openwr; 296 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR); 297 } 298 while (openst->openrd) { 299 --openst->openrd; 300 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR); 301 } 302 while (openst->openwr) { 303 --openst->openwr; 304 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR); 305 } 306 kfree(openst, M_DEVBUF); 307 msg->state->any.any = NULL; 308 } 309 kdmsg_msg_reply(msg, error); 310 } else { 311 kdmsg_msg_result(msg, error); 312 } 313 } 314 315 static 316 void 317 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg) 318 { 319 struct dios_io *iost; 320 struct buf *bp; 321 struct bio *bio; 322 int error = DMSG_ERR_NOSUPP; 323 int reterr = 1; 324 325 /* 326 * Only DMSG_BLK_READ commands imply read ops. 327 */ 328 iost = msg->state->any.any; 329 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) { 330 if (msg->any.blk_read.bytes < DEV_BSIZE || 331 msg->any.blk_read.bytes > MAXPHYS) { 332 error = DMSG_ERR_PARAM; 333 goto done; 334 } 335 if (iost == NULL) { 336 iost = kmalloc(sizeof(*iost), M_DEVBUF, 337 M_WAITOK | M_ZERO); 338 msg->state->any.any = iost; 339 } 340 reterr = 0; 341 bp = geteblk(msg->any.blk_read.bytes); 342 bio = &bp->b_bio1; 343 bp->b_cmd = BUF_CMD_READ; 344 bp->b_bcount = msg->any.blk_read.bytes; 345 bp->b_resid = bp->b_bcount; 346 bio->bio_offset = msg->any.blk_read.offset; 347 bio->bio_caller_info1.ptr = msg->state; 348 bio->bio_done = diskiodone; 349 /* kdmsg_state_hold(msg->state); */ 350 351 atomic_add_int(&iost->count, 1); 352 if (msg->any.head.cmd & DMSGF_DELETE) 353 iost->eof = 1; 354 BUF_KERNPROC(bp); 355 dev_dstrategy(dp->d_rawdev, bio); 356 } 357 done: 358 if (reterr) { 359 if (msg->any.head.cmd & DMSGF_DELETE) { 360 if (iost && iost->count == 0) { 361 kfree(iost, M_DEVBUF); 362 msg->state->any.any = NULL; 363 } 364 kdmsg_msg_reply(msg, error); 365 } else { 366 kdmsg_msg_result(msg, error); 367 } 368 } 369 } 370 371 static 372 void 373 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg) 374 { 375 struct dios_io *iost; 376 struct buf *bp; 377 struct bio *bio; 378 int error = DMSG_ERR_NOSUPP; 379 int reterr = 1; 380 381 /* 382 * Only DMSG_BLK_WRITE commands imply read ops. 383 */ 384 iost = msg->state->any.any; 385 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) { 386 if (msg->any.blk_write.bytes < DEV_BSIZE || 387 msg->any.blk_write.bytes > MAXPHYS) { 388 error = DMSG_ERR_PARAM; 389 goto done; 390 } 391 if (iost == NULL) { 392 iost = kmalloc(sizeof(*iost), M_DEVBUF, 393 M_WAITOK | M_ZERO); 394 msg->state->any.any = iost; 395 } 396 397 /* 398 * Issue WRITE. Short data implies zeros. Try to optimize 399 * the buffer cache buffer for the case where we can just 400 * use the message's data pointer. 401 */ 402 reterr = 0; 403 if (msg->aux_size >= msg->any.blk_write.bytes) 404 bp = getpbuf(NULL); 405 else 406 bp = geteblk(msg->any.blk_write.bytes); 407 bio = &bp->b_bio1; 408 bp->b_cmd = BUF_CMD_WRITE; 409 bp->b_bcount = msg->any.blk_write.bytes; 410 bp->b_resid = bp->b_bcount; 411 if (msg->aux_size >= msg->any.blk_write.bytes) { 412 bp->b_data = msg->aux_data; 413 } else { 414 bcopy(msg->aux_data, bp->b_data, msg->aux_size); 415 bzero(bp->b_data + msg->aux_size, 416 msg->any.blk_write.bytes - msg->aux_size); 417 } 418 bio->bio_offset = msg->any.blk_write.offset; 419 bio->bio_caller_info1.ptr = msg->state; 420 bio->bio_done = diskiodone; 421 /* kdmsg_state_hold(msg->state); */ 422 423 atomic_add_int(&iost->count, 1); 424 if (msg->any.head.cmd & DMSGF_DELETE) 425 iost->eof = 1; 426 BUF_KERNPROC(bp); 427 dev_dstrategy(dp->d_rawdev, bio); 428 } 429 done: 430 if (reterr) { 431 if (msg->any.head.cmd & DMSGF_DELETE) { 432 if (iost && iost->count == 0) { 433 kfree(iost, M_DEVBUF); 434 msg->state->any.any = NULL; 435 } 436 kdmsg_msg_reply(msg, error); 437 } else { 438 kdmsg_msg_result(msg, error); 439 } 440 } 441 } 442 443 static 444 void 445 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg) 446 { 447 struct dios_io *iost; 448 struct buf *bp; 449 struct bio *bio; 450 int error = DMSG_ERR_NOSUPP; 451 int reterr = 1; 452 453 /* 454 * Only DMSG_BLK_FLUSH commands imply read ops. 455 */ 456 iost = msg->state->any.any; 457 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) { 458 if (iost == NULL) { 459 iost = kmalloc(sizeof(*iost), M_DEVBUF, 460 M_WAITOK | M_ZERO); 461 msg->state->any.any = iost; 462 } 463 reterr = 0; 464 bp = getpbuf(NULL); 465 bio = &bp->b_bio1; 466 bp->b_cmd = BUF_CMD_FLUSH; 467 bp->b_bcount = msg->any.blk_flush.bytes; 468 bp->b_resid = 0; 469 bio->bio_offset = msg->any.blk_flush.offset; 470 bio->bio_caller_info1.ptr = msg->state; 471 bio->bio_done = diskiodone; 472 /* kdmsg_state_hold(msg->state); */ 473 474 atomic_add_int(&iost->count, 1); 475 if (msg->any.head.cmd & DMSGF_DELETE) 476 iost->eof = 1; 477 BUF_KERNPROC(bp); 478 dev_dstrategy(dp->d_rawdev, bio); 479 } 480 if (reterr) { 481 if (msg->any.head.cmd & DMSGF_DELETE) { 482 if (iost && iost->count == 0) { 483 kfree(iost, M_DEVBUF); 484 msg->state->any.any = NULL; 485 } 486 kdmsg_msg_reply(msg, error); 487 } else { 488 kdmsg_msg_result(msg, error); 489 } 490 } 491 } 492 493 static 494 void 495 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg) 496 { 497 struct dios_io *iost; 498 struct buf *bp; 499 struct bio *bio; 500 int error = DMSG_ERR_NOSUPP; 501 int reterr = 1; 502 503 /* 504 * Only DMSG_BLK_FREEBLKS commands imply read ops. 505 */ 506 iost = msg->state->any.any; 507 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) { 508 if (iost == NULL) { 509 iost = kmalloc(sizeof(*iost), M_DEVBUF, 510 M_WAITOK | M_ZERO); 511 msg->state->any.any = iost; 512 } 513 reterr = 0; 514 bp = getpbuf(NULL); 515 bio = &bp->b_bio1; 516 bp->b_cmd = BUF_CMD_FREEBLKS; 517 bp->b_bcount = msg->any.blk_freeblks.bytes; 518 bp->b_resid = 0; 519 bio->bio_offset = msg->any.blk_freeblks.offset; 520 bio->bio_caller_info1.ptr = msg->state; 521 bio->bio_done = diskiodone; 522 /* kdmsg_state_hold(msg->state); */ 523 524 atomic_add_int(&iost->count, 1); 525 if (msg->any.head.cmd & DMSGF_DELETE) 526 iost->eof = 1; 527 BUF_KERNPROC(bp); 528 dev_dstrategy(dp->d_rawdev, bio); 529 } 530 if (reterr) { 531 if (msg->any.head.cmd & DMSGF_DELETE) { 532 if (iost && iost->count == 0) { 533 kfree(iost, M_DEVBUF); 534 msg->state->any.any = NULL; 535 } 536 kdmsg_msg_reply(msg, error); 537 } else { 538 kdmsg_msg_result(msg, error); 539 } 540 } 541 } 542 543 static 544 void 545 diskiodone(struct bio *bio) 546 { 547 struct buf *bp = bio->bio_buf; 548 kdmsg_state_t *state = bio->bio_caller_info1.ptr; 549 kdmsg_msg_t *rmsg; 550 struct dios_io *iost = state->any.any; 551 int error; 552 int resid = 0; 553 int bytes; 554 uint32_t cmd; 555 void *data; 556 557 cmd = DMSG_LNK_ERROR; 558 data = NULL; 559 bytes = 0; 560 561 switch(bp->b_cmd) { 562 case BUF_CMD_READ: 563 cmd = DMSG_LNK_ERROR; 564 data = bp->b_data; 565 bytes = bp->b_bcount; 566 /* fall through */ 567 case BUF_CMD_WRITE: 568 if (bp->b_flags & B_ERROR) { 569 error = bp->b_error; 570 } else { 571 error = 0; 572 resid = bp->b_resid; 573 } 574 break; 575 case BUF_CMD_FLUSH: 576 case BUF_CMD_FREEBLKS: 577 if (bp->b_flags & B_ERROR) 578 error = bp->b_error; 579 else 580 error = 0; 581 break; 582 default: 583 panic("diskiodone: Unknown bio cmd = %d\n", 584 bio->bio_buf->b_cmd); 585 error = 0; /* avoid compiler warning */ 586 break; /* NOT REACHED */ 587 } 588 589 /* 590 * Convert error to DMSG_ERR_* code. 591 */ 592 if (error) 593 error = DMSG_ERR_IO; 594 595 /* 596 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will 597 * have already converted cmd to BLK_ERROR and set up data to return. 598 */ 599 if (resid && cmd == DMSG_LNK_ERROR) 600 cmd = DMSG_BLK_ERROR; 601 /* XXX txcmd is delayed so this won't work for streaming */ 602 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */ 603 cmd |= DMSGF_CREATE; 604 if (iost->eof) { 605 if (atomic_fetchadd_int(&iost->count, -1) == 1) 606 cmd |= DMSGF_DELETE; 607 } else { 608 atomic_add_int(&iost->count, -1); 609 } 610 cmd |= DMSGF_REPLY; 611 612 /* 613 * Allocate a basic or extended reply. Be careful not to populate 614 * extended header fields unless we allocated an extended reply. 615 */ 616 rmsg = kdmsg_msg_alloc_state(state, cmd, NULL, 0); 617 if (data) { 618 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT); 619 rmsg->aux_size = bytes; 620 rmsg->flags |= KDMSG_FLAG_AUXALLOC; 621 bcopy(data, rmsg->aux_data, bytes); 622 } 623 rmsg->any.blk_error.head.error = error; 624 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR) 625 rmsg->any.blk_error.resid = resid; 626 bio->bio_caller_info1.ptr = NULL; 627 /* kdmsg_state_drop(state); */ 628 kdmsg_msg_write(rmsg); 629 if (bp->b_flags & B_PAGING) { 630 relpbuf(bio->bio_buf, NULL); 631 } else { 632 bp->b_flags |= B_INVAL | B_AGE; 633 brelse(bp); 634 } 635 } 636