1 /* 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/disklabel32.h> 43 #include <sys/disklabel64.h> 44 #include <sys/diskslice.h> 45 #include <sys/diskmbr.h> 46 #include <sys/disk.h> 47 #include <sys/malloc.h> 48 #include <sys/device.h> 49 #include <sys/devfs.h> 50 #include <sys/thread.h> 51 #include <sys/queue.h> 52 #include <sys/lock.h> 53 #include <sys/stat.h> 54 #include <sys/uuid.h> 55 #include <sys/dmsg.h> 56 57 #include <sys/buf2.h> 58 #include <sys/mplock2.h> 59 #include <sys/msgport2.h> 60 #include <sys/thread2.h> 61 62 struct dios_open { 63 int openrd; 64 int openwr; 65 }; 66 67 struct dios_io { 68 int count; 69 int eof; 70 }; 71 72 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg"); 73 74 static int disk_iocom_reconnect(struct disk *dp, struct file *fp); 75 static int disk_rcvdmsg(kdmsg_msg_t *msg); 76 77 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg); 78 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg); 79 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg); 80 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg); 81 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg); 82 static void diskiodone(struct bio *bio); 83 84 void 85 disk_iocom_init(struct disk *dp) 86 { 87 kdmsg_iocom_init(&dp->d_iocom, dp, 88 KDMSG_IOCOMF_AUTOCONN | 89 KDMSG_IOCOMF_AUTORXSPAN | 90 KDMSG_IOCOMF_AUTOTXSPAN, 91 M_DMSG_DISK, disk_rcvdmsg); 92 } 93 94 void 95 disk_iocom_update(struct disk *dp) 96 { 97 } 98 99 void 100 disk_iocom_uninit(struct disk *dp) 101 { 102 kdmsg_iocom_uninit(&dp->d_iocom); 103 } 104 105 int 106 disk_iocom_ioctl(struct disk *dp, int cmd, void *data) 107 { 108 struct file *fp; 109 struct disk_ioc_recluster *recl; 110 int error; 111 112 switch(cmd) { 113 case DIOCRECLUSTER: 114 recl = data; 115 fp = holdfp(curproc->p_fd, recl->fd, -1); 116 if (fp) { 117 error = disk_iocom_reconnect(dp, fp); 118 } else { 119 error = EINVAL; 120 } 121 break; 122 default: 123 error = EOPNOTSUPP; 124 break; 125 } 126 return error; 127 } 128 129 static 130 int 131 disk_iocom_reconnect(struct disk *dp, struct file *fp) 132 { 133 char devname[64]; 134 135 ksnprintf(devname, sizeof(devname), "%s%d", 136 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev)); 137 138 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname); 139 140 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER; 141 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 142 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK; 143 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK; 144 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 145 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label, 146 sizeof(dp->d_iocom.auto_lnk_conn.cl_label), 147 "%s/%s", hostname, devname); 148 if (dp->d_info.d_serialno) { 149 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label, 150 sizeof(dp->d_iocom.auto_lnk_conn.fs_label), 151 "%s", dp->d_info.d_serialno); 152 } 153 154 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER; 155 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 156 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK; 157 dp->d_iocom.auto_lnk_span.media.block.bytes = 158 dp->d_info.d_media_size; 159 dp->d_iocom.auto_lnk_span.media.block.blksize = 160 dp->d_info.d_media_blksize; 161 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label, 162 sizeof(dp->d_iocom.auto_lnk_span.cl_label), 163 "%s/%s", hostname, devname); 164 if (dp->d_info.d_serialno) { 165 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label, 166 sizeof(dp->d_iocom.auto_lnk_span.fs_label), 167 "%s", dp->d_info.d_serialno); 168 } 169 170 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL); 171 172 return (0); 173 } 174 175 int 176 disk_rcvdmsg(kdmsg_msg_t *msg) 177 { 178 struct disk *dp = msg->state->iocom->handle; 179 180 /* 181 * Handle debug messages (these might not be in transactions) 182 */ 183 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) { 184 case DMSG_DBG_SHELL: 185 /* 186 * Execute shell command (not supported atm) 187 */ 188 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 189 return(0); 190 case DMSG_DBG_SHELL | DMSGF_REPLY: 191 if (msg->aux_data) { 192 msg->aux_data[msg->aux_size - 1] = 0; 193 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data); 194 } 195 return(0); 196 } 197 198 /* 199 * All remaining messages must be in a transaction 200 * 201 * NOTE! We are switching on the first message's command. The 202 * actual message command within the transaction may be 203 * different (if streaming within a transaction). 204 */ 205 if (msg->state == &msg->state->iocom->state0) { 206 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 207 return(0); 208 } 209 210 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) { 211 case DMSG_BLK_OPEN: 212 disk_blk_open(dp, msg); 213 break; 214 case DMSG_BLK_READ: 215 disk_blk_read(dp, msg); 216 break; 217 case DMSG_BLK_WRITE: 218 disk_blk_write(dp, msg); 219 break; 220 case DMSG_BLK_FLUSH: 221 disk_blk_flush(dp, msg); 222 break; 223 case DMSG_BLK_FREEBLKS: 224 disk_blk_freeblks(dp, msg); 225 break; 226 default: 227 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) { 228 if (msg->any.head.cmd & DMSGF_DELETE) 229 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 230 else 231 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP); 232 } 233 break; 234 } 235 return (0); 236 } 237 238 static 239 void 240 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg) 241 { 242 struct dios_open *openst; 243 int error = DMSG_ERR_NOSUPP; 244 int fflags; 245 246 openst = msg->state->any.any; 247 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) { 248 if (openst == NULL) { 249 openst = kmalloc(sizeof(*openst), M_DEVBUF, 250 M_WAITOK | M_ZERO); 251 msg->state->any.any = openst; 252 } 253 fflags = 0; 254 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 255 fflags = FREAD; 256 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 257 fflags |= FWRITE; 258 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL); 259 if (error) { 260 error = DMSG_ERR_IO; 261 } else { 262 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 263 ++openst->openrd; 264 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 265 ++openst->openwr; 266 } 267 } 268 #if 0 269 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE && 270 openst) { 271 fflags = 0; 272 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) && 273 openst->openrd) { 274 fflags = FREAD; 275 } 276 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) && 277 openst->openwr) { 278 fflags |= FWRITE; 279 } 280 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL); 281 if (error) { 282 error = DMSG_ERR_IO; 283 } else { 284 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 285 --openst->openrd; 286 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 287 --openst->openwr; 288 } 289 } 290 #endif 291 if (msg->any.head.cmd & DMSGF_DELETE) { 292 if (openst) { 293 while (openst->openrd && openst->openwr) { 294 --openst->openrd; 295 --openst->openwr; 296 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL); 297 } 298 while (openst->openrd) { 299 --openst->openrd; 300 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL); 301 } 302 while (openst->openwr) { 303 --openst->openwr; 304 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL); 305 } 306 kfree(openst, M_DEVBUF); 307 msg->state->any.any = NULL; 308 } 309 kdmsg_msg_reply(msg, error); 310 } else { 311 kdmsg_msg_result(msg, error); 312 } 313 } 314 315 static 316 void 317 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg) 318 { 319 struct dios_io *iost; 320 struct buf *bp; 321 struct bio *bio; 322 int error = DMSG_ERR_NOSUPP; 323 int reterr = 1; 324 325 /* 326 * Only DMSG_BLK_READ commands imply read ops. 327 */ 328 iost = msg->state->any.any; 329 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) { 330 if (msg->any.blk_read.bytes < DEV_BSIZE || 331 msg->any.blk_read.bytes > MAXPHYS) { 332 error = DMSG_ERR_PARAM; 333 goto done; 334 } 335 if (iost == NULL) { 336 iost = kmalloc(sizeof(*iost), M_DEVBUF, 337 M_WAITOK | M_ZERO); 338 msg->state->any.any = iost; 339 } 340 reterr = 0; 341 bp = geteblk(msg->any.blk_read.bytes); 342 bio = &bp->b_bio1; 343 bp->b_cmd = BUF_CMD_READ; 344 bp->b_bcount = msg->any.blk_read.bytes; 345 bp->b_resid = bp->b_bcount; 346 bio->bio_offset = msg->any.blk_read.offset; 347 bio->bio_caller_info1.ptr = msg->state; 348 bio->bio_done = diskiodone; 349 /* kdmsg_state_hold(msg->state); */ 350 351 atomic_add_int(&iost->count, 1); 352 if (msg->any.head.cmd & DMSGF_DELETE) 353 iost->eof = 1; 354 BUF_KERNPROC(bp); 355 dev_dstrategy(dp->d_rawdev, bio); 356 } 357 done: 358 if (reterr) { 359 if (msg->any.head.cmd & DMSGF_DELETE) { 360 if (iost && iost->count == 0) { 361 kfree(iost, M_DEVBUF); 362 msg->state->any.any = NULL; 363 } 364 kdmsg_msg_reply(msg, error); 365 } else { 366 kdmsg_msg_result(msg, error); 367 } 368 } 369 } 370 371 static 372 void 373 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg) 374 { 375 struct dios_io *iost; 376 struct buf *bp; 377 struct bio *bio; 378 int error = DMSG_ERR_NOSUPP; 379 int reterr = 1; 380 381 /* 382 * Only DMSG_BLK_WRITE commands imply read ops. 383 */ 384 iost = msg->state->any.any; 385 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) { 386 if (msg->any.blk_write.bytes < DEV_BSIZE || 387 msg->any.blk_write.bytes > MAXPHYS) { 388 error = DMSG_ERR_PARAM; 389 goto done; 390 } 391 if (iost == NULL) { 392 iost = kmalloc(sizeof(*iost), M_DEVBUF, 393 M_WAITOK | M_ZERO); 394 msg->state->any.any = iost; 395 } 396 397 /* 398 * Issue WRITE. Short data implies zeros. Try to optimize 399 * the buffer cache buffer for the case where we can just 400 * use the message's data pointer. 401 */ 402 reterr = 0; 403 if (msg->aux_size >= msg->any.blk_write.bytes) 404 bp = getpbuf(NULL); 405 else 406 bp = geteblk(msg->any.blk_write.bytes); 407 bio = &bp->b_bio1; 408 bp->b_cmd = BUF_CMD_WRITE; 409 bp->b_bcount = msg->any.blk_write.bytes; 410 bp->b_resid = bp->b_bcount; 411 if (msg->aux_size >= msg->any.blk_write.bytes) { 412 bp->b_data = msg->aux_data; 413 } else { 414 bcopy(msg->aux_data, bp->b_data, msg->aux_size); 415 bzero(bp->b_data + msg->aux_size, 416 msg->any.blk_write.bytes - msg->aux_size); 417 } 418 bio->bio_offset = msg->any.blk_write.offset; 419 bio->bio_caller_info1.ptr = msg->state; 420 bio->bio_done = diskiodone; 421 /* kdmsg_state_hold(msg->state); */ 422 423 atomic_add_int(&iost->count, 1); 424 if (msg->any.head.cmd & DMSGF_DELETE) 425 iost->eof = 1; 426 BUF_KERNPROC(bp); 427 dev_dstrategy(dp->d_rawdev, bio); 428 } 429 done: 430 if (reterr) { 431 if (msg->any.head.cmd & DMSGF_DELETE) { 432 if (iost && iost->count == 0) { 433 kfree(iost, M_DEVBUF); 434 msg->state->any.any = NULL; 435 } 436 kdmsg_msg_reply(msg, error); 437 } else { 438 kdmsg_msg_result(msg, error); 439 } 440 } 441 } 442 443 static 444 void 445 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg) 446 { 447 struct dios_io *iost; 448 struct buf *bp; 449 struct bio *bio; 450 int error = DMSG_ERR_NOSUPP; 451 int reterr = 1; 452 453 /* 454 * Only DMSG_BLK_FLUSH commands imply read ops. 455 */ 456 iost = msg->state->any.any; 457 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) { 458 if (iost == NULL) { 459 iost = kmalloc(sizeof(*iost), M_DEVBUF, 460 M_WAITOK | M_ZERO); 461 msg->state->any.any = iost; 462 } 463 reterr = 0; 464 bp = getpbuf(NULL); 465 bio = &bp->b_bio1; 466 bp->b_cmd = BUF_CMD_FLUSH; 467 bp->b_bcount = msg->any.blk_flush.bytes; 468 bp->b_resid = 0; 469 bio->bio_offset = msg->any.blk_flush.offset; 470 bio->bio_caller_info1.ptr = msg->state; 471 bio->bio_done = diskiodone; 472 /* kdmsg_state_hold(msg->state); */ 473 474 atomic_add_int(&iost->count, 1); 475 if (msg->any.head.cmd & DMSGF_DELETE) 476 iost->eof = 1; 477 BUF_KERNPROC(bp); 478 dev_dstrategy(dp->d_rawdev, bio); 479 } 480 if (reterr) { 481 if (msg->any.head.cmd & DMSGF_DELETE) { 482 if (iost && iost->count == 0) { 483 kfree(iost, M_DEVBUF); 484 msg->state->any.any = NULL; 485 } 486 kdmsg_msg_reply(msg, error); 487 } else { 488 kdmsg_msg_result(msg, error); 489 } 490 } 491 } 492 493 static 494 void 495 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg) 496 { 497 struct dios_io *iost; 498 struct buf *bp; 499 struct bio *bio; 500 int error = DMSG_ERR_NOSUPP; 501 int reterr = 1; 502 503 /* 504 * Only DMSG_BLK_FREEBLKS commands imply read ops. 505 */ 506 iost = msg->state->any.any; 507 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) { 508 if (iost == NULL) { 509 iost = kmalloc(sizeof(*iost), M_DEVBUF, 510 M_WAITOK | M_ZERO); 511 msg->state->any.any = iost; 512 } 513 reterr = 0; 514 bp = getpbuf(NULL); 515 bio = &bp->b_bio1; 516 bp->b_cmd = BUF_CMD_FREEBLKS; 517 bp->b_bcount = msg->any.blk_freeblks.bytes; 518 bp->b_resid = 0; 519 bio->bio_offset = msg->any.blk_freeblks.offset; 520 bio->bio_caller_info1.ptr = msg->state; 521 bio->bio_done = diskiodone; 522 /* kdmsg_state_hold(msg->state); */ 523 524 atomic_add_int(&iost->count, 1); 525 if (msg->any.head.cmd & DMSGF_DELETE) 526 iost->eof = 1; 527 BUF_KERNPROC(bp); 528 dev_dstrategy(dp->d_rawdev, bio); 529 } 530 if (reterr) { 531 if (msg->any.head.cmd & DMSGF_DELETE) { 532 if (iost && iost->count == 0) { 533 kfree(iost, M_DEVBUF); 534 msg->state->any.any = NULL; 535 } 536 kdmsg_msg_reply(msg, error); 537 } else { 538 kdmsg_msg_result(msg, error); 539 } 540 } 541 } 542 543 static 544 void 545 diskiodone(struct bio *bio) 546 { 547 struct buf *bp = bio->bio_buf; 548 kdmsg_state_t *state = bio->bio_caller_info1.ptr; 549 kdmsg_msg_t *rmsg; 550 struct dios_io *iost = state->any.any; 551 int error; 552 int resid = 0; 553 int bytes; 554 uint32_t cmd; 555 void *data; 556 557 cmd = DMSG_LNK_ERROR; 558 data = NULL; 559 bytes = 0; 560 561 switch(bp->b_cmd) { 562 case BUF_CMD_READ: 563 cmd = DMSG_LNK_ERROR; 564 data = bp->b_data; 565 bytes = bp->b_bcount; 566 /* fall through */ 567 case BUF_CMD_WRITE: 568 if (bp->b_flags & B_ERROR) { 569 error = bp->b_error; 570 } else { 571 error = 0; 572 resid = bp->b_resid; 573 } 574 break; 575 case BUF_CMD_FLUSH: 576 case BUF_CMD_FREEBLKS: 577 if (bp->b_flags & B_ERROR) 578 error = bp->b_error; 579 else 580 error = 0; 581 break; 582 default: 583 panic("diskiodone: Unknown bio cmd = %d\n", 584 bio->bio_buf->b_cmd); 585 error = 0; /* avoid compiler warning */ 586 break; /* NOT REACHED */ 587 } 588 589 /* 590 * Convert error to DMSG_ERR_* code. 591 */ 592 if (error) 593 error = DMSG_ERR_IO; 594 595 /* 596 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will 597 * have already converted cmd to BLK_ERROR and set up data to return. 598 */ 599 if (resid && cmd == DMSG_LNK_ERROR) 600 cmd = DMSG_BLK_ERROR; 601 /* XXX txcmd is delayed so this won't work for streaming */ 602 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */ 603 cmd |= DMSGF_CREATE; 604 if (iost->eof) { 605 if (atomic_fetchadd_int(&iost->count, -1) == 1) 606 cmd |= DMSGF_DELETE; 607 } else { 608 atomic_add_int(&iost->count, -1); 609 } 610 cmd |= DMSGF_REPLY; 611 612 /* 613 * Allocate a basic or extended reply. Be careful not to populate 614 * extended header fields unless we allocated an extended reply. 615 */ 616 rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0); 617 if (data) { 618 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT); 619 rmsg->aux_size = bytes; 620 rmsg->flags |= KDMSG_FLAG_AUXALLOC; 621 bcopy(data, rmsg->aux_data, bytes); 622 } 623 rmsg->any.blk_error.head.error = error; 624 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR) 625 rmsg->any.blk_error.resid = resid; 626 bio->bio_caller_info1.ptr = NULL; 627 /* kdmsg_state_drop(state); */ 628 kdmsg_msg_write(rmsg); 629 if (bp->b_flags & B_PAGING) { 630 relpbuf(bio->bio_buf, NULL); 631 } else { 632 bp->b_flags |= B_INVAL | B_AGE; 633 brelse(bp); 634 } 635 } 636