1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/module.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/bio.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/queue.h> 42 #include <sys/device.h> 43 #include <sys/tree.h> 44 #include <sys/syslink_rpc.h> 45 #include <sys/proc.h> 46 #include <sys/dsched.h> 47 #include <sys/devfs.h> 48 49 #include <machine/stdarg.h> 50 51 #include <sys/mplock2.h> 52 53 /* 54 * system link descriptors identify the command in the 55 * arguments structure. 56 */ 57 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 58 59 #define DEVOP_DESC_INIT(name) \ 60 struct syslink_desc DDESCNAME(name) = { \ 61 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 62 #name } 63 64 DEVOP_DESC_INIT(default); 65 DEVOP_DESC_INIT(open); 66 DEVOP_DESC_INIT(close); 67 DEVOP_DESC_INIT(read); 68 DEVOP_DESC_INIT(write); 69 DEVOP_DESC_INIT(ioctl); 70 DEVOP_DESC_INIT(dump); 71 DEVOP_DESC_INIT(psize); 72 DEVOP_DESC_INIT(mmap); 73 DEVOP_DESC_INIT(mmap_single); 74 DEVOP_DESC_INIT(strategy); 75 DEVOP_DESC_INIT(kqfilter); 76 DEVOP_DESC_INIT(revoke); 77 DEVOP_DESC_INIT(clone); 78 79 /* 80 * Misc default ops 81 */ 82 struct dev_ops dead_dev_ops; 83 84 static d_open_t noopen; 85 static d_close_t noclose; 86 static d_read_t noread; 87 static d_write_t nowrite; 88 static d_ioctl_t noioctl; 89 static d_mmap_t nommap; 90 static d_mmap_single_t nommap_single; 91 static d_strategy_t nostrategy; 92 static d_dump_t nodump; 93 static d_psize_t nopsize; 94 static d_kqfilter_t nokqfilter; 95 static d_clone_t noclone; 96 static d_revoke_t norevoke; 97 98 struct dev_ops default_dev_ops = { 99 { "null" }, 100 .d_default = NULL, /* must be NULL */ 101 .d_open = noopen, 102 .d_close = noclose, 103 .d_read = noread, 104 .d_write = nowrite, 105 .d_ioctl = noioctl, 106 .d_mmap = nommap, 107 .d_mmap_single = nommap_single, 108 .d_strategy = nostrategy, 109 .d_dump = nodump, 110 .d_psize = nopsize, 111 .d_kqfilter = nokqfilter, 112 .d_revoke = norevoke, 113 .d_clone = noclone 114 }; 115 116 static __inline 117 int 118 dev_needmplock(cdev_t dev) 119 { 120 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 121 } 122 123 static __inline 124 int 125 dev_nokvabio(cdev_t dev) 126 { 127 return((dev->si_ops->head.flags & D_KVABIO) == 0); 128 } 129 130 /************************************************************************ 131 * GENERAL DEVICE API FUNCTIONS * 132 ************************************************************************ 133 * 134 * The MPSAFEness of these depends on dev->si_ops->head.flags 135 */ 136 int 137 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, 138 struct file *fp) 139 { 140 struct dev_open_args ap; 141 int needmplock = dev_needmplock(dev); 142 int error; 143 144 ap.a_head.a_desc = &dev_open_desc; 145 ap.a_head.a_dev = dev; 146 ap.a_oflags = oflags; 147 ap.a_devtype = devtype; 148 ap.a_cred = cred; 149 ap.a_fp = fp; 150 151 if (needmplock) 152 get_mplock(); 153 error = dev->si_ops->d_open(&ap); 154 if (needmplock) 155 rel_mplock(); 156 return (error); 157 } 158 159 int 160 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp) 161 { 162 struct dev_close_args ap; 163 int needmplock = dev_needmplock(dev); 164 int error; 165 166 ap.a_head.a_desc = &dev_close_desc; 167 ap.a_head.a_dev = dev; 168 ap.a_fflag = fflag; 169 ap.a_devtype = devtype; 170 ap.a_fp = fp; 171 172 if (needmplock) 173 get_mplock(); 174 error = dev->si_ops->d_close(&ap); 175 if (needmplock) 176 rel_mplock(); 177 return (error); 178 } 179 180 int 181 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 182 { 183 struct dev_read_args ap; 184 int needmplock = dev_needmplock(dev); 185 int error; 186 187 ap.a_head.a_desc = &dev_read_desc; 188 ap.a_head.a_dev = dev; 189 ap.a_uio = uio; 190 ap.a_ioflag = ioflag; 191 ap.a_fp = fp; 192 193 if (needmplock) 194 get_mplock(); 195 error = dev->si_ops->d_read(&ap); 196 if (needmplock) 197 rel_mplock(); 198 if (error == 0) 199 dev->si_lastread = time_uptime; 200 return (error); 201 } 202 203 int 204 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 205 { 206 struct dev_write_args ap; 207 int needmplock = dev_needmplock(dev); 208 int error; 209 210 dev->si_lastwrite = time_uptime; 211 ap.a_head.a_desc = &dev_write_desc; 212 ap.a_head.a_dev = dev; 213 ap.a_uio = uio; 214 ap.a_ioflag = ioflag; 215 ap.a_fp = fp; 216 217 if (needmplock) 218 get_mplock(); 219 error = dev->si_ops->d_write(&ap); 220 if (needmplock) 221 rel_mplock(); 222 return (error); 223 } 224 225 int 226 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 227 struct sysmsg *msg, struct file *fp) 228 { 229 struct dev_ioctl_args ap; 230 int needmplock = dev_needmplock(dev); 231 int error; 232 233 ap.a_head.a_desc = &dev_ioctl_desc; 234 ap.a_head.a_dev = dev; 235 ap.a_cmd = cmd; 236 ap.a_data = data; 237 ap.a_fflag = fflag; 238 ap.a_cred = cred; 239 ap.a_sysmsg = msg; 240 ap.a_fp = fp; 241 242 if (needmplock) 243 get_mplock(); 244 error = dev->si_ops->d_ioctl(&ap); 245 if (needmplock) 246 rel_mplock(); 247 return (error); 248 } 249 250 int64_t 251 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp) 252 { 253 struct dev_mmap_args ap; 254 int needmplock = dev_needmplock(dev); 255 int error; 256 257 ap.a_head.a_desc = &dev_mmap_desc; 258 ap.a_head.a_dev = dev; 259 ap.a_offset = offset; 260 ap.a_nprot = nprot; 261 ap.a_fp = fp; 262 263 if (needmplock) 264 get_mplock(); 265 error = dev->si_ops->d_mmap(&ap); 266 if (needmplock) 267 rel_mplock(); 268 269 if (error == 0) 270 return(ap.a_result); 271 return(-1); 272 } 273 274 int 275 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size, 276 struct vm_object **object, int nprot, struct file *fp) 277 { 278 struct dev_mmap_single_args ap; 279 int needmplock = dev_needmplock(dev); 280 int error; 281 282 ap.a_head.a_desc = &dev_mmap_single_desc; 283 ap.a_head.a_dev = dev; 284 ap.a_offset = offset; 285 ap.a_size = size; 286 ap.a_object = object; 287 ap.a_nprot = nprot; 288 ap.a_fp = fp; 289 290 if (needmplock) 291 get_mplock(); 292 error = dev->si_ops->d_mmap_single(&ap); 293 if (needmplock) 294 rel_mplock(); 295 296 return(error); 297 } 298 299 int 300 dev_dclone(cdev_t dev) 301 { 302 struct dev_clone_args ap; 303 int needmplock = dev_needmplock(dev); 304 int error; 305 306 ap.a_head.a_desc = &dev_clone_desc; 307 ap.a_head.a_dev = dev; 308 309 if (needmplock) 310 get_mplock(); 311 error = dev->si_ops->d_clone(&ap); 312 if (needmplock) 313 rel_mplock(); 314 return (error); 315 } 316 317 int 318 dev_drevoke(cdev_t dev) 319 { 320 struct dev_revoke_args ap; 321 int needmplock = dev_needmplock(dev); 322 int error; 323 324 ap.a_head.a_desc = &dev_revoke_desc; 325 ap.a_head.a_dev = dev; 326 327 if (needmplock) 328 get_mplock(); 329 error = dev->si_ops->d_revoke(&ap); 330 if (needmplock) 331 rel_mplock(); 332 333 return (error); 334 } 335 336 /* 337 * Core device strategy call, used to issue I/O on a device. There are 338 * two versions, a non-chained version and a chained version. The chained 339 * version reuses a BIO set up by vn_strategy(). The only difference is 340 * that, for now, we do not push a new tracking structure when chaining 341 * from vn_strategy. XXX this will ultimately have to change. 342 */ 343 void 344 dev_dstrategy(cdev_t dev, struct bio *bio) 345 { 346 struct dev_strategy_args ap; 347 struct bio_track *track; 348 struct buf *bp = bio->bio_buf; 349 int needmplock = dev_needmplock(dev); 350 351 /* 352 * If the device doe snot support KVABIO and the buffer is using 353 * KVABIO, we must synchronize b_data to all cpus before dispatching. 354 */ 355 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 356 bkvasync_all(bp); 357 358 ap.a_head.a_desc = &dev_strategy_desc; 359 ap.a_head.a_dev = dev; 360 ap.a_bio = bio; 361 362 KKASSERT(bio->bio_track == NULL); 363 KKASSERT(bp->b_cmd != BUF_CMD_DONE); 364 if (bp->b_cmd == BUF_CMD_READ) 365 track = &dev->si_track_read; 366 else 367 track = &dev->si_track_write; 368 bio_track_ref(track); 369 bio->bio_track = track; 370 dsched_buf_enter(bp); /* might stack */ 371 372 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 373 if (needmplock) 374 get_mplock(); 375 (void)dev->si_ops->d_strategy(&ap); 376 if (needmplock) 377 rel_mplock(); 378 } 379 380 void 381 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 382 { 383 struct dev_strategy_args ap; 384 struct buf *bp = bio->bio_buf; 385 int needmplock = dev_needmplock(dev); 386 387 /* 388 * If the device doe snot support KVABIO and the buffer is using 389 * KVABIO, we must synchronize b_data to all cpus before dispatching. 390 */ 391 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 392 bkvasync_all(bp); 393 394 ap.a_head.a_desc = &dev_strategy_desc; 395 ap.a_head.a_dev = dev; 396 ap.a_bio = bio; 397 398 KKASSERT(bio->bio_track != NULL); 399 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 400 if (needmplock) 401 get_mplock(); 402 (void)dev->si_ops->d_strategy(&ap); 403 if (needmplock) 404 rel_mplock(); 405 } 406 407 /* 408 * note: the disk layer is expected to set count, blkno, and secsize before 409 * forwarding the message. 410 */ 411 int 412 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 413 size_t length) 414 { 415 struct dev_dump_args ap; 416 int needmplock = dev_needmplock(dev); 417 int error; 418 419 ap.a_head.a_desc = &dev_dump_desc; 420 ap.a_head.a_dev = dev; 421 ap.a_count = 0; 422 ap.a_blkno = 0; 423 ap.a_secsize = 0; 424 ap.a_virtual = virtual; 425 ap.a_physical = physical; 426 ap.a_offset = offset; 427 ap.a_length = length; 428 429 if (needmplock) 430 get_mplock(); 431 error = dev->si_ops->d_dump(&ap); 432 if (needmplock) 433 rel_mplock(); 434 return (error); 435 } 436 437 int64_t 438 dev_dpsize(cdev_t dev) 439 { 440 struct dev_psize_args ap; 441 int needmplock = dev_needmplock(dev); 442 int error; 443 444 ap.a_head.a_desc = &dev_psize_desc; 445 ap.a_head.a_dev = dev; 446 447 if (needmplock) 448 get_mplock(); 449 error = dev->si_ops->d_psize(&ap); 450 if (needmplock) 451 rel_mplock(); 452 453 if (error == 0) 454 return (ap.a_result); 455 return(-1); 456 } 457 458 /* 459 * Pass-thru to the device kqfilter. 460 * 461 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 462 * which return 0 do not have to bother setting a_result. 463 */ 464 int 465 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp) 466 { 467 struct dev_kqfilter_args ap; 468 int needmplock = dev_needmplock(dev); 469 int error; 470 471 ap.a_head.a_desc = &dev_kqfilter_desc; 472 ap.a_head.a_dev = dev; 473 ap.a_kn = kn; 474 ap.a_result = 0; 475 ap.a_fp = fp; 476 477 if (needmplock) 478 get_mplock(); 479 error = dev->si_ops->d_kqfilter(&ap); 480 if (needmplock) 481 rel_mplock(); 482 483 if (error == 0) 484 return(ap.a_result); 485 return(ENODEV); 486 } 487 488 /************************************************************************ 489 * DEVICE HELPER FUNCTIONS * 490 ************************************************************************/ 491 492 /* 493 * MPSAFE 494 */ 495 int 496 dev_drefs(cdev_t dev) 497 { 498 return(dev->si_sysref.refcnt); 499 } 500 501 /* 502 * MPSAFE 503 */ 504 const char * 505 dev_dname(cdev_t dev) 506 { 507 return(dev->si_ops->head.name); 508 } 509 510 /* 511 * MPSAFE 512 */ 513 int 514 dev_dflags(cdev_t dev) 515 { 516 return(dev->si_ops->head.flags); 517 } 518 519 /* 520 * MPSAFE 521 */ 522 int 523 dev_dmaj(cdev_t dev) 524 { 525 return(dev->si_ops->head.maj); 526 } 527 528 /* 529 * Used when forwarding a request through layers. The caller adjusts 530 * ap->a_head.a_dev and then calls this function. 531 */ 532 int 533 dev_doperate(struct dev_generic_args *ap) 534 { 535 int (*func)(struct dev_generic_args *); 536 int needmplock = dev_needmplock(ap->a_dev); 537 int error; 538 539 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 540 541 if (needmplock) 542 get_mplock(); 543 error = func(ap); 544 if (needmplock) 545 rel_mplock(); 546 547 return (error); 548 } 549 550 /* 551 * Used by the console intercept code only. Issue an operation through 552 * a foreign ops structure allowing the ops structure associated 553 * with the device to remain intact. 554 */ 555 int 556 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 557 { 558 int (*func)(struct dev_generic_args *); 559 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 560 int error; 561 562 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 563 564 if (needmplock) 565 get_mplock(); 566 error = func(ap); 567 if (needmplock) 568 rel_mplock(); 569 570 return (error); 571 } 572 573 /* 574 * Convert a template dev_ops into the real thing by filling in 575 * uninitialized fields. 576 */ 577 void 578 compile_dev_ops(struct dev_ops *ops) 579 { 580 int offset; 581 582 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 583 offset <= offsetof(struct dev_ops, dev_ops_last_field); 584 offset += sizeof(void *) 585 ) { 586 void **func_p = (void **)((char *)ops + offset); 587 void **def_p = (void **)((char *)&default_dev_ops + offset); 588 if (*func_p == NULL) { 589 if (ops->d_default) 590 *func_p = ops->d_default; 591 else 592 *func_p = *def_p; 593 } 594 } 595 } 596 597 /************************************************************************ 598 * MAJOR/MINOR SPACE FUNCTION * 599 ************************************************************************/ 600 601 /* 602 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 603 * 604 * Disk devices typically register their major, e.g. 'ad0', and then call 605 * into the disk label management code which overloads its own onto e.g. 'ad0' 606 * to support all the various slice and partition combinations. 607 * 608 * The mask/match supplied in this call are a full 32 bits and the same 609 * mask and match must be specified in a later dev_ops_remove() call to 610 * match this add. However, the match value for the minor number should never 611 * have any bits set in the major number's bit range (8-15). The mask value 612 * may be conveniently specified as -1 without creating any major number 613 * interference. 614 */ 615 616 static 617 int 618 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 619 { 620 if (a->maj < b->maj) 621 return(-1); 622 else if (a->maj > b->maj) 623 return(1); 624 return(0); 625 } 626 627 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 628 629 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 630 631 int 632 dev_ops_remove_all(struct dev_ops *ops) 633 { 634 return devfs_destroy_dev_by_ops(ops, -1); 635 } 636 637 int 638 dev_ops_remove_minor(struct dev_ops *ops, int minor) 639 { 640 return devfs_destroy_dev_by_ops(ops, minor); 641 } 642 643 struct dev_ops * 644 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 645 { 646 struct dev_ops *oops = dev->si_ops; 647 648 compile_dev_ops(iops); 649 iops->head.maj = oops->head.maj; 650 iops->head.data = oops->head.data; 651 iops->head.flags = oops->head.flags; 652 dev->si_ops = iops; 653 dev->si_flags |= SI_INTERCEPTED; 654 655 return (oops); 656 } 657 658 void 659 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 660 { 661 struct dev_ops *iops = dev->si_ops; 662 663 dev->si_ops = oops; 664 dev->si_flags &= ~SI_INTERCEPTED; 665 iops->head.maj = 0; 666 iops->head.data = NULL; 667 iops->head.flags = 0; 668 } 669 670 /************************************************************************ 671 * DEFAULT DEV OPS FUNCTIONS * 672 ************************************************************************/ 673 674 675 /* 676 * Unsupported devswitch functions (e.g. for writing to read-only device). 677 * XXX may belong elsewhere. 678 */ 679 static int 680 norevoke(struct dev_revoke_args *ap) 681 { 682 /* take no action */ 683 return(0); 684 } 685 686 static int 687 noclone(struct dev_clone_args *ap) 688 { 689 /* take no action */ 690 return (0); /* allow the clone */ 691 } 692 693 static int 694 noopen(struct dev_open_args *ap) 695 { 696 return (ENODEV); 697 } 698 699 static int 700 noclose(struct dev_close_args *ap) 701 { 702 return (ENODEV); 703 } 704 705 static int 706 noread(struct dev_read_args *ap) 707 { 708 return (ENODEV); 709 } 710 711 static int 712 nowrite(struct dev_write_args *ap) 713 { 714 return (ENODEV); 715 } 716 717 static int 718 noioctl(struct dev_ioctl_args *ap) 719 { 720 return (ENODEV); 721 } 722 723 static int 724 nokqfilter(struct dev_kqfilter_args *ap) 725 { 726 return (ENODEV); 727 } 728 729 static int 730 nommap(struct dev_mmap_args *ap) 731 { 732 return (ENODEV); 733 } 734 735 static int 736 nommap_single(struct dev_mmap_single_args *ap) 737 { 738 return (ENODEV); 739 } 740 741 static int 742 nostrategy(struct dev_strategy_args *ap) 743 { 744 struct bio *bio = ap->a_bio; 745 746 bio->bio_buf->b_flags |= B_ERROR; 747 bio->bio_buf->b_error = EOPNOTSUPP; 748 biodone(bio); 749 return(0); 750 } 751 752 static int 753 nopsize(struct dev_psize_args *ap) 754 { 755 ap->a_result = 0; 756 return(0); 757 } 758 759 static int 760 nodump(struct dev_dump_args *ap) 761 { 762 return (ENODEV); 763 } 764 765 /* 766 * XXX this is probably bogus. Any device that uses it isn't checking the 767 * minor number. 768 */ 769 int 770 nullopen(struct dev_open_args *ap) 771 { 772 return (0); 773 } 774 775 int 776 nullclose(struct dev_close_args *ap) 777 { 778 return (0); 779 } 780 781