1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elischer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/module.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/bio.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/queue.h> 42 #include <sys/device.h> 43 #include <sys/tree.h> 44 #include <sys/syslink_rpc.h> 45 #include <sys/proc.h> 46 #include <sys/dsched.h> 47 #include <sys/devfs.h> 48 #include <sys/file.h> 49 50 #include <machine/stdarg.h> 51 52 #include <sys/mplock2.h> 53 54 /* 55 * system link descriptors identify the command in the 56 * arguments structure. 57 */ 58 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 59 60 #define DEVOP_DESC_INIT(name) \ 61 struct syslink_desc DDESCNAME(name) = { \ 62 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 63 #name } 64 65 DEVOP_DESC_INIT(default); 66 DEVOP_DESC_INIT(open); 67 DEVOP_DESC_INIT(close); 68 DEVOP_DESC_INIT(read); 69 DEVOP_DESC_INIT(write); 70 DEVOP_DESC_INIT(ioctl); 71 DEVOP_DESC_INIT(dump); 72 DEVOP_DESC_INIT(psize); 73 DEVOP_DESC_INIT(mmap); 74 DEVOP_DESC_INIT(mmap_single); 75 DEVOP_DESC_INIT(strategy); 76 DEVOP_DESC_INIT(kqfilter); 77 DEVOP_DESC_INIT(revoke); 78 DEVOP_DESC_INIT(clone); 79 80 /* 81 * Misc default ops 82 */ 83 struct dev_ops dead_dev_ops; 84 85 static d_open_t noopen; 86 static d_close_t noclose; 87 static d_read_t noread; 88 static d_write_t nowrite; 89 static d_ioctl_t noioctl; 90 static d_mmap_t nommap; 91 static d_mmap_single_t nommap_single; 92 static d_strategy_t nostrategy; 93 static d_dump_t nodump; 94 static d_psize_t nopsize; 95 static d_kqfilter_t nokqfilter; 96 static d_clone_t noclone; 97 static d_revoke_t norevoke; 98 99 struct dev_ops default_dev_ops = { 100 { "null" }, 101 .d_default = NULL, /* must be NULL */ 102 .d_open = noopen, 103 .d_close = noclose, 104 .d_read = noread, 105 .d_write = nowrite, 106 .d_ioctl = noioctl, 107 .d_mmap = nommap, 108 .d_mmap_single = nommap_single, 109 .d_strategy = nostrategy, 110 .d_dump = nodump, 111 .d_psize = nopsize, 112 .d_kqfilter = nokqfilter, 113 .d_revoke = norevoke, 114 .d_clone = noclone 115 }; 116 117 static __inline 118 int 119 dev_needmplock(cdev_t dev) 120 { 121 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 122 } 123 124 static __inline 125 int 126 dev_nokvabio(cdev_t dev) 127 { 128 return((dev->si_ops->head.flags & D_KVABIO) == 0); 129 } 130 131 /************************************************************************ 132 * GENERAL DEVICE API FUNCTIONS * 133 ************************************************************************ 134 * 135 * The MPSAFEness of these depends on dev->si_ops->head.flags 136 */ 137 int 138 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, 139 struct file **fpp, struct vnode *vp) 140 { 141 struct dev_open_args ap; 142 int needmplock = dev_needmplock(dev); 143 int error; 144 145 ap.a_head.a_desc = &dev_open_desc; 146 ap.a_head.a_dev = dev; 147 ap.a_oflags = oflags; 148 ap.a_devtype = devtype; 149 ap.a_cred = cred; 150 ap.a_fpp = fpp; 151 if (ap.a_fpp) 152 (*ap.a_fpp)->f_data = vp; 153 154 /* 155 * vref(vp) is being done in vop_stdopen() 156 * 157 * If a non-null vp is passed-in, the caller must also issue a 158 * vop_stdopen() 159 * 160 * NOTE: d_open() may replace *ap.a_fpp 161 */ 162 if (needmplock) 163 get_mplock(); 164 error = dev->si_ops->d_open(&ap); 165 if (needmplock) 166 rel_mplock(); 167 return (error); 168 } 169 170 int 171 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp) 172 { 173 struct dev_close_args ap; 174 int needmplock = dev_needmplock(dev); 175 int error; 176 177 ap.a_head.a_desc = &dev_close_desc; 178 ap.a_head.a_dev = dev; 179 ap.a_fflag = fflag; 180 ap.a_devtype = devtype; 181 ap.a_fp = fp; 182 183 if (needmplock) 184 get_mplock(); 185 error = dev->si_ops->d_close(&ap); 186 if (needmplock) 187 rel_mplock(); 188 return (error); 189 } 190 191 int 192 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 193 { 194 struct dev_read_args ap; 195 int needmplock = dev_needmplock(dev); 196 int error; 197 198 ap.a_head.a_desc = &dev_read_desc; 199 ap.a_head.a_dev = dev; 200 ap.a_uio = uio; 201 ap.a_ioflag = ioflag; 202 ap.a_fp = fp; 203 204 if (needmplock) 205 get_mplock(); 206 error = dev->si_ops->d_read(&ap); 207 if (needmplock) 208 rel_mplock(); 209 if (error == 0) 210 dev->si_lastread = time_uptime; 211 return (error); 212 } 213 214 int 215 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 216 { 217 struct dev_write_args ap; 218 int needmplock = dev_needmplock(dev); 219 int error; 220 221 dev->si_lastwrite = time_uptime; 222 ap.a_head.a_desc = &dev_write_desc; 223 ap.a_head.a_dev = dev; 224 ap.a_uio = uio; 225 ap.a_ioflag = ioflag; 226 ap.a_fp = fp; 227 228 if (needmplock) 229 get_mplock(); 230 error = dev->si_ops->d_write(&ap); 231 if (needmplock) 232 rel_mplock(); 233 return (error); 234 } 235 236 int 237 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 238 struct sysmsg *msg, struct file *fp) 239 { 240 struct dev_ioctl_args ap; 241 int needmplock = dev_needmplock(dev); 242 int error; 243 244 ap.a_head.a_desc = &dev_ioctl_desc; 245 ap.a_head.a_dev = dev; 246 ap.a_cmd = cmd; 247 ap.a_data = data; 248 ap.a_fflag = fflag; 249 ap.a_cred = cred; 250 ap.a_sysmsg = msg; 251 ap.a_fp = fp; 252 253 if (needmplock) 254 get_mplock(); 255 error = dev->si_ops->d_ioctl(&ap); 256 if (needmplock) 257 rel_mplock(); 258 return (error); 259 } 260 261 int64_t 262 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp) 263 { 264 struct dev_mmap_args ap; 265 int needmplock = dev_needmplock(dev); 266 int error; 267 268 ap.a_head.a_desc = &dev_mmap_desc; 269 ap.a_head.a_dev = dev; 270 ap.a_offset = offset; 271 ap.a_nprot = nprot; 272 ap.a_fp = fp; 273 274 if (needmplock) 275 get_mplock(); 276 error = dev->si_ops->d_mmap(&ap); 277 if (needmplock) 278 rel_mplock(); 279 280 if (error == 0) 281 return(ap.a_result); 282 return(-1); 283 } 284 285 int 286 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size, 287 struct vm_object **object, int nprot, struct file *fp) 288 { 289 struct dev_mmap_single_args ap; 290 int needmplock = dev_needmplock(dev); 291 int error; 292 293 ap.a_head.a_desc = &dev_mmap_single_desc; 294 ap.a_head.a_dev = dev; 295 ap.a_offset = offset; 296 ap.a_size = size; 297 ap.a_object = object; 298 ap.a_nprot = nprot; 299 ap.a_fp = fp; 300 301 if (needmplock) 302 get_mplock(); 303 error = dev->si_ops->d_mmap_single(&ap); 304 if (needmplock) 305 rel_mplock(); 306 307 return(error); 308 } 309 310 int 311 dev_dclone(cdev_t dev) 312 { 313 struct dev_clone_args ap; 314 int needmplock = dev_needmplock(dev); 315 int error; 316 317 ap.a_head.a_desc = &dev_clone_desc; 318 ap.a_head.a_dev = dev; 319 320 if (needmplock) 321 get_mplock(); 322 error = dev->si_ops->d_clone(&ap); 323 if (needmplock) 324 rel_mplock(); 325 return (error); 326 } 327 328 int 329 dev_drevoke(cdev_t dev) 330 { 331 struct dev_revoke_args ap; 332 int needmplock = dev_needmplock(dev); 333 int error; 334 335 ap.a_head.a_desc = &dev_revoke_desc; 336 ap.a_head.a_dev = dev; 337 338 if (needmplock) 339 get_mplock(); 340 error = dev->si_ops->d_revoke(&ap); 341 if (needmplock) 342 rel_mplock(); 343 344 return (error); 345 } 346 347 /* 348 * Core device strategy call, used to issue I/O on a device. There are 349 * two versions, a non-chained version and a chained version. The chained 350 * version reuses a BIO set up by vn_strategy(). The only difference is 351 * that, for now, we do not push a new tracking structure when chaining 352 * from vn_strategy. XXX this will ultimately have to change. 353 */ 354 void 355 dev_dstrategy(cdev_t dev, struct bio *bio) 356 { 357 struct dev_strategy_args ap; 358 struct bio_track *track; 359 struct buf *bp = bio->bio_buf; 360 int needmplock = dev_needmplock(dev); 361 362 /* 363 * If the device doe snot support KVABIO and the buffer is using 364 * KVABIO, we must synchronize b_data to all cpus before dispatching. 365 */ 366 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 367 bkvasync_all(bp); 368 369 ap.a_head.a_desc = &dev_strategy_desc; 370 ap.a_head.a_dev = dev; 371 ap.a_bio = bio; 372 373 KKASSERT(bio->bio_track == NULL); 374 KKASSERT(bp->b_cmd != BUF_CMD_DONE); 375 if (bp->b_cmd == BUF_CMD_READ) 376 track = &dev->si_track_read; 377 else 378 track = &dev->si_track_write; 379 bio_track_ref(track); 380 bio->bio_track = track; 381 dsched_buf_enter(bp); /* might stack */ 382 383 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 384 if (needmplock) 385 get_mplock(); 386 (void)dev->si_ops->d_strategy(&ap); 387 if (needmplock) 388 rel_mplock(); 389 } 390 391 void 392 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 393 { 394 struct dev_strategy_args ap; 395 struct buf *bp = bio->bio_buf; 396 int needmplock = dev_needmplock(dev); 397 398 /* 399 * If the device doe snot support KVABIO and the buffer is using 400 * KVABIO, we must synchronize b_data to all cpus before dispatching. 401 */ 402 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 403 bkvasync_all(bp); 404 405 ap.a_head.a_desc = &dev_strategy_desc; 406 ap.a_head.a_dev = dev; 407 ap.a_bio = bio; 408 409 KKASSERT(bio->bio_track != NULL); 410 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 411 if (needmplock) 412 get_mplock(); 413 (void)dev->si_ops->d_strategy(&ap); 414 if (needmplock) 415 rel_mplock(); 416 } 417 418 /* 419 * note: the disk layer is expected to set count, blkno, and secsize before 420 * forwarding the message. 421 */ 422 int 423 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 424 size_t length) 425 { 426 struct dev_dump_args ap; 427 int needmplock = dev_needmplock(dev); 428 int error; 429 430 ap.a_head.a_desc = &dev_dump_desc; 431 ap.a_head.a_dev = dev; 432 ap.a_count = 0; 433 ap.a_blkno = 0; 434 ap.a_secsize = 0; 435 ap.a_virtual = virtual; 436 ap.a_physical = physical; 437 ap.a_offset = offset; 438 ap.a_length = length; 439 440 if (needmplock) 441 get_mplock(); 442 error = dev->si_ops->d_dump(&ap); 443 if (needmplock) 444 rel_mplock(); 445 return (error); 446 } 447 448 int64_t 449 dev_dpsize(cdev_t dev) 450 { 451 struct dev_psize_args ap; 452 int needmplock = dev_needmplock(dev); 453 int error; 454 455 ap.a_head.a_desc = &dev_psize_desc; 456 ap.a_head.a_dev = dev; 457 458 if (needmplock) 459 get_mplock(); 460 error = dev->si_ops->d_psize(&ap); 461 if (needmplock) 462 rel_mplock(); 463 464 if (error == 0) 465 return (ap.a_result); 466 return(-1); 467 } 468 469 /* 470 * Pass-thru to the device kqfilter. 471 * 472 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 473 * which return 0 do not have to bother setting a_result. 474 */ 475 int 476 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp) 477 { 478 struct dev_kqfilter_args ap; 479 int needmplock = dev_needmplock(dev); 480 int error; 481 482 ap.a_head.a_desc = &dev_kqfilter_desc; 483 ap.a_head.a_dev = dev; 484 ap.a_kn = kn; 485 ap.a_result = 0; 486 ap.a_fp = fp; 487 488 if (needmplock) 489 get_mplock(); 490 error = dev->si_ops->d_kqfilter(&ap); 491 if (needmplock) 492 rel_mplock(); 493 494 if (error == 0) 495 return(ap.a_result); 496 return(ENODEV); 497 } 498 499 /************************************************************************ 500 * DEVICE HELPER FUNCTIONS * 501 ************************************************************************/ 502 503 /* 504 * MPSAFE 505 */ 506 int 507 dev_drefs(cdev_t dev) 508 { 509 return(dev->si_sysref.refcnt); 510 } 511 512 /* 513 * MPSAFE 514 */ 515 const char * 516 dev_dname(cdev_t dev) 517 { 518 return(dev->si_ops->head.name); 519 } 520 521 /* 522 * MPSAFE 523 */ 524 int 525 dev_dflags(cdev_t dev) 526 { 527 return(dev->si_ops->head.flags); 528 } 529 530 /* 531 * MPSAFE 532 */ 533 int 534 dev_dmaj(cdev_t dev) 535 { 536 return(dev->si_ops->head.maj); 537 } 538 539 /* 540 * Used when forwarding a request through layers. The caller adjusts 541 * ap->a_head.a_dev and then calls this function. 542 */ 543 int 544 dev_doperate(struct dev_generic_args *ap) 545 { 546 int (*func)(struct dev_generic_args *); 547 int needmplock = dev_needmplock(ap->a_dev); 548 int error; 549 550 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 551 552 if (needmplock) 553 get_mplock(); 554 error = func(ap); 555 if (needmplock) 556 rel_mplock(); 557 558 return (error); 559 } 560 561 /* 562 * Used by the console intercept code only. Issue an operation through 563 * a foreign ops structure allowing the ops structure associated 564 * with the device to remain intact. 565 */ 566 int 567 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 568 { 569 int (*func)(struct dev_generic_args *); 570 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 571 int error; 572 573 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 574 575 if (needmplock) 576 get_mplock(); 577 error = func(ap); 578 if (needmplock) 579 rel_mplock(); 580 581 return (error); 582 } 583 584 /* 585 * Convert a template dev_ops into the real thing by filling in 586 * uninitialized fields. 587 */ 588 void 589 compile_dev_ops(struct dev_ops *ops) 590 { 591 int offset; 592 593 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 594 offset <= offsetof(struct dev_ops, dev_ops_last_field); 595 offset += sizeof(void *) 596 ) { 597 void **func_p = (void **)((char *)ops + offset); 598 void **def_p = (void **)((char *)&default_dev_ops + offset); 599 if (*func_p == NULL) { 600 if (ops->d_default) 601 *func_p = ops->d_default; 602 else 603 *func_p = *def_p; 604 } 605 } 606 } 607 608 /************************************************************************ 609 * MAJOR/MINOR SPACE FUNCTION * 610 ************************************************************************/ 611 612 /* 613 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 614 * 615 * Disk devices typically register their major, e.g. 'ad0', and then call 616 * into the disk label management code which overloads its own onto e.g. 'ad0' 617 * to support all the various slice and partition combinations. 618 * 619 * The mask/match supplied in this call are a full 32 bits and the same 620 * mask and match must be specified in a later dev_ops_remove() call to 621 * match this add. However, the match value for the minor number should never 622 * have any bits set in the major number's bit range (8-15). The mask value 623 * may be conveniently specified as -1 without creating any major number 624 * interference. 625 */ 626 627 static 628 int 629 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 630 { 631 if (a->maj < b->maj) 632 return(-1); 633 else if (a->maj > b->maj) 634 return(1); 635 return(0); 636 } 637 638 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 639 640 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 641 642 int 643 dev_ops_remove_all(struct dev_ops *ops) 644 { 645 return devfs_destroy_dev_by_ops(ops, -1); 646 } 647 648 int 649 dev_ops_remove_minor(struct dev_ops *ops, int minor) 650 { 651 return devfs_destroy_dev_by_ops(ops, minor); 652 } 653 654 struct dev_ops * 655 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 656 { 657 struct dev_ops *oops = dev->si_ops; 658 659 compile_dev_ops(iops); 660 iops->head.maj = oops->head.maj; 661 iops->head.data = oops->head.data; 662 iops->head.flags = oops->head.flags; 663 dev->si_ops = iops; 664 dev->si_flags |= SI_INTERCEPTED; 665 666 return (oops); 667 } 668 669 void 670 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 671 { 672 struct dev_ops *iops = dev->si_ops; 673 674 dev->si_ops = oops; 675 dev->si_flags &= ~SI_INTERCEPTED; 676 iops->head.maj = 0; 677 iops->head.data = NULL; 678 iops->head.flags = 0; 679 } 680 681 /************************************************************************ 682 * DEFAULT DEV OPS FUNCTIONS * 683 ************************************************************************/ 684 685 686 /* 687 * Unsupported devswitch functions (e.g. for writing to read-only device). 688 * XXX may belong elsewhere. 689 */ 690 static int 691 norevoke(struct dev_revoke_args *ap) 692 { 693 /* take no action */ 694 return(0); 695 } 696 697 static int 698 noclone(struct dev_clone_args *ap) 699 { 700 /* take no action */ 701 return (0); /* allow the clone */ 702 } 703 704 static int 705 noopen(struct dev_open_args *ap) 706 { 707 return (ENODEV); 708 } 709 710 static int 711 noclose(struct dev_close_args *ap) 712 { 713 return (ENODEV); 714 } 715 716 static int 717 noread(struct dev_read_args *ap) 718 { 719 return (ENODEV); 720 } 721 722 static int 723 nowrite(struct dev_write_args *ap) 724 { 725 return (ENODEV); 726 } 727 728 static int 729 noioctl(struct dev_ioctl_args *ap) 730 { 731 return (ENODEV); 732 } 733 734 static int 735 nokqfilter(struct dev_kqfilter_args *ap) 736 { 737 return (ENODEV); 738 } 739 740 static int 741 nommap(struct dev_mmap_args *ap) 742 { 743 return (ENODEV); 744 } 745 746 static int 747 nommap_single(struct dev_mmap_single_args *ap) 748 { 749 return (ENODEV); 750 } 751 752 static int 753 nostrategy(struct dev_strategy_args *ap) 754 { 755 struct bio *bio = ap->a_bio; 756 757 bio->bio_buf->b_flags |= B_ERROR; 758 bio->bio_buf->b_error = EOPNOTSUPP; 759 biodone(bio); 760 return(0); 761 } 762 763 static int 764 nopsize(struct dev_psize_args *ap) 765 { 766 ap->a_result = 0; 767 return(0); 768 } 769 770 static int 771 nodump(struct dev_dump_args *ap) 772 { 773 return (ENODEV); 774 } 775 776 /* 777 * XXX this is probably bogus. Any device that uses it isn't checking the 778 * minor number. 779 */ 780 int 781 nullopen(struct dev_open_args *ap) 782 { 783 return (0); 784 } 785 786 int 787 nullclose(struct dev_close_args *ap) 788 { 789 return (0); 790 } 791