1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/module.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/bio.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/queue.h> 42 #include <sys/device.h> 43 #include <sys/tree.h> 44 #include <sys/syslink_rpc.h> 45 #include <sys/proc.h> 46 #include <sys/dsched.h> 47 #include <sys/devfs.h> 48 #include <sys/file.h> 49 50 #include <machine/stdarg.h> 51 52 #include <sys/mplock2.h> 53 54 /* 55 * system link descriptors identify the command in the 56 * arguments structure. 57 */ 58 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 59 60 #define DEVOP_DESC_INIT(name) \ 61 struct syslink_desc DDESCNAME(name) = { \ 62 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 63 #name } 64 65 DEVOP_DESC_INIT(default); 66 DEVOP_DESC_INIT(open); 67 DEVOP_DESC_INIT(close); 68 DEVOP_DESC_INIT(read); 69 DEVOP_DESC_INIT(write); 70 DEVOP_DESC_INIT(ioctl); 71 DEVOP_DESC_INIT(dump); 72 DEVOP_DESC_INIT(psize); 73 DEVOP_DESC_INIT(mmap); 74 DEVOP_DESC_INIT(mmap_single); 75 DEVOP_DESC_INIT(strategy); 76 DEVOP_DESC_INIT(kqfilter); 77 DEVOP_DESC_INIT(revoke); 78 DEVOP_DESC_INIT(clone); 79 80 /* 81 * Misc default ops 82 */ 83 struct dev_ops dead_dev_ops; 84 85 static d_open_t noopen; 86 static d_close_t noclose; 87 static d_read_t noread; 88 static d_write_t nowrite; 89 static d_ioctl_t noioctl; 90 static d_mmap_t nommap; 91 static d_mmap_single_t nommap_single; 92 static d_strategy_t nostrategy; 93 static d_dump_t nodump; 94 static d_psize_t nopsize; 95 static d_kqfilter_t nokqfilter; 96 static d_clone_t noclone; 97 static d_revoke_t norevoke; 98 99 struct dev_ops default_dev_ops = { 100 { "null" }, 101 .d_default = NULL, /* must be NULL */ 102 .d_open = noopen, 103 .d_close = noclose, 104 .d_read = noread, 105 .d_write = nowrite, 106 .d_ioctl = noioctl, 107 .d_mmap = nommap, 108 .d_mmap_single = nommap_single, 109 .d_strategy = nostrategy, 110 .d_dump = nodump, 111 .d_psize = nopsize, 112 .d_kqfilter = nokqfilter, 113 .d_revoke = norevoke, 114 .d_clone = noclone 115 }; 116 117 static __inline 118 int 119 dev_needmplock(cdev_t dev) 120 { 121 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 122 } 123 124 static __inline 125 int 126 dev_nokvabio(cdev_t dev) 127 { 128 return((dev->si_ops->head.flags & D_KVABIO) == 0); 129 } 130 131 /************************************************************************ 132 * GENERAL DEVICE API FUNCTIONS * 133 ************************************************************************ 134 * 135 * The MPSAFEness of these depends on dev->si_ops->head.flags 136 */ 137 int 138 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, 139 struct file *fp, struct vnode *vp) 140 { 141 struct dev_open_args ap; 142 int needmplock = dev_needmplock(dev); 143 int error; 144 145 ap.a_head.a_desc = &dev_open_desc; 146 ap.a_head.a_dev = dev; 147 ap.a_oflags = oflags; 148 ap.a_devtype = devtype; 149 ap.a_cred = cred; 150 ap.a_fp = fp; 151 if (ap.a_fp) 152 ap.a_fp->f_data = vp; 153 /* 154 vref(vp) is being done in vop_stdopen() 155 If a non-null vp is passed-in, the caller must also issue a vop_stdopen() 156 */ 157 158 if (needmplock) 159 get_mplock(); 160 error = dev->si_ops->d_open(&ap); 161 if (needmplock) 162 rel_mplock(); 163 return (error); 164 } 165 166 int 167 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp) 168 { 169 struct dev_close_args ap; 170 int needmplock = dev_needmplock(dev); 171 int error; 172 173 ap.a_head.a_desc = &dev_close_desc; 174 ap.a_head.a_dev = dev; 175 ap.a_fflag = fflag; 176 ap.a_devtype = devtype; 177 ap.a_fp = fp; 178 179 if (needmplock) 180 get_mplock(); 181 error = dev->si_ops->d_close(&ap); 182 if (needmplock) 183 rel_mplock(); 184 return (error); 185 } 186 187 int 188 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 189 { 190 struct dev_read_args ap; 191 int needmplock = dev_needmplock(dev); 192 int error; 193 194 ap.a_head.a_desc = &dev_read_desc; 195 ap.a_head.a_dev = dev; 196 ap.a_uio = uio; 197 ap.a_ioflag = ioflag; 198 ap.a_fp = fp; 199 200 if (needmplock) 201 get_mplock(); 202 error = dev->si_ops->d_read(&ap); 203 if (needmplock) 204 rel_mplock(); 205 if (error == 0) 206 dev->si_lastread = time_uptime; 207 return (error); 208 } 209 210 int 211 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 212 { 213 struct dev_write_args ap; 214 int needmplock = dev_needmplock(dev); 215 int error; 216 217 dev->si_lastwrite = time_uptime; 218 ap.a_head.a_desc = &dev_write_desc; 219 ap.a_head.a_dev = dev; 220 ap.a_uio = uio; 221 ap.a_ioflag = ioflag; 222 ap.a_fp = fp; 223 224 if (needmplock) 225 get_mplock(); 226 error = dev->si_ops->d_write(&ap); 227 if (needmplock) 228 rel_mplock(); 229 return (error); 230 } 231 232 int 233 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 234 struct sysmsg *msg, struct file *fp) 235 { 236 struct dev_ioctl_args ap; 237 int needmplock = dev_needmplock(dev); 238 int error; 239 240 ap.a_head.a_desc = &dev_ioctl_desc; 241 ap.a_head.a_dev = dev; 242 ap.a_cmd = cmd; 243 ap.a_data = data; 244 ap.a_fflag = fflag; 245 ap.a_cred = cred; 246 ap.a_sysmsg = msg; 247 ap.a_fp = fp; 248 249 if (needmplock) 250 get_mplock(); 251 error = dev->si_ops->d_ioctl(&ap); 252 if (needmplock) 253 rel_mplock(); 254 return (error); 255 } 256 257 int64_t 258 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp) 259 { 260 struct dev_mmap_args ap; 261 int needmplock = dev_needmplock(dev); 262 int error; 263 264 ap.a_head.a_desc = &dev_mmap_desc; 265 ap.a_head.a_dev = dev; 266 ap.a_offset = offset; 267 ap.a_nprot = nprot; 268 ap.a_fp = fp; 269 270 if (needmplock) 271 get_mplock(); 272 error = dev->si_ops->d_mmap(&ap); 273 if (needmplock) 274 rel_mplock(); 275 276 if (error == 0) 277 return(ap.a_result); 278 return(-1); 279 } 280 281 int 282 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size, 283 struct vm_object **object, int nprot, struct file *fp) 284 { 285 struct dev_mmap_single_args ap; 286 int needmplock = dev_needmplock(dev); 287 int error; 288 289 ap.a_head.a_desc = &dev_mmap_single_desc; 290 ap.a_head.a_dev = dev; 291 ap.a_offset = offset; 292 ap.a_size = size; 293 ap.a_object = object; 294 ap.a_nprot = nprot; 295 ap.a_fp = fp; 296 297 if (needmplock) 298 get_mplock(); 299 error = dev->si_ops->d_mmap_single(&ap); 300 if (needmplock) 301 rel_mplock(); 302 303 return(error); 304 } 305 306 int 307 dev_dclone(cdev_t dev) 308 { 309 struct dev_clone_args ap; 310 int needmplock = dev_needmplock(dev); 311 int error; 312 313 ap.a_head.a_desc = &dev_clone_desc; 314 ap.a_head.a_dev = dev; 315 316 if (needmplock) 317 get_mplock(); 318 error = dev->si_ops->d_clone(&ap); 319 if (needmplock) 320 rel_mplock(); 321 return (error); 322 } 323 324 int 325 dev_drevoke(cdev_t dev) 326 { 327 struct dev_revoke_args ap; 328 int needmplock = dev_needmplock(dev); 329 int error; 330 331 ap.a_head.a_desc = &dev_revoke_desc; 332 ap.a_head.a_dev = dev; 333 334 if (needmplock) 335 get_mplock(); 336 error = dev->si_ops->d_revoke(&ap); 337 if (needmplock) 338 rel_mplock(); 339 340 return (error); 341 } 342 343 /* 344 * Core device strategy call, used to issue I/O on a device. There are 345 * two versions, a non-chained version and a chained version. The chained 346 * version reuses a BIO set up by vn_strategy(). The only difference is 347 * that, for now, we do not push a new tracking structure when chaining 348 * from vn_strategy. XXX this will ultimately have to change. 349 */ 350 void 351 dev_dstrategy(cdev_t dev, struct bio *bio) 352 { 353 struct dev_strategy_args ap; 354 struct bio_track *track; 355 struct buf *bp = bio->bio_buf; 356 int needmplock = dev_needmplock(dev); 357 358 /* 359 * If the device doe snot support KVABIO and the buffer is using 360 * KVABIO, we must synchronize b_data to all cpus before dispatching. 361 */ 362 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 363 bkvasync_all(bp); 364 365 ap.a_head.a_desc = &dev_strategy_desc; 366 ap.a_head.a_dev = dev; 367 ap.a_bio = bio; 368 369 KKASSERT(bio->bio_track == NULL); 370 KKASSERT(bp->b_cmd != BUF_CMD_DONE); 371 if (bp->b_cmd == BUF_CMD_READ) 372 track = &dev->si_track_read; 373 else 374 track = &dev->si_track_write; 375 bio_track_ref(track); 376 bio->bio_track = track; 377 dsched_buf_enter(bp); /* might stack */ 378 379 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 380 if (needmplock) 381 get_mplock(); 382 (void)dev->si_ops->d_strategy(&ap); 383 if (needmplock) 384 rel_mplock(); 385 } 386 387 void 388 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 389 { 390 struct dev_strategy_args ap; 391 struct buf *bp = bio->bio_buf; 392 int needmplock = dev_needmplock(dev); 393 394 /* 395 * If the device doe snot support KVABIO and the buffer is using 396 * KVABIO, we must synchronize b_data to all cpus before dispatching. 397 */ 398 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 399 bkvasync_all(bp); 400 401 ap.a_head.a_desc = &dev_strategy_desc; 402 ap.a_head.a_dev = dev; 403 ap.a_bio = bio; 404 405 KKASSERT(bio->bio_track != NULL); 406 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 407 if (needmplock) 408 get_mplock(); 409 (void)dev->si_ops->d_strategy(&ap); 410 if (needmplock) 411 rel_mplock(); 412 } 413 414 /* 415 * note: the disk layer is expected to set count, blkno, and secsize before 416 * forwarding the message. 417 */ 418 int 419 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 420 size_t length) 421 { 422 struct dev_dump_args ap; 423 int needmplock = dev_needmplock(dev); 424 int error; 425 426 ap.a_head.a_desc = &dev_dump_desc; 427 ap.a_head.a_dev = dev; 428 ap.a_count = 0; 429 ap.a_blkno = 0; 430 ap.a_secsize = 0; 431 ap.a_virtual = virtual; 432 ap.a_physical = physical; 433 ap.a_offset = offset; 434 ap.a_length = length; 435 436 if (needmplock) 437 get_mplock(); 438 error = dev->si_ops->d_dump(&ap); 439 if (needmplock) 440 rel_mplock(); 441 return (error); 442 } 443 444 int64_t 445 dev_dpsize(cdev_t dev) 446 { 447 struct dev_psize_args ap; 448 int needmplock = dev_needmplock(dev); 449 int error; 450 451 ap.a_head.a_desc = &dev_psize_desc; 452 ap.a_head.a_dev = dev; 453 454 if (needmplock) 455 get_mplock(); 456 error = dev->si_ops->d_psize(&ap); 457 if (needmplock) 458 rel_mplock(); 459 460 if (error == 0) 461 return (ap.a_result); 462 return(-1); 463 } 464 465 /* 466 * Pass-thru to the device kqfilter. 467 * 468 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 469 * which return 0 do not have to bother setting a_result. 470 */ 471 int 472 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp) 473 { 474 struct dev_kqfilter_args ap; 475 int needmplock = dev_needmplock(dev); 476 int error; 477 478 ap.a_head.a_desc = &dev_kqfilter_desc; 479 ap.a_head.a_dev = dev; 480 ap.a_kn = kn; 481 ap.a_result = 0; 482 ap.a_fp = fp; 483 484 if (needmplock) 485 get_mplock(); 486 error = dev->si_ops->d_kqfilter(&ap); 487 if (needmplock) 488 rel_mplock(); 489 490 if (error == 0) 491 return(ap.a_result); 492 return(ENODEV); 493 } 494 495 /************************************************************************ 496 * DEVICE HELPER FUNCTIONS * 497 ************************************************************************/ 498 499 /* 500 * MPSAFE 501 */ 502 int 503 dev_drefs(cdev_t dev) 504 { 505 return(dev->si_sysref.refcnt); 506 } 507 508 /* 509 * MPSAFE 510 */ 511 const char * 512 dev_dname(cdev_t dev) 513 { 514 return(dev->si_ops->head.name); 515 } 516 517 /* 518 * MPSAFE 519 */ 520 int 521 dev_dflags(cdev_t dev) 522 { 523 return(dev->si_ops->head.flags); 524 } 525 526 /* 527 * MPSAFE 528 */ 529 int 530 dev_dmaj(cdev_t dev) 531 { 532 return(dev->si_ops->head.maj); 533 } 534 535 /* 536 * Used when forwarding a request through layers. The caller adjusts 537 * ap->a_head.a_dev and then calls this function. 538 */ 539 int 540 dev_doperate(struct dev_generic_args *ap) 541 { 542 int (*func)(struct dev_generic_args *); 543 int needmplock = dev_needmplock(ap->a_dev); 544 int error; 545 546 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 547 548 if (needmplock) 549 get_mplock(); 550 error = func(ap); 551 if (needmplock) 552 rel_mplock(); 553 554 return (error); 555 } 556 557 /* 558 * Used by the console intercept code only. Issue an operation through 559 * a foreign ops structure allowing the ops structure associated 560 * with the device to remain intact. 561 */ 562 int 563 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 564 { 565 int (*func)(struct dev_generic_args *); 566 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 567 int error; 568 569 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 570 571 if (needmplock) 572 get_mplock(); 573 error = func(ap); 574 if (needmplock) 575 rel_mplock(); 576 577 return (error); 578 } 579 580 /* 581 * Convert a template dev_ops into the real thing by filling in 582 * uninitialized fields. 583 */ 584 void 585 compile_dev_ops(struct dev_ops *ops) 586 { 587 int offset; 588 589 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 590 offset <= offsetof(struct dev_ops, dev_ops_last_field); 591 offset += sizeof(void *) 592 ) { 593 void **func_p = (void **)((char *)ops + offset); 594 void **def_p = (void **)((char *)&default_dev_ops + offset); 595 if (*func_p == NULL) { 596 if (ops->d_default) 597 *func_p = ops->d_default; 598 else 599 *func_p = *def_p; 600 } 601 } 602 } 603 604 /************************************************************************ 605 * MAJOR/MINOR SPACE FUNCTION * 606 ************************************************************************/ 607 608 /* 609 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 610 * 611 * Disk devices typically register their major, e.g. 'ad0', and then call 612 * into the disk label management code which overloads its own onto e.g. 'ad0' 613 * to support all the various slice and partition combinations. 614 * 615 * The mask/match supplied in this call are a full 32 bits and the same 616 * mask and match must be specified in a later dev_ops_remove() call to 617 * match this add. However, the match value for the minor number should never 618 * have any bits set in the major number's bit range (8-15). The mask value 619 * may be conveniently specified as -1 without creating any major number 620 * interference. 621 */ 622 623 static 624 int 625 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 626 { 627 if (a->maj < b->maj) 628 return(-1); 629 else if (a->maj > b->maj) 630 return(1); 631 return(0); 632 } 633 634 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 635 636 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 637 638 int 639 dev_ops_remove_all(struct dev_ops *ops) 640 { 641 return devfs_destroy_dev_by_ops(ops, -1); 642 } 643 644 int 645 dev_ops_remove_minor(struct dev_ops *ops, int minor) 646 { 647 return devfs_destroy_dev_by_ops(ops, minor); 648 } 649 650 struct dev_ops * 651 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 652 { 653 struct dev_ops *oops = dev->si_ops; 654 655 compile_dev_ops(iops); 656 iops->head.maj = oops->head.maj; 657 iops->head.data = oops->head.data; 658 iops->head.flags = oops->head.flags; 659 dev->si_ops = iops; 660 dev->si_flags |= SI_INTERCEPTED; 661 662 return (oops); 663 } 664 665 void 666 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 667 { 668 struct dev_ops *iops = dev->si_ops; 669 670 dev->si_ops = oops; 671 dev->si_flags &= ~SI_INTERCEPTED; 672 iops->head.maj = 0; 673 iops->head.data = NULL; 674 iops->head.flags = 0; 675 } 676 677 /************************************************************************ 678 * DEFAULT DEV OPS FUNCTIONS * 679 ************************************************************************/ 680 681 682 /* 683 * Unsupported devswitch functions (e.g. for writing to read-only device). 684 * XXX may belong elsewhere. 685 */ 686 static int 687 norevoke(struct dev_revoke_args *ap) 688 { 689 /* take no action */ 690 return(0); 691 } 692 693 static int 694 noclone(struct dev_clone_args *ap) 695 { 696 /* take no action */ 697 return (0); /* allow the clone */ 698 } 699 700 static int 701 noopen(struct dev_open_args *ap) 702 { 703 return (ENODEV); 704 } 705 706 static int 707 noclose(struct dev_close_args *ap) 708 { 709 return (ENODEV); 710 } 711 712 static int 713 noread(struct dev_read_args *ap) 714 { 715 return (ENODEV); 716 } 717 718 static int 719 nowrite(struct dev_write_args *ap) 720 { 721 return (ENODEV); 722 } 723 724 static int 725 noioctl(struct dev_ioctl_args *ap) 726 { 727 return (ENODEV); 728 } 729 730 static int 731 nokqfilter(struct dev_kqfilter_args *ap) 732 { 733 return (ENODEV); 734 } 735 736 static int 737 nommap(struct dev_mmap_args *ap) 738 { 739 return (ENODEV); 740 } 741 742 static int 743 nommap_single(struct dev_mmap_single_args *ap) 744 { 745 return (ENODEV); 746 } 747 748 static int 749 nostrategy(struct dev_strategy_args *ap) 750 { 751 struct bio *bio = ap->a_bio; 752 753 bio->bio_buf->b_flags |= B_ERROR; 754 bio->bio_buf->b_error = EOPNOTSUPP; 755 biodone(bio); 756 return(0); 757 } 758 759 static int 760 nopsize(struct dev_psize_args *ap) 761 { 762 ap->a_result = 0; 763 return(0); 764 } 765 766 static int 767 nodump(struct dev_dump_args *ap) 768 { 769 return (ENODEV); 770 } 771 772 /* 773 * XXX this is probably bogus. Any device that uses it isn't checking the 774 * minor number. 775 */ 776 int 777 nullopen(struct dev_open_args *ap) 778 { 779 return (0); 780 } 781 782 int 783 nullclose(struct dev_close_args *ap) 784 { 785 return (0); 786 } 787 788