1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/module.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/bio.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/queue.h> 42 #include <sys/device.h> 43 #include <sys/tree.h> 44 #include <sys/syslink_rpc.h> 45 #include <sys/proc.h> 46 #include <sys/dsched.h> 47 #include <sys/devfs.h> 48 49 #include <machine/stdarg.h> 50 51 #include <sys/thread2.h> 52 #include <sys/mplock2.h> 53 54 /* 55 * system link descriptors identify the command in the 56 * arguments structure. 57 */ 58 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 59 60 #define DEVOP_DESC_INIT(name) \ 61 struct syslink_desc DDESCNAME(name) = { \ 62 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 63 #name } 64 65 DEVOP_DESC_INIT(default); 66 DEVOP_DESC_INIT(open); 67 DEVOP_DESC_INIT(close); 68 DEVOP_DESC_INIT(read); 69 DEVOP_DESC_INIT(write); 70 DEVOP_DESC_INIT(ioctl); 71 DEVOP_DESC_INIT(dump); 72 DEVOP_DESC_INIT(psize); 73 DEVOP_DESC_INIT(mmap); 74 DEVOP_DESC_INIT(mmap_single); 75 DEVOP_DESC_INIT(strategy); 76 DEVOP_DESC_INIT(kqfilter); 77 DEVOP_DESC_INIT(revoke); 78 DEVOP_DESC_INIT(clone); 79 80 /* 81 * Misc default ops 82 */ 83 struct dev_ops dead_dev_ops; 84 85 static d_open_t noopen; 86 static d_close_t noclose; 87 static d_read_t noread; 88 static d_write_t nowrite; 89 static d_ioctl_t noioctl; 90 static d_mmap_t nommap; 91 static d_mmap_single_t nommap_single; 92 static d_strategy_t nostrategy; 93 static d_dump_t nodump; 94 static d_psize_t nopsize; 95 static d_kqfilter_t nokqfilter; 96 static d_clone_t noclone; 97 static d_revoke_t norevoke; 98 99 struct dev_ops default_dev_ops = { 100 { "null" }, 101 .d_default = NULL, /* must be NULL */ 102 .d_open = noopen, 103 .d_close = noclose, 104 .d_read = noread, 105 .d_write = nowrite, 106 .d_ioctl = noioctl, 107 .d_mmap = nommap, 108 .d_mmap_single = nommap_single, 109 .d_strategy = nostrategy, 110 .d_dump = nodump, 111 .d_psize = nopsize, 112 .d_kqfilter = nokqfilter, 113 .d_revoke = norevoke, 114 .d_clone = noclone 115 }; 116 117 static __inline 118 int 119 dev_needmplock(cdev_t dev) 120 { 121 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 122 } 123 124 static __inline 125 int 126 dev_nokvabio(cdev_t dev) 127 { 128 return((dev->si_ops->head.flags & D_KVABIO) == 0); 129 } 130 131 /************************************************************************ 132 * GENERAL DEVICE API FUNCTIONS * 133 ************************************************************************ 134 * 135 * The MPSAFEness of these depends on dev->si_ops->head.flags 136 */ 137 int 138 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, 139 struct file *fp) 140 { 141 struct dev_open_args ap; 142 int needmplock = dev_needmplock(dev); 143 int error; 144 145 ap.a_head.a_desc = &dev_open_desc; 146 ap.a_head.a_dev = dev; 147 ap.a_oflags = oflags; 148 ap.a_devtype = devtype; 149 ap.a_cred = cred; 150 ap.a_fp = fp; 151 152 if (needmplock) 153 get_mplock(); 154 error = dev->si_ops->d_open(&ap); 155 if (needmplock) 156 rel_mplock(); 157 return (error); 158 } 159 160 int 161 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp) 162 { 163 struct dev_close_args ap; 164 int needmplock = dev_needmplock(dev); 165 int error; 166 167 ap.a_head.a_desc = &dev_close_desc; 168 ap.a_head.a_dev = dev; 169 ap.a_fflag = fflag; 170 ap.a_devtype = devtype; 171 ap.a_fp = fp; 172 173 if (needmplock) 174 get_mplock(); 175 error = dev->si_ops->d_close(&ap); 176 if (needmplock) 177 rel_mplock(); 178 return (error); 179 } 180 181 int 182 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 183 { 184 struct dev_read_args ap; 185 int needmplock = dev_needmplock(dev); 186 int error; 187 188 ap.a_head.a_desc = &dev_read_desc; 189 ap.a_head.a_dev = dev; 190 ap.a_uio = uio; 191 ap.a_ioflag = ioflag; 192 ap.a_fp = fp; 193 194 if (needmplock) 195 get_mplock(); 196 error = dev->si_ops->d_read(&ap); 197 if (needmplock) 198 rel_mplock(); 199 if (error == 0) 200 dev->si_lastread = time_uptime; 201 return (error); 202 } 203 204 int 205 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 206 { 207 struct dev_write_args ap; 208 int needmplock = dev_needmplock(dev); 209 int error; 210 211 dev->si_lastwrite = time_uptime; 212 ap.a_head.a_desc = &dev_write_desc; 213 ap.a_head.a_dev = dev; 214 ap.a_uio = uio; 215 ap.a_ioflag = ioflag; 216 ap.a_fp = fp; 217 218 if (needmplock) 219 get_mplock(); 220 error = dev->si_ops->d_write(&ap); 221 if (needmplock) 222 rel_mplock(); 223 return (error); 224 } 225 226 int 227 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 228 struct sysmsg *msg, struct file *fp) 229 { 230 struct dev_ioctl_args ap; 231 int needmplock = dev_needmplock(dev); 232 int error; 233 234 ap.a_head.a_desc = &dev_ioctl_desc; 235 ap.a_head.a_dev = dev; 236 ap.a_cmd = cmd; 237 ap.a_data = data; 238 ap.a_fflag = fflag; 239 ap.a_cred = cred; 240 ap.a_sysmsg = msg; 241 ap.a_fp = fp; 242 243 if (needmplock) 244 get_mplock(); 245 error = dev->si_ops->d_ioctl(&ap); 246 if (needmplock) 247 rel_mplock(); 248 return (error); 249 } 250 251 int64_t 252 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp) 253 { 254 struct dev_mmap_args ap; 255 int needmplock = dev_needmplock(dev); 256 int error; 257 258 ap.a_head.a_desc = &dev_mmap_desc; 259 ap.a_head.a_dev = dev; 260 ap.a_offset = offset; 261 ap.a_nprot = nprot; 262 ap.a_fp = fp; 263 264 if (needmplock) 265 get_mplock(); 266 error = dev->si_ops->d_mmap(&ap); 267 if (needmplock) 268 rel_mplock(); 269 270 if (error == 0) 271 return(ap.a_result); 272 return(-1); 273 } 274 275 int 276 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size, 277 struct vm_object **object, int nprot, struct file *fp) 278 { 279 struct dev_mmap_single_args ap; 280 int needmplock = dev_needmplock(dev); 281 int error; 282 283 ap.a_head.a_desc = &dev_mmap_single_desc; 284 ap.a_head.a_dev = dev; 285 ap.a_offset = offset; 286 ap.a_size = size; 287 ap.a_object = object; 288 ap.a_nprot = nprot; 289 ap.a_fp = fp; 290 291 if (needmplock) 292 get_mplock(); 293 error = dev->si_ops->d_mmap_single(&ap); 294 if (needmplock) 295 rel_mplock(); 296 297 return(error); 298 } 299 300 int 301 dev_dclone(cdev_t dev) 302 { 303 struct dev_clone_args ap; 304 int needmplock = dev_needmplock(dev); 305 int error; 306 307 ap.a_head.a_desc = &dev_clone_desc; 308 ap.a_head.a_dev = dev; 309 310 if (needmplock) 311 get_mplock(); 312 error = dev->si_ops->d_clone(&ap); 313 if (needmplock) 314 rel_mplock(); 315 return (error); 316 } 317 318 int 319 dev_drevoke(cdev_t dev) 320 { 321 struct dev_revoke_args ap; 322 int needmplock = dev_needmplock(dev); 323 int error; 324 325 ap.a_head.a_desc = &dev_revoke_desc; 326 ap.a_head.a_dev = dev; 327 328 if (needmplock) 329 get_mplock(); 330 error = dev->si_ops->d_revoke(&ap); 331 if (needmplock) 332 rel_mplock(); 333 334 return (error); 335 } 336 337 /* 338 * Core device strategy call, used to issue I/O on a device. There are 339 * two versions, a non-chained version and a chained version. The chained 340 * version reuses a BIO set up by vn_strategy(). The only difference is 341 * that, for now, we do not push a new tracking structure when chaining 342 * from vn_strategy. XXX this will ultimately have to change. 343 */ 344 void 345 dev_dstrategy(cdev_t dev, struct bio *bio) 346 { 347 struct dev_strategy_args ap; 348 struct bio_track *track; 349 struct buf *bp = bio->bio_buf; 350 int needmplock = dev_needmplock(dev); 351 352 /* 353 * If the device doe snot support KVABIO and the buffer is using 354 * KVABIO, we must synchronize b_data to all cpus before dispatching. 355 */ 356 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 357 bkvasync_all(bp); 358 359 ap.a_head.a_desc = &dev_strategy_desc; 360 ap.a_head.a_dev = dev; 361 ap.a_bio = bio; 362 363 KKASSERT(bio->bio_track == NULL); 364 KKASSERT(bp->b_cmd != BUF_CMD_DONE); 365 if (bp->b_cmd == BUF_CMD_READ) 366 track = &dev->si_track_read; 367 else 368 track = &dev->si_track_write; 369 bio_track_ref(track); 370 bio->bio_track = track; 371 dsched_buf_enter(bp); /* might stack */ 372 373 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 374 if (needmplock) 375 get_mplock(); 376 (void)dev->si_ops->d_strategy(&ap); 377 if (needmplock) 378 rel_mplock(); 379 } 380 381 void 382 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 383 { 384 struct dev_strategy_args ap; 385 struct buf *bp = bio->bio_buf; 386 int needmplock = dev_needmplock(dev); 387 388 /* 389 * If the device doe snot support KVABIO and the buffer is using 390 * KVABIO, we must synchronize b_data to all cpus before dispatching. 391 */ 392 if (dev_nokvabio(dev) && (bp->b_flags & B_KVABIO)) 393 bkvasync_all(bp); 394 395 ap.a_head.a_desc = &dev_strategy_desc; 396 ap.a_head.a_dev = dev; 397 ap.a_bio = bio; 398 399 KKASSERT(bio->bio_track != NULL); 400 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 401 if (needmplock) 402 get_mplock(); 403 (void)dev->si_ops->d_strategy(&ap); 404 if (needmplock) 405 rel_mplock(); 406 } 407 408 /* 409 * note: the disk layer is expected to set count, blkno, and secsize before 410 * forwarding the message. 411 */ 412 int 413 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 414 size_t length) 415 { 416 struct dev_dump_args ap; 417 int needmplock = dev_needmplock(dev); 418 int error; 419 420 ap.a_head.a_desc = &dev_dump_desc; 421 ap.a_head.a_dev = dev; 422 ap.a_count = 0; 423 ap.a_blkno = 0; 424 ap.a_secsize = 0; 425 ap.a_virtual = virtual; 426 ap.a_physical = physical; 427 ap.a_offset = offset; 428 ap.a_length = length; 429 430 if (needmplock) 431 get_mplock(); 432 error = dev->si_ops->d_dump(&ap); 433 if (needmplock) 434 rel_mplock(); 435 return (error); 436 } 437 438 int64_t 439 dev_dpsize(cdev_t dev) 440 { 441 struct dev_psize_args ap; 442 int needmplock = dev_needmplock(dev); 443 int error; 444 445 ap.a_head.a_desc = &dev_psize_desc; 446 ap.a_head.a_dev = dev; 447 448 if (needmplock) 449 get_mplock(); 450 error = dev->si_ops->d_psize(&ap); 451 if (needmplock) 452 rel_mplock(); 453 454 if (error == 0) 455 return (ap.a_result); 456 return(-1); 457 } 458 459 /* 460 * Pass-thru to the device kqfilter. 461 * 462 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 463 * which return 0 do not have to bother setting a_result. 464 */ 465 int 466 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp) 467 { 468 struct dev_kqfilter_args ap; 469 int needmplock = dev_needmplock(dev); 470 int error; 471 472 ap.a_head.a_desc = &dev_kqfilter_desc; 473 ap.a_head.a_dev = dev; 474 ap.a_kn = kn; 475 ap.a_result = 0; 476 ap.a_fp = fp; 477 478 if (needmplock) 479 get_mplock(); 480 error = dev->si_ops->d_kqfilter(&ap); 481 if (needmplock) 482 rel_mplock(); 483 484 if (error == 0) 485 return(ap.a_result); 486 return(ENODEV); 487 } 488 489 /************************************************************************ 490 * DEVICE HELPER FUNCTIONS * 491 ************************************************************************/ 492 493 /* 494 * MPSAFE 495 */ 496 int 497 dev_drefs(cdev_t dev) 498 { 499 return(dev->si_sysref.refcnt); 500 } 501 502 /* 503 * MPSAFE 504 */ 505 const char * 506 dev_dname(cdev_t dev) 507 { 508 return(dev->si_ops->head.name); 509 } 510 511 /* 512 * MPSAFE 513 */ 514 int 515 dev_dflags(cdev_t dev) 516 { 517 return(dev->si_ops->head.flags); 518 } 519 520 /* 521 * MPSAFE 522 */ 523 int 524 dev_dmaj(cdev_t dev) 525 { 526 return(dev->si_ops->head.maj); 527 } 528 529 /* 530 * Used when forwarding a request through layers. The caller adjusts 531 * ap->a_head.a_dev and then calls this function. 532 */ 533 int 534 dev_doperate(struct dev_generic_args *ap) 535 { 536 int (*func)(struct dev_generic_args *); 537 int needmplock = dev_needmplock(ap->a_dev); 538 int error; 539 540 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 541 542 if (needmplock) 543 get_mplock(); 544 error = func(ap); 545 if (needmplock) 546 rel_mplock(); 547 548 return (error); 549 } 550 551 /* 552 * Used by the console intercept code only. Issue an operation through 553 * a foreign ops structure allowing the ops structure associated 554 * with the device to remain intact. 555 */ 556 int 557 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 558 { 559 int (*func)(struct dev_generic_args *); 560 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 561 int error; 562 563 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 564 565 if (needmplock) 566 get_mplock(); 567 error = func(ap); 568 if (needmplock) 569 rel_mplock(); 570 571 return (error); 572 } 573 574 /* 575 * Convert a template dev_ops into the real thing by filling in 576 * uninitialized fields. 577 */ 578 void 579 compile_dev_ops(struct dev_ops *ops) 580 { 581 int offset; 582 583 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 584 offset <= offsetof(struct dev_ops, dev_ops_last_field); 585 offset += sizeof(void *) 586 ) { 587 void **func_p = (void **)((char *)ops + offset); 588 void **def_p = (void **)((char *)&default_dev_ops + offset); 589 if (*func_p == NULL) { 590 if (ops->d_default) 591 *func_p = ops->d_default; 592 else 593 *func_p = *def_p; 594 } 595 } 596 } 597 598 /************************************************************************ 599 * MAJOR/MINOR SPACE FUNCTION * 600 ************************************************************************/ 601 602 /* 603 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 604 * 605 * Disk devices typically register their major, e.g. 'ad0', and then call 606 * into the disk label management code which overloads its own onto e.g. 'ad0' 607 * to support all the various slice and partition combinations. 608 * 609 * The mask/match supplied in this call are a full 32 bits and the same 610 * mask and match must be specified in a later dev_ops_remove() call to 611 * match this add. However, the match value for the minor number should never 612 * have any bits set in the major number's bit range (8-15). The mask value 613 * may be conveniently specified as -1 without creating any major number 614 * interference. 615 */ 616 617 static 618 int 619 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 620 { 621 if (a->maj < b->maj) 622 return(-1); 623 else if (a->maj > b->maj) 624 return(1); 625 return(0); 626 } 627 628 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 629 630 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 631 632 int 633 dev_ops_remove_all(struct dev_ops *ops) 634 { 635 return devfs_destroy_dev_by_ops(ops, -1); 636 } 637 638 int 639 dev_ops_remove_minor(struct dev_ops *ops, int minor) 640 { 641 return devfs_destroy_dev_by_ops(ops, minor); 642 } 643 644 struct dev_ops * 645 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 646 { 647 struct dev_ops *oops = dev->si_ops; 648 649 compile_dev_ops(iops); 650 iops->head.maj = oops->head.maj; 651 iops->head.data = oops->head.data; 652 iops->head.flags = oops->head.flags; 653 dev->si_ops = iops; 654 dev->si_flags |= SI_INTERCEPTED; 655 656 return (oops); 657 } 658 659 void 660 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 661 { 662 struct dev_ops *iops = dev->si_ops; 663 664 dev->si_ops = oops; 665 dev->si_flags &= ~SI_INTERCEPTED; 666 iops->head.maj = 0; 667 iops->head.data = NULL; 668 iops->head.flags = 0; 669 } 670 671 /************************************************************************ 672 * DEFAULT DEV OPS FUNCTIONS * 673 ************************************************************************/ 674 675 676 /* 677 * Unsupported devswitch functions (e.g. for writing to read-only device). 678 * XXX may belong elsewhere. 679 */ 680 static int 681 norevoke(struct dev_revoke_args *ap) 682 { 683 /* take no action */ 684 return(0); 685 } 686 687 static int 688 noclone(struct dev_clone_args *ap) 689 { 690 /* take no action */ 691 return (0); /* allow the clone */ 692 } 693 694 static int 695 noopen(struct dev_open_args *ap) 696 { 697 return (ENODEV); 698 } 699 700 static int 701 noclose(struct dev_close_args *ap) 702 { 703 return (ENODEV); 704 } 705 706 static int 707 noread(struct dev_read_args *ap) 708 { 709 return (ENODEV); 710 } 711 712 static int 713 nowrite(struct dev_write_args *ap) 714 { 715 return (ENODEV); 716 } 717 718 static int 719 noioctl(struct dev_ioctl_args *ap) 720 { 721 return (ENODEV); 722 } 723 724 static int 725 nokqfilter(struct dev_kqfilter_args *ap) 726 { 727 return (ENODEV); 728 } 729 730 static int 731 nommap(struct dev_mmap_args *ap) 732 { 733 return (ENODEV); 734 } 735 736 static int 737 nommap_single(struct dev_mmap_single_args *ap) 738 { 739 return (ENODEV); 740 } 741 742 static int 743 nostrategy(struct dev_strategy_args *ap) 744 { 745 struct bio *bio = ap->a_bio; 746 747 bio->bio_buf->b_flags |= B_ERROR; 748 bio->bio_buf->b_error = EOPNOTSUPP; 749 biodone(bio); 750 return(0); 751 } 752 753 static int 754 nopsize(struct dev_psize_args *ap) 755 { 756 ap->a_result = 0; 757 return(0); 758 } 759 760 static int 761 nodump(struct dev_dump_args *ap) 762 { 763 return (ENODEV); 764 } 765 766 /* 767 * XXX this is probably bogus. Any device that uses it isn't checking the 768 * minor number. 769 */ 770 int 771 nullopen(struct dev_open_args *ap) 772 { 773 return (0); 774 } 775 776 int 777 nullclose(struct dev_close_args *ap) 778 { 779 return (0); 780 } 781 782