1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/module.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/bio.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/queue.h> 42 #include <sys/device.h> 43 #include <sys/tree.h> 44 #include <sys/syslink_rpc.h> 45 #include <sys/proc.h> 46 #include <sys/dsched.h> 47 #include <sys/devfs.h> 48 49 #include <machine/stdarg.h> 50 51 #include <sys/thread2.h> 52 #include <sys/mplock2.h> 53 54 /* 55 * system link descriptors identify the command in the 56 * arguments structure. 57 */ 58 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 59 60 #define DEVOP_DESC_INIT(name) \ 61 struct syslink_desc DDESCNAME(name) = { \ 62 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 63 #name } 64 65 DEVOP_DESC_INIT(default); 66 DEVOP_DESC_INIT(open); 67 DEVOP_DESC_INIT(close); 68 DEVOP_DESC_INIT(read); 69 DEVOP_DESC_INIT(write); 70 DEVOP_DESC_INIT(ioctl); 71 DEVOP_DESC_INIT(dump); 72 DEVOP_DESC_INIT(psize); 73 DEVOP_DESC_INIT(mmap); 74 DEVOP_DESC_INIT(mmap_single); 75 DEVOP_DESC_INIT(strategy); 76 DEVOP_DESC_INIT(kqfilter); 77 DEVOP_DESC_INIT(revoke); 78 DEVOP_DESC_INIT(clone); 79 80 /* 81 * Misc default ops 82 */ 83 struct dev_ops dead_dev_ops; 84 85 static d_open_t noopen; 86 static d_close_t noclose; 87 static d_read_t noread; 88 static d_write_t nowrite; 89 static d_ioctl_t noioctl; 90 static d_mmap_t nommap; 91 static d_mmap_single_t nommap_single; 92 static d_strategy_t nostrategy; 93 static d_dump_t nodump; 94 static d_psize_t nopsize; 95 static d_kqfilter_t nokqfilter; 96 static d_clone_t noclone; 97 static d_revoke_t norevoke; 98 99 struct dev_ops default_dev_ops = { 100 { "null" }, 101 .d_default = NULL, /* must be NULL */ 102 .d_open = noopen, 103 .d_close = noclose, 104 .d_read = noread, 105 .d_write = nowrite, 106 .d_ioctl = noioctl, 107 .d_mmap = nommap, 108 .d_mmap_single = nommap_single, 109 .d_strategy = nostrategy, 110 .d_dump = nodump, 111 .d_psize = nopsize, 112 .d_kqfilter = nokqfilter, 113 .d_revoke = norevoke, 114 .d_clone = noclone 115 }; 116 117 static __inline 118 int 119 dev_needmplock(cdev_t dev) 120 { 121 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 122 } 123 124 /************************************************************************ 125 * GENERAL DEVICE API FUNCTIONS * 126 ************************************************************************ 127 * 128 * The MPSAFEness of these depends on dev->si_ops->head.flags 129 */ 130 int 131 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, struct file *fp) 132 { 133 struct dev_open_args ap; 134 int needmplock = dev_needmplock(dev); 135 int error; 136 137 ap.a_head.a_desc = &dev_open_desc; 138 ap.a_head.a_dev = dev; 139 ap.a_oflags = oflags; 140 ap.a_devtype = devtype; 141 ap.a_cred = cred; 142 ap.a_fp = fp; 143 144 if (needmplock) 145 get_mplock(); 146 error = dev->si_ops->d_open(&ap); 147 if (needmplock) 148 rel_mplock(); 149 return (error); 150 } 151 152 int 153 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp) 154 { 155 struct dev_close_args ap; 156 int needmplock = dev_needmplock(dev); 157 int error; 158 159 ap.a_head.a_desc = &dev_close_desc; 160 ap.a_head.a_dev = dev; 161 ap.a_fflag = fflag; 162 ap.a_devtype = devtype; 163 ap.a_fp = fp; 164 165 if (needmplock) 166 get_mplock(); 167 error = dev->si_ops->d_close(&ap); 168 if (needmplock) 169 rel_mplock(); 170 return (error); 171 } 172 173 int 174 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 175 { 176 struct dev_read_args ap; 177 int needmplock = dev_needmplock(dev); 178 int error; 179 180 ap.a_head.a_desc = &dev_read_desc; 181 ap.a_head.a_dev = dev; 182 ap.a_uio = uio; 183 ap.a_ioflag = ioflag; 184 ap.a_fp = fp; 185 186 if (needmplock) 187 get_mplock(); 188 error = dev->si_ops->d_read(&ap); 189 if (needmplock) 190 rel_mplock(); 191 if (error == 0) 192 dev->si_lastread = time_uptime; 193 return (error); 194 } 195 196 int 197 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp) 198 { 199 struct dev_write_args ap; 200 int needmplock = dev_needmplock(dev); 201 int error; 202 203 dev->si_lastwrite = time_uptime; 204 ap.a_head.a_desc = &dev_write_desc; 205 ap.a_head.a_dev = dev; 206 ap.a_uio = uio; 207 ap.a_ioflag = ioflag; 208 ap.a_fp = fp; 209 210 if (needmplock) 211 get_mplock(); 212 error = dev->si_ops->d_write(&ap); 213 if (needmplock) 214 rel_mplock(); 215 return (error); 216 } 217 218 int 219 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 220 struct sysmsg *msg, struct file *fp) 221 { 222 struct dev_ioctl_args ap; 223 int needmplock = dev_needmplock(dev); 224 int error; 225 226 ap.a_head.a_desc = &dev_ioctl_desc; 227 ap.a_head.a_dev = dev; 228 ap.a_cmd = cmd; 229 ap.a_data = data; 230 ap.a_fflag = fflag; 231 ap.a_cred = cred; 232 ap.a_sysmsg = msg; 233 ap.a_fp = fp; 234 235 if (needmplock) 236 get_mplock(); 237 error = dev->si_ops->d_ioctl(&ap); 238 if (needmplock) 239 rel_mplock(); 240 return (error); 241 } 242 243 int64_t 244 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp) 245 { 246 struct dev_mmap_args ap; 247 int needmplock = dev_needmplock(dev); 248 int error; 249 250 ap.a_head.a_desc = &dev_mmap_desc; 251 ap.a_head.a_dev = dev; 252 ap.a_offset = offset; 253 ap.a_nprot = nprot; 254 ap.a_fp = fp; 255 256 if (needmplock) 257 get_mplock(); 258 error = dev->si_ops->d_mmap(&ap); 259 if (needmplock) 260 rel_mplock(); 261 262 if (error == 0) 263 return(ap.a_result); 264 return(-1); 265 } 266 267 int 268 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size, 269 struct vm_object **object, int nprot, struct file *fp) 270 { 271 struct dev_mmap_single_args ap; 272 int needmplock = dev_needmplock(dev); 273 int error; 274 275 ap.a_head.a_desc = &dev_mmap_single_desc; 276 ap.a_head.a_dev = dev; 277 ap.a_offset = offset; 278 ap.a_size = size; 279 ap.a_object = object; 280 ap.a_nprot = nprot; 281 ap.a_fp = fp; 282 283 if (needmplock) 284 get_mplock(); 285 error = dev->si_ops->d_mmap_single(&ap); 286 if (needmplock) 287 rel_mplock(); 288 289 return(error); 290 } 291 292 int 293 dev_dclone(cdev_t dev) 294 { 295 struct dev_clone_args ap; 296 int needmplock = dev_needmplock(dev); 297 int error; 298 299 ap.a_head.a_desc = &dev_clone_desc; 300 ap.a_head.a_dev = dev; 301 302 if (needmplock) 303 get_mplock(); 304 error = dev->si_ops->d_clone(&ap); 305 if (needmplock) 306 rel_mplock(); 307 return (error); 308 } 309 310 int 311 dev_drevoke(cdev_t dev) 312 { 313 struct dev_revoke_args ap; 314 int needmplock = dev_needmplock(dev); 315 int error; 316 317 ap.a_head.a_desc = &dev_revoke_desc; 318 ap.a_head.a_dev = dev; 319 320 if (needmplock) 321 get_mplock(); 322 error = dev->si_ops->d_revoke(&ap); 323 if (needmplock) 324 rel_mplock(); 325 326 return (error); 327 } 328 329 /* 330 * Core device strategy call, used to issue I/O on a device. There are 331 * two versions, a non-chained version and a chained version. The chained 332 * version reuses a BIO set up by vn_strategy(). The only difference is 333 * that, for now, we do not push a new tracking structure when chaining 334 * from vn_strategy. XXX this will ultimately have to change. 335 */ 336 void 337 dev_dstrategy(cdev_t dev, struct bio *bio) 338 { 339 struct dev_strategy_args ap; 340 struct bio_track *track; 341 int needmplock = dev_needmplock(dev); 342 343 ap.a_head.a_desc = &dev_strategy_desc; 344 ap.a_head.a_dev = dev; 345 ap.a_bio = bio; 346 347 KKASSERT(bio->bio_track == NULL); 348 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 349 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 350 track = &dev->si_track_read; 351 else 352 track = &dev->si_track_write; 353 bio_track_ref(track); 354 bio->bio_track = track; 355 dsched_buf_enter(bio->bio_buf); /* might stack */ 356 357 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 358 if (needmplock) 359 get_mplock(); 360 (void)dev->si_ops->d_strategy(&ap); 361 if (needmplock) 362 rel_mplock(); 363 } 364 365 void 366 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 367 { 368 struct dev_strategy_args ap; 369 int needmplock = dev_needmplock(dev); 370 371 ap.a_head.a_desc = &dev_strategy_desc; 372 ap.a_head.a_dev = dev; 373 ap.a_bio = bio; 374 375 KKASSERT(bio->bio_track != NULL); 376 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 377 if (needmplock) 378 get_mplock(); 379 (void)dev->si_ops->d_strategy(&ap); 380 if (needmplock) 381 rel_mplock(); 382 } 383 384 /* 385 * note: the disk layer is expected to set count, blkno, and secsize before 386 * forwarding the message. 387 */ 388 int 389 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 390 size_t length) 391 { 392 struct dev_dump_args ap; 393 int needmplock = dev_needmplock(dev); 394 int error; 395 396 ap.a_head.a_desc = &dev_dump_desc; 397 ap.a_head.a_dev = dev; 398 ap.a_count = 0; 399 ap.a_blkno = 0; 400 ap.a_secsize = 0; 401 ap.a_virtual = virtual; 402 ap.a_physical = physical; 403 ap.a_offset = offset; 404 ap.a_length = length; 405 406 if (needmplock) 407 get_mplock(); 408 error = dev->si_ops->d_dump(&ap); 409 if (needmplock) 410 rel_mplock(); 411 return (error); 412 } 413 414 int64_t 415 dev_dpsize(cdev_t dev) 416 { 417 struct dev_psize_args ap; 418 int needmplock = dev_needmplock(dev); 419 int error; 420 421 ap.a_head.a_desc = &dev_psize_desc; 422 ap.a_head.a_dev = dev; 423 424 if (needmplock) 425 get_mplock(); 426 error = dev->si_ops->d_psize(&ap); 427 if (needmplock) 428 rel_mplock(); 429 430 if (error == 0) 431 return (ap.a_result); 432 return(-1); 433 } 434 435 /* 436 * Pass-thru to the device kqfilter. 437 * 438 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 439 * which return 0 do not have to bother setting a_result. 440 */ 441 int 442 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp) 443 { 444 struct dev_kqfilter_args ap; 445 int needmplock = dev_needmplock(dev); 446 int error; 447 448 ap.a_head.a_desc = &dev_kqfilter_desc; 449 ap.a_head.a_dev = dev; 450 ap.a_kn = kn; 451 ap.a_result = 0; 452 ap.a_fp = fp; 453 454 if (needmplock) 455 get_mplock(); 456 error = dev->si_ops->d_kqfilter(&ap); 457 if (needmplock) 458 rel_mplock(); 459 460 if (error == 0) 461 return(ap.a_result); 462 return(ENODEV); 463 } 464 465 /************************************************************************ 466 * DEVICE HELPER FUNCTIONS * 467 ************************************************************************/ 468 469 /* 470 * MPSAFE 471 */ 472 int 473 dev_drefs(cdev_t dev) 474 { 475 return(dev->si_sysref.refcnt); 476 } 477 478 /* 479 * MPSAFE 480 */ 481 const char * 482 dev_dname(cdev_t dev) 483 { 484 return(dev->si_ops->head.name); 485 } 486 487 /* 488 * MPSAFE 489 */ 490 int 491 dev_dflags(cdev_t dev) 492 { 493 return(dev->si_ops->head.flags); 494 } 495 496 /* 497 * MPSAFE 498 */ 499 int 500 dev_dmaj(cdev_t dev) 501 { 502 return(dev->si_ops->head.maj); 503 } 504 505 /* 506 * Used when forwarding a request through layers. The caller adjusts 507 * ap->a_head.a_dev and then calls this function. 508 */ 509 int 510 dev_doperate(struct dev_generic_args *ap) 511 { 512 int (*func)(struct dev_generic_args *); 513 int needmplock = dev_needmplock(ap->a_dev); 514 int error; 515 516 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 517 518 if (needmplock) 519 get_mplock(); 520 error = func(ap); 521 if (needmplock) 522 rel_mplock(); 523 524 return (error); 525 } 526 527 /* 528 * Used by the console intercept code only. Issue an operation through 529 * a foreign ops structure allowing the ops structure associated 530 * with the device to remain intact. 531 */ 532 int 533 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 534 { 535 int (*func)(struct dev_generic_args *); 536 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 537 int error; 538 539 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 540 541 if (needmplock) 542 get_mplock(); 543 error = func(ap); 544 if (needmplock) 545 rel_mplock(); 546 547 return (error); 548 } 549 550 /* 551 * Convert a template dev_ops into the real thing by filling in 552 * uninitialized fields. 553 */ 554 void 555 compile_dev_ops(struct dev_ops *ops) 556 { 557 int offset; 558 559 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 560 offset <= offsetof(struct dev_ops, dev_ops_last_field); 561 offset += sizeof(void *) 562 ) { 563 void **func_p = (void **)((char *)ops + offset); 564 void **def_p = (void **)((char *)&default_dev_ops + offset); 565 if (*func_p == NULL) { 566 if (ops->d_default) 567 *func_p = ops->d_default; 568 else 569 *func_p = *def_p; 570 } 571 } 572 } 573 574 /************************************************************************ 575 * MAJOR/MINOR SPACE FUNCTION * 576 ************************************************************************/ 577 578 /* 579 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 580 * 581 * Disk devices typically register their major, e.g. 'ad0', and then call 582 * into the disk label management code which overloads its own onto e.g. 'ad0' 583 * to support all the various slice and partition combinations. 584 * 585 * The mask/match supplied in this call are a full 32 bits and the same 586 * mask and match must be specified in a later dev_ops_remove() call to 587 * match this add. However, the match value for the minor number should never 588 * have any bits set in the major number's bit range (8-15). The mask value 589 * may be conveniently specified as -1 without creating any major number 590 * interference. 591 */ 592 593 static 594 int 595 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 596 { 597 if (a->maj < b->maj) 598 return(-1); 599 else if (a->maj > b->maj) 600 return(1); 601 return(0); 602 } 603 604 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 605 606 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 607 608 int 609 dev_ops_remove_all(struct dev_ops *ops) 610 { 611 return devfs_destroy_dev_by_ops(ops, -1); 612 } 613 614 int 615 dev_ops_remove_minor(struct dev_ops *ops, int minor) 616 { 617 return devfs_destroy_dev_by_ops(ops, minor); 618 } 619 620 struct dev_ops * 621 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 622 { 623 struct dev_ops *oops = dev->si_ops; 624 625 compile_dev_ops(iops); 626 iops->head.maj = oops->head.maj; 627 iops->head.data = oops->head.data; 628 iops->head.flags = oops->head.flags; 629 dev->si_ops = iops; 630 dev->si_flags |= SI_INTERCEPTED; 631 632 return (oops); 633 } 634 635 void 636 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 637 { 638 struct dev_ops *iops = dev->si_ops; 639 640 dev->si_ops = oops; 641 dev->si_flags &= ~SI_INTERCEPTED; 642 iops->head.maj = 0; 643 iops->head.data = NULL; 644 iops->head.flags = 0; 645 } 646 647 /************************************************************************ 648 * DEFAULT DEV OPS FUNCTIONS * 649 ************************************************************************/ 650 651 652 /* 653 * Unsupported devswitch functions (e.g. for writing to read-only device). 654 * XXX may belong elsewhere. 655 */ 656 static int 657 norevoke(struct dev_revoke_args *ap) 658 { 659 /* take no action */ 660 return(0); 661 } 662 663 static int 664 noclone(struct dev_clone_args *ap) 665 { 666 /* take no action */ 667 return (0); /* allow the clone */ 668 } 669 670 static int 671 noopen(struct dev_open_args *ap) 672 { 673 return (ENODEV); 674 } 675 676 static int 677 noclose(struct dev_close_args *ap) 678 { 679 return (ENODEV); 680 } 681 682 static int 683 noread(struct dev_read_args *ap) 684 { 685 return (ENODEV); 686 } 687 688 static int 689 nowrite(struct dev_write_args *ap) 690 { 691 return (ENODEV); 692 } 693 694 static int 695 noioctl(struct dev_ioctl_args *ap) 696 { 697 return (ENODEV); 698 } 699 700 static int 701 nokqfilter(struct dev_kqfilter_args *ap) 702 { 703 return (ENODEV); 704 } 705 706 static int 707 nommap(struct dev_mmap_args *ap) 708 { 709 return (ENODEV); 710 } 711 712 static int 713 nommap_single(struct dev_mmap_single_args *ap) 714 { 715 return (ENODEV); 716 } 717 718 static int 719 nostrategy(struct dev_strategy_args *ap) 720 { 721 struct bio *bio = ap->a_bio; 722 723 bio->bio_buf->b_flags |= B_ERROR; 724 bio->bio_buf->b_error = EOPNOTSUPP; 725 biodone(bio); 726 return(0); 727 } 728 729 static int 730 nopsize(struct dev_psize_args *ap) 731 { 732 ap->a_result = 0; 733 return(0); 734 } 735 736 static int 737 nodump(struct dev_dump_args *ap) 738 { 739 return (ENODEV); 740 } 741 742 /* 743 * XXX this is probably bogus. Any device that uses it isn't checking the 744 * minor number. 745 */ 746 int 747 nullopen(struct dev_open_args *ap) 748 { 749 return (0); 750 } 751 752 int 753 nullclose(struct dev_close_args *ap) 754 { 755 return (0); 756 } 757 758