1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $ 31 */ 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/systm.h> 37 #include <sys/module.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/vnode.h> 43 #include <sys/queue.h> 44 #include <sys/device.h> 45 #include <sys/tree.h> 46 #include <sys/syslink_rpc.h> 47 #include <sys/proc.h> 48 #include <sys/sysctl.h> 49 #include <machine/stdarg.h> 50 #include <sys/devfs.h> 51 #include <sys/dsched.h> 52 53 #include <sys/thread2.h> 54 #include <sys/mplock2.h> 55 56 static int mpsafe_writes; 57 static int mplock_writes; 58 static int mpsafe_reads; 59 static int mplock_reads; 60 static int mpsafe_strategies; 61 static int mplock_strategies; 62 63 SYSCTL_INT(_kern, OID_AUTO, mpsafe_writes, CTLFLAG_RD, &mpsafe_writes, 64 0, "mpsafe writes"); 65 SYSCTL_INT(_kern, OID_AUTO, mplock_writes, CTLFLAG_RD, &mplock_writes, 66 0, "non-mpsafe writes"); 67 SYSCTL_INT(_kern, OID_AUTO, mpsafe_reads, CTLFLAG_RD, &mpsafe_reads, 68 0, "mpsafe reads"); 69 SYSCTL_INT(_kern, OID_AUTO, mplock_reads, CTLFLAG_RD, &mplock_reads, 70 0, "non-mpsafe reads"); 71 SYSCTL_INT(_kern, OID_AUTO, mpsafe_strategies, CTLFLAG_RD, &mpsafe_strategies, 72 0, "mpsafe strategies"); 73 SYSCTL_INT(_kern, OID_AUTO, mplock_strategies, CTLFLAG_RD, &mplock_strategies, 74 0, "non-mpsafe strategies"); 75 76 /* 77 * system link descriptors identify the command in the 78 * arguments structure. 79 */ 80 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 81 82 #define DEVOP_DESC_INIT(name) \ 83 struct syslink_desc DDESCNAME(name) = { \ 84 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 85 #name } 86 87 DEVOP_DESC_INIT(default); 88 DEVOP_DESC_INIT(open); 89 DEVOP_DESC_INIT(close); 90 DEVOP_DESC_INIT(read); 91 DEVOP_DESC_INIT(write); 92 DEVOP_DESC_INIT(ioctl); 93 DEVOP_DESC_INIT(dump); 94 DEVOP_DESC_INIT(psize); 95 DEVOP_DESC_INIT(mmap); 96 DEVOP_DESC_INIT(strategy); 97 DEVOP_DESC_INIT(kqfilter); 98 DEVOP_DESC_INIT(revoke); 99 DEVOP_DESC_INIT(clone); 100 101 /* 102 * Misc default ops 103 */ 104 struct dev_ops dead_dev_ops; 105 106 struct dev_ops default_dev_ops = { 107 { "null" }, 108 .d_default = NULL, /* must be NULL */ 109 .d_open = noopen, 110 .d_close = noclose, 111 .d_read = noread, 112 .d_write = nowrite, 113 .d_ioctl = noioctl, 114 .d_mmap = nommap, 115 .d_strategy = nostrategy, 116 .d_dump = nodump, 117 .d_psize = nopsize, 118 .d_kqfilter = nokqfilter, 119 .d_revoke = norevoke, 120 .d_clone = noclone 121 }; 122 123 static __inline 124 int 125 dev_needmplock(cdev_t dev) 126 { 127 return((dev->si_ops->head.flags & D_MPSAFE) == 0); 128 } 129 130 /************************************************************************ 131 * GENERAL DEVICE API FUNCTIONS * 132 ************************************************************************ 133 * 134 * The MPSAFEness of these depends on dev->si_ops->head.flags 135 */ 136 int 137 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred) 138 { 139 struct dev_open_args ap; 140 int needmplock = dev_needmplock(dev); 141 int error; 142 143 ap.a_head.a_desc = &dev_open_desc; 144 ap.a_head.a_dev = dev; 145 ap.a_oflags = oflags; 146 ap.a_devtype = devtype; 147 ap.a_cred = cred; 148 149 if (needmplock) 150 get_mplock(); 151 error = dev->si_ops->d_open(&ap); 152 if (needmplock) 153 rel_mplock(); 154 return (error); 155 } 156 157 int 158 dev_dclose(cdev_t dev, int fflag, int devtype) 159 { 160 struct dev_close_args ap; 161 int needmplock = dev_needmplock(dev); 162 int error; 163 164 ap.a_head.a_desc = &dev_close_desc; 165 ap.a_head.a_dev = dev; 166 ap.a_fflag = fflag; 167 ap.a_devtype = devtype; 168 169 if (needmplock) 170 get_mplock(); 171 error = dev->si_ops->d_close(&ap); 172 if (needmplock) 173 rel_mplock(); 174 return (error); 175 } 176 177 int 178 dev_dread(cdev_t dev, struct uio *uio, int ioflag) 179 { 180 struct dev_read_args ap; 181 int needmplock = dev_needmplock(dev); 182 int error; 183 184 ap.a_head.a_desc = &dev_read_desc; 185 ap.a_head.a_dev = dev; 186 ap.a_uio = uio; 187 ap.a_ioflag = ioflag; 188 189 if (needmplock) { 190 get_mplock(); 191 ++mplock_reads; 192 } else { 193 ++mpsafe_reads; 194 } 195 error = dev->si_ops->d_read(&ap); 196 if (needmplock) 197 rel_mplock(); 198 if (error == 0) 199 dev->si_lastread = time_second; 200 return (error); 201 } 202 203 int 204 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag) 205 { 206 struct dev_write_args ap; 207 int needmplock = dev_needmplock(dev); 208 int error; 209 210 dev->si_lastwrite = time_second; 211 ap.a_head.a_desc = &dev_write_desc; 212 ap.a_head.a_dev = dev; 213 ap.a_uio = uio; 214 ap.a_ioflag = ioflag; 215 216 if (needmplock) { 217 get_mplock(); 218 ++mplock_writes; 219 } else { 220 ++mpsafe_writes; 221 } 222 error = dev->si_ops->d_write(&ap); 223 if (needmplock) 224 rel_mplock(); 225 return (error); 226 } 227 228 int 229 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 230 struct sysmsg *msg) 231 { 232 struct dev_ioctl_args ap; 233 int needmplock = dev_needmplock(dev); 234 int error; 235 236 ap.a_head.a_desc = &dev_ioctl_desc; 237 ap.a_head.a_dev = dev; 238 ap.a_cmd = cmd; 239 ap.a_data = data; 240 ap.a_fflag = fflag; 241 ap.a_cred = cred; 242 ap.a_sysmsg = msg; 243 244 if (needmplock) 245 get_mplock(); 246 error = dev->si_ops->d_ioctl(&ap); 247 if (needmplock) 248 rel_mplock(); 249 return (error); 250 } 251 252 int 253 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot) 254 { 255 struct dev_mmap_args ap; 256 int needmplock = dev_needmplock(dev); 257 int error; 258 259 ap.a_head.a_desc = &dev_mmap_desc; 260 ap.a_head.a_dev = dev; 261 ap.a_offset = offset; 262 ap.a_nprot = nprot; 263 264 if (needmplock) 265 get_mplock(); 266 error = dev->si_ops->d_mmap(&ap); 267 if (needmplock) 268 rel_mplock(); 269 270 if (error == 0) 271 return(ap.a_result); 272 return(-1); 273 } 274 275 int 276 dev_dclone(cdev_t dev) 277 { 278 struct dev_clone_args ap; 279 int needmplock = dev_needmplock(dev); 280 int error; 281 282 ap.a_head.a_desc = &dev_clone_desc; 283 ap.a_head.a_dev = dev; 284 285 if (needmplock) 286 get_mplock(); 287 error = dev->si_ops->d_clone(&ap); 288 if (needmplock) 289 rel_mplock(); 290 return (error); 291 } 292 293 int 294 dev_drevoke(cdev_t dev) 295 { 296 struct dev_revoke_args ap; 297 int needmplock = dev_needmplock(dev); 298 int error; 299 300 ap.a_head.a_desc = &dev_revoke_desc; 301 ap.a_head.a_dev = dev; 302 303 if (needmplock) 304 get_mplock(); 305 error = dev->si_ops->d_revoke(&ap); 306 if (needmplock) 307 rel_mplock(); 308 309 return (error); 310 } 311 312 /* 313 * Core device strategy call, used to issue I/O on a device. There are 314 * two versions, a non-chained version and a chained version. The chained 315 * version reuses a BIO set up by vn_strategy(). The only difference is 316 * that, for now, we do not push a new tracking structure when chaining 317 * from vn_strategy. XXX this will ultimately have to change. 318 */ 319 void 320 dev_dstrategy(cdev_t dev, struct bio *bio) 321 { 322 struct dev_strategy_args ap; 323 struct bio_track *track; 324 int needmplock = dev_needmplock(dev); 325 326 ap.a_head.a_desc = &dev_strategy_desc; 327 ap.a_head.a_dev = dev; 328 ap.a_bio = bio; 329 330 KKASSERT(bio->bio_track == NULL); 331 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 332 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 333 track = &dev->si_track_read; 334 else 335 track = &dev->si_track_write; 336 bio_track_ref(track); 337 bio->bio_track = track; 338 339 if (dsched_is_clear_buf_priv(bio->bio_buf)) 340 dsched_new_buf(bio->bio_buf); 341 342 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 343 if (needmplock) { 344 get_mplock(); 345 ++mplock_strategies; 346 } else { 347 ++mpsafe_strategies; 348 } 349 (void)dev->si_ops->d_strategy(&ap); 350 if (needmplock) 351 rel_mplock(); 352 } 353 354 void 355 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 356 { 357 struct dev_strategy_args ap; 358 int needmplock = dev_needmplock(dev); 359 360 ap.a_head.a_desc = &dev_strategy_desc; 361 ap.a_head.a_dev = dev; 362 ap.a_bio = bio; 363 364 KKASSERT(bio->bio_track != NULL); 365 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 366 if (needmplock) 367 get_mplock(); 368 (void)dev->si_ops->d_strategy(&ap); 369 if (needmplock) 370 rel_mplock(); 371 } 372 373 /* 374 * note: the disk layer is expected to set count, blkno, and secsize before 375 * forwarding the message. 376 */ 377 int 378 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 379 size_t length) 380 { 381 struct dev_dump_args ap; 382 int needmplock = dev_needmplock(dev); 383 int error; 384 385 ap.a_head.a_desc = &dev_dump_desc; 386 ap.a_head.a_dev = dev; 387 ap.a_count = 0; 388 ap.a_blkno = 0; 389 ap.a_secsize = 0; 390 ap.a_virtual = virtual; 391 ap.a_physical = physical; 392 ap.a_offset = offset; 393 ap.a_length = length; 394 395 if (needmplock) 396 get_mplock(); 397 error = dev->si_ops->d_dump(&ap); 398 if (needmplock) 399 rel_mplock(); 400 return (error); 401 } 402 403 int64_t 404 dev_dpsize(cdev_t dev) 405 { 406 struct dev_psize_args ap; 407 int needmplock = dev_needmplock(dev); 408 int error; 409 410 ap.a_head.a_desc = &dev_psize_desc; 411 ap.a_head.a_dev = dev; 412 413 if (needmplock) 414 get_mplock(); 415 error = dev->si_ops->d_psize(&ap); 416 if (needmplock) 417 rel_mplock(); 418 419 if (error == 0) 420 return (ap.a_result); 421 return(-1); 422 } 423 424 /* 425 * Pass-thru to the device kqfilter. 426 * 427 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions 428 * which return 0 do not have to bother setting a_result. 429 */ 430 int 431 dev_dkqfilter(cdev_t dev, struct knote *kn) 432 { 433 struct dev_kqfilter_args ap; 434 int needmplock = dev_needmplock(dev); 435 int error; 436 437 ap.a_head.a_desc = &dev_kqfilter_desc; 438 ap.a_head.a_dev = dev; 439 ap.a_kn = kn; 440 ap.a_result = 0; 441 442 if (needmplock) 443 get_mplock(); 444 error = dev->si_ops->d_kqfilter(&ap); 445 if (needmplock) 446 rel_mplock(); 447 448 if (error == 0) 449 return(ap.a_result); 450 return(ENODEV); 451 } 452 453 /************************************************************************ 454 * DEVICE HELPER FUNCTIONS * 455 ************************************************************************/ 456 457 /* 458 * MPSAFE 459 */ 460 int 461 dev_drefs(cdev_t dev) 462 { 463 return(dev->si_sysref.refcnt); 464 } 465 466 /* 467 * MPSAFE 468 */ 469 const char * 470 dev_dname(cdev_t dev) 471 { 472 return(dev->si_ops->head.name); 473 } 474 475 /* 476 * MPSAFE 477 */ 478 int 479 dev_dflags(cdev_t dev) 480 { 481 return(dev->si_ops->head.flags); 482 } 483 484 /* 485 * MPSAFE 486 */ 487 int 488 dev_dmaj(cdev_t dev) 489 { 490 return(dev->si_ops->head.maj); 491 } 492 493 /* 494 * Used when forwarding a request through layers. The caller adjusts 495 * ap->a_head.a_dev and then calls this function. 496 */ 497 int 498 dev_doperate(struct dev_generic_args *ap) 499 { 500 int (*func)(struct dev_generic_args *); 501 int needmplock = dev_needmplock(ap->a_dev); 502 int error; 503 504 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 505 506 if (needmplock) 507 get_mplock(); 508 error = func(ap); 509 if (needmplock) 510 rel_mplock(); 511 512 return (error); 513 } 514 515 /* 516 * Used by the console intercept code only. Issue an operation through 517 * a foreign ops structure allowing the ops structure associated 518 * with the device to remain intact. 519 */ 520 int 521 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 522 { 523 int (*func)(struct dev_generic_args *); 524 int needmplock = ((ops->head.flags & D_MPSAFE) == 0); 525 int error; 526 527 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 528 529 if (needmplock) 530 get_mplock(); 531 error = func(ap); 532 if (needmplock) 533 rel_mplock(); 534 535 return (error); 536 } 537 538 /* 539 * Convert a template dev_ops into the real thing by filling in 540 * uninitialized fields. 541 */ 542 void 543 compile_dev_ops(struct dev_ops *ops) 544 { 545 int offset; 546 547 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 548 offset <= offsetof(struct dev_ops, dev_ops_last_field); 549 offset += sizeof(void *) 550 ) { 551 void **func_p = (void **)((char *)ops + offset); 552 void **def_p = (void **)((char *)&default_dev_ops + offset); 553 if (*func_p == NULL) { 554 if (ops->d_default) 555 *func_p = ops->d_default; 556 else 557 *func_p = *def_p; 558 } 559 } 560 } 561 562 /************************************************************************ 563 * MAJOR/MINOR SPACE FUNCTION * 564 ************************************************************************/ 565 566 /* 567 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 568 * 569 * Disk devices typically register their major, e.g. 'ad0', and then call 570 * into the disk label management code which overloads its own onto e.g. 'ad0' 571 * to support all the various slice and partition combinations. 572 * 573 * The mask/match supplied in this call are a full 32 bits and the same 574 * mask and match must be specified in a later dev_ops_remove() call to 575 * match this add. However, the match value for the minor number should never 576 * have any bits set in the major number's bit range (8-15). The mask value 577 * may be conveniently specified as -1 without creating any major number 578 * interference. 579 */ 580 581 static 582 int 583 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 584 { 585 if (a->maj < b->maj) 586 return(-1); 587 else if (a->maj > b->maj) 588 return(1); 589 return(0); 590 } 591 592 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 593 594 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 595 596 int 597 dev_ops_remove_all(struct dev_ops *ops) 598 { 599 return devfs_destroy_dev_by_ops(ops, -1); 600 } 601 602 int 603 dev_ops_remove_minor(struct dev_ops *ops, int minor) 604 { 605 return devfs_destroy_dev_by_ops(ops, minor); 606 } 607 608 struct dev_ops * 609 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 610 { 611 struct dev_ops *oops = dev->si_ops; 612 613 compile_dev_ops(iops); 614 iops->head.maj = oops->head.maj; 615 iops->head.data = oops->head.data; 616 iops->head.flags = oops->head.flags; 617 dev->si_ops = iops; 618 dev->si_flags |= SI_INTERCEPTED; 619 620 return (oops); 621 } 622 623 void 624 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 625 { 626 struct dev_ops *iops = dev->si_ops; 627 628 dev->si_ops = oops; 629 dev->si_flags &= ~SI_INTERCEPTED; 630 iops->head.maj = 0; 631 iops->head.data = NULL; 632 iops->head.flags = 0; 633 } 634 635 /************************************************************************ 636 * DEFAULT DEV OPS FUNCTIONS * 637 ************************************************************************/ 638 639 640 /* 641 * Unsupported devswitch functions (e.g. for writing to read-only device). 642 * XXX may belong elsewhere. 643 */ 644 int 645 norevoke(struct dev_revoke_args *ap) 646 { 647 /* take no action */ 648 return(0); 649 } 650 651 int 652 noclone(struct dev_clone_args *ap) 653 { 654 /* take no action */ 655 return (0); /* allow the clone */ 656 } 657 658 int 659 noopen(struct dev_open_args *ap) 660 { 661 return (ENODEV); 662 } 663 664 int 665 noclose(struct dev_close_args *ap) 666 { 667 return (ENODEV); 668 } 669 670 int 671 noread(struct dev_read_args *ap) 672 { 673 return (ENODEV); 674 } 675 676 int 677 nowrite(struct dev_write_args *ap) 678 { 679 return (ENODEV); 680 } 681 682 int 683 noioctl(struct dev_ioctl_args *ap) 684 { 685 return (ENODEV); 686 } 687 688 int 689 nokqfilter(struct dev_kqfilter_args *ap) 690 { 691 return (ENODEV); 692 } 693 694 int 695 nommap(struct dev_mmap_args *ap) 696 { 697 return (ENODEV); 698 } 699 700 int 701 nostrategy(struct dev_strategy_args *ap) 702 { 703 struct bio *bio = ap->a_bio; 704 705 bio->bio_buf->b_flags |= B_ERROR; 706 bio->bio_buf->b_error = EOPNOTSUPP; 707 biodone(bio); 708 return(0); 709 } 710 711 int 712 nopsize(struct dev_psize_args *ap) 713 { 714 ap->a_result = 0; 715 return(0); 716 } 717 718 int 719 nodump(struct dev_dump_args *ap) 720 { 721 return (ENODEV); 722 } 723 724 /* 725 * XXX this is probably bogus. Any device that uses it isn't checking the 726 * minor number. 727 */ 728 int 729 nullopen(struct dev_open_args *ap) 730 { 731 return (0); 732 } 733 734 int 735 nullclose(struct dev_close_args *ap) 736 { 737 return (0); 738 } 739 740