1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $DragonFly: src/sys/kern/kern_device.c,v 1.22 2006/12/23 00:35:04 swildner Exp $ 31 */ 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/systm.h> 37 #include <sys/module.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/vnode.h> 43 #include <sys/queue.h> 44 #include <sys/device.h> 45 #include <sys/syslink.h> 46 #include <sys/proc.h> 47 #include <machine/stdarg.h> 48 #include <sys/thread2.h> 49 50 /* 51 * system link descriptors identify the command in the 52 * arguments structure. 53 */ 54 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 55 56 #define DEVOP_DESC_INIT(name) \ 57 struct syslink_desc DDESCNAME(name) = { \ 58 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 59 #name } 60 61 DEVOP_DESC_INIT(default); 62 DEVOP_DESC_INIT(open); 63 DEVOP_DESC_INIT(close); 64 DEVOP_DESC_INIT(read); 65 DEVOP_DESC_INIT(write); 66 DEVOP_DESC_INIT(ioctl); 67 DEVOP_DESC_INIT(dump); 68 DEVOP_DESC_INIT(psize); 69 DEVOP_DESC_INIT(poll); 70 DEVOP_DESC_INIT(mmap); 71 DEVOP_DESC_INIT(strategy); 72 DEVOP_DESC_INIT(kqfilter); 73 DEVOP_DESC_INIT(clone); 74 75 /* 76 * Misc default ops 77 */ 78 struct dev_ops dead_dev_ops; 79 80 struct dev_ops default_dev_ops = { 81 { "null" }, 82 .d_default = NULL, /* must be NULL */ 83 .d_open = noopen, 84 .d_close = noclose, 85 .d_read = noread, 86 .d_write = nowrite, 87 .d_ioctl = noioctl, 88 .d_poll = nopoll, 89 .d_mmap = nommap, 90 .d_strategy = nostrategy, 91 .d_dump = nodump, 92 .d_psize = nopsize, 93 .d_kqfilter = nokqfilter, 94 .d_clone = noclone 95 }; 96 97 /* 98 * This is used to look-up devices 99 */ 100 static struct dev_ops_link *dev_ops_array[NUMCDEVSW]; 101 102 /************************************************************************ 103 * GENERAL DEVICE API FUNCTIONS * 104 ************************************************************************/ 105 106 int 107 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred) 108 { 109 struct dev_open_args ap; 110 111 ap.a_head.a_desc = &dev_open_desc; 112 ap.a_head.a_dev = dev; 113 ap.a_oflags = oflags; 114 ap.a_devtype = devtype; 115 ap.a_cred = cred; 116 return(dev->si_ops->d_open(&ap)); 117 } 118 119 int 120 dev_dclose(cdev_t dev, int fflag, int devtype) 121 { 122 struct dev_close_args ap; 123 124 ap.a_head.a_desc = &dev_close_desc; 125 ap.a_head.a_dev = dev; 126 ap.a_fflag = fflag; 127 ap.a_devtype = devtype; 128 return(dev->si_ops->d_close(&ap)); 129 } 130 131 int 132 dev_dread(cdev_t dev, struct uio *uio, int ioflag) 133 { 134 struct dev_read_args ap; 135 int error; 136 137 ap.a_head.a_desc = &dev_read_desc; 138 ap.a_head.a_dev = dev; 139 ap.a_uio = uio; 140 ap.a_ioflag = ioflag; 141 error = dev->si_ops->d_read(&ap); 142 if (error == 0) 143 dev->si_lastread = time_second; 144 return (error); 145 } 146 147 int 148 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag) 149 { 150 struct dev_write_args ap; 151 int error; 152 153 dev->si_lastwrite = time_second; 154 ap.a_head.a_desc = &dev_write_desc; 155 ap.a_head.a_dev = dev; 156 ap.a_uio = uio; 157 ap.a_ioflag = ioflag; 158 error = dev->si_ops->d_write(&ap); 159 return (error); 160 } 161 162 int 163 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred) 164 { 165 struct dev_ioctl_args ap; 166 167 ap.a_head.a_desc = &dev_ioctl_desc; 168 ap.a_head.a_dev = dev; 169 ap.a_cmd = cmd; 170 ap.a_data = data; 171 ap.a_fflag = fflag; 172 ap.a_cred = cred; 173 return(dev->si_ops->d_ioctl(&ap)); 174 } 175 176 int 177 dev_dpoll(cdev_t dev, int events) 178 { 179 struct dev_poll_args ap; 180 int error; 181 182 ap.a_head.a_desc = &dev_poll_desc; 183 ap.a_head.a_dev = dev; 184 ap.a_events = events; 185 error = dev->si_ops->d_poll(&ap); 186 if (error == 0) 187 return(ap.a_events); 188 return (seltrue(dev, events)); 189 } 190 191 int 192 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot) 193 { 194 struct dev_mmap_args ap; 195 int error; 196 197 ap.a_head.a_desc = &dev_mmap_desc; 198 ap.a_head.a_dev = dev; 199 ap.a_offset = offset; 200 ap.a_nprot = nprot; 201 error = dev->si_ops->d_mmap(&ap); 202 if (error == 0) 203 return(ap.a_result); 204 return(-1); 205 } 206 207 int 208 dev_dclone(cdev_t dev) 209 { 210 struct dev_clone_args ap; 211 212 ap.a_head.a_desc = &dev_clone_desc; 213 ap.a_head.a_dev = dev; 214 return (dev->si_ops->d_clone(&ap)); 215 } 216 217 /* 218 * Core device strategy call, used to issue I/O on a device. There are 219 * two versions, a non-chained version and a chained version. The chained 220 * version reuses a BIO set up by vn_strategy(). The only difference is 221 * that, for now, we do not push a new tracking structure when chaining 222 * from vn_strategy. XXX this will ultimately have to change. 223 */ 224 void 225 dev_dstrategy(cdev_t dev, struct bio *bio) 226 { 227 struct dev_strategy_args ap; 228 struct bio_track *track; 229 230 ap.a_head.a_desc = &dev_strategy_desc; 231 ap.a_head.a_dev = dev; 232 ap.a_bio = bio; 233 234 KKASSERT(bio->bio_track == NULL); 235 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 236 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 237 track = &dev->si_track_read; 238 else 239 track = &dev->si_track_write; 240 atomic_add_int(&track->bk_active, 1); 241 bio->bio_track = track; 242 (void)dev->si_ops->d_strategy(&ap); 243 } 244 245 void 246 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 247 { 248 struct dev_strategy_args ap; 249 250 KKASSERT(bio->bio_track != NULL); 251 ap.a_head.a_desc = &dev_strategy_desc; 252 ap.a_head.a_dev = dev; 253 ap.a_bio = bio; 254 (void)dev->si_ops->d_strategy(&ap); 255 } 256 257 /* 258 * note: the disk layer is expected to set count, blkno, and secsize before 259 * forwarding the message. 260 */ 261 int 262 dev_ddump(cdev_t dev) 263 { 264 struct dev_dump_args ap; 265 266 ap.a_head.a_desc = &dev_dump_desc; 267 ap.a_head.a_dev = dev; 268 ap.a_count = 0; 269 ap.a_blkno = 0; 270 ap.a_secsize = 0; 271 return(dev->si_ops->d_dump(&ap)); 272 } 273 274 int 275 dev_dpsize(cdev_t dev) 276 { 277 struct dev_psize_args ap; 278 int error; 279 280 ap.a_head.a_desc = &dev_psize_desc; 281 ap.a_head.a_dev = dev; 282 error = dev->si_ops->d_psize(&ap); 283 if (error == 0) 284 return (ap.a_result); 285 return(-1); 286 } 287 288 int 289 dev_dkqfilter(cdev_t dev, struct knote *kn) 290 { 291 struct dev_kqfilter_args ap; 292 int error; 293 294 ap.a_head.a_desc = &dev_kqfilter_desc; 295 ap.a_head.a_dev = dev; 296 ap.a_kn = kn; 297 error = dev->si_ops->d_kqfilter(&ap); 298 if (error == 0) 299 return(ap.a_result); 300 return(ENODEV); 301 } 302 303 /************************************************************************ 304 * DEVICE HELPER FUNCTIONS * 305 ************************************************************************/ 306 307 const char * 308 dev_dname(cdev_t dev) 309 { 310 return(dev->si_ops->head.name); 311 } 312 313 int 314 dev_dflags(cdev_t dev) 315 { 316 return(dev->si_ops->head.flags); 317 } 318 319 int 320 dev_dmaj(cdev_t dev) 321 { 322 return(dev->si_ops->head.maj); 323 } 324 325 /* 326 * Used when forwarding a request through layers. The caller adjusts 327 * ap->a_head.a_dev and then calls this function. 328 */ 329 int 330 dev_doperate(struct dev_generic_args *ap) 331 { 332 int (*func)(struct dev_generic_args *); 333 334 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 335 return (func(ap)); 336 } 337 338 /* 339 * Used by the console intercept code only. Issue an operation through 340 * a foreign ops structure allowing the ops structure associated 341 * with the device to remain intact. 342 */ 343 int 344 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 345 { 346 int (*func)(struct dev_generic_args *); 347 348 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 349 return (func(ap)); 350 } 351 352 /* 353 * Convert a template dev_ops into the real thing by filling in 354 * uninitialized fields. 355 */ 356 void 357 compile_dev_ops(struct dev_ops *ops) 358 { 359 int offset; 360 361 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 362 offset <= offsetof(struct dev_ops, dev_ops_last_field); 363 offset += sizeof(void *) 364 ) { 365 void **func_p = (void **)((char *)ops + offset); 366 void **def_p = (void **)((char *)&default_dev_ops + offset); 367 if (*func_p == NULL) { 368 if (ops->d_default) 369 *func_p = ops->d_default; 370 else 371 *func_p = *def_p; 372 } 373 } 374 } 375 376 /************************************************************************ 377 * MAJOR/MINOR SPACE FUNCTION * 378 ************************************************************************/ 379 380 /* 381 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 382 * 383 * The kernel can overload a major number by making multiple dev_ops_add() 384 * calls, but only the most recent one (the first one in the dev_ops_array[] 385 * list matching the mask/match) will be visible to userland. make_dev() does 386 * not automatically call dev_ops_add() (nor do we want it to, since 387 * partition-managed disk devices are overloaded on top of the raw device). 388 * 389 * Disk devices typically register their major, e.g. 'ad0', and then call 390 * into the disk label management code which overloads its own onto e.g. 'ad0' 391 * to support all the various slice and partition combinations. 392 * 393 * The mask/match supplied in this call are a full 32 bits and the same 394 * mask and match must be specified in a later dev_ops_remove() call to 395 * match this add. However, the match value for the minor number should never 396 * have any bits set in the major number's bit range (8-15). The mask value 397 * may be conveniently specified as -1 without creating any major number 398 * interference. 399 */ 400 int 401 dev_ops_add(struct dev_ops *ops, u_int mask, u_int match) 402 { 403 int maj; 404 struct dev_ops_link *link; 405 406 compile_dev_ops(ops); 407 maj = ops->head.maj; 408 if (maj < 0 || maj >= NUMCDEVSW) { 409 kprintf("%s: ERROR: driver has bogus dev_ops->head.maj = %d\n", 410 ops->head.name, maj); 411 return (EINVAL); 412 } 413 for (link = dev_ops_array[maj]; link; link = link->next) { 414 /* 415 * If we get an exact match we usurp the target, but we only print 416 * a warning message if a different device switch is installed. 417 */ 418 if (link->mask == mask && link->match == match) { 419 if (link->ops != ops) { 420 kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s" 421 " (%p) dev_ops_array[]\n", 422 ops->head.name, ops, 423 link->ops->head.name, link->ops); 424 link->ops = ops; 425 ++ops->head.refs; 426 } 427 return(0); 428 } 429 /* 430 * XXX add additional warnings for overlaps 431 */ 432 } 433 434 link = kmalloc(sizeof(struct dev_ops_link), M_DEVBUF, M_INTWAIT|M_ZERO); 435 link->mask = mask; 436 link->match = match; 437 link->ops = ops; 438 link->next = dev_ops_array[maj]; 439 dev_ops_array[maj] = link; 440 ++ops->head.refs; 441 return(0); 442 } 443 444 /* 445 * Should only be used by udev2dev(). 446 * 447 * If the minor number is -1, we match the first ops we find for this 448 * major. If the mask is not -1 then multiple minor numbers can match 449 * the same ops. 450 * 451 * Note that this function will return NULL if the minor number is not within 452 * the bounds of the installed mask(s). 453 * 454 * The specified minor number should NOT include any major bits. 455 */ 456 struct dev_ops * 457 dev_ops_get(int x, int y) 458 { 459 struct dev_ops_link *link; 460 461 if (x < 0 || x >= NUMCDEVSW) 462 return(NULL); 463 for (link = dev_ops_array[x]; link; link = link->next) { 464 if (y == -1 || (link->mask & y) == link->match) 465 return(link->ops); 466 } 467 return(NULL); 468 } 469 470 /* 471 * Take a cookie cutter to the major/minor device space for the passed 472 * device and generate a new dev_ops visible to userland which the caller 473 * can then modify. The original device is not modified but portions of 474 * its major/minor space will no longer be visible to userland. 475 */ 476 struct dev_ops * 477 dev_ops_add_override(cdev_t backing_dev, struct dev_ops *template, 478 u_int mask, u_int match) 479 { 480 struct dev_ops *ops; 481 struct dev_ops *backing_ops = backing_dev->si_ops; 482 483 ops = kmalloc(sizeof(struct dev_ops), M_DEVBUF, M_INTWAIT); 484 *ops = *template; 485 ops->head.name = backing_ops->head.name; 486 ops->head.maj = backing_ops->head.maj; 487 ops->head.flags = backing_ops->head.flags; 488 compile_dev_ops(ops); 489 dev_ops_add(ops, mask, match); 490 491 return(ops); 492 } 493 494 /* 495 * Remove all matching dev_ops entries from the dev_ops_array[] major 496 * array so no new user opens can be performed, and destroy all devices 497 * installed in the hash table that are associated with this dev_ops. (see 498 * destroy_all_devs()). 499 * 500 * The mask and match should match a previous call to dev_ops_add*(). 501 */ 502 int 503 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match) 504 { 505 int maj = ops->head.maj; 506 struct dev_ops_link *link; 507 struct dev_ops_link **plink; 508 509 if (maj < 0 || maj >= NUMCDEVSW) { 510 kprintf("%s: ERROR: driver has bogus ops->d_maj = %d\n", 511 ops->head.name, maj); 512 return EINVAL; 513 } 514 if (ops != &dead_dev_ops) 515 destroy_all_devs(ops, mask, match); 516 for (plink = &dev_ops_array[maj]; (link = *plink) != NULL; 517 plink = &link->next) { 518 if (link->mask == mask && link->match == match) { 519 if (link->ops == ops) 520 break; 521 kprintf("%s: ERROR: cannot remove from dev_ops_array[], " 522 "its major number %d was stolen by %s\n", 523 ops->head.name, maj, 524 link->ops->head.name 525 ); 526 } 527 } 528 if (link == NULL) { 529 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed " 530 "multiple times!\n", 531 ops->head.name, maj, mask, match); 532 } else { 533 *plink = link->next; 534 --ops->head.refs; /* XXX ops_release() / record refs */ 535 kfree(link, M_DEVBUF); 536 } 537 if (dev_ops_array[maj] == NULL && ops->head.refs != 0) { 538 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called " 539 "while %d device refs still exist!\n", 540 ops->head.name, maj, mask, match, ops->head.refs); 541 } else { 542 kprintf("%s: ops removed\n", ops->head.name); 543 } 544 return 0; 545 } 546 547 /* 548 * Release a ops entry. When the ref count reaches zero, recurse 549 * through the stack. 550 */ 551 void 552 dev_ops_release(struct dev_ops *ops) 553 { 554 --ops->head.refs; 555 if (ops->head.refs == 0) { 556 /* XXX */ 557 } 558 } 559 560 struct dev_ops * 561 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 562 { 563 struct dev_ops *oops = dev->si_ops; 564 565 compile_dev_ops(iops); 566 iops->head.maj = oops->head.maj; 567 iops->head.data = oops->head.data; 568 iops->head.flags = oops->head.flags; 569 dev->si_ops = iops; 570 dev->si_flags |= SI_INTERCEPTED; 571 572 return (oops); 573 } 574 575 void 576 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 577 { 578 struct dev_ops *iops = dev->si_ops; 579 580 dev->si_ops = oops; 581 dev->si_flags &= ~SI_INTERCEPTED; 582 iops->head.maj = 0; 583 iops->head.data = NULL; 584 iops->head.flags = 0; 585 } 586 587 /************************************************************************ 588 * DEFAULT DEV OPS FUNCTIONS * 589 ************************************************************************/ 590 591 592 /* 593 * Unsupported devswitch functions (e.g. for writing to read-only device). 594 * XXX may belong elsewhere. 595 */ 596 597 int 598 noclone(struct dev_clone_args *ap) 599 { 600 /* take no action */ 601 return (0); /* allow the clone */ 602 } 603 604 int 605 noopen(struct dev_open_args *ap) 606 { 607 return (ENODEV); 608 } 609 610 int 611 noclose(struct dev_close_args *ap) 612 { 613 return (ENODEV); 614 } 615 616 int 617 noread(struct dev_read_args *ap) 618 { 619 return (ENODEV); 620 } 621 622 int 623 nowrite(struct dev_write_args *ap) 624 { 625 return (ENODEV); 626 } 627 628 int 629 noioctl(struct dev_ioctl_args *ap) 630 { 631 return (ENODEV); 632 } 633 634 int 635 nokqfilter(struct dev_kqfilter_args *ap) 636 { 637 return (ENODEV); 638 } 639 640 int 641 nommap(struct dev_mmap_args *ap) 642 { 643 return (ENODEV); 644 } 645 646 int 647 nopoll(struct dev_poll_args *ap) 648 { 649 ap->a_events = 0; 650 return(0); 651 } 652 653 int 654 nostrategy(struct dev_strategy_args *ap) 655 { 656 struct bio *bio = ap->a_bio; 657 658 bio->bio_buf->b_flags |= B_ERROR; 659 bio->bio_buf->b_error = EOPNOTSUPP; 660 biodone(bio); 661 return(0); 662 } 663 664 int 665 nopsize(struct dev_psize_args *ap) 666 { 667 ap->a_result = 0; 668 return(0); 669 } 670 671 int 672 nodump(struct dev_dump_args *ap) 673 { 674 return (ENODEV); 675 } 676 677 /* 678 * XXX this is probably bogus. Any device that uses it isn't checking the 679 * minor number. 680 */ 681 int 682 nullopen(struct dev_open_args *ap) 683 { 684 return (0); 685 } 686 687 int 688 nullclose(struct dev_close_args *ap) 689 { 690 return (0); 691 } 692 693