1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $ 31 */ 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/systm.h> 37 #include <sys/module.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/vnode.h> 43 #include <sys/queue.h> 44 #include <sys/device.h> 45 #include <sys/tree.h> 46 #include <sys/syslink_rpc.h> 47 #include <sys/proc.h> 48 #include <machine/stdarg.h> 49 #include <sys/thread2.h> 50 #include <sys/devfs.h> 51 52 /* 53 * system link descriptors identify the command in the 54 * arguments structure. 55 */ 56 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 57 58 #define DEVOP_DESC_INIT(name) \ 59 struct syslink_desc DDESCNAME(name) = { \ 60 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 61 #name } 62 63 DEVOP_DESC_INIT(default); 64 DEVOP_DESC_INIT(open); 65 DEVOP_DESC_INIT(close); 66 DEVOP_DESC_INIT(read); 67 DEVOP_DESC_INIT(write); 68 DEVOP_DESC_INIT(ioctl); 69 DEVOP_DESC_INIT(dump); 70 DEVOP_DESC_INIT(psize); 71 DEVOP_DESC_INIT(poll); 72 DEVOP_DESC_INIT(mmap); 73 DEVOP_DESC_INIT(strategy); 74 DEVOP_DESC_INIT(kqfilter); 75 DEVOP_DESC_INIT(revoke); 76 DEVOP_DESC_INIT(clone); 77 78 /* 79 * Misc default ops 80 */ 81 struct dev_ops dead_dev_ops; 82 83 struct dev_ops default_dev_ops = { 84 { "null" }, 85 .d_default = NULL, /* must be NULL */ 86 .d_open = noopen, 87 .d_close = noclose, 88 .d_read = noread, 89 .d_write = nowrite, 90 .d_ioctl = noioctl, 91 .d_poll = nopoll, 92 .d_mmap = nommap, 93 .d_strategy = nostrategy, 94 .d_dump = nodump, 95 .d_psize = nopsize, 96 .d_kqfilter = nokqfilter, 97 .d_revoke = norevoke, 98 .d_clone = noclone 99 }; 100 101 /************************************************************************ 102 * GENERAL DEVICE API FUNCTIONS * 103 ************************************************************************/ 104 105 int 106 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred) 107 { 108 struct dev_open_args ap; 109 110 ap.a_head.a_desc = &dev_open_desc; 111 ap.a_head.a_dev = dev; 112 ap.a_oflags = oflags; 113 ap.a_devtype = devtype; 114 ap.a_cred = cred; 115 return(dev->si_ops->d_open(&ap)); 116 } 117 118 int 119 dev_dclose(cdev_t dev, int fflag, int devtype) 120 { 121 struct dev_close_args ap; 122 123 ap.a_head.a_desc = &dev_close_desc; 124 ap.a_head.a_dev = dev; 125 ap.a_fflag = fflag; 126 ap.a_devtype = devtype; 127 return(dev->si_ops->d_close(&ap)); 128 } 129 130 int 131 dev_dread(cdev_t dev, struct uio *uio, int ioflag) 132 { 133 struct dev_read_args ap; 134 int error; 135 136 ap.a_head.a_desc = &dev_read_desc; 137 ap.a_head.a_dev = dev; 138 ap.a_uio = uio; 139 ap.a_ioflag = ioflag; 140 error = dev->si_ops->d_read(&ap); 141 if (error == 0) 142 dev->si_lastread = time_second; 143 return (error); 144 } 145 146 int 147 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag) 148 { 149 struct dev_write_args ap; 150 int error; 151 152 dev->si_lastwrite = time_second; 153 ap.a_head.a_desc = &dev_write_desc; 154 ap.a_head.a_dev = dev; 155 ap.a_uio = uio; 156 ap.a_ioflag = ioflag; 157 error = dev->si_ops->d_write(&ap); 158 return (error); 159 } 160 161 int 162 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 163 struct sysmsg *msg) 164 { 165 struct dev_ioctl_args ap; 166 167 ap.a_head.a_desc = &dev_ioctl_desc; 168 ap.a_head.a_dev = dev; 169 ap.a_cmd = cmd; 170 ap.a_data = data; 171 ap.a_fflag = fflag; 172 ap.a_cred = cred; 173 ap.a_sysmsg = msg; 174 return(dev->si_ops->d_ioctl(&ap)); 175 } 176 177 int 178 dev_dpoll(cdev_t dev, int events) 179 { 180 struct dev_poll_args ap; 181 int error; 182 183 ap.a_head.a_desc = &dev_poll_desc; 184 ap.a_head.a_dev = dev; 185 ap.a_events = events; 186 error = dev->si_ops->d_poll(&ap); 187 if (error == 0) 188 return(ap.a_events); 189 return (seltrue(dev, events)); 190 } 191 192 int 193 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot) 194 { 195 struct dev_mmap_args ap; 196 int error; 197 198 ap.a_head.a_desc = &dev_mmap_desc; 199 ap.a_head.a_dev = dev; 200 ap.a_offset = offset; 201 ap.a_nprot = nprot; 202 error = dev->si_ops->d_mmap(&ap); 203 if (error == 0) 204 return(ap.a_result); 205 return(-1); 206 } 207 208 int 209 dev_dclone(cdev_t dev) 210 { 211 struct dev_clone_args ap; 212 213 ap.a_head.a_desc = &dev_clone_desc; 214 ap.a_head.a_dev = dev; 215 return (dev->si_ops->d_clone(&ap)); 216 } 217 218 int 219 dev_drevoke(cdev_t dev) 220 { 221 struct dev_revoke_args ap; 222 223 ap.a_head.a_desc = &dev_revoke_desc; 224 ap.a_head.a_dev = dev; 225 return (dev->si_ops->d_revoke(&ap)); 226 } 227 228 /* 229 * Core device strategy call, used to issue I/O on a device. There are 230 * two versions, a non-chained version and a chained version. The chained 231 * version reuses a BIO set up by vn_strategy(). The only difference is 232 * that, for now, we do not push a new tracking structure when chaining 233 * from vn_strategy. XXX this will ultimately have to change. 234 */ 235 void 236 dev_dstrategy(cdev_t dev, struct bio *bio) 237 { 238 struct dev_strategy_args ap; 239 struct bio_track *track; 240 241 ap.a_head.a_desc = &dev_strategy_desc; 242 ap.a_head.a_dev = dev; 243 ap.a_bio = bio; 244 245 KKASSERT(bio->bio_track == NULL); 246 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 247 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 248 track = &dev->si_track_read; 249 else 250 track = &dev->si_track_write; 251 bio_track_ref(track); 252 bio->bio_track = track; 253 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 254 (void)dev->si_ops->d_strategy(&ap); 255 } 256 257 void 258 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 259 { 260 struct dev_strategy_args ap; 261 262 ap.a_head.a_desc = &dev_strategy_desc; 263 ap.a_head.a_dev = dev; 264 ap.a_bio = bio; 265 266 KKASSERT(bio->bio_track != NULL); 267 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 268 (void)dev->si_ops->d_strategy(&ap); 269 } 270 271 /* 272 * note: the disk layer is expected to set count, blkno, and secsize before 273 * forwarding the message. 274 */ 275 int 276 dev_ddump(cdev_t dev) 277 { 278 struct dev_dump_args ap; 279 280 ap.a_head.a_desc = &dev_dump_desc; 281 ap.a_head.a_dev = dev; 282 ap.a_count = 0; 283 ap.a_blkno = 0; 284 ap.a_secsize = 0; 285 return(dev->si_ops->d_dump(&ap)); 286 } 287 288 int64_t 289 dev_dpsize(cdev_t dev) 290 { 291 struct dev_psize_args ap; 292 int error; 293 294 ap.a_head.a_desc = &dev_psize_desc; 295 ap.a_head.a_dev = dev; 296 error = dev->si_ops->d_psize(&ap); 297 if (error == 0) 298 return (ap.a_result); 299 return(-1); 300 } 301 302 int 303 dev_dkqfilter(cdev_t dev, struct knote *kn) 304 { 305 struct dev_kqfilter_args ap; 306 int error; 307 308 ap.a_head.a_desc = &dev_kqfilter_desc; 309 ap.a_head.a_dev = dev; 310 ap.a_kn = kn; 311 error = dev->si_ops->d_kqfilter(&ap); 312 if (error == 0) 313 return(ap.a_result); 314 return(ENODEV); 315 } 316 317 /************************************************************************ 318 * DEVICE HELPER FUNCTIONS * 319 ************************************************************************/ 320 321 /* 322 * MPSAFE 323 */ 324 int 325 dev_drefs(cdev_t dev) 326 { 327 return(dev->si_sysref.refcnt); 328 } 329 330 /* 331 * MPSAFE 332 */ 333 const char * 334 dev_dname(cdev_t dev) 335 { 336 return(dev->si_ops->head.name); 337 } 338 339 /* 340 * MPSAFE 341 */ 342 int 343 dev_dflags(cdev_t dev) 344 { 345 return(dev->si_ops->head.flags); 346 } 347 348 /* 349 * MPSAFE 350 */ 351 int 352 dev_dmaj(cdev_t dev) 353 { 354 return(dev->si_ops->head.maj); 355 } 356 357 /* 358 * Used when forwarding a request through layers. The caller adjusts 359 * ap->a_head.a_dev and then calls this function. 360 */ 361 int 362 dev_doperate(struct dev_generic_args *ap) 363 { 364 int (*func)(struct dev_generic_args *); 365 366 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 367 return (func(ap)); 368 } 369 370 /* 371 * Used by the console intercept code only. Issue an operation through 372 * a foreign ops structure allowing the ops structure associated 373 * with the device to remain intact. 374 */ 375 int 376 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 377 { 378 int (*func)(struct dev_generic_args *); 379 380 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 381 return (func(ap)); 382 } 383 384 /* 385 * Convert a template dev_ops into the real thing by filling in 386 * uninitialized fields. 387 */ 388 void 389 compile_dev_ops(struct dev_ops *ops) 390 { 391 int offset; 392 393 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 394 offset <= offsetof(struct dev_ops, dev_ops_last_field); 395 offset += sizeof(void *) 396 ) { 397 void **func_p = (void **)((char *)ops + offset); 398 void **def_p = (void **)((char *)&default_dev_ops + offset); 399 if (*func_p == NULL) { 400 if (ops->d_default) 401 *func_p = ops->d_default; 402 else 403 *func_p = *def_p; 404 } 405 } 406 } 407 408 /************************************************************************ 409 * MAJOR/MINOR SPACE FUNCTION * 410 ************************************************************************/ 411 412 /* 413 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 414 * 415 * Disk devices typically register their major, e.g. 'ad0', and then call 416 * into the disk label management code which overloads its own onto e.g. 'ad0' 417 * to support all the various slice and partition combinations. 418 * 419 * The mask/match supplied in this call are a full 32 bits and the same 420 * mask and match must be specified in a later dev_ops_remove() call to 421 * match this add. However, the match value for the minor number should never 422 * have any bits set in the major number's bit range (8-15). The mask value 423 * may be conveniently specified as -1 without creating any major number 424 * interference. 425 */ 426 427 static 428 int 429 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 430 { 431 if (a->maj < b->maj) 432 return(-1); 433 else if (a->maj > b->maj) 434 return(1); 435 return(0); 436 } 437 438 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 439 440 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 441 442 int 443 dev_ops_remove_all(struct dev_ops *ops) 444 { 445 return devfs_destroy_dev_by_ops(ops, -1); 446 } 447 448 int 449 dev_ops_remove_minor(struct dev_ops *ops, int minor) 450 { 451 return devfs_destroy_dev_by_ops(ops, minor); 452 } 453 454 struct dev_ops * 455 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 456 { 457 struct dev_ops *oops = dev->si_ops; 458 459 compile_dev_ops(iops); 460 iops->head.maj = oops->head.maj; 461 iops->head.data = oops->head.data; 462 iops->head.flags = oops->head.flags; 463 dev->si_ops = iops; 464 dev->si_flags |= SI_INTERCEPTED; 465 466 return (oops); 467 } 468 469 void 470 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 471 { 472 struct dev_ops *iops = dev->si_ops; 473 474 dev->si_ops = oops; 475 dev->si_flags &= ~SI_INTERCEPTED; 476 iops->head.maj = 0; 477 iops->head.data = NULL; 478 iops->head.flags = 0; 479 } 480 481 /************************************************************************ 482 * DEFAULT DEV OPS FUNCTIONS * 483 ************************************************************************/ 484 485 486 /* 487 * Unsupported devswitch functions (e.g. for writing to read-only device). 488 * XXX may belong elsewhere. 489 */ 490 int 491 norevoke(struct dev_revoke_args *ap) 492 { 493 /* take no action */ 494 return(0); 495 } 496 497 int 498 noclone(struct dev_clone_args *ap) 499 { 500 /* take no action */ 501 return (0); /* allow the clone */ 502 } 503 504 int 505 noopen(struct dev_open_args *ap) 506 { 507 return (ENODEV); 508 } 509 510 int 511 noclose(struct dev_close_args *ap) 512 { 513 return (ENODEV); 514 } 515 516 int 517 noread(struct dev_read_args *ap) 518 { 519 return (ENODEV); 520 } 521 522 int 523 nowrite(struct dev_write_args *ap) 524 { 525 return (ENODEV); 526 } 527 528 int 529 noioctl(struct dev_ioctl_args *ap) 530 { 531 return (ENODEV); 532 } 533 534 int 535 nokqfilter(struct dev_kqfilter_args *ap) 536 { 537 return (ENODEV); 538 } 539 540 int 541 nommap(struct dev_mmap_args *ap) 542 { 543 return (ENODEV); 544 } 545 546 int 547 nopoll(struct dev_poll_args *ap) 548 { 549 ap->a_events = 0; 550 return(0); 551 } 552 553 int 554 nostrategy(struct dev_strategy_args *ap) 555 { 556 struct bio *bio = ap->a_bio; 557 558 bio->bio_buf->b_flags |= B_ERROR; 559 bio->bio_buf->b_error = EOPNOTSUPP; 560 biodone(bio); 561 return(0); 562 } 563 564 int 565 nopsize(struct dev_psize_args *ap) 566 { 567 ap->a_result = 0; 568 return(0); 569 } 570 571 int 572 nodump(struct dev_dump_args *ap) 573 { 574 return (ENODEV); 575 } 576 577 /* 578 * XXX this is probably bogus. Any device that uses it isn't checking the 579 * minor number. 580 */ 581 int 582 nullopen(struct dev_open_args *ap) 583 { 584 return (0); 585 } 586 587 int 588 nullclose(struct dev_close_args *ap) 589 { 590 return (0); 591 } 592 593