1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $DragonFly: src/sys/kern/kern_device.c,v 1.17 2006/04/30 17:22:17 dillon Exp $ 29 */ 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/sysctl.h> 33 #include <sys/systm.h> 34 #include <sys/module.h> 35 #include <sys/malloc.h> 36 #include <sys/conf.h> 37 #include <sys/bio.h> 38 #include <sys/buf.h> 39 #include <sys/vnode.h> 40 #include <sys/queue.h> 41 #include <sys/msgport.h> 42 #include <sys/device.h> 43 #include <machine/stdarg.h> 44 #include <sys/proc.h> 45 #include <sys/thread2.h> 46 #include <sys/msgport2.h> 47 48 static struct cdevlink *cdevbase[NUMCDEVSW]; 49 50 static int cdevsw_putport(lwkt_port_t port, lwkt_msg_t msg); 51 52 struct cdevsw dead_cdevsw; 53 54 /* 55 * Initialize a message port to serve as the default message-handling port 56 * for device operations. This message port provides compatibility with 57 * traditional cdevsw dispatch functions by running them synchronously. 58 * 59 * YYY NOTE: ms_cmd can now hold a function pointer, should this code be 60 * converted from an integer op to a function pointer with a flag to 61 * indicate legacy operation? 62 */ 63 static void 64 init_default_cdevsw_port(lwkt_port_t port) 65 { 66 lwkt_initport(port, NULL); 67 port->mp_putport = cdevsw_putport; 68 } 69 70 static 71 int 72 cdevsw_putport(lwkt_port_t port, lwkt_msg_t lmsg) 73 { 74 cdevallmsg_t msg = (cdevallmsg_t)lmsg; 75 struct cdevsw *devsw = msg->am_msg.dev->si_devsw; 76 int error; 77 78 /* 79 * Run the device switch function synchronously in the context of the 80 * caller and return a synchronous error code (anything not EASYNC). 81 */ 82 switch(msg->am_lmsg.ms_cmd.cm_op) { 83 case CDEV_CMD_OPEN: 84 error = devsw->old_open( 85 msg->am_open.msg.dev, 86 msg->am_open.oflags, 87 msg->am_open.devtype, 88 msg->am_open.td); 89 break; 90 case CDEV_CMD_CLOSE: 91 error = devsw->old_close( 92 msg->am_close.msg.dev, 93 msg->am_close.fflag, 94 msg->am_close.devtype, 95 msg->am_close.td); 96 break; 97 case CDEV_CMD_STRATEGY: 98 devsw->old_strategy(msg->am_strategy.msg.dev, msg->am_strategy.bio); 99 error = 0; 100 break; 101 case CDEV_CMD_IOCTL: 102 error = devsw->old_ioctl( 103 msg->am_ioctl.msg.dev, 104 msg->am_ioctl.cmd, 105 msg->am_ioctl.data, 106 msg->am_ioctl.fflag, 107 msg->am_ioctl.td); 108 break; 109 case CDEV_CMD_DUMP: 110 error = devsw->old_dump( 111 msg->am_dump.msg.dev, 112 msg->am_dump.count, 113 msg->am_dump.blkno, 114 msg->am_dump.secsize); 115 break; 116 case CDEV_CMD_PSIZE: 117 msg->am_psize.result = devsw->old_psize(msg->am_psize.msg.dev); 118 error = 0; /* XXX */ 119 break; 120 case CDEV_CMD_READ: 121 error = devsw->old_read( 122 msg->am_read.msg.dev, 123 msg->am_read.uio, 124 msg->am_read.ioflag); 125 break; 126 case CDEV_CMD_WRITE: 127 error = devsw->old_write( 128 msg->am_read.msg.dev, 129 msg->am_read.uio, 130 msg->am_read.ioflag); 131 break; 132 case CDEV_CMD_POLL: 133 msg->am_poll.events = devsw->old_poll( 134 msg->am_poll.msg.dev, 135 msg->am_poll.events, 136 msg->am_poll.td); 137 error = 0; 138 break; 139 case CDEV_CMD_KQFILTER: 140 msg->am_kqfilter.result = devsw->old_kqfilter( 141 msg->am_kqfilter.msg.dev, 142 msg->am_kqfilter.kn); 143 error = 0; 144 break; 145 case CDEV_CMD_MMAP: 146 msg->am_mmap.result = devsw->old_mmap( 147 msg->am_mmap.msg.dev, 148 msg->am_mmap.offset, 149 msg->am_mmap.nprot); 150 error = 0; /* XXX */ 151 break; 152 default: 153 error = ENOSYS; 154 break; 155 } 156 KKASSERT(error != EASYNC); 157 return(error); 158 } 159 160 static __inline 161 lwkt_port_t 162 _init_cdevmsg(dev_t dev, cdevmsg_t msg, int cmd) 163 { 164 lwkt_initmsg_simple(&msg->msg, cmd); 165 msg->dev = dev; 166 return(dev->si_port); 167 } 168 169 int 170 dev_dopen(dev_t dev, int oflags, int devtype, thread_t td) 171 { 172 struct cdevmsg_open msg; 173 lwkt_port_t port; 174 175 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_OPEN); 176 if (port == NULL) 177 return(ENXIO); 178 msg.oflags = oflags; 179 msg.devtype = devtype; 180 msg.td = td; 181 return(lwkt_domsg(port, &msg.msg.msg)); 182 } 183 184 int 185 dev_dclose(dev_t dev, int fflag, int devtype, thread_t td) 186 { 187 struct cdevmsg_close msg; 188 lwkt_port_t port; 189 190 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_CLOSE); 191 if (port == NULL) 192 return(ENXIO); 193 msg.fflag = fflag; 194 msg.devtype = devtype; 195 msg.td = td; 196 return(lwkt_domsg(port, &msg.msg.msg)); 197 } 198 199 /* 200 * Core device strategy call, used to issue I/O on a device. There are 201 * two versions, a non-chained version and a chained version. The chained 202 * version reuses a BIO set up by vn_strategy(). The only difference is 203 * that, for now, we do not push a new tracking structure when chaining 204 * from vn_strategy. XXX this will ultimately have to change. 205 */ 206 void 207 dev_dstrategy(dev_t dev, struct bio *bio) 208 { 209 struct cdevmsg_strategy msg; 210 struct bio_track *track; 211 lwkt_port_t port; 212 213 KKASSERT(bio->bio_track == NULL); 214 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 215 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 216 track = &dev->si_track_read; 217 else 218 track = &dev->si_track_write; 219 atomic_add_int(&track->bk_active, 1); 220 bio->bio_track = track; 221 222 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY); 223 KKASSERT(port); /* 'nostrategy' function is NULL YYY */ 224 msg.bio = bio; 225 lwkt_domsg(port, &msg.msg.msg); 226 } 227 228 void 229 dev_dstrategy_chain(dev_t dev, struct bio *bio) 230 { 231 struct cdevmsg_strategy msg; 232 lwkt_port_t port; 233 234 KKASSERT(bio->bio_track != NULL); 235 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY); 236 KKASSERT(port); /* 'nostrategy' function is NULL YYY */ 237 msg.bio = bio; 238 lwkt_domsg(port, &msg.msg.msg); 239 } 240 241 int 242 dev_dioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, thread_t td) 243 { 244 struct cdevmsg_ioctl msg; 245 lwkt_port_t port; 246 247 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_IOCTL); 248 if (port == NULL) 249 return(ENXIO); 250 msg.cmd = cmd; 251 msg.data = data; 252 msg.fflag = fflag; 253 msg.td = td; 254 return(lwkt_domsg(port, &msg.msg.msg)); 255 } 256 257 /* 258 * note: the disk layer is expected to set count, blkno, and secsize before 259 * forwarding the message. 260 */ 261 int 262 dev_ddump(dev_t dev) 263 { 264 struct cdevmsg_dump msg; 265 lwkt_port_t port; 266 267 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_DUMP); 268 if (port == NULL) 269 return(ENXIO); 270 msg.count = 0; 271 msg.blkno = 0; 272 msg.secsize = 0; 273 return(lwkt_domsg(port, &msg.msg.msg)); 274 } 275 276 int 277 dev_dpsize(dev_t dev) 278 { 279 struct cdevmsg_psize msg; 280 lwkt_port_t port; 281 int error; 282 283 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_PSIZE); 284 if (port == NULL) 285 return(-1); 286 error = lwkt_domsg(port, &msg.msg.msg); 287 if (error == 0) 288 return(msg.result); 289 return(-1); 290 } 291 292 int 293 dev_dread(dev_t dev, struct uio *uio, int ioflag) 294 { 295 struct cdevmsg_read msg; 296 lwkt_port_t port; 297 int error; 298 299 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_READ); 300 if (port == NULL) 301 return(ENXIO); 302 msg.uio = uio; 303 msg.ioflag = ioflag; 304 error = lwkt_domsg(port, &msg.msg.msg); 305 if (error == 0) 306 dev->si_lastread = time_second; 307 return (error); 308 } 309 310 int 311 dev_dwrite(dev_t dev, struct uio *uio, int ioflag) 312 { 313 struct cdevmsg_write msg; 314 lwkt_port_t port; 315 316 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_WRITE); 317 if (port == NULL) 318 return(ENXIO); 319 dev->si_lastwrite = time_second; 320 msg.uio = uio; 321 msg.ioflag = ioflag; 322 return(lwkt_domsg(port, &msg.msg.msg)); 323 } 324 325 int 326 dev_dpoll(dev_t dev, int events, thread_t td) 327 { 328 struct cdevmsg_poll msg; 329 lwkt_port_t port; 330 int error; 331 332 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_POLL); 333 if (port == NULL) 334 return(ENXIO); 335 msg.events = events; 336 msg.td = td; 337 error = lwkt_domsg(port, &msg.msg.msg); 338 if (error == 0) 339 return(msg.events); 340 return(seltrue(dev, msg.events, td)); 341 } 342 343 int 344 dev_dkqfilter(dev_t dev, struct knote *kn) 345 { 346 struct cdevmsg_kqfilter msg; 347 lwkt_port_t port; 348 int error; 349 350 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_KQFILTER); 351 if (port == NULL) 352 return(ENXIO); 353 msg.kn = kn; 354 error = lwkt_domsg(port, &msg.msg.msg); 355 if (error == 0) 356 return(msg.result); 357 return(ENODEV); 358 } 359 360 int 361 dev_dmmap(dev_t dev, vm_offset_t offset, int nprot) 362 { 363 struct cdevmsg_mmap msg; 364 lwkt_port_t port; 365 int error; 366 367 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_MMAP); 368 if (port == NULL) 369 return(-1); 370 msg.offset = offset; 371 msg.nprot = nprot; 372 error = lwkt_domsg(port, &msg.msg.msg); 373 if (error == 0) 374 return(msg.result); 375 return(-1); 376 } 377 378 const char * 379 dev_dname(dev_t dev) 380 { 381 return(dev->si_devsw->d_name); 382 } 383 384 int 385 dev_dflags(dev_t dev) 386 { 387 return(dev->si_devsw->d_flags); 388 } 389 390 int 391 dev_dmaj(dev_t dev) 392 { 393 return(dev->si_devsw->d_maj); 394 } 395 396 lwkt_port_t 397 dev_dport(dev_t dev) 398 { 399 return(dev->si_port); 400 } 401 402 /* 403 * Convert a cdevsw template into the real thing, filling in fields the 404 * device left empty with appropriate defaults. 405 */ 406 void 407 compile_devsw(struct cdevsw *devsw) 408 { 409 static lwkt_port devsw_compat_port; 410 411 if (devsw_compat_port.mp_putport == NULL) 412 init_default_cdevsw_port(&devsw_compat_port); 413 414 if (devsw->old_open == NULL) 415 devsw->old_open = noopen; 416 if (devsw->old_close == NULL) 417 devsw->old_close = noclose; 418 if (devsw->old_read == NULL) 419 devsw->old_read = noread; 420 if (devsw->old_write == NULL) 421 devsw->old_write = nowrite; 422 if (devsw->old_ioctl == NULL) 423 devsw->old_ioctl = noioctl; 424 if (devsw->old_poll == NULL) 425 devsw->old_poll = nopoll; 426 if (devsw->old_mmap == NULL) 427 devsw->old_mmap = nommap; 428 if (devsw->old_strategy == NULL) 429 devsw->old_strategy = nostrategy; 430 if (devsw->old_dump == NULL) 431 devsw->old_dump = nodump; 432 if (devsw->old_psize == NULL) 433 devsw->old_psize = nopsize; 434 if (devsw->old_kqfilter == NULL) 435 devsw->old_kqfilter = nokqfilter; 436 437 if (devsw->d_port == NULL) 438 devsw->d_port = &devsw_compat_port; 439 if (devsw->d_clone == NULL) 440 devsw->d_clone = noclone; 441 } 442 443 /* 444 * This makes a cdevsw entry visible to userland (e.g /dev/<blah>). 445 * 446 * The kernel can overload a major number by making multiple cdevsw_add() 447 * calls, but only the most recent one (the first one in the cdevbase[] list 448 * matching the mask/match) will be visible to userland. make_dev() does 449 * not automatically call cdevsw_add() (nor do we want it to, since 450 * partition-managed disk devices are overloaded on top of the raw device). 451 * 452 * Disk devices typically register their major, e.g. 'ad0', and then call 453 * into the disk label management code which overloads its own onto e.g. 'ad0' 454 * to support all the various slice and partition combinations. 455 * 456 * The mask/match supplied in this call are a full 32 bits and the same 457 * mask and match must be specified in a later cdevsw_remove() call to 458 * match this add. However, the match value for the minor number should never 459 * have any bits set in the major number's bit range (8-15). The mask value 460 * may be conveniently specified as -1 without creating any major number 461 * interference. 462 */ 463 int 464 cdevsw_add(struct cdevsw *devsw, u_int mask, u_int match) 465 { 466 int maj; 467 struct cdevlink *link; 468 469 compile_devsw(devsw); 470 maj = devsw->d_maj; 471 if (maj < 0 || maj >= NUMCDEVSW) { 472 printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n", 473 devsw->d_name, maj); 474 return (EINVAL); 475 } 476 for (link = cdevbase[maj]; link; link = link->next) { 477 /* 478 * If we get an exact match we usurp the target, but we only print 479 * a warning message if a different device switch is installed. 480 */ 481 if (link->mask == mask && link->match == match) { 482 if (link->devsw != devsw) { 483 printf("WARNING: \"%s\" (%p) is usurping \"%s\"'s (%p)" 484 " cdevsw[]\n", 485 devsw->d_name, devsw, 486 link->devsw->d_name, link->devsw); 487 link->devsw = devsw; 488 ++devsw->d_refs; 489 } 490 return(0); 491 } 492 /* 493 * XXX add additional warnings for overlaps 494 */ 495 } 496 497 link = malloc(sizeof(struct cdevlink), M_DEVBUF, M_INTWAIT|M_ZERO); 498 link->mask = mask; 499 link->match = match; 500 link->devsw = devsw; 501 link->next = cdevbase[maj]; 502 cdevbase[maj] = link; 503 ++devsw->d_refs; 504 return(0); 505 } 506 507 /* 508 * Should only be used by udev2dev(). 509 * 510 * If the minor number is -1, we match the first cdevsw we find for this 511 * major. If the mask is not -1 then multiple minor numbers can match 512 * the same devsw. 513 * 514 * Note that this function will return NULL if the minor number is not within 515 * the bounds of the installed mask(s). 516 * 517 * The specified minor number should NOT include any major bits. 518 */ 519 struct cdevsw * 520 cdevsw_get(int x, int y) 521 { 522 struct cdevlink *link; 523 524 if (x < 0 || x >= NUMCDEVSW) 525 return(NULL); 526 for (link = cdevbase[x]; link; link = link->next) { 527 if (y == -1 || (link->mask & y) == link->match) 528 return(link->devsw); 529 } 530 return(NULL); 531 } 532 533 /* 534 * Use the passed cdevsw as a template to create our intercept cdevsw, 535 * and install and return ours. 536 */ 537 struct cdevsw * 538 cdevsw_add_override(dev_t backing_dev, u_int mask, u_int match) 539 { 540 struct cdevsw *devsw; 541 struct cdevsw *bsw = backing_dev->si_devsw; 542 543 devsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_INTWAIT|M_ZERO); 544 devsw->d_name = bsw->d_name; 545 devsw->d_maj = bsw->d_maj; 546 devsw->d_flags = bsw->d_flags; 547 compile_devsw(devsw); 548 cdevsw_add(devsw, mask, match); 549 550 return(devsw); 551 } 552 553 /* 554 * Override a device's port, returning the previously installed port. This 555 * is XXX very dangerous. 556 */ 557 lwkt_port_t 558 cdevsw_dev_override(dev_t dev, lwkt_port_t port) 559 { 560 lwkt_port_t oport; 561 562 oport = dev->si_port; 563 dev->si_port = port; 564 return(oport); 565 } 566 567 /* 568 * Remove a cdevsw entry from the cdevbase[] major array so no new user opens 569 * can be performed, and destroy all devices installed in the hash table 570 * which are associated with this cdevsw. (see destroy_all_dev()). 571 */ 572 int 573 cdevsw_remove(struct cdevsw *devsw, u_int mask, u_int match) 574 { 575 int maj = devsw->d_maj; 576 struct cdevlink *link; 577 struct cdevlink **plink; 578 579 if (maj < 0 || maj >= NUMCDEVSW) { 580 printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n", 581 devsw->d_name, maj); 582 return EINVAL; 583 } 584 if (devsw != &dead_cdevsw) 585 destroy_all_dev(devsw, mask, match); 586 for (plink = &cdevbase[maj]; (link = *plink) != NULL; plink = &link->next) { 587 if (link->mask == mask && link->match == match) { 588 if (link->devsw == devsw) 589 break; 590 printf("%s: ERROR: cannot remove from cdevsw[], its major" 591 " number %d was stolen by %s\n", 592 devsw->d_name, maj, 593 link->devsw->d_name 594 ); 595 } 596 } 597 if (link == NULL) { 598 printf("%s(%d)[%08x/%08x]: WARNING: cdevsw removed multiple times!\n", 599 devsw->d_name, maj, mask, match); 600 } else { 601 *plink = link->next; 602 --devsw->d_refs; /* XXX cdevsw_release() / record refs */ 603 free(link, M_DEVBUF); 604 } 605 if (cdevbase[maj] == NULL && devsw->d_refs != 0) { 606 printf("%s(%d)[%08x/%08x]: Warning: cdevsw_remove() called while " 607 "%d device refs still exist!\n", 608 devsw->d_name, maj, mask, match, devsw->d_refs); 609 } else { 610 printf("%s: cdevsw removed\n", devsw->d_name); 611 } 612 return 0; 613 } 614 615 /* 616 * Release a cdevsw entry. When the ref count reaches zero, recurse 617 * through the stack. 618 */ 619 void 620 cdevsw_release(struct cdevsw *devsw) 621 { 622 --devsw->d_refs; 623 if (devsw->d_refs == 0) { 624 /* XXX */ 625 } 626 } 627 628