1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $DragonFly: src/sys/kern/kern_device.c,v 1.16 2006/02/17 19:18:06 dillon Exp $ 29 */ 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/sysctl.h> 33 #include <sys/systm.h> 34 #include <sys/module.h> 35 #include <sys/malloc.h> 36 #include <sys/conf.h> 37 #include <sys/bio.h> 38 #include <sys/buf.h> 39 #include <sys/vnode.h> 40 #include <sys/queue.h> 41 #include <sys/msgport.h> 42 #include <sys/device.h> 43 #include <machine/stdarg.h> 44 #include <sys/proc.h> 45 #include <sys/thread2.h> 46 #include <sys/msgport2.h> 47 48 static struct cdevlink *cdevbase[NUMCDEVSW]; 49 50 static int cdevsw_putport(lwkt_port_t port, lwkt_msg_t msg); 51 52 struct cdevsw dead_cdevsw; 53 54 /* 55 * Initialize a message port to serve as the default message-handling port 56 * for device operations. This message port provides compatibility with 57 * traditional cdevsw dispatch functions by running them synchronously. 58 * 59 * YYY NOTE: ms_cmd can now hold a function pointer, should this code be 60 * converted from an integer op to a function pointer with a flag to 61 * indicate legacy operation? 62 */ 63 static void 64 init_default_cdevsw_port(lwkt_port_t port) 65 { 66 lwkt_initport(port, NULL); 67 port->mp_putport = cdevsw_putport; 68 } 69 70 static 71 int 72 cdevsw_putport(lwkt_port_t port, lwkt_msg_t lmsg) 73 { 74 cdevallmsg_t msg = (cdevallmsg_t)lmsg; 75 struct cdevsw *devsw = msg->am_msg.dev->si_devsw; 76 int error; 77 78 /* 79 * Run the device switch function synchronously in the context of the 80 * caller and return a synchronous error code (anything not EASYNC). 81 */ 82 switch(msg->am_lmsg.ms_cmd.cm_op) { 83 case CDEV_CMD_OPEN: 84 error = devsw->old_open( 85 msg->am_open.msg.dev, 86 msg->am_open.oflags, 87 msg->am_open.devtype, 88 msg->am_open.td); 89 break; 90 case CDEV_CMD_CLOSE: 91 error = devsw->old_close( 92 msg->am_close.msg.dev, 93 msg->am_close.fflag, 94 msg->am_close.devtype, 95 msg->am_close.td); 96 break; 97 case CDEV_CMD_STRATEGY: 98 devsw->old_strategy(msg->am_strategy.msg.dev, msg->am_strategy.bio); 99 error = 0; 100 break; 101 case CDEV_CMD_IOCTL: 102 error = devsw->old_ioctl( 103 msg->am_ioctl.msg.dev, 104 msg->am_ioctl.cmd, 105 msg->am_ioctl.data, 106 msg->am_ioctl.fflag, 107 msg->am_ioctl.td); 108 break; 109 case CDEV_CMD_DUMP: 110 error = devsw->old_dump( 111 msg->am_dump.msg.dev, 112 msg->am_dump.count, 113 msg->am_dump.blkno, 114 msg->am_dump.secsize); 115 break; 116 case CDEV_CMD_PSIZE: 117 msg->am_psize.result = devsw->old_psize(msg->am_psize.msg.dev); 118 error = 0; /* XXX */ 119 break; 120 case CDEV_CMD_READ: 121 error = devsw->old_read( 122 msg->am_read.msg.dev, 123 msg->am_read.uio, 124 msg->am_read.ioflag); 125 break; 126 case CDEV_CMD_WRITE: 127 error = devsw->old_write( 128 msg->am_read.msg.dev, 129 msg->am_read.uio, 130 msg->am_read.ioflag); 131 break; 132 case CDEV_CMD_POLL: 133 msg->am_poll.events = devsw->old_poll( 134 msg->am_poll.msg.dev, 135 msg->am_poll.events, 136 msg->am_poll.td); 137 error = 0; 138 break; 139 case CDEV_CMD_KQFILTER: 140 msg->am_kqfilter.result = devsw->old_kqfilter( 141 msg->am_kqfilter.msg.dev, 142 msg->am_kqfilter.kn); 143 error = 0; 144 break; 145 case CDEV_CMD_MMAP: 146 msg->am_mmap.result = devsw->old_mmap( 147 msg->am_mmap.msg.dev, 148 msg->am_mmap.offset, 149 msg->am_mmap.nprot); 150 error = 0; /* XXX */ 151 break; 152 default: 153 error = ENOSYS; 154 break; 155 } 156 KKASSERT(error != EASYNC); 157 return(error); 158 } 159 160 static __inline 161 lwkt_port_t 162 _init_cdevmsg(dev_t dev, cdevmsg_t msg, int cmd) 163 { 164 lwkt_initmsg_simple(&msg->msg, cmd); 165 msg->dev = dev; 166 return(dev->si_port); 167 } 168 169 int 170 dev_dopen(dev_t dev, int oflags, int devtype, thread_t td) 171 { 172 struct cdevmsg_open msg; 173 lwkt_port_t port; 174 175 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_OPEN); 176 if (port == NULL) 177 return(ENXIO); 178 msg.oflags = oflags; 179 msg.devtype = devtype; 180 msg.td = td; 181 return(lwkt_domsg(port, &msg.msg.msg)); 182 } 183 184 int 185 dev_dclose(dev_t dev, int fflag, int devtype, thread_t td) 186 { 187 struct cdevmsg_close msg; 188 lwkt_port_t port; 189 190 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_CLOSE); 191 if (port == NULL) 192 return(ENXIO); 193 msg.fflag = fflag; 194 msg.devtype = devtype; 195 msg.td = td; 196 return(lwkt_domsg(port, &msg.msg.msg)); 197 } 198 199 /* 200 * Core device strategy call, used to issue I/O on a device. There are 201 * two versions, a non-chained version and a chained version. The chained 202 * version reuses a BIO set up by vn_strategy(). The only difference is 203 * that, for now, we do not push a new tracking structure when chaining 204 * from vn_strategy. XXX this will ultimately have to change. 205 */ 206 void 207 dev_dstrategy(dev_t dev, struct bio *bio) 208 { 209 struct cdevmsg_strategy msg; 210 struct bio_track *track; 211 lwkt_port_t port; 212 213 KKASSERT(bio->bio_track == NULL); 214 if (bio->bio_buf->b_flags & B_READ) 215 track = &dev->si_track_read; 216 else 217 track = &dev->si_track_write; 218 atomic_add_int(&track->bk_active, 1); 219 bio->bio_track = track; 220 221 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY); 222 KKASSERT(port); /* 'nostrategy' function is NULL YYY */ 223 msg.bio = bio; 224 lwkt_domsg(port, &msg.msg.msg); 225 } 226 227 void 228 dev_dstrategy_chain(dev_t dev, struct bio *bio) 229 { 230 struct cdevmsg_strategy msg; 231 lwkt_port_t port; 232 233 KKASSERT(bio->bio_track != NULL); 234 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY); 235 KKASSERT(port); /* 'nostrategy' function is NULL YYY */ 236 msg.bio = bio; 237 lwkt_domsg(port, &msg.msg.msg); 238 } 239 240 int 241 dev_dioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, thread_t td) 242 { 243 struct cdevmsg_ioctl msg; 244 lwkt_port_t port; 245 246 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_IOCTL); 247 if (port == NULL) 248 return(ENXIO); 249 msg.cmd = cmd; 250 msg.data = data; 251 msg.fflag = fflag; 252 msg.td = td; 253 return(lwkt_domsg(port, &msg.msg.msg)); 254 } 255 256 /* 257 * note: the disk layer is expected to set count, blkno, and secsize before 258 * forwarding the message. 259 */ 260 int 261 dev_ddump(dev_t dev) 262 { 263 struct cdevmsg_dump msg; 264 lwkt_port_t port; 265 266 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_DUMP); 267 if (port == NULL) 268 return(ENXIO); 269 msg.count = 0; 270 msg.blkno = 0; 271 msg.secsize = 0; 272 return(lwkt_domsg(port, &msg.msg.msg)); 273 } 274 275 int 276 dev_dpsize(dev_t dev) 277 { 278 struct cdevmsg_psize msg; 279 lwkt_port_t port; 280 int error; 281 282 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_PSIZE); 283 if (port == NULL) 284 return(-1); 285 error = lwkt_domsg(port, &msg.msg.msg); 286 if (error == 0) 287 return(msg.result); 288 return(-1); 289 } 290 291 int 292 dev_dread(dev_t dev, struct uio *uio, int ioflag) 293 { 294 struct cdevmsg_read msg; 295 lwkt_port_t port; 296 int error; 297 298 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_READ); 299 if (port == NULL) 300 return(ENXIO); 301 msg.uio = uio; 302 msg.ioflag = ioflag; 303 error = lwkt_domsg(port, &msg.msg.msg); 304 if (error == 0) 305 dev->si_lastread = time_second; 306 return (error); 307 } 308 309 int 310 dev_dwrite(dev_t dev, struct uio *uio, int ioflag) 311 { 312 struct cdevmsg_write msg; 313 lwkt_port_t port; 314 315 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_WRITE); 316 if (port == NULL) 317 return(ENXIO); 318 dev->si_lastwrite = time_second; 319 msg.uio = uio; 320 msg.ioflag = ioflag; 321 return(lwkt_domsg(port, &msg.msg.msg)); 322 } 323 324 int 325 dev_dpoll(dev_t dev, int events, thread_t td) 326 { 327 struct cdevmsg_poll msg; 328 lwkt_port_t port; 329 int error; 330 331 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_POLL); 332 if (port == NULL) 333 return(ENXIO); 334 msg.events = events; 335 msg.td = td; 336 error = lwkt_domsg(port, &msg.msg.msg); 337 if (error == 0) 338 return(msg.events); 339 return(seltrue(dev, msg.events, td)); 340 } 341 342 int 343 dev_dkqfilter(dev_t dev, struct knote *kn) 344 { 345 struct cdevmsg_kqfilter msg; 346 lwkt_port_t port; 347 int error; 348 349 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_KQFILTER); 350 if (port == NULL) 351 return(ENXIO); 352 msg.kn = kn; 353 error = lwkt_domsg(port, &msg.msg.msg); 354 if (error == 0) 355 return(msg.result); 356 return(ENODEV); 357 } 358 359 int 360 dev_dmmap(dev_t dev, vm_offset_t offset, int nprot) 361 { 362 struct cdevmsg_mmap msg; 363 lwkt_port_t port; 364 int error; 365 366 port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_MMAP); 367 if (port == NULL) 368 return(-1); 369 msg.offset = offset; 370 msg.nprot = nprot; 371 error = lwkt_domsg(port, &msg.msg.msg); 372 if (error == 0) 373 return(msg.result); 374 return(-1); 375 } 376 377 const char * 378 dev_dname(dev_t dev) 379 { 380 return(dev->si_devsw->d_name); 381 } 382 383 int 384 dev_dflags(dev_t dev) 385 { 386 return(dev->si_devsw->d_flags); 387 } 388 389 int 390 dev_dmaj(dev_t dev) 391 { 392 return(dev->si_devsw->d_maj); 393 } 394 395 lwkt_port_t 396 dev_dport(dev_t dev) 397 { 398 return(dev->si_port); 399 } 400 401 /* 402 * Convert a cdevsw template into the real thing, filling in fields the 403 * device left empty with appropriate defaults. 404 */ 405 void 406 compile_devsw(struct cdevsw *devsw) 407 { 408 static lwkt_port devsw_compat_port; 409 410 if (devsw_compat_port.mp_putport == NULL) 411 init_default_cdevsw_port(&devsw_compat_port); 412 413 if (devsw->old_open == NULL) 414 devsw->old_open = noopen; 415 if (devsw->old_close == NULL) 416 devsw->old_close = noclose; 417 if (devsw->old_read == NULL) 418 devsw->old_read = noread; 419 if (devsw->old_write == NULL) 420 devsw->old_write = nowrite; 421 if (devsw->old_ioctl == NULL) 422 devsw->old_ioctl = noioctl; 423 if (devsw->old_poll == NULL) 424 devsw->old_poll = nopoll; 425 if (devsw->old_mmap == NULL) 426 devsw->old_mmap = nommap; 427 if (devsw->old_strategy == NULL) 428 devsw->old_strategy = nostrategy; 429 if (devsw->old_dump == NULL) 430 devsw->old_dump = nodump; 431 if (devsw->old_psize == NULL) 432 devsw->old_psize = nopsize; 433 if (devsw->old_kqfilter == NULL) 434 devsw->old_kqfilter = nokqfilter; 435 436 if (devsw->d_port == NULL) 437 devsw->d_port = &devsw_compat_port; 438 if (devsw->d_clone == NULL) 439 devsw->d_clone = noclone; 440 } 441 442 /* 443 * This makes a cdevsw entry visible to userland (e.g /dev/<blah>). 444 * 445 * The kernel can overload a major number by making multiple cdevsw_add() 446 * calls, but only the most recent one (the first one in the cdevbase[] list 447 * matching the mask/match) will be visible to userland. make_dev() does 448 * not automatically call cdevsw_add() (nor do we want it to, since 449 * partition-managed disk devices are overloaded on top of the raw device). 450 * 451 * Disk devices typically register their major, e.g. 'ad0', and then call 452 * into the disk label management code which overloads its own onto e.g. 'ad0' 453 * to support all the various slice and partition combinations. 454 * 455 * The mask/match supplied in this call are a full 32 bits and the same 456 * mask and match must be specified in a later cdevsw_remove() call to 457 * match this add. However, the match value for the minor number should never 458 * have any bits set in the major number's bit range (8-15). The mask value 459 * may be conveniently specified as -1 without creating any major number 460 * interference. 461 */ 462 int 463 cdevsw_add(struct cdevsw *devsw, u_int mask, u_int match) 464 { 465 int maj; 466 struct cdevlink *link; 467 468 compile_devsw(devsw); 469 maj = devsw->d_maj; 470 if (maj < 0 || maj >= NUMCDEVSW) { 471 printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n", 472 devsw->d_name, maj); 473 return (EINVAL); 474 } 475 for (link = cdevbase[maj]; link; link = link->next) { 476 /* 477 * If we get an exact match we usurp the target, but we only print 478 * a warning message if a different device switch is installed. 479 */ 480 if (link->mask == mask && link->match == match) { 481 if (link->devsw != devsw) { 482 printf("WARNING: \"%s\" (%p) is usurping \"%s\"'s (%p)" 483 " cdevsw[]\n", 484 devsw->d_name, devsw, 485 link->devsw->d_name, link->devsw); 486 link->devsw = devsw; 487 ++devsw->d_refs; 488 } 489 return(0); 490 } 491 /* 492 * XXX add additional warnings for overlaps 493 */ 494 } 495 496 link = malloc(sizeof(struct cdevlink), M_DEVBUF, M_INTWAIT|M_ZERO); 497 link->mask = mask; 498 link->match = match; 499 link->devsw = devsw; 500 link->next = cdevbase[maj]; 501 cdevbase[maj] = link; 502 ++devsw->d_refs; 503 return(0); 504 } 505 506 /* 507 * Should only be used by udev2dev(). 508 * 509 * If the minor number is -1, we match the first cdevsw we find for this 510 * major. If the mask is not -1 then multiple minor numbers can match 511 * the same devsw. 512 * 513 * Note that this function will return NULL if the minor number is not within 514 * the bounds of the installed mask(s). 515 * 516 * The specified minor number should NOT include any major bits. 517 */ 518 struct cdevsw * 519 cdevsw_get(int x, int y) 520 { 521 struct cdevlink *link; 522 523 if (x < 0 || x >= NUMCDEVSW) 524 return(NULL); 525 for (link = cdevbase[x]; link; link = link->next) { 526 if (y == -1 || (link->mask & y) == link->match) 527 return(link->devsw); 528 } 529 return(NULL); 530 } 531 532 /* 533 * Use the passed cdevsw as a template to create our intercept cdevsw, 534 * and install and return ours. 535 */ 536 struct cdevsw * 537 cdevsw_add_override(dev_t backing_dev, u_int mask, u_int match) 538 { 539 struct cdevsw *devsw; 540 struct cdevsw *bsw = backing_dev->si_devsw; 541 542 devsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_INTWAIT|M_ZERO); 543 devsw->d_name = bsw->d_name; 544 devsw->d_maj = bsw->d_maj; 545 devsw->d_flags = bsw->d_flags; 546 compile_devsw(devsw); 547 cdevsw_add(devsw, mask, match); 548 549 return(devsw); 550 } 551 552 /* 553 * Override a device's port, returning the previously installed port. This 554 * is XXX very dangerous. 555 */ 556 lwkt_port_t 557 cdevsw_dev_override(dev_t dev, lwkt_port_t port) 558 { 559 lwkt_port_t oport; 560 561 oport = dev->si_port; 562 dev->si_port = port; 563 return(oport); 564 } 565 566 /* 567 * Remove a cdevsw entry from the cdevbase[] major array so no new user opens 568 * can be performed, and destroy all devices installed in the hash table 569 * which are associated with this cdevsw. (see destroy_all_dev()). 570 */ 571 int 572 cdevsw_remove(struct cdevsw *devsw, u_int mask, u_int match) 573 { 574 int maj = devsw->d_maj; 575 struct cdevlink *link; 576 struct cdevlink **plink; 577 578 if (maj < 0 || maj >= NUMCDEVSW) { 579 printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n", 580 devsw->d_name, maj); 581 return EINVAL; 582 } 583 if (devsw != &dead_cdevsw) 584 destroy_all_dev(devsw, mask, match); 585 for (plink = &cdevbase[maj]; (link = *plink) != NULL; plink = &link->next) { 586 if (link->mask == mask && link->match == match) { 587 if (link->devsw == devsw) 588 break; 589 printf("%s: ERROR: cannot remove from cdevsw[], its major" 590 " number %d was stolen by %s\n", 591 devsw->d_name, maj, 592 link->devsw->d_name 593 ); 594 } 595 } 596 if (link == NULL) { 597 printf("%s(%d)[%08x/%08x]: WARNING: cdevsw removed multiple times!\n", 598 devsw->d_name, maj, mask, match); 599 } else { 600 *plink = link->next; 601 --devsw->d_refs; /* XXX cdevsw_release() / record refs */ 602 free(link, M_DEVBUF); 603 } 604 if (cdevbase[maj] == NULL && devsw->d_refs != 0) { 605 printf("%s(%d)[%08x/%08x]: Warning: cdevsw_remove() called while " 606 "%d device refs still exist!\n", 607 devsw->d_name, maj, mask, match, devsw->d_refs); 608 } else { 609 printf("%s: cdevsw removed\n", devsw->d_name); 610 } 611 return 0; 612 } 613 614 /* 615 * Release a cdevsw entry. When the ref count reaches zero, recurse 616 * through the stack. 617 */ 618 void 619 cdevsw_release(struct cdevsw *devsw) 620 { 621 --devsw->d_refs; 622 if (devsw->d_refs == 0) { 623 /* XXX */ 624 } 625 } 626 627