1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.12 (Berkeley) 02/22/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_revoke_desc, spec_revoke }, /* revoke */ 52 { &vop_mmap_desc, spec_mmap }, /* mmap */ 53 { &vop_fsync_desc, spec_fsync }, /* fsync */ 54 { &vop_seek_desc, spec_seek }, /* seek */ 55 { &vop_remove_desc, spec_remove }, /* remove */ 56 { &vop_link_desc, spec_link }, /* link */ 57 { &vop_rename_desc, spec_rename }, /* rename */ 58 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 59 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 60 { &vop_symlink_desc, spec_symlink }, /* symlink */ 61 { &vop_readdir_desc, spec_readdir }, /* readdir */ 62 { &vop_readlink_desc, spec_readlink }, /* readlink */ 63 { &vop_abortop_desc, spec_abortop }, /* abortop */ 64 { &vop_inactive_desc, spec_inactive }, /* inactive */ 65 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 66 { &vop_lock_desc, spec_lock }, /* lock */ 67 { &vop_unlock_desc, spec_unlock }, /* unlock */ 68 { &vop_bmap_desc, spec_bmap }, /* bmap */ 69 { &vop_strategy_desc, spec_strategy }, /* strategy */ 70 { &vop_print_desc, spec_print }, /* print */ 71 { &vop_islocked_desc, spec_islocked }, /* islocked */ 72 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 73 { &vop_advlock_desc, spec_advlock }, /* advlock */ 74 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 75 { &vop_valloc_desc, spec_valloc }, /* valloc */ 76 { &vop_vfree_desc, spec_vfree }, /* vfree */ 77 { &vop_truncate_desc, spec_truncate }, /* truncate */ 78 { &vop_update_desc, spec_update }, /* update */ 79 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 80 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 81 }; 82 struct vnodeopv_desc spec_vnodeop_opv_desc = 83 { &spec_vnodeop_p, spec_vnodeop_entries }; 84 85 /* 86 * Trivial lookup routine that always fails. 87 */ 88 int 89 spec_lookup(ap) 90 struct vop_lookup_args /* { 91 struct vnode *a_dvp; 92 struct vnode **a_vpp; 93 struct componentname *a_cnp; 94 } */ *ap; 95 { 96 97 *ap->a_vpp = NULL; 98 return (ENOTDIR); 99 } 100 101 /* 102 * Open a special file. 103 */ 104 /* ARGSUSED */ 105 spec_open(ap) 106 struct vop_open_args /* { 107 struct vnode *a_vp; 108 int a_mode; 109 struct ucred *a_cred; 110 struct proc *a_p; 111 } */ *ap; 112 { 113 struct vnode *bvp, *vp = ap->a_vp; 114 dev_t bdev, dev = (dev_t)vp->v_rdev; 115 register int maj = major(dev); 116 int error; 117 118 /* 119 * Don't allow open if fs is mounted -nodev. 120 */ 121 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 122 return (ENXIO); 123 124 switch (vp->v_type) { 125 126 case VCHR: 127 if ((u_int)maj >= nchrdev) 128 return (ENXIO); 129 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 130 /* 131 * When running in very secure mode, do not allow 132 * opens for writing of any disk character devices. 133 */ 134 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 135 return (EPERM); 136 /* 137 * When running in secure mode, do not allow opens 138 * for writing of /dev/mem, /dev/kmem, or character 139 * devices whose corresponding block devices are 140 * currently mounted. 141 */ 142 if (securelevel >= 1) { 143 if ((bdev = chrtoblk(dev)) != NODEV && 144 vfinddev(bdev, VBLK, &bvp) && 145 bvp->v_usecount > 0 && 146 (error = vfs_mountedon(bvp))) 147 return (error); 148 if (iskmemdev(dev)) 149 return (EPERM); 150 } 151 } 152 if (cdevsw[maj].d_type == D_TTY) 153 vp->v_flag |= VISTTY; 154 VOP_UNLOCK(vp); 155 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 156 VOP_LOCK(vp); 157 return (error); 158 159 case VBLK: 160 if ((u_int)maj >= nblkdev) 161 return (ENXIO); 162 /* 163 * When running in very secure mode, do not allow 164 * opens for writing of any disk block devices. 165 */ 166 if (securelevel >= 2 && ap->a_cred != FSCRED && 167 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 168 return (EPERM); 169 /* 170 * Do not allow opens of block devices that are 171 * currently mounted. 172 */ 173 if (error = vfs_mountedon(vp)) 174 return (error); 175 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 176 } 177 return (0); 178 } 179 180 /* 181 * Vnode op for read 182 */ 183 /* ARGSUSED */ 184 spec_read(ap) 185 struct vop_read_args /* { 186 struct vnode *a_vp; 187 struct uio *a_uio; 188 int a_ioflag; 189 struct ucred *a_cred; 190 } */ *ap; 191 { 192 register struct vnode *vp = ap->a_vp; 193 register struct uio *uio = ap->a_uio; 194 struct proc *p = uio->uio_procp; 195 struct buf *bp; 196 daddr_t bn, nextbn; 197 long bsize, bscale; 198 struct partinfo dpart; 199 int n, on, majordev, (*ioctl)(); 200 int error = 0; 201 dev_t dev; 202 203 #ifdef DIAGNOSTIC 204 if (uio->uio_rw != UIO_READ) 205 panic("spec_read mode"); 206 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 207 panic("spec_read proc"); 208 #endif 209 if (uio->uio_resid == 0) 210 return (0); 211 212 switch (vp->v_type) { 213 214 case VCHR: 215 VOP_UNLOCK(vp); 216 error = (*cdevsw[major(vp->v_rdev)].d_read) 217 (vp->v_rdev, uio, ap->a_ioflag); 218 VOP_LOCK(vp); 219 return (error); 220 221 case VBLK: 222 if (uio->uio_offset < 0) 223 return (EINVAL); 224 bsize = BLKDEV_IOSIZE; 225 dev = vp->v_rdev; 226 if ((majordev = major(dev)) < nblkdev && 227 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 228 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 229 dpart.part->p_fstype == FS_BSDFFS && 230 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 231 bsize = dpart.part->p_frag * dpart.part->p_fsize; 232 bscale = bsize / DEV_BSIZE; 233 do { 234 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 235 on = uio->uio_offset % bsize; 236 n = min((unsigned)(bsize - on), uio->uio_resid); 237 if (vp->v_lastr + bscale == bn) { 238 nextbn = bn + bscale; 239 error = breadn(vp, bn, (int)bsize, &nextbn, 240 (int *)&bsize, 1, NOCRED, &bp); 241 } else 242 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 243 vp->v_lastr = bn; 244 n = min(n, bsize - bp->b_resid); 245 if (error) { 246 brelse(bp); 247 return (error); 248 } 249 error = uiomove((char *)bp->b_data + on, n, uio); 250 if (n + on == bsize) 251 bp->b_flags |= B_AGE; 252 brelse(bp); 253 } while (error == 0 && uio->uio_resid > 0 && n != 0); 254 return (error); 255 256 default: 257 panic("spec_read type"); 258 } 259 /* NOTREACHED */ 260 } 261 262 /* 263 * Vnode op for write 264 */ 265 /* ARGSUSED */ 266 spec_write(ap) 267 struct vop_write_args /* { 268 struct vnode *a_vp; 269 struct uio *a_uio; 270 int a_ioflag; 271 struct ucred *a_cred; 272 } */ *ap; 273 { 274 register struct vnode *vp = ap->a_vp; 275 register struct uio *uio = ap->a_uio; 276 struct proc *p = uio->uio_procp; 277 struct buf *bp; 278 daddr_t bn; 279 int bsize, blkmask; 280 struct partinfo dpart; 281 register int n, on; 282 int error = 0; 283 284 #ifdef DIAGNOSTIC 285 if (uio->uio_rw != UIO_WRITE) 286 panic("spec_write mode"); 287 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 288 panic("spec_write proc"); 289 #endif 290 291 switch (vp->v_type) { 292 293 case VCHR: 294 VOP_UNLOCK(vp); 295 error = (*cdevsw[major(vp->v_rdev)].d_write) 296 (vp->v_rdev, uio, ap->a_ioflag); 297 VOP_LOCK(vp); 298 return (error); 299 300 case VBLK: 301 if (uio->uio_resid == 0) 302 return (0); 303 if (uio->uio_offset < 0) 304 return (EINVAL); 305 bsize = BLKDEV_IOSIZE; 306 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 307 (caddr_t)&dpart, FREAD, p) == 0) { 308 if (dpart.part->p_fstype == FS_BSDFFS && 309 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 310 bsize = dpart.part->p_frag * 311 dpart.part->p_fsize; 312 } 313 blkmask = (bsize / DEV_BSIZE) - 1; 314 do { 315 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 316 on = uio->uio_offset % bsize; 317 n = min((unsigned)(bsize - on), uio->uio_resid); 318 if (n == bsize) 319 bp = getblk(vp, bn, bsize, 0, 0); 320 else 321 error = bread(vp, bn, bsize, NOCRED, &bp); 322 n = min(n, bsize - bp->b_resid); 323 if (error) { 324 brelse(bp); 325 return (error); 326 } 327 error = uiomove((char *)bp->b_data + on, n, uio); 328 if (n + on == bsize) { 329 bp->b_flags |= B_AGE; 330 bawrite(bp); 331 } else 332 bdwrite(bp); 333 } while (error == 0 && uio->uio_resid > 0 && n != 0); 334 return (error); 335 336 default: 337 panic("spec_write type"); 338 } 339 /* NOTREACHED */ 340 } 341 342 /* 343 * Device ioctl operation. 344 */ 345 /* ARGSUSED */ 346 spec_ioctl(ap) 347 struct vop_ioctl_args /* { 348 struct vnode *a_vp; 349 int a_command; 350 caddr_t a_data; 351 int a_fflag; 352 struct ucred *a_cred; 353 struct proc *a_p; 354 } */ *ap; 355 { 356 dev_t dev = ap->a_vp->v_rdev; 357 358 switch (ap->a_vp->v_type) { 359 360 case VCHR: 361 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 362 ap->a_fflag, ap->a_p)); 363 364 case VBLK: 365 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 366 if (bdevsw[major(dev)].d_type == D_TAPE) 367 return (0); 368 else 369 return (1); 370 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 371 ap->a_fflag, ap->a_p)); 372 373 default: 374 panic("spec_ioctl"); 375 /* NOTREACHED */ 376 } 377 } 378 379 /* ARGSUSED */ 380 spec_select(ap) 381 struct vop_select_args /* { 382 struct vnode *a_vp; 383 int a_which; 384 int a_fflags; 385 struct ucred *a_cred; 386 struct proc *a_p; 387 } */ *ap; 388 { 389 register dev_t dev; 390 391 switch (ap->a_vp->v_type) { 392 393 default: 394 return (1); /* XXX */ 395 396 case VCHR: 397 dev = ap->a_vp->v_rdev; 398 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 399 } 400 } 401 /* 402 * Synch buffers associated with a block device 403 */ 404 /* ARGSUSED */ 405 int 406 spec_fsync(ap) 407 struct vop_fsync_args /* { 408 struct vnode *a_vp; 409 struct ucred *a_cred; 410 int a_waitfor; 411 struct proc *a_p; 412 } */ *ap; 413 { 414 register struct vnode *vp = ap->a_vp; 415 register struct buf *bp; 416 struct buf *nbp; 417 int s; 418 419 if (vp->v_type == VCHR) 420 return (0); 421 /* 422 * Flush all dirty buffers associated with a block device. 423 */ 424 loop: 425 s = splbio(); 426 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 427 nbp = bp->b_vnbufs.le_next; 428 if ((bp->b_flags & B_BUSY)) 429 continue; 430 if ((bp->b_flags & B_DELWRI) == 0) 431 panic("spec_fsync: not dirty"); 432 bremfree(bp); 433 bp->b_flags |= B_BUSY; 434 splx(s); 435 bawrite(bp); 436 goto loop; 437 } 438 if (ap->a_waitfor == MNT_WAIT) { 439 while (vp->v_numoutput) { 440 vp->v_flag |= VBWAIT; 441 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 442 } 443 #ifdef DIAGNOSTIC 444 if (vp->v_dirtyblkhd.lh_first) { 445 vprint("spec_fsync: dirty", vp); 446 goto loop; 447 } 448 #endif 449 } 450 splx(s); 451 return (0); 452 } 453 454 /* 455 * Just call the device strategy routine 456 */ 457 spec_strategy(ap) 458 struct vop_strategy_args /* { 459 struct buf *a_bp; 460 } */ *ap; 461 { 462 463 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 464 return (0); 465 } 466 467 /* 468 * This is a noop, simply returning what one has been given. 469 */ 470 spec_bmap(ap) 471 struct vop_bmap_args /* { 472 struct vnode *a_vp; 473 daddr_t a_bn; 474 struct vnode **a_vpp; 475 daddr_t *a_bnp; 476 int *a_runp; 477 } */ *ap; 478 { 479 480 if (ap->a_vpp != NULL) 481 *ap->a_vpp = ap->a_vp; 482 if (ap->a_bnp != NULL) 483 *ap->a_bnp = ap->a_bn; 484 if (ap->a_runp != NULL) 485 *ap->a_runp = 0; 486 return (0); 487 } 488 489 /* 490 * At the moment we do not do any locking. 491 */ 492 /* ARGSUSED */ 493 spec_lock(ap) 494 struct vop_lock_args /* { 495 struct vnode *a_vp; 496 } */ *ap; 497 { 498 499 return (0); 500 } 501 502 /* ARGSUSED */ 503 spec_unlock(ap) 504 struct vop_unlock_args /* { 505 struct vnode *a_vp; 506 } */ *ap; 507 { 508 509 return (0); 510 } 511 512 /* 513 * Device close routine 514 */ 515 /* ARGSUSED */ 516 spec_close(ap) 517 struct vop_close_args /* { 518 struct vnode *a_vp; 519 int a_fflag; 520 struct ucred *a_cred; 521 struct proc *a_p; 522 } */ *ap; 523 { 524 register struct vnode *vp = ap->a_vp; 525 dev_t dev = vp->v_rdev; 526 int (*devclose) __P((dev_t, int, int, struct proc *)); 527 int mode, error; 528 529 switch (vp->v_type) { 530 531 case VCHR: 532 /* 533 * Hack: a tty device that is a controlling terminal 534 * has a reference from the session structure. 535 * We cannot easily tell that a character device is 536 * a controlling terminal, unless it is the closing 537 * process' controlling terminal. In that case, 538 * if the reference count is 2 (this last descriptor 539 * plus the session), release the reference from the session. 540 */ 541 if (vcount(vp) == 2 && ap->a_p && 542 vp == ap->a_p->p_session->s_ttyvp) { 543 vrele(vp); 544 ap->a_p->p_session->s_ttyvp = NULL; 545 } 546 /* 547 * If the vnode is locked, then we are in the midst 548 * of forcably closing the device, otherwise we only 549 * close on last reference. 550 */ 551 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 552 return (0); 553 devclose = cdevsw[major(dev)].d_close; 554 mode = S_IFCHR; 555 break; 556 557 case VBLK: 558 /* 559 * On last close of a block device (that isn't mounted) 560 * we must invalidate any in core blocks, so that 561 * we can, for instance, change floppy disks. 562 */ 563 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 564 return (error); 565 /* 566 * We do not want to really close the device if it 567 * is still in use unless we are trying to close it 568 * forcibly. Since every use (buffer, vnode, swap, cmap) 569 * holds a reference to the vnode, and because we mark 570 * any other vnodes that alias this device, when the 571 * sum of the reference counts on all the aliased 572 * vnodes descends to one, we are on last close. 573 */ 574 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 575 return (0); 576 devclose = bdevsw[major(dev)].d_close; 577 mode = S_IFBLK; 578 break; 579 580 default: 581 panic("spec_close: not special"); 582 } 583 584 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 585 } 586 587 /* 588 * Print out the contents of a special device vnode. 589 */ 590 spec_print(ap) 591 struct vop_print_args /* { 592 struct vnode *a_vp; 593 } */ *ap; 594 { 595 596 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 597 minor(ap->a_vp->v_rdev)); 598 } 599 600 /* 601 * Return POSIX pathconf information applicable to special devices. 602 */ 603 spec_pathconf(ap) 604 struct vop_pathconf_args /* { 605 struct vnode *a_vp; 606 int a_name; 607 int *a_retval; 608 } */ *ap; 609 { 610 611 switch (ap->a_name) { 612 case _PC_LINK_MAX: 613 *ap->a_retval = LINK_MAX; 614 return (0); 615 case _PC_MAX_CANON: 616 *ap->a_retval = MAX_CANON; 617 return (0); 618 case _PC_MAX_INPUT: 619 *ap->a_retval = MAX_INPUT; 620 return (0); 621 case _PC_PIPE_BUF: 622 *ap->a_retval = PIPE_BUF; 623 return (0); 624 case _PC_CHOWN_RESTRICTED: 625 *ap->a_retval = 1; 626 return (0); 627 case _PC_VDISABLE: 628 *ap->a_retval = _POSIX_VDISABLE; 629 return (0); 630 default: 631 return (EINVAL); 632 } 633 /* NOTREACHED */ 634 } 635 636 /* 637 * Special device advisory byte-level locks. 638 */ 639 /* ARGSUSED */ 640 spec_advlock(ap) 641 struct vop_advlock_args /* { 642 struct vnode *a_vp; 643 caddr_t a_id; 644 int a_op; 645 struct flock *a_fl; 646 int a_flags; 647 } */ *ap; 648 { 649 650 return (EOPNOTSUPP); 651 } 652 653 /* 654 * Special device failed operation 655 */ 656 spec_ebadf() 657 { 658 659 return (EBADF); 660 } 661 662 /* 663 * Special device bad operation 664 */ 665 spec_badop() 666 { 667 668 panic("spec_badop called"); 669 /* NOTREACHED */ 670 } 671