1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.11 (Berkeley) 02/06/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_mmap_desc, spec_mmap }, /* mmap */ 52 { &vop_fsync_desc, spec_fsync }, /* fsync */ 53 { &vop_seek_desc, spec_seek }, /* seek */ 54 { &vop_remove_desc, spec_remove }, /* remove */ 55 { &vop_link_desc, spec_link }, /* link */ 56 { &vop_rename_desc, spec_rename }, /* rename */ 57 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 58 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 59 { &vop_symlink_desc, spec_symlink }, /* symlink */ 60 { &vop_readdir_desc, spec_readdir }, /* readdir */ 61 { &vop_readlink_desc, spec_readlink }, /* readlink */ 62 { &vop_abortop_desc, spec_abortop }, /* abortop */ 63 { &vop_inactive_desc, spec_inactive }, /* inactive */ 64 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 65 { &vop_lock_desc, spec_lock }, /* lock */ 66 { &vop_unlock_desc, spec_unlock }, /* unlock */ 67 { &vop_bmap_desc, spec_bmap }, /* bmap */ 68 { &vop_strategy_desc, spec_strategy }, /* strategy */ 69 { &vop_print_desc, spec_print }, /* print */ 70 { &vop_islocked_desc, spec_islocked }, /* islocked */ 71 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 72 { &vop_advlock_desc, spec_advlock }, /* advlock */ 73 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 74 { &vop_valloc_desc, spec_valloc }, /* valloc */ 75 { &vop_vfree_desc, spec_vfree }, /* vfree */ 76 { &vop_truncate_desc, spec_truncate }, /* truncate */ 77 { &vop_update_desc, spec_update }, /* update */ 78 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 79 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 80 }; 81 struct vnodeopv_desc spec_vnodeop_opv_desc = 82 { &spec_vnodeop_p, spec_vnodeop_entries }; 83 84 /* 85 * Trivial lookup routine that always fails. 86 */ 87 int 88 spec_lookup(ap) 89 struct vop_lookup_args /* { 90 struct vnode *a_dvp; 91 struct vnode **a_vpp; 92 struct componentname *a_cnp; 93 } */ *ap; 94 { 95 96 *ap->a_vpp = NULL; 97 return (ENOTDIR); 98 } 99 100 /* 101 * Open a special file. 102 */ 103 /* ARGSUSED */ 104 spec_open(ap) 105 struct vop_open_args /* { 106 struct vnode *a_vp; 107 int a_mode; 108 struct ucred *a_cred; 109 struct proc *a_p; 110 } */ *ap; 111 { 112 struct vnode *bvp, *vp = ap->a_vp; 113 dev_t bdev, dev = (dev_t)vp->v_rdev; 114 register int maj = major(dev); 115 int error; 116 117 /* 118 * Don't allow open if fs is mounted -nodev. 119 */ 120 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 121 return (ENXIO); 122 123 switch (vp->v_type) { 124 125 case VCHR: 126 if ((u_int)maj >= nchrdev) 127 return (ENXIO); 128 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 129 /* 130 * When running in very secure mode, do not allow 131 * opens for writing of any disk character devices. 132 */ 133 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 134 return (EPERM); 135 /* 136 * When running in secure mode, do not allow opens 137 * for writing of /dev/mem, /dev/kmem, or character 138 * devices whose corresponding block devices are 139 * currently mounted. 140 */ 141 if (securelevel >= 1) { 142 if ((bdev = chrtoblk(dev)) != NODEV && 143 vfinddev(bdev, VBLK, &bvp) && 144 bvp->v_usecount > 0 && 145 (error = vfs_mountedon(bvp))) 146 return (error); 147 if (iskmemdev(dev)) 148 return (EPERM); 149 } 150 } 151 if (cdevsw[maj].d_type == D_TTY) 152 vp->v_flag |= VISTTY; 153 VOP_UNLOCK(vp); 154 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 155 VOP_LOCK(vp); 156 return (error); 157 158 case VBLK: 159 if ((u_int)maj >= nblkdev) 160 return (ENXIO); 161 /* 162 * When running in very secure mode, do not allow 163 * opens for writing of any disk block devices. 164 */ 165 if (securelevel >= 2 && ap->a_cred != FSCRED && 166 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 167 return (EPERM); 168 /* 169 * Do not allow opens of block devices that are 170 * currently mounted. 171 */ 172 if (error = vfs_mountedon(vp)) 173 return (error); 174 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 175 } 176 return (0); 177 } 178 179 /* 180 * Vnode op for read 181 */ 182 /* ARGSUSED */ 183 spec_read(ap) 184 struct vop_read_args /* { 185 struct vnode *a_vp; 186 struct uio *a_uio; 187 int a_ioflag; 188 struct ucred *a_cred; 189 } */ *ap; 190 { 191 register struct vnode *vp = ap->a_vp; 192 register struct uio *uio = ap->a_uio; 193 struct proc *p = uio->uio_procp; 194 struct buf *bp; 195 daddr_t bn, nextbn; 196 long bsize, bscale; 197 struct partinfo dpart; 198 int n, on, majordev, (*ioctl)(); 199 int error = 0; 200 dev_t dev; 201 202 #ifdef DIAGNOSTIC 203 if (uio->uio_rw != UIO_READ) 204 panic("spec_read mode"); 205 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 206 panic("spec_read proc"); 207 #endif 208 if (uio->uio_resid == 0) 209 return (0); 210 211 switch (vp->v_type) { 212 213 case VCHR: 214 VOP_UNLOCK(vp); 215 error = (*cdevsw[major(vp->v_rdev)].d_read) 216 (vp->v_rdev, uio, ap->a_ioflag); 217 VOP_LOCK(vp); 218 return (error); 219 220 case VBLK: 221 if (uio->uio_offset < 0) 222 return (EINVAL); 223 bsize = BLKDEV_IOSIZE; 224 dev = vp->v_rdev; 225 if ((majordev = major(dev)) < nblkdev && 226 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 227 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 228 dpart.part->p_fstype == FS_BSDFFS && 229 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 230 bsize = dpart.part->p_frag * dpart.part->p_fsize; 231 bscale = bsize / DEV_BSIZE; 232 do { 233 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 234 on = uio->uio_offset % bsize; 235 n = min((unsigned)(bsize - on), uio->uio_resid); 236 if (vp->v_lastr + bscale == bn) { 237 nextbn = bn + bscale; 238 error = breadn(vp, bn, (int)bsize, &nextbn, 239 (int *)&bsize, 1, NOCRED, &bp); 240 } else 241 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 242 vp->v_lastr = bn; 243 n = min(n, bsize - bp->b_resid); 244 if (error) { 245 brelse(bp); 246 return (error); 247 } 248 error = uiomove((char *)bp->b_data + on, n, uio); 249 if (n + on == bsize) 250 bp->b_flags |= B_AGE; 251 brelse(bp); 252 } while (error == 0 && uio->uio_resid > 0 && n != 0); 253 return (error); 254 255 default: 256 panic("spec_read type"); 257 } 258 /* NOTREACHED */ 259 } 260 261 /* 262 * Vnode op for write 263 */ 264 /* ARGSUSED */ 265 spec_write(ap) 266 struct vop_write_args /* { 267 struct vnode *a_vp; 268 struct uio *a_uio; 269 int a_ioflag; 270 struct ucred *a_cred; 271 } */ *ap; 272 { 273 register struct vnode *vp = ap->a_vp; 274 register struct uio *uio = ap->a_uio; 275 struct proc *p = uio->uio_procp; 276 struct buf *bp; 277 daddr_t bn; 278 int bsize, blkmask; 279 struct partinfo dpart; 280 register int n, on; 281 int error = 0; 282 283 #ifdef DIAGNOSTIC 284 if (uio->uio_rw != UIO_WRITE) 285 panic("spec_write mode"); 286 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 287 panic("spec_write proc"); 288 #endif 289 290 switch (vp->v_type) { 291 292 case VCHR: 293 VOP_UNLOCK(vp); 294 error = (*cdevsw[major(vp->v_rdev)].d_write) 295 (vp->v_rdev, uio, ap->a_ioflag); 296 VOP_LOCK(vp); 297 return (error); 298 299 case VBLK: 300 if (uio->uio_resid == 0) 301 return (0); 302 if (uio->uio_offset < 0) 303 return (EINVAL); 304 bsize = BLKDEV_IOSIZE; 305 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 306 (caddr_t)&dpart, FREAD, p) == 0) { 307 if (dpart.part->p_fstype == FS_BSDFFS && 308 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 309 bsize = dpart.part->p_frag * 310 dpart.part->p_fsize; 311 } 312 blkmask = (bsize / DEV_BSIZE) - 1; 313 do { 314 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 315 on = uio->uio_offset % bsize; 316 n = min((unsigned)(bsize - on), uio->uio_resid); 317 if (n == bsize) 318 bp = getblk(vp, bn, bsize, 0, 0); 319 else 320 error = bread(vp, bn, bsize, NOCRED, &bp); 321 n = min(n, bsize - bp->b_resid); 322 if (error) { 323 brelse(bp); 324 return (error); 325 } 326 error = uiomove((char *)bp->b_data + on, n, uio); 327 if (n + on == bsize) { 328 bp->b_flags |= B_AGE; 329 bawrite(bp); 330 } else 331 bdwrite(bp); 332 } while (error == 0 && uio->uio_resid > 0 && n != 0); 333 return (error); 334 335 default: 336 panic("spec_write type"); 337 } 338 /* NOTREACHED */ 339 } 340 341 /* 342 * Device ioctl operation. 343 */ 344 /* ARGSUSED */ 345 spec_ioctl(ap) 346 struct vop_ioctl_args /* { 347 struct vnode *a_vp; 348 int a_command; 349 caddr_t a_data; 350 int a_fflag; 351 struct ucred *a_cred; 352 struct proc *a_p; 353 } */ *ap; 354 { 355 dev_t dev = ap->a_vp->v_rdev; 356 357 switch (ap->a_vp->v_type) { 358 359 case VCHR: 360 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 361 ap->a_fflag, ap->a_p)); 362 363 case VBLK: 364 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 365 if (bdevsw[major(dev)].d_type == D_TAPE) 366 return (0); 367 else 368 return (1); 369 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 370 ap->a_fflag, ap->a_p)); 371 372 default: 373 panic("spec_ioctl"); 374 /* NOTREACHED */ 375 } 376 } 377 378 /* ARGSUSED */ 379 spec_select(ap) 380 struct vop_select_args /* { 381 struct vnode *a_vp; 382 int a_which; 383 int a_fflags; 384 struct ucred *a_cred; 385 struct proc *a_p; 386 } */ *ap; 387 { 388 register dev_t dev; 389 390 switch (ap->a_vp->v_type) { 391 392 default: 393 return (1); /* XXX */ 394 395 case VCHR: 396 dev = ap->a_vp->v_rdev; 397 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 398 } 399 } 400 /* 401 * Synch buffers associated with a block device 402 */ 403 /* ARGSUSED */ 404 int 405 spec_fsync(ap) 406 struct vop_fsync_args /* { 407 struct vnode *a_vp; 408 struct ucred *a_cred; 409 int a_waitfor; 410 struct proc *a_p; 411 } */ *ap; 412 { 413 register struct vnode *vp = ap->a_vp; 414 register struct buf *bp; 415 struct buf *nbp; 416 int s; 417 418 if (vp->v_type == VCHR) 419 return (0); 420 /* 421 * Flush all dirty buffers associated with a block device. 422 */ 423 loop: 424 s = splbio(); 425 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 426 nbp = bp->b_vnbufs.le_next; 427 if ((bp->b_flags & B_BUSY)) 428 continue; 429 if ((bp->b_flags & B_DELWRI) == 0) 430 panic("spec_fsync: not dirty"); 431 bremfree(bp); 432 bp->b_flags |= B_BUSY; 433 splx(s); 434 bawrite(bp); 435 goto loop; 436 } 437 if (ap->a_waitfor == MNT_WAIT) { 438 while (vp->v_numoutput) { 439 vp->v_flag |= VBWAIT; 440 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 441 } 442 #ifdef DIAGNOSTIC 443 if (vp->v_dirtyblkhd.lh_first) { 444 vprint("spec_fsync: dirty", vp); 445 goto loop; 446 } 447 #endif 448 } 449 splx(s); 450 return (0); 451 } 452 453 /* 454 * Just call the device strategy routine 455 */ 456 spec_strategy(ap) 457 struct vop_strategy_args /* { 458 struct buf *a_bp; 459 } */ *ap; 460 { 461 462 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 463 return (0); 464 } 465 466 /* 467 * This is a noop, simply returning what one has been given. 468 */ 469 spec_bmap(ap) 470 struct vop_bmap_args /* { 471 struct vnode *a_vp; 472 daddr_t a_bn; 473 struct vnode **a_vpp; 474 daddr_t *a_bnp; 475 int *a_runp; 476 } */ *ap; 477 { 478 479 if (ap->a_vpp != NULL) 480 *ap->a_vpp = ap->a_vp; 481 if (ap->a_bnp != NULL) 482 *ap->a_bnp = ap->a_bn; 483 if (ap->a_runp != NULL) 484 *ap->a_runp = 0; 485 return (0); 486 } 487 488 /* 489 * At the moment we do not do any locking. 490 */ 491 /* ARGSUSED */ 492 spec_lock(ap) 493 struct vop_lock_args /* { 494 struct vnode *a_vp; 495 } */ *ap; 496 { 497 498 return (0); 499 } 500 501 /* ARGSUSED */ 502 spec_unlock(ap) 503 struct vop_unlock_args /* { 504 struct vnode *a_vp; 505 } */ *ap; 506 { 507 508 return (0); 509 } 510 511 /* 512 * Device close routine 513 */ 514 /* ARGSUSED */ 515 spec_close(ap) 516 struct vop_close_args /* { 517 struct vnode *a_vp; 518 int a_fflag; 519 struct ucred *a_cred; 520 struct proc *a_p; 521 } */ *ap; 522 { 523 register struct vnode *vp = ap->a_vp; 524 dev_t dev = vp->v_rdev; 525 int (*devclose) __P((dev_t, int, int, struct proc *)); 526 int mode, error; 527 528 switch (vp->v_type) { 529 530 case VCHR: 531 /* 532 * Hack: a tty device that is a controlling terminal 533 * has a reference from the session structure. 534 * We cannot easily tell that a character device is 535 * a controlling terminal, unless it is the closing 536 * process' controlling terminal. In that case, 537 * if the reference count is 2 (this last descriptor 538 * plus the session), release the reference from the session. 539 */ 540 if (vcount(vp) == 2 && ap->a_p && 541 vp == ap->a_p->p_session->s_ttyvp) { 542 vrele(vp); 543 ap->a_p->p_session->s_ttyvp = NULL; 544 } 545 /* 546 * If the vnode is locked, then we are in the midst 547 * of forcably closing the device, otherwise we only 548 * close on last reference. 549 */ 550 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 551 return (0); 552 devclose = cdevsw[major(dev)].d_close; 553 mode = S_IFCHR; 554 break; 555 556 case VBLK: 557 /* 558 * On last close of a block device (that isn't mounted) 559 * we must invalidate any in core blocks, so that 560 * we can, for instance, change floppy disks. 561 */ 562 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 563 return (error); 564 /* 565 * We do not want to really close the device if it 566 * is still in use unless we are trying to close it 567 * forcibly. Since every use (buffer, vnode, swap, cmap) 568 * holds a reference to the vnode, and because we mark 569 * any other vnodes that alias this device, when the 570 * sum of the reference counts on all the aliased 571 * vnodes descends to one, we are on last close. 572 */ 573 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 574 return (0); 575 devclose = bdevsw[major(dev)].d_close; 576 mode = S_IFBLK; 577 break; 578 579 default: 580 panic("spec_close: not special"); 581 } 582 583 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 584 } 585 586 /* 587 * Print out the contents of a special device vnode. 588 */ 589 spec_print(ap) 590 struct vop_print_args /* { 591 struct vnode *a_vp; 592 } */ *ap; 593 { 594 595 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 596 minor(ap->a_vp->v_rdev)); 597 } 598 599 /* 600 * Return POSIX pathconf information applicable to special devices. 601 */ 602 spec_pathconf(ap) 603 struct vop_pathconf_args /* { 604 struct vnode *a_vp; 605 int a_name; 606 int *a_retval; 607 } */ *ap; 608 { 609 610 switch (ap->a_name) { 611 case _PC_LINK_MAX: 612 *ap->a_retval = LINK_MAX; 613 return (0); 614 case _PC_MAX_CANON: 615 *ap->a_retval = MAX_CANON; 616 return (0); 617 case _PC_MAX_INPUT: 618 *ap->a_retval = MAX_INPUT; 619 return (0); 620 case _PC_PIPE_BUF: 621 *ap->a_retval = PIPE_BUF; 622 return (0); 623 case _PC_CHOWN_RESTRICTED: 624 *ap->a_retval = 1; 625 return (0); 626 case _PC_VDISABLE: 627 *ap->a_retval = _POSIX_VDISABLE; 628 return (0); 629 default: 630 return (EINVAL); 631 } 632 /* NOTREACHED */ 633 } 634 635 /* 636 * Special device advisory byte-level locks. 637 */ 638 /* ARGSUSED */ 639 spec_advlock(ap) 640 struct vop_advlock_args /* { 641 struct vnode *a_vp; 642 caddr_t a_id; 643 int a_op; 644 struct flock *a_fl; 645 int a_flags; 646 } */ *ap; 647 { 648 649 return (EOPNOTSUPP); 650 } 651 652 /* 653 * Special device failed operation 654 */ 655 spec_ebadf() 656 { 657 658 return (EBADF); 659 } 660 661 /* 662 * Special device bad operation 663 */ 664 spec_badop() 665 { 666 667 panic("spec_badop called"); 668 /* NOTREACHED */ 669 } 670