1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.14 (Berkeley) 05/21/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_revoke_desc, spec_revoke }, /* revoke */ 52 { &vop_mmap_desc, spec_mmap }, /* mmap */ 53 { &vop_fsync_desc, spec_fsync }, /* fsync */ 54 { &vop_seek_desc, spec_seek }, /* seek */ 55 { &vop_remove_desc, spec_remove }, /* remove */ 56 { &vop_link_desc, spec_link }, /* link */ 57 { &vop_rename_desc, spec_rename }, /* rename */ 58 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 59 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 60 { &vop_symlink_desc, spec_symlink }, /* symlink */ 61 { &vop_readdir_desc, spec_readdir }, /* readdir */ 62 { &vop_readlink_desc, spec_readlink }, /* readlink */ 63 { &vop_abortop_desc, spec_abortop }, /* abortop */ 64 { &vop_inactive_desc, spec_inactive }, /* inactive */ 65 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 66 { &vop_lock_desc, spec_lock }, /* lock */ 67 { &vop_unlock_desc, spec_unlock }, /* unlock */ 68 { &vop_bmap_desc, spec_bmap }, /* bmap */ 69 { &vop_strategy_desc, spec_strategy }, /* strategy */ 70 { &vop_print_desc, spec_print }, /* print */ 71 { &vop_islocked_desc, spec_islocked }, /* islocked */ 72 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 73 { &vop_advlock_desc, spec_advlock }, /* advlock */ 74 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 75 { &vop_valloc_desc, spec_valloc }, /* valloc */ 76 { &vop_vfree_desc, spec_vfree }, /* vfree */ 77 { &vop_truncate_desc, spec_truncate }, /* truncate */ 78 { &vop_update_desc, spec_update }, /* update */ 79 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 80 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 81 }; 82 struct vnodeopv_desc spec_vnodeop_opv_desc = 83 { &spec_vnodeop_p, spec_vnodeop_entries }; 84 85 /* 86 * Trivial lookup routine that always fails. 87 */ 88 int 89 spec_lookup(ap) 90 struct vop_lookup_args /* { 91 struct vnode *a_dvp; 92 struct vnode **a_vpp; 93 struct componentname *a_cnp; 94 } */ *ap; 95 { 96 97 *ap->a_vpp = NULL; 98 return (ENOTDIR); 99 } 100 101 /* 102 * Open a special file. 103 */ 104 /* ARGSUSED */ 105 spec_open(ap) 106 struct vop_open_args /* { 107 struct vnode *a_vp; 108 int a_mode; 109 struct ucred *a_cred; 110 struct proc *a_p; 111 } */ *ap; 112 { 113 struct proc *p = ap->a_p; 114 struct vnode *bvp, *vp = ap->a_vp; 115 dev_t bdev, dev = (dev_t)vp->v_rdev; 116 int maj = major(dev); 117 int error; 118 119 /* 120 * Don't allow open if fs is mounted -nodev. 121 */ 122 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 123 return (ENXIO); 124 125 switch (vp->v_type) { 126 127 case VCHR: 128 if ((u_int)maj >= nchrdev) 129 return (ENXIO); 130 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 131 /* 132 * When running in very secure mode, do not allow 133 * opens for writing of any disk character devices. 134 */ 135 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 136 return (EPERM); 137 /* 138 * When running in secure mode, do not allow opens 139 * for writing of /dev/mem, /dev/kmem, or character 140 * devices whose corresponding block devices are 141 * currently mounted. 142 */ 143 if (securelevel >= 1) { 144 if ((bdev = chrtoblk(dev)) != NODEV && 145 vfinddev(bdev, VBLK, &bvp) && 146 bvp->v_usecount > 0 && 147 (error = vfs_mountedon(bvp))) 148 return (error); 149 if (iskmemdev(dev)) 150 return (EPERM); 151 } 152 } 153 if (cdevsw[maj].d_type == D_TTY) 154 vp->v_flag |= VISTTY; 155 VOP_UNLOCK(vp, 0, p); 156 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); 157 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 158 return (error); 159 160 case VBLK: 161 if ((u_int)maj >= nblkdev) 162 return (ENXIO); 163 /* 164 * When running in very secure mode, do not allow 165 * opens for writing of any disk block devices. 166 */ 167 if (securelevel >= 2 && ap->a_cred != FSCRED && 168 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 169 return (EPERM); 170 /* 171 * Do not allow opens of block devices that are 172 * currently mounted. 173 */ 174 if (error = vfs_mountedon(vp)) 175 return (error); 176 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p)); 177 } 178 return (0); 179 } 180 181 /* 182 * Vnode op for read 183 */ 184 /* ARGSUSED */ 185 spec_read(ap) 186 struct vop_read_args /* { 187 struct vnode *a_vp; 188 struct uio *a_uio; 189 int a_ioflag; 190 struct ucred *a_cred; 191 } */ *ap; 192 { 193 register struct vnode *vp = ap->a_vp; 194 register struct uio *uio = ap->a_uio; 195 struct proc *p = uio->uio_procp; 196 struct buf *bp; 197 daddr_t bn, nextbn; 198 long bsize, bscale; 199 struct partinfo dpart; 200 int n, on, majordev, (*ioctl)(); 201 int error = 0; 202 dev_t dev; 203 204 #ifdef DIAGNOSTIC 205 if (uio->uio_rw != UIO_READ) 206 panic("spec_read mode"); 207 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 208 panic("spec_read proc"); 209 #endif 210 if (uio->uio_resid == 0) 211 return (0); 212 213 switch (vp->v_type) { 214 215 case VCHR: 216 VOP_UNLOCK(vp, 0, p); 217 error = (*cdevsw[major(vp->v_rdev)].d_read) 218 (vp->v_rdev, uio, ap->a_ioflag); 219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 220 return (error); 221 222 case VBLK: 223 if (uio->uio_offset < 0) 224 return (EINVAL); 225 bsize = BLKDEV_IOSIZE; 226 dev = vp->v_rdev; 227 if ((majordev = major(dev)) < nblkdev && 228 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 229 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 230 dpart.part->p_fstype == FS_BSDFFS && 231 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 232 bsize = dpart.part->p_frag * dpart.part->p_fsize; 233 bscale = bsize / DEV_BSIZE; 234 do { 235 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 236 on = uio->uio_offset % bsize; 237 n = min((unsigned)(bsize - on), uio->uio_resid); 238 if (vp->v_lastr + bscale == bn) { 239 nextbn = bn + bscale; 240 error = breadn(vp, bn, (int)bsize, &nextbn, 241 (int *)&bsize, 1, NOCRED, &bp); 242 } else 243 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 244 vp->v_lastr = bn; 245 n = min(n, bsize - bp->b_resid); 246 if (error) { 247 brelse(bp); 248 return (error); 249 } 250 error = uiomove((char *)bp->b_data + on, n, uio); 251 if (n + on == bsize) 252 bp->b_flags |= B_AGE; 253 brelse(bp); 254 } while (error == 0 && uio->uio_resid > 0 && n != 0); 255 return (error); 256 257 default: 258 panic("spec_read type"); 259 } 260 /* NOTREACHED */ 261 } 262 263 /* 264 * Vnode op for write 265 */ 266 /* ARGSUSED */ 267 spec_write(ap) 268 struct vop_write_args /* { 269 struct vnode *a_vp; 270 struct uio *a_uio; 271 int a_ioflag; 272 struct ucred *a_cred; 273 } */ *ap; 274 { 275 register struct vnode *vp = ap->a_vp; 276 register struct uio *uio = ap->a_uio; 277 struct proc *p = uio->uio_procp; 278 struct buf *bp; 279 daddr_t bn; 280 int bsize, blkmask; 281 struct partinfo dpart; 282 register int n, on; 283 int error = 0; 284 285 #ifdef DIAGNOSTIC 286 if (uio->uio_rw != UIO_WRITE) 287 panic("spec_write mode"); 288 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 289 panic("spec_write proc"); 290 #endif 291 292 switch (vp->v_type) { 293 294 case VCHR: 295 VOP_UNLOCK(vp, 0, p); 296 error = (*cdevsw[major(vp->v_rdev)].d_write) 297 (vp->v_rdev, uio, ap->a_ioflag); 298 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 299 return (error); 300 301 case VBLK: 302 if (uio->uio_resid == 0) 303 return (0); 304 if (uio->uio_offset < 0) 305 return (EINVAL); 306 bsize = BLKDEV_IOSIZE; 307 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 308 (caddr_t)&dpart, FREAD, p) == 0) { 309 if (dpart.part->p_fstype == FS_BSDFFS && 310 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 311 bsize = dpart.part->p_frag * 312 dpart.part->p_fsize; 313 } 314 blkmask = (bsize / DEV_BSIZE) - 1; 315 do { 316 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 317 on = uio->uio_offset % bsize; 318 n = min((unsigned)(bsize - on), uio->uio_resid); 319 if (n == bsize) 320 bp = getblk(vp, bn, bsize, 0, 0); 321 else 322 error = bread(vp, bn, bsize, NOCRED, &bp); 323 n = min(n, bsize - bp->b_resid); 324 if (error) { 325 brelse(bp); 326 return (error); 327 } 328 error = uiomove((char *)bp->b_data + on, n, uio); 329 if (n + on == bsize) { 330 bp->b_flags |= B_AGE; 331 bawrite(bp); 332 } else 333 bdwrite(bp); 334 } while (error == 0 && uio->uio_resid > 0 && n != 0); 335 return (error); 336 337 default: 338 panic("spec_write type"); 339 } 340 /* NOTREACHED */ 341 } 342 343 /* 344 * Device ioctl operation. 345 */ 346 /* ARGSUSED */ 347 spec_ioctl(ap) 348 struct vop_ioctl_args /* { 349 struct vnode *a_vp; 350 int a_command; 351 caddr_t a_data; 352 int a_fflag; 353 struct ucred *a_cred; 354 struct proc *a_p; 355 } */ *ap; 356 { 357 dev_t dev = ap->a_vp->v_rdev; 358 359 switch (ap->a_vp->v_type) { 360 361 case VCHR: 362 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 363 ap->a_fflag, ap->a_p)); 364 365 case VBLK: 366 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 367 if (bdevsw[major(dev)].d_type == D_TAPE) 368 return (0); 369 else 370 return (1); 371 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 372 ap->a_fflag, ap->a_p)); 373 374 default: 375 panic("spec_ioctl"); 376 /* NOTREACHED */ 377 } 378 } 379 380 /* ARGSUSED */ 381 spec_select(ap) 382 struct vop_select_args /* { 383 struct vnode *a_vp; 384 int a_which; 385 int a_fflags; 386 struct ucred *a_cred; 387 struct proc *a_p; 388 } */ *ap; 389 { 390 register dev_t dev; 391 392 switch (ap->a_vp->v_type) { 393 394 default: 395 return (1); /* XXX */ 396 397 case VCHR: 398 dev = ap->a_vp->v_rdev; 399 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 400 } 401 } 402 /* 403 * Synch buffers associated with a block device 404 */ 405 /* ARGSUSED */ 406 int 407 spec_fsync(ap) 408 struct vop_fsync_args /* { 409 struct vnode *a_vp; 410 struct ucred *a_cred; 411 int a_waitfor; 412 struct proc *a_p; 413 } */ *ap; 414 { 415 register struct vnode *vp = ap->a_vp; 416 register struct buf *bp; 417 struct buf *nbp; 418 int s; 419 420 if (vp->v_type == VCHR) 421 return (0); 422 /* 423 * Flush all dirty buffers associated with a block device. 424 */ 425 loop: 426 s = splbio(); 427 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 428 nbp = bp->b_vnbufs.le_next; 429 if ((bp->b_flags & B_BUSY)) 430 continue; 431 if ((bp->b_flags & B_DELWRI) == 0) 432 panic("spec_fsync: not dirty"); 433 bremfree(bp); 434 bp->b_flags |= B_BUSY; 435 splx(s); 436 bawrite(bp); 437 goto loop; 438 } 439 if (ap->a_waitfor == MNT_WAIT) { 440 while (vp->v_numoutput) { 441 vp->v_flag |= VBWAIT; 442 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 443 } 444 #ifdef DIAGNOSTIC 445 if (vp->v_dirtyblkhd.lh_first) { 446 vprint("spec_fsync: dirty", vp); 447 goto loop; 448 } 449 #endif 450 } 451 splx(s); 452 return (0); 453 } 454 455 int 456 spec_inactive(ap) 457 struct vop_inactive_args /* { 458 struct vnode *a_vp; 459 struct proc *a_p; 460 } */ *ap; 461 { 462 463 VOP_UNLOCK(ap->a_vp, 0, ap->a_p); 464 return (0); 465 } 466 467 /* 468 * Just call the device strategy routine 469 */ 470 spec_strategy(ap) 471 struct vop_strategy_args /* { 472 struct buf *a_bp; 473 } */ *ap; 474 { 475 476 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 477 return (0); 478 } 479 480 /* 481 * This is a noop, simply returning what one has been given. 482 */ 483 spec_bmap(ap) 484 struct vop_bmap_args /* { 485 struct vnode *a_vp; 486 daddr_t a_bn; 487 struct vnode **a_vpp; 488 daddr_t *a_bnp; 489 int *a_runp; 490 } */ *ap; 491 { 492 493 if (ap->a_vpp != NULL) 494 *ap->a_vpp = ap->a_vp; 495 if (ap->a_bnp != NULL) 496 *ap->a_bnp = ap->a_bn; 497 if (ap->a_runp != NULL) 498 *ap->a_runp = 0; 499 return (0); 500 } 501 502 /* 503 * Device close routine 504 */ 505 /* ARGSUSED */ 506 spec_close(ap) 507 struct vop_close_args /* { 508 struct vnode *a_vp; 509 int a_fflag; 510 struct ucred *a_cred; 511 struct proc *a_p; 512 } */ *ap; 513 { 514 register struct vnode *vp = ap->a_vp; 515 dev_t dev = vp->v_rdev; 516 int (*devclose) __P((dev_t, int, int, struct proc *)); 517 int mode, error; 518 519 switch (vp->v_type) { 520 521 case VCHR: 522 /* 523 * Hack: a tty device that is a controlling terminal 524 * has a reference from the session structure. 525 * We cannot easily tell that a character device is 526 * a controlling terminal, unless it is the closing 527 * process' controlling terminal. In that case, 528 * if the reference count is 2 (this last descriptor 529 * plus the session), release the reference from the session. 530 */ 531 if (vcount(vp) == 2 && ap->a_p && 532 vp == ap->a_p->p_session->s_ttyvp) { 533 vrele(vp); 534 ap->a_p->p_session->s_ttyvp = NULL; 535 } 536 /* 537 * If the vnode is locked, then we are in the midst 538 * of forcably closing the device, otherwise we only 539 * close on last reference. 540 */ 541 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 542 return (0); 543 devclose = cdevsw[major(dev)].d_close; 544 mode = S_IFCHR; 545 break; 546 547 case VBLK: 548 /* 549 * On last close of a block device (that isn't mounted) 550 * we must invalidate any in core blocks, so that 551 * we can, for instance, change floppy disks. 552 */ 553 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 554 return (error); 555 /* 556 * We do not want to really close the device if it 557 * is still in use unless we are trying to close it 558 * forcibly. Since every use (buffer, vnode, swap, cmap) 559 * holds a reference to the vnode, and because we mark 560 * any other vnodes that alias this device, when the 561 * sum of the reference counts on all the aliased 562 * vnodes descends to one, we are on last close. 563 */ 564 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 565 return (0); 566 devclose = bdevsw[major(dev)].d_close; 567 mode = S_IFBLK; 568 break; 569 570 default: 571 panic("spec_close: not special"); 572 } 573 574 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 575 } 576 577 /* 578 * Print out the contents of a special device vnode. 579 */ 580 spec_print(ap) 581 struct vop_print_args /* { 582 struct vnode *a_vp; 583 } */ *ap; 584 { 585 586 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 587 minor(ap->a_vp->v_rdev)); 588 } 589 590 /* 591 * Return POSIX pathconf information applicable to special devices. 592 */ 593 spec_pathconf(ap) 594 struct vop_pathconf_args /* { 595 struct vnode *a_vp; 596 int a_name; 597 int *a_retval; 598 } */ *ap; 599 { 600 601 switch (ap->a_name) { 602 case _PC_LINK_MAX: 603 *ap->a_retval = LINK_MAX; 604 return (0); 605 case _PC_MAX_CANON: 606 *ap->a_retval = MAX_CANON; 607 return (0); 608 case _PC_MAX_INPUT: 609 *ap->a_retval = MAX_INPUT; 610 return (0); 611 case _PC_PIPE_BUF: 612 *ap->a_retval = PIPE_BUF; 613 return (0); 614 case _PC_CHOWN_RESTRICTED: 615 *ap->a_retval = 1; 616 return (0); 617 case _PC_VDISABLE: 618 *ap->a_retval = _POSIX_VDISABLE; 619 return (0); 620 default: 621 return (EINVAL); 622 } 623 /* NOTREACHED */ 624 } 625 626 /* 627 * Special device advisory byte-level locks. 628 */ 629 /* ARGSUSED */ 630 spec_advlock(ap) 631 struct vop_advlock_args /* { 632 struct vnode *a_vp; 633 caddr_t a_id; 634 int a_op; 635 struct flock *a_fl; 636 int a_flags; 637 } */ *ap; 638 { 639 640 return (EOPNOTSUPP); 641 } 642 643 /* 644 * Special device failed operation 645 */ 646 spec_ebadf() 647 { 648 649 return (EBADF); 650 } 651 652 /* 653 * Special device bad operation 654 */ 655 spec_badop() 656 { 657 658 panic("spec_badop called"); 659 /* NOTREACHED */ 660 } 661