1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.9 (Berkeley) 01/03/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_mmap_desc, spec_mmap }, /* mmap */ 52 { &vop_fsync_desc, spec_fsync }, /* fsync */ 53 { &vop_seek_desc, spec_seek }, /* seek */ 54 { &vop_remove_desc, spec_remove }, /* remove */ 55 { &vop_link_desc, spec_link }, /* link */ 56 { &vop_rename_desc, spec_rename }, /* rename */ 57 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 58 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 59 { &vop_symlink_desc, spec_symlink }, /* symlink */ 60 { &vop_readdir_desc, spec_readdir }, /* readdir */ 61 { &vop_readlink_desc, spec_readlink }, /* readlink */ 62 { &vop_abortop_desc, spec_abortop }, /* abortop */ 63 { &vop_inactive_desc, spec_inactive }, /* inactive */ 64 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 65 { &vop_lock_desc, spec_lock }, /* lock */ 66 { &vop_unlock_desc, spec_unlock }, /* unlock */ 67 { &vop_bmap_desc, spec_bmap }, /* bmap */ 68 { &vop_strategy_desc, spec_strategy }, /* strategy */ 69 { &vop_print_desc, spec_print }, /* print */ 70 { &vop_islocked_desc, spec_islocked }, /* islocked */ 71 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 72 { &vop_advlock_desc, spec_advlock }, /* advlock */ 73 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 74 { &vop_valloc_desc, spec_valloc }, /* valloc */ 75 { &vop_vfree_desc, spec_vfree }, /* vfree */ 76 { &vop_truncate_desc, spec_truncate }, /* truncate */ 77 { &vop_update_desc, spec_update }, /* update */ 78 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 79 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 80 }; 81 struct vnodeopv_desc spec_vnodeop_opv_desc = 82 { &spec_vnodeop_p, spec_vnodeop_entries }; 83 84 /* 85 * Trivial lookup routine that always fails. 86 */ 87 int 88 spec_lookup(ap) 89 struct vop_lookup_args /* { 90 struct vnode *a_dvp; 91 struct vnode **a_vpp; 92 struct componentname *a_cnp; 93 } */ *ap; 94 { 95 96 *ap->a_vpp = NULL; 97 return (ENOTDIR); 98 } 99 100 /* 101 * Open a special file. 102 */ 103 /* ARGSUSED */ 104 spec_open(ap) 105 struct vop_open_args /* { 106 struct vnode *a_vp; 107 int a_mode; 108 struct ucred *a_cred; 109 struct proc *a_p; 110 } */ *ap; 111 { 112 struct vnode *bvp, *vp = ap->a_vp; 113 dev_t bdev, dev = (dev_t)vp->v_rdev; 114 register int maj = major(dev); 115 int error; 116 117 /* 118 * Don't allow open if fs is mounted -nodev. 119 */ 120 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 121 return (ENXIO); 122 123 switch (vp->v_type) { 124 125 case VCHR: 126 if ((u_int)maj >= nchrdev) 127 return (ENXIO); 128 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 129 /* 130 * When running in very secure mode, do not allow 131 * opens for writing of any disk character devices. 132 */ 133 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 134 return (EPERM); 135 /* 136 * When running in secure mode, do not allow opens 137 * for writing of /dev/mem, /dev/kmem, or character 138 * devices whose corresponding block devices are 139 * currently mounted. 140 */ 141 if (securelevel >= 1) { 142 if ((bdev = chrtoblk(dev)) != NODEV && 143 vfinddev(bdev, VBLK, &bvp) && 144 bvp->v_usecount > 0 && 145 (error = vfs_mountedon(bvp))) 146 return (error); 147 if (iskmemdev(dev)) 148 return (EPERM); 149 } 150 } 151 if (cdevsw[maj].d_type == D_TTY) 152 vp->v_flag |= VISTTY; 153 VOP_UNLOCK(vp); 154 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 155 VOP_LOCK(vp); 156 return (error); 157 158 case VBLK: 159 if ((u_int)maj >= nblkdev) 160 return (ENXIO); 161 /* 162 * When running in very secure mode, do not allow 163 * opens for writing of any disk block devices. 164 */ 165 if (securelevel >= 2 && ap->a_cred != FSCRED && 166 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 167 return (EPERM); 168 /* 169 * Do not allow opens of block devices that are 170 * currently mounted. 171 */ 172 if (error = vfs_mountedon(vp)) 173 return (error); 174 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 175 } 176 return (0); 177 } 178 179 /* 180 * Vnode op for read 181 */ 182 /* ARGSUSED */ 183 spec_read(ap) 184 struct vop_read_args /* { 185 struct vnode *a_vp; 186 struct uio *a_uio; 187 int a_ioflag; 188 struct ucred *a_cred; 189 } */ *ap; 190 { 191 register struct vnode *vp = ap->a_vp; 192 register struct uio *uio = ap->a_uio; 193 struct proc *p = uio->uio_procp; 194 struct buf *bp; 195 daddr_t bn, nextbn; 196 long bsize, bscale; 197 struct partinfo dpart; 198 int n, on, majordev, (*ioctl)(); 199 int error = 0; 200 dev_t dev; 201 202 #ifdef DIAGNOSTIC 203 if (uio->uio_rw != UIO_READ) 204 panic("spec_read mode"); 205 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 206 panic("spec_read proc"); 207 #endif 208 if (uio->uio_resid == 0) 209 return (0); 210 211 switch (vp->v_type) { 212 213 case VCHR: 214 VOP_UNLOCK(vp); 215 error = (*cdevsw[major(vp->v_rdev)].d_read) 216 (vp->v_rdev, uio, ap->a_ioflag); 217 VOP_LOCK(vp); 218 return (error); 219 220 case VBLK: 221 if (uio->uio_offset < 0) 222 return (EINVAL); 223 bsize = BLKDEV_IOSIZE; 224 dev = vp->v_rdev; 225 if ((majordev = major(dev)) < nblkdev && 226 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 227 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 228 dpart.part->p_fstype == FS_BSDFFS && 229 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 230 bsize = dpart.part->p_frag * dpart.part->p_fsize; 231 bscale = bsize / DEV_BSIZE; 232 do { 233 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 234 on = uio->uio_offset % bsize; 235 n = min((unsigned)(bsize - on), uio->uio_resid); 236 if (vp->v_lastr + bscale == bn) { 237 nextbn = bn + bscale; 238 error = breadn(vp, bn, (int)bsize, &nextbn, 239 (int *)&bsize, 1, NOCRED, &bp); 240 } else 241 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 242 vp->v_lastr = bn; 243 n = min(n, bsize - bp->b_resid); 244 if (error) { 245 brelse(bp); 246 return (error); 247 } 248 error = uiomove((char *)bp->b_data + on, n, uio); 249 if (n + on == bsize) 250 bp->b_flags |= B_AGE; 251 brelse(bp); 252 } while (error == 0 && uio->uio_resid > 0 && n != 0); 253 return (error); 254 255 default: 256 panic("spec_read type"); 257 } 258 /* NOTREACHED */ 259 } 260 261 /* 262 * Vnode op for write 263 */ 264 /* ARGSUSED */ 265 spec_write(ap) 266 struct vop_write_args /* { 267 struct vnode *a_vp; 268 struct uio *a_uio; 269 int a_ioflag; 270 struct ucred *a_cred; 271 } */ *ap; 272 { 273 register struct vnode *vp = ap->a_vp; 274 register struct uio *uio = ap->a_uio; 275 struct proc *p = uio->uio_procp; 276 struct buf *bp; 277 daddr_t bn; 278 int bsize, blkmask; 279 struct partinfo dpart; 280 register int n, on; 281 int error = 0; 282 283 #ifdef DIAGNOSTIC 284 if (uio->uio_rw != UIO_WRITE) 285 panic("spec_write mode"); 286 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 287 panic("spec_write proc"); 288 #endif 289 290 switch (vp->v_type) { 291 292 case VCHR: 293 VOP_UNLOCK(vp); 294 error = (*cdevsw[major(vp->v_rdev)].d_write) 295 (vp->v_rdev, uio, ap->a_ioflag); 296 VOP_LOCK(vp); 297 return (error); 298 299 case VBLK: 300 if (uio->uio_resid == 0) 301 return (0); 302 if (uio->uio_offset < 0) 303 return (EINVAL); 304 bsize = BLKDEV_IOSIZE; 305 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 306 (caddr_t)&dpart, FREAD, p) == 0) { 307 if (dpart.part->p_fstype == FS_BSDFFS && 308 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 309 bsize = dpart.part->p_frag * 310 dpart.part->p_fsize; 311 } 312 blkmask = (bsize / DEV_BSIZE) - 1; 313 do { 314 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 315 on = uio->uio_offset % bsize; 316 n = min((unsigned)(bsize - on), uio->uio_resid); 317 if (n == bsize) 318 bp = getblk(vp, bn, bsize, 0, 0); 319 else 320 error = bread(vp, bn, bsize, NOCRED, &bp); 321 n = min(n, bsize - bp->b_resid); 322 if (error) { 323 brelse(bp); 324 return (error); 325 } 326 error = uiomove((char *)bp->b_data + on, n, uio); 327 if (n + on == bsize) { 328 bp->b_flags |= B_AGE; 329 bawrite(bp); 330 } else 331 bdwrite(bp); 332 } while (error == 0 && uio->uio_resid > 0 && n != 0); 333 return (error); 334 335 default: 336 panic("spec_write type"); 337 } 338 /* NOTREACHED */ 339 } 340 341 /* 342 * Device ioctl operation. 343 */ 344 /* ARGSUSED */ 345 spec_ioctl(ap) 346 struct vop_ioctl_args /* { 347 struct vnode *a_vp; 348 int a_command; 349 caddr_t a_data; 350 int a_fflag; 351 struct ucred *a_cred; 352 struct proc *a_p; 353 } */ *ap; 354 { 355 dev_t dev = ap->a_vp->v_rdev; 356 357 switch (ap->a_vp->v_type) { 358 359 case VCHR: 360 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 361 ap->a_fflag, ap->a_p)); 362 363 case VBLK: 364 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 365 if (bdevsw[major(dev)].d_type == D_TAPE) 366 return (0); 367 else 368 return (1); 369 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 370 ap->a_fflag, ap->a_p)); 371 372 default: 373 panic("spec_ioctl"); 374 /* NOTREACHED */ 375 } 376 } 377 378 /* ARGSUSED */ 379 spec_select(ap) 380 struct vop_select_args /* { 381 struct vnode *a_vp; 382 int a_which; 383 int a_fflags; 384 struct ucred *a_cred; 385 struct proc *a_p; 386 } */ *ap; 387 { 388 register dev_t dev; 389 390 switch (ap->a_vp->v_type) { 391 392 default: 393 return (1); /* XXX */ 394 395 case VCHR: 396 dev = ap->a_vp->v_rdev; 397 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 398 } 399 } 400 /* 401 * Synch buffers associated with a block device 402 */ 403 /* ARGSUSED */ 404 int 405 spec_fsync(ap) 406 struct vop_fsync_args /* { 407 struct vnode *a_vp; 408 struct ucred *a_cred; 409 int a_waitfor; 410 struct proc *a_p; 411 } */ *ap; 412 { 413 register struct vnode *vp = ap->a_vp; 414 register struct buf *bp; 415 struct buf *nbp; 416 int s; 417 418 if (vp->v_type == VCHR) 419 return (0); 420 /* 421 * Flush all dirty buffers associated with a block device. 422 */ 423 loop: 424 s = splbio(); 425 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 426 nbp = bp->b_vnbufs.le_next; 427 if ((bp->b_flags & B_BUSY)) 428 continue; 429 if ((bp->b_flags & B_DELWRI) == 0) 430 panic("spec_fsync: not dirty"); 431 bremfree(bp); 432 bp->b_flags |= B_BUSY; 433 splx(s); 434 bawrite(bp); 435 goto loop; 436 } 437 if (ap->a_waitfor == MNT_WAIT) { 438 while (vp->v_numoutput) { 439 vp->v_flag |= VBWAIT; 440 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 441 } 442 #ifdef DIAGNOSTIC 443 if (vp->v_dirtyblkhd.lh_first) { 444 vprint("spec_fsync: dirty", vp); 445 goto loop; 446 } 447 #endif 448 } 449 splx(s); 450 return (0); 451 } 452 453 /* 454 * Just call the device strategy routine 455 */ 456 spec_strategy(ap) 457 struct vop_strategy_args /* { 458 struct buf *a_bp; 459 } */ *ap; 460 { 461 462 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 463 return (0); 464 } 465 466 /* 467 * This is a noop, simply returning what one has been given. 468 */ 469 spec_bmap(ap) 470 struct vop_bmap_args /* { 471 struct vnode *a_vp; 472 daddr_t a_bn; 473 struct vnode **a_vpp; 474 daddr_t *a_bnp; 475 } */ *ap; 476 { 477 478 if (ap->a_vpp != NULL) 479 *ap->a_vpp = ap->a_vp; 480 if (ap->a_bnp != NULL) 481 *ap->a_bnp = ap->a_bn; 482 return (0); 483 } 484 485 /* 486 * At the moment we do not do any locking. 487 */ 488 /* ARGSUSED */ 489 spec_lock(ap) 490 struct vop_lock_args /* { 491 struct vnode *a_vp; 492 } */ *ap; 493 { 494 495 return (0); 496 } 497 498 /* ARGSUSED */ 499 spec_unlock(ap) 500 struct vop_unlock_args /* { 501 struct vnode *a_vp; 502 } */ *ap; 503 { 504 505 return (0); 506 } 507 508 /* 509 * Device close routine 510 */ 511 /* ARGSUSED */ 512 spec_close(ap) 513 struct vop_close_args /* { 514 struct vnode *a_vp; 515 int a_fflag; 516 struct ucred *a_cred; 517 struct proc *a_p; 518 } */ *ap; 519 { 520 register struct vnode *vp = ap->a_vp; 521 dev_t dev = vp->v_rdev; 522 int (*devclose) __P((dev_t, int, int, struct proc *)); 523 int mode, error; 524 525 switch (vp->v_type) { 526 527 case VCHR: 528 /* 529 * Hack: a tty device that is a controlling terminal 530 * has a reference from the session structure. 531 * We cannot easily tell that a character device is 532 * a controlling terminal, unless it is the closing 533 * process' controlling terminal. In that case, 534 * if the reference count is 2 (this last descriptor 535 * plus the session), release the reference from the session. 536 */ 537 if (vcount(vp) == 2 && ap->a_p && 538 vp == ap->a_p->p_session->s_ttyvp) { 539 vrele(vp); 540 ap->a_p->p_session->s_ttyvp = NULL; 541 } 542 /* 543 * If the vnode is locked, then we are in the midst 544 * of forcably closing the device, otherwise we only 545 * close on last reference. 546 */ 547 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 548 return (0); 549 devclose = cdevsw[major(dev)].d_close; 550 mode = S_IFCHR; 551 break; 552 553 case VBLK: 554 /* 555 * On last close of a block device (that isn't mounted) 556 * we must invalidate any in core blocks, so that 557 * we can, for instance, change floppy disks. 558 */ 559 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 560 return (error); 561 /* 562 * We do not want to really close the device if it 563 * is still in use unless we are trying to close it 564 * forcibly. Since every use (buffer, vnode, swap, cmap) 565 * holds a reference to the vnode, and because we mark 566 * any other vnodes that alias this device, when the 567 * sum of the reference counts on all the aliased 568 * vnodes descends to one, we are on last close. 569 */ 570 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 571 return (0); 572 devclose = bdevsw[major(dev)].d_close; 573 mode = S_IFBLK; 574 break; 575 576 default: 577 panic("spec_close: not special"); 578 } 579 580 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 581 } 582 583 /* 584 * Print out the contents of a special device vnode. 585 */ 586 spec_print(ap) 587 struct vop_print_args /* { 588 struct vnode *a_vp; 589 } */ *ap; 590 { 591 592 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 593 minor(ap->a_vp->v_rdev)); 594 } 595 596 /* 597 * Return POSIX pathconf information applicable to special devices. 598 */ 599 spec_pathconf(ap) 600 struct vop_pathconf_args /* { 601 struct vnode *a_vp; 602 int a_name; 603 int *a_retval; 604 } */ *ap; 605 { 606 607 switch (ap->a_name) { 608 case _PC_LINK_MAX: 609 *ap->a_retval = LINK_MAX; 610 return (0); 611 case _PC_MAX_CANON: 612 *ap->a_retval = MAX_CANON; 613 return (0); 614 case _PC_MAX_INPUT: 615 *ap->a_retval = MAX_INPUT; 616 return (0); 617 case _PC_PIPE_BUF: 618 *ap->a_retval = PIPE_BUF; 619 return (0); 620 case _PC_CHOWN_RESTRICTED: 621 *ap->a_retval = 1; 622 return (0); 623 case _PC_VDISABLE: 624 *ap->a_retval = _POSIX_VDISABLE; 625 return (0); 626 default: 627 return (EINVAL); 628 } 629 /* NOTREACHED */ 630 } 631 632 /* 633 * Special device advisory byte-level locks. 634 */ 635 /* ARGSUSED */ 636 spec_advlock(ap) 637 struct vop_advlock_args /* { 638 struct vnode *a_vp; 639 caddr_t a_id; 640 int a_op; 641 struct flock *a_fl; 642 int a_flags; 643 } */ *ap; 644 { 645 646 return (EOPNOTSUPP); 647 } 648 649 /* 650 * Special device failed operation 651 */ 652 spec_ebadf() 653 { 654 655 return (EBADF); 656 } 657 658 /* 659 * Special device bad operation 660 */ 661 spec_badop() 662 { 663 664 panic("spec_badop called"); 665 /* NOTREACHED */ 666 } 667