1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.7 (Berkeley) 08/10/94 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_mmap_desc, spec_mmap }, /* mmap */ 52 { &vop_fsync_desc, spec_fsync }, /* fsync */ 53 { &vop_seek_desc, spec_seek }, /* seek */ 54 { &vop_remove_desc, spec_remove }, /* remove */ 55 { &vop_link_desc, spec_link }, /* link */ 56 { &vop_rename_desc, spec_rename }, /* rename */ 57 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 58 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 59 { &vop_symlink_desc, spec_symlink }, /* symlink */ 60 { &vop_readdir_desc, spec_readdir }, /* readdir */ 61 { &vop_readlink_desc, spec_readlink }, /* readlink */ 62 { &vop_abortop_desc, spec_abortop }, /* abortop */ 63 { &vop_inactive_desc, spec_inactive }, /* inactive */ 64 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 65 { &vop_lock_desc, spec_lock }, /* lock */ 66 { &vop_unlock_desc, spec_unlock }, /* unlock */ 67 { &vop_bmap_desc, spec_bmap }, /* bmap */ 68 { &vop_strategy_desc, spec_strategy }, /* strategy */ 69 { &vop_print_desc, spec_print }, /* print */ 70 { &vop_islocked_desc, spec_islocked }, /* islocked */ 71 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 72 { &vop_advlock_desc, spec_advlock }, /* advlock */ 73 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 74 { &vop_valloc_desc, spec_valloc }, /* valloc */ 75 { &vop_vfree_desc, spec_vfree }, /* vfree */ 76 { &vop_truncate_desc, spec_truncate }, /* truncate */ 77 { &vop_update_desc, spec_update }, /* update */ 78 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 79 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 80 }; 81 struct vnodeopv_desc spec_vnodeop_opv_desc = 82 { &spec_vnodeop_p, spec_vnodeop_entries }; 83 84 /* 85 * Trivial lookup routine that always fails. 86 */ 87 int 88 spec_lookup(ap) 89 struct vop_lookup_args /* { 90 struct vnode *a_dvp; 91 struct vnode **a_vpp; 92 struct componentname *a_cnp; 93 } */ *ap; 94 { 95 96 *ap->a_vpp = NULL; 97 return (ENOTDIR); 98 } 99 100 /* 101 * Open a special file. 102 */ 103 /* ARGSUSED */ 104 spec_open(ap) 105 struct vop_open_args /* { 106 struct vnode *a_vp; 107 int a_mode; 108 struct ucred *a_cred; 109 struct proc *a_p; 110 } */ *ap; 111 { 112 struct vnode *bvp, *vp = ap->a_vp; 113 dev_t bdev, dev = (dev_t)vp->v_rdev; 114 register int maj = major(dev); 115 int error; 116 117 /* 118 * Don't allow open if fs is mounted -nodev. 119 */ 120 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 121 return (ENXIO); 122 123 switch (vp->v_type) { 124 125 case VCHR: 126 if ((u_int)maj >= nchrdev) 127 return (ENXIO); 128 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 129 /* 130 * When running in very secure mode, do not allow 131 * opens for writing of any disk character devices. 132 */ 133 if (securelevel >= 2 && isdisk(dev, VCHR)) 134 return (EPERM); 135 /* 136 * When running in secure mode, do not allow opens 137 * for writing of /dev/mem, /dev/kmem, or character 138 * devices whose corresponding block devices are 139 * currently mounted. 140 */ 141 if (securelevel >= 1) { 142 if ((bdev = chrtoblk(dev)) != NODEV && 143 vfinddev(bdev, VBLK, &bvp) && 144 bvp->v_usecount > 0 && 145 (error = vfs_mountedon(bvp))) 146 return (error); 147 if (iskmemdev(dev)) 148 return (EPERM); 149 } 150 } 151 VOP_UNLOCK(vp); 152 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 153 VOP_LOCK(vp); 154 return (error); 155 156 case VBLK: 157 if ((u_int)maj >= nblkdev) 158 return (ENXIO); 159 /* 160 * When running in very secure mode, do not allow 161 * opens for writing of any disk block devices. 162 */ 163 if (securelevel >= 2 && ap->a_cred != FSCRED && 164 (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) 165 return (EPERM); 166 /* 167 * Do not allow opens of block devices that are 168 * currently mounted. 169 */ 170 if (error = vfs_mountedon(vp)) 171 return (error); 172 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 173 } 174 return (0); 175 } 176 177 /* 178 * Vnode op for read 179 */ 180 /* ARGSUSED */ 181 spec_read(ap) 182 struct vop_read_args /* { 183 struct vnode *a_vp; 184 struct uio *a_uio; 185 int a_ioflag; 186 struct ucred *a_cred; 187 } */ *ap; 188 { 189 register struct vnode *vp = ap->a_vp; 190 register struct uio *uio = ap->a_uio; 191 struct proc *p = uio->uio_procp; 192 struct buf *bp; 193 daddr_t bn, nextbn; 194 long bsize, bscale; 195 struct partinfo dpart; 196 int n, on, majordev, (*ioctl)(); 197 int error = 0; 198 dev_t dev; 199 200 #ifdef DIAGNOSTIC 201 if (uio->uio_rw != UIO_READ) 202 panic("spec_read mode"); 203 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 204 panic("spec_read proc"); 205 #endif 206 if (uio->uio_resid == 0) 207 return (0); 208 209 switch (vp->v_type) { 210 211 case VCHR: 212 VOP_UNLOCK(vp); 213 error = (*cdevsw[major(vp->v_rdev)].d_read) 214 (vp->v_rdev, uio, ap->a_ioflag); 215 VOP_LOCK(vp); 216 return (error); 217 218 case VBLK: 219 if (uio->uio_offset < 0) 220 return (EINVAL); 221 bsize = BLKDEV_IOSIZE; 222 dev = vp->v_rdev; 223 if ((majordev = major(dev)) < nblkdev && 224 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 225 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 226 dpart.part->p_fstype == FS_BSDFFS && 227 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 228 bsize = dpart.part->p_frag * dpart.part->p_fsize; 229 bscale = bsize / DEV_BSIZE; 230 do { 231 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 232 on = uio->uio_offset % bsize; 233 n = min((unsigned)(bsize - on), uio->uio_resid); 234 if (vp->v_lastr + bscale == bn) { 235 nextbn = bn + bscale; 236 error = breadn(vp, bn, (int)bsize, &nextbn, 237 (int *)&bsize, 1, NOCRED, &bp); 238 } else 239 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 240 vp->v_lastr = bn; 241 n = min(n, bsize - bp->b_resid); 242 if (error) { 243 brelse(bp); 244 return (error); 245 } 246 error = uiomove((char *)bp->b_data + on, n, uio); 247 if (n + on == bsize) 248 bp->b_flags |= B_AGE; 249 brelse(bp); 250 } while (error == 0 && uio->uio_resid > 0 && n != 0); 251 return (error); 252 253 default: 254 panic("spec_read type"); 255 } 256 /* NOTREACHED */ 257 } 258 259 /* 260 * Vnode op for write 261 */ 262 /* ARGSUSED */ 263 spec_write(ap) 264 struct vop_write_args /* { 265 struct vnode *a_vp; 266 struct uio *a_uio; 267 int a_ioflag; 268 struct ucred *a_cred; 269 } */ *ap; 270 { 271 register struct vnode *vp = ap->a_vp; 272 register struct uio *uio = ap->a_uio; 273 struct proc *p = uio->uio_procp; 274 struct buf *bp; 275 daddr_t bn; 276 int bsize, blkmask; 277 struct partinfo dpart; 278 register int n, on; 279 int error = 0; 280 281 #ifdef DIAGNOSTIC 282 if (uio->uio_rw != UIO_WRITE) 283 panic("spec_write mode"); 284 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 285 panic("spec_write proc"); 286 #endif 287 288 switch (vp->v_type) { 289 290 case VCHR: 291 VOP_UNLOCK(vp); 292 error = (*cdevsw[major(vp->v_rdev)].d_write) 293 (vp->v_rdev, uio, ap->a_ioflag); 294 VOP_LOCK(vp); 295 return (error); 296 297 case VBLK: 298 if (uio->uio_resid == 0) 299 return (0); 300 if (uio->uio_offset < 0) 301 return (EINVAL); 302 bsize = BLKDEV_IOSIZE; 303 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 304 (caddr_t)&dpart, FREAD, p) == 0) { 305 if (dpart.part->p_fstype == FS_BSDFFS && 306 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 307 bsize = dpart.part->p_frag * 308 dpart.part->p_fsize; 309 } 310 blkmask = (bsize / DEV_BSIZE) - 1; 311 do { 312 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 313 on = uio->uio_offset % bsize; 314 n = min((unsigned)(bsize - on), uio->uio_resid); 315 if (n == bsize) 316 bp = getblk(vp, bn, bsize, 0, 0); 317 else 318 error = bread(vp, bn, bsize, NOCRED, &bp); 319 n = min(n, bsize - bp->b_resid); 320 if (error) { 321 brelse(bp); 322 return (error); 323 } 324 error = uiomove((char *)bp->b_data + on, n, uio); 325 if (n + on == bsize) { 326 bp->b_flags |= B_AGE; 327 bawrite(bp); 328 } else 329 bdwrite(bp); 330 } while (error == 0 && uio->uio_resid > 0 && n != 0); 331 return (error); 332 333 default: 334 panic("spec_write type"); 335 } 336 /* NOTREACHED */ 337 } 338 339 /* 340 * Device ioctl operation. 341 */ 342 /* ARGSUSED */ 343 spec_ioctl(ap) 344 struct vop_ioctl_args /* { 345 struct vnode *a_vp; 346 int a_command; 347 caddr_t a_data; 348 int a_fflag; 349 struct ucred *a_cred; 350 struct proc *a_p; 351 } */ *ap; 352 { 353 dev_t dev = ap->a_vp->v_rdev; 354 355 switch (ap->a_vp->v_type) { 356 357 case VCHR: 358 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 359 ap->a_fflag, ap->a_p)); 360 361 case VBLK: 362 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 363 if (bdevsw[major(dev)].d_flags & B_TAPE) 364 return (0); 365 else 366 return (1); 367 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 368 ap->a_fflag, ap->a_p)); 369 370 default: 371 panic("spec_ioctl"); 372 /* NOTREACHED */ 373 } 374 } 375 376 /* ARGSUSED */ 377 spec_select(ap) 378 struct vop_select_args /* { 379 struct vnode *a_vp; 380 int a_which; 381 int a_fflags; 382 struct ucred *a_cred; 383 struct proc *a_p; 384 } */ *ap; 385 { 386 register dev_t dev; 387 388 switch (ap->a_vp->v_type) { 389 390 default: 391 return (1); /* XXX */ 392 393 case VCHR: 394 dev = ap->a_vp->v_rdev; 395 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 396 } 397 } 398 /* 399 * Synch buffers associated with a block device 400 */ 401 /* ARGSUSED */ 402 int 403 spec_fsync(ap) 404 struct vop_fsync_args /* { 405 struct vnode *a_vp; 406 struct ucred *a_cred; 407 int a_waitfor; 408 struct proc *a_p; 409 } */ *ap; 410 { 411 register struct vnode *vp = ap->a_vp; 412 register struct buf *bp; 413 struct buf *nbp; 414 int s; 415 416 if (vp->v_type == VCHR) 417 return (0); 418 /* 419 * Flush all dirty buffers associated with a block device. 420 */ 421 loop: 422 s = splbio(); 423 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 424 nbp = bp->b_vnbufs.le_next; 425 if ((bp->b_flags & B_BUSY)) 426 continue; 427 if ((bp->b_flags & B_DELWRI) == 0) 428 panic("spec_fsync: not dirty"); 429 bremfree(bp); 430 bp->b_flags |= B_BUSY; 431 splx(s); 432 bawrite(bp); 433 goto loop; 434 } 435 if (ap->a_waitfor == MNT_WAIT) { 436 while (vp->v_numoutput) { 437 vp->v_flag |= VBWAIT; 438 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 439 } 440 #ifdef DIAGNOSTIC 441 if (vp->v_dirtyblkhd.lh_first) { 442 vprint("spec_fsync: dirty", vp); 443 goto loop; 444 } 445 #endif 446 } 447 splx(s); 448 return (0); 449 } 450 451 /* 452 * Just call the device strategy routine 453 */ 454 spec_strategy(ap) 455 struct vop_strategy_args /* { 456 struct buf *a_bp; 457 } */ *ap; 458 { 459 460 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 461 return (0); 462 } 463 464 /* 465 * This is a noop, simply returning what one has been given. 466 */ 467 spec_bmap(ap) 468 struct vop_bmap_args /* { 469 struct vnode *a_vp; 470 daddr_t a_bn; 471 struct vnode **a_vpp; 472 daddr_t *a_bnp; 473 } */ *ap; 474 { 475 476 if (ap->a_vpp != NULL) 477 *ap->a_vpp = ap->a_vp; 478 if (ap->a_bnp != NULL) 479 *ap->a_bnp = ap->a_bn; 480 return (0); 481 } 482 483 /* 484 * At the moment we do not do any locking. 485 */ 486 /* ARGSUSED */ 487 spec_lock(ap) 488 struct vop_lock_args /* { 489 struct vnode *a_vp; 490 } */ *ap; 491 { 492 493 return (0); 494 } 495 496 /* ARGSUSED */ 497 spec_unlock(ap) 498 struct vop_unlock_args /* { 499 struct vnode *a_vp; 500 } */ *ap; 501 { 502 503 return (0); 504 } 505 506 /* 507 * Device close routine 508 */ 509 /* ARGSUSED */ 510 spec_close(ap) 511 struct vop_close_args /* { 512 struct vnode *a_vp; 513 int a_fflag; 514 struct ucred *a_cred; 515 struct proc *a_p; 516 } */ *ap; 517 { 518 register struct vnode *vp = ap->a_vp; 519 dev_t dev = vp->v_rdev; 520 int (*devclose) __P((dev_t, int, int, struct proc *)); 521 int mode, error; 522 523 switch (vp->v_type) { 524 525 case VCHR: 526 /* 527 * Hack: a tty device that is a controlling terminal 528 * has a reference from the session structure. 529 * We cannot easily tell that a character device is 530 * a controlling terminal, unless it is the closing 531 * process' controlling terminal. In that case, 532 * if the reference count is 2 (this last descriptor 533 * plus the session), release the reference from the session. 534 */ 535 if (vcount(vp) == 2 && ap->a_p && 536 vp == ap->a_p->p_session->s_ttyvp) { 537 vrele(vp); 538 ap->a_p->p_session->s_ttyvp = NULL; 539 } 540 /* 541 * If the vnode is locked, then we are in the midst 542 * of forcably closing the device, otherwise we only 543 * close on last reference. 544 */ 545 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 546 return (0); 547 devclose = cdevsw[major(dev)].d_close; 548 mode = S_IFCHR; 549 break; 550 551 case VBLK: 552 /* 553 * On last close of a block device (that isn't mounted) 554 * we must invalidate any in core blocks, so that 555 * we can, for instance, change floppy disks. 556 */ 557 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 558 return (error); 559 /* 560 * We do not want to really close the device if it 561 * is still in use unless we are trying to close it 562 * forcibly. Since every use (buffer, vnode, swap, cmap) 563 * holds a reference to the vnode, and because we mark 564 * any other vnodes that alias this device, when the 565 * sum of the reference counts on all the aliased 566 * vnodes descends to one, we are on last close. 567 */ 568 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 569 return (0); 570 devclose = bdevsw[major(dev)].d_close; 571 mode = S_IFBLK; 572 break; 573 574 default: 575 panic("spec_close: not special"); 576 } 577 578 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 579 } 580 581 /* 582 * Print out the contents of a special device vnode. 583 */ 584 spec_print(ap) 585 struct vop_print_args /* { 586 struct vnode *a_vp; 587 } */ *ap; 588 { 589 590 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 591 minor(ap->a_vp->v_rdev)); 592 } 593 594 /* 595 * Return POSIX pathconf information applicable to special devices. 596 */ 597 spec_pathconf(ap) 598 struct vop_pathconf_args /* { 599 struct vnode *a_vp; 600 int a_name; 601 int *a_retval; 602 } */ *ap; 603 { 604 605 switch (ap->a_name) { 606 case _PC_LINK_MAX: 607 *ap->a_retval = LINK_MAX; 608 return (0); 609 case _PC_MAX_CANON: 610 *ap->a_retval = MAX_CANON; 611 return (0); 612 case _PC_MAX_INPUT: 613 *ap->a_retval = MAX_INPUT; 614 return (0); 615 case _PC_PIPE_BUF: 616 *ap->a_retval = PIPE_BUF; 617 return (0); 618 case _PC_CHOWN_RESTRICTED: 619 *ap->a_retval = 1; 620 return (0); 621 case _PC_VDISABLE: 622 *ap->a_retval = _POSIX_VDISABLE; 623 return (0); 624 default: 625 return (EINVAL); 626 } 627 /* NOTREACHED */ 628 } 629 630 /* 631 * Special device advisory byte-level locks. 632 */ 633 /* ARGSUSED */ 634 spec_advlock(ap) 635 struct vop_advlock_args /* { 636 struct vnode *a_vp; 637 caddr_t a_id; 638 int a_op; 639 struct flock *a_fl; 640 int a_flags; 641 } */ *ap; 642 { 643 644 return (EOPNOTSUPP); 645 } 646 647 /* 648 * Special device failed operation 649 */ 650 spec_ebadf() 651 { 652 653 return (EBADF); 654 } 655 656 /* 657 * Special device bad operation 658 */ 659 spec_badop() 660 { 661 662 panic("spec_badop called"); 663 /* NOTREACHED */ 664 } 665