1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.6 (Berkeley) 04/09/94 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 49 { &vop_select_desc, spec_select }, /* select */ 50 { &vop_mmap_desc, spec_mmap }, /* mmap */ 51 { &vop_fsync_desc, spec_fsync }, /* fsync */ 52 { &vop_seek_desc, spec_seek }, /* seek */ 53 { &vop_remove_desc, spec_remove }, /* remove */ 54 { &vop_link_desc, spec_link }, /* link */ 55 { &vop_rename_desc, spec_rename }, /* rename */ 56 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 57 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 58 { &vop_symlink_desc, spec_symlink }, /* symlink */ 59 { &vop_readdir_desc, spec_readdir }, /* readdir */ 60 { &vop_readlink_desc, spec_readlink }, /* readlink */ 61 { &vop_abortop_desc, spec_abortop }, /* abortop */ 62 { &vop_inactive_desc, spec_inactive }, /* inactive */ 63 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 64 { &vop_lock_desc, spec_lock }, /* lock */ 65 { &vop_unlock_desc, spec_unlock }, /* unlock */ 66 { &vop_bmap_desc, spec_bmap }, /* bmap */ 67 { &vop_strategy_desc, spec_strategy }, /* strategy */ 68 { &vop_print_desc, spec_print }, /* print */ 69 { &vop_islocked_desc, spec_islocked }, /* islocked */ 70 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 71 { &vop_advlock_desc, spec_advlock }, /* advlock */ 72 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 73 { &vop_valloc_desc, spec_valloc }, /* valloc */ 74 { &vop_vfree_desc, spec_vfree }, /* vfree */ 75 { &vop_truncate_desc, spec_truncate }, /* truncate */ 76 { &vop_update_desc, spec_update }, /* update */ 77 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 78 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 79 }; 80 struct vnodeopv_desc spec_vnodeop_opv_desc = 81 { &spec_vnodeop_p, spec_vnodeop_entries }; 82 83 /* 84 * Trivial lookup routine that always fails. 85 */ 86 int 87 spec_lookup(ap) 88 struct vop_lookup_args /* { 89 struct vnode *a_dvp; 90 struct vnode **a_vpp; 91 struct componentname *a_cnp; 92 } */ *ap; 93 { 94 95 *ap->a_vpp = NULL; 96 return (ENOTDIR); 97 } 98 99 /* 100 * Open a special file. 101 */ 102 /* ARGSUSED */ 103 spec_open(ap) 104 struct vop_open_args /* { 105 struct vnode *a_vp; 106 int a_mode; 107 struct ucred *a_cred; 108 struct proc *a_p; 109 } */ *ap; 110 { 111 struct vnode *bvp, *vp = ap->a_vp; 112 dev_t bdev, dev = (dev_t)vp->v_rdev; 113 register int maj = major(dev); 114 int error; 115 116 /* 117 * Don't allow open if fs is mounted -nodev. 118 */ 119 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 120 return (ENXIO); 121 122 switch (vp->v_type) { 123 124 case VCHR: 125 if ((u_int)maj >= nchrdev) 126 return (ENXIO); 127 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 128 /* 129 * When running in very secure mode, do not allow 130 * opens for writing of any disk character devices. 131 */ 132 if (securelevel >= 2 && isdisk(dev, VCHR)) 133 return (EPERM); 134 /* 135 * When running in secure mode, do not allow opens 136 * for writing of /dev/mem, /dev/kmem, or character 137 * devices whose corresponding block devices are 138 * currently mounted. 139 */ 140 if (securelevel >= 1) { 141 if ((bdev = chrtoblk(dev)) != NODEV && 142 vfinddev(bdev, VBLK, &bvp) && 143 bvp->v_usecount > 0 && 144 (error = vfs_mountedon(bvp))) 145 return (error); 146 if (iskmemdev(dev)) 147 return (EPERM); 148 } 149 } 150 VOP_UNLOCK(vp); 151 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 152 VOP_LOCK(vp); 153 return (error); 154 155 case VBLK: 156 if ((u_int)maj >= nblkdev) 157 return (ENXIO); 158 /* 159 * When running in very secure mode, do not allow 160 * opens for writing of any disk block devices. 161 */ 162 if (securelevel >= 2 && ap->a_cred != FSCRED && 163 (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) 164 return (EPERM); 165 /* 166 * Do not allow opens of block devices that are 167 * currently mounted. 168 */ 169 if (error = vfs_mountedon(vp)) 170 return (error); 171 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 172 } 173 return (0); 174 } 175 176 /* 177 * Vnode op for read 178 */ 179 /* ARGSUSED */ 180 spec_read(ap) 181 struct vop_read_args /* { 182 struct vnode *a_vp; 183 struct uio *a_uio; 184 int a_ioflag; 185 struct ucred *a_cred; 186 } */ *ap; 187 { 188 register struct vnode *vp = ap->a_vp; 189 register struct uio *uio = ap->a_uio; 190 struct proc *p = uio->uio_procp; 191 struct buf *bp; 192 daddr_t bn, nextbn; 193 long bsize, bscale; 194 struct partinfo dpart; 195 int n, on, majordev, (*ioctl)(); 196 int error = 0; 197 dev_t dev; 198 199 #ifdef DIAGNOSTIC 200 if (uio->uio_rw != UIO_READ) 201 panic("spec_read mode"); 202 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 203 panic("spec_read proc"); 204 #endif 205 if (uio->uio_resid == 0) 206 return (0); 207 208 switch (vp->v_type) { 209 210 case VCHR: 211 VOP_UNLOCK(vp); 212 error = (*cdevsw[major(vp->v_rdev)].d_read) 213 (vp->v_rdev, uio, ap->a_ioflag); 214 VOP_LOCK(vp); 215 return (error); 216 217 case VBLK: 218 if (uio->uio_offset < 0) 219 return (EINVAL); 220 bsize = BLKDEV_IOSIZE; 221 dev = vp->v_rdev; 222 if ((majordev = major(dev)) < nblkdev && 223 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 224 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 225 dpart.part->p_fstype == FS_BSDFFS && 226 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 227 bsize = dpart.part->p_frag * dpart.part->p_fsize; 228 bscale = bsize / DEV_BSIZE; 229 do { 230 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 231 on = uio->uio_offset % bsize; 232 n = min((unsigned)(bsize - on), uio->uio_resid); 233 if (vp->v_lastr + bscale == bn) { 234 nextbn = bn + bscale; 235 error = breadn(vp, bn, (int)bsize, &nextbn, 236 (int *)&bsize, 1, NOCRED, &bp); 237 } else 238 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 239 vp->v_lastr = bn; 240 n = min(n, bsize - bp->b_resid); 241 if (error) { 242 brelse(bp); 243 return (error); 244 } 245 error = uiomove((char *)bp->b_data + on, n, uio); 246 if (n + on == bsize) 247 bp->b_flags |= B_AGE; 248 brelse(bp); 249 } while (error == 0 && uio->uio_resid > 0 && n != 0); 250 return (error); 251 252 default: 253 panic("spec_read type"); 254 } 255 /* NOTREACHED */ 256 } 257 258 /* 259 * Vnode op for write 260 */ 261 /* ARGSUSED */ 262 spec_write(ap) 263 struct vop_write_args /* { 264 struct vnode *a_vp; 265 struct uio *a_uio; 266 int a_ioflag; 267 struct ucred *a_cred; 268 } */ *ap; 269 { 270 register struct vnode *vp = ap->a_vp; 271 register struct uio *uio = ap->a_uio; 272 struct proc *p = uio->uio_procp; 273 struct buf *bp; 274 daddr_t bn; 275 int bsize, blkmask; 276 struct partinfo dpart; 277 register int n, on; 278 int error = 0; 279 280 #ifdef DIAGNOSTIC 281 if (uio->uio_rw != UIO_WRITE) 282 panic("spec_write mode"); 283 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 284 panic("spec_write proc"); 285 #endif 286 287 switch (vp->v_type) { 288 289 case VCHR: 290 VOP_UNLOCK(vp); 291 error = (*cdevsw[major(vp->v_rdev)].d_write) 292 (vp->v_rdev, uio, ap->a_ioflag); 293 VOP_LOCK(vp); 294 return (error); 295 296 case VBLK: 297 if (uio->uio_resid == 0) 298 return (0); 299 if (uio->uio_offset < 0) 300 return (EINVAL); 301 bsize = BLKDEV_IOSIZE; 302 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 303 (caddr_t)&dpart, FREAD, p) == 0) { 304 if (dpart.part->p_fstype == FS_BSDFFS && 305 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 306 bsize = dpart.part->p_frag * 307 dpart.part->p_fsize; 308 } 309 blkmask = (bsize / DEV_BSIZE) - 1; 310 do { 311 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 312 on = uio->uio_offset % bsize; 313 n = min((unsigned)(bsize - on), uio->uio_resid); 314 if (n == bsize) 315 bp = getblk(vp, bn, bsize, 0, 0); 316 else 317 error = bread(vp, bn, bsize, NOCRED, &bp); 318 n = min(n, bsize - bp->b_resid); 319 if (error) { 320 brelse(bp); 321 return (error); 322 } 323 error = uiomove((char *)bp->b_data + on, n, uio); 324 if (n + on == bsize) { 325 bp->b_flags |= B_AGE; 326 bawrite(bp); 327 } else 328 bdwrite(bp); 329 } while (error == 0 && uio->uio_resid > 0 && n != 0); 330 return (error); 331 332 default: 333 panic("spec_write type"); 334 } 335 /* NOTREACHED */ 336 } 337 338 /* 339 * Device ioctl operation. 340 */ 341 /* ARGSUSED */ 342 spec_ioctl(ap) 343 struct vop_ioctl_args /* { 344 struct vnode *a_vp; 345 int a_command; 346 caddr_t a_data; 347 int a_fflag; 348 struct ucred *a_cred; 349 struct proc *a_p; 350 } */ *ap; 351 { 352 dev_t dev = ap->a_vp->v_rdev; 353 354 switch (ap->a_vp->v_type) { 355 356 case VCHR: 357 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 358 ap->a_fflag, ap->a_p)); 359 360 case VBLK: 361 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 362 if (bdevsw[major(dev)].d_flags & B_TAPE) 363 return (0); 364 else 365 return (1); 366 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 367 ap->a_fflag, ap->a_p)); 368 369 default: 370 panic("spec_ioctl"); 371 /* NOTREACHED */ 372 } 373 } 374 375 /* ARGSUSED */ 376 spec_select(ap) 377 struct vop_select_args /* { 378 struct vnode *a_vp; 379 int a_which; 380 int a_fflags; 381 struct ucred *a_cred; 382 struct proc *a_p; 383 } */ *ap; 384 { 385 register dev_t dev; 386 387 switch (ap->a_vp->v_type) { 388 389 default: 390 return (1); /* XXX */ 391 392 case VCHR: 393 dev = ap->a_vp->v_rdev; 394 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 395 } 396 } 397 /* 398 * Synch buffers associated with a block device 399 */ 400 /* ARGSUSED */ 401 int 402 spec_fsync(ap) 403 struct vop_fsync_args /* { 404 struct vnode *a_vp; 405 struct ucred *a_cred; 406 int a_waitfor; 407 struct proc *a_p; 408 } */ *ap; 409 { 410 register struct vnode *vp = ap->a_vp; 411 register struct buf *bp; 412 struct buf *nbp; 413 int s; 414 415 if (vp->v_type == VCHR) 416 return (0); 417 /* 418 * Flush all dirty buffers associated with a block device. 419 */ 420 loop: 421 s = splbio(); 422 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 423 nbp = bp->b_vnbufs.le_next; 424 if ((bp->b_flags & B_BUSY)) 425 continue; 426 if ((bp->b_flags & B_DELWRI) == 0) 427 panic("spec_fsync: not dirty"); 428 bremfree(bp); 429 bp->b_flags |= B_BUSY; 430 splx(s); 431 bawrite(bp); 432 goto loop; 433 } 434 if (ap->a_waitfor == MNT_WAIT) { 435 while (vp->v_numoutput) { 436 vp->v_flag |= VBWAIT; 437 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 438 } 439 #ifdef DIAGNOSTIC 440 if (vp->v_dirtyblkhd.lh_first) { 441 vprint("spec_fsync: dirty", vp); 442 goto loop; 443 } 444 #endif 445 } 446 splx(s); 447 return (0); 448 } 449 450 /* 451 * Just call the device strategy routine 452 */ 453 spec_strategy(ap) 454 struct vop_strategy_args /* { 455 struct buf *a_bp; 456 } */ *ap; 457 { 458 459 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 460 return (0); 461 } 462 463 /* 464 * This is a noop, simply returning what one has been given. 465 */ 466 spec_bmap(ap) 467 struct vop_bmap_args /* { 468 struct vnode *a_vp; 469 daddr_t a_bn; 470 struct vnode **a_vpp; 471 daddr_t *a_bnp; 472 } */ *ap; 473 { 474 475 if (ap->a_vpp != NULL) 476 *ap->a_vpp = ap->a_vp; 477 if (ap->a_bnp != NULL) 478 *ap->a_bnp = ap->a_bn; 479 return (0); 480 } 481 482 /* 483 * At the moment we do not do any locking. 484 */ 485 /* ARGSUSED */ 486 spec_lock(ap) 487 struct vop_lock_args /* { 488 struct vnode *a_vp; 489 } */ *ap; 490 { 491 492 return (0); 493 } 494 495 /* ARGSUSED */ 496 spec_unlock(ap) 497 struct vop_unlock_args /* { 498 struct vnode *a_vp; 499 } */ *ap; 500 { 501 502 return (0); 503 } 504 505 /* 506 * Device close routine 507 */ 508 /* ARGSUSED */ 509 spec_close(ap) 510 struct vop_close_args /* { 511 struct vnode *a_vp; 512 int a_fflag; 513 struct ucred *a_cred; 514 struct proc *a_p; 515 } */ *ap; 516 { 517 register struct vnode *vp = ap->a_vp; 518 dev_t dev = vp->v_rdev; 519 int (*devclose) __P((dev_t, int, int, struct proc *)); 520 int mode, error; 521 522 switch (vp->v_type) { 523 524 case VCHR: 525 /* 526 * Hack: a tty device that is a controlling terminal 527 * has a reference from the session structure. 528 * We cannot easily tell that a character device is 529 * a controlling terminal, unless it is the closing 530 * process' controlling terminal. In that case, 531 * if the reference count is 2 (this last descriptor 532 * plus the session), release the reference from the session. 533 */ 534 if (vcount(vp) == 2 && ap->a_p && 535 vp == ap->a_p->p_session->s_ttyvp) { 536 vrele(vp); 537 ap->a_p->p_session->s_ttyvp = NULL; 538 } 539 /* 540 * If the vnode is locked, then we are in the midst 541 * of forcably closing the device, otherwise we only 542 * close on last reference. 543 */ 544 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 545 return (0); 546 devclose = cdevsw[major(dev)].d_close; 547 mode = S_IFCHR; 548 break; 549 550 case VBLK: 551 /* 552 * On last close of a block device (that isn't mounted) 553 * we must invalidate any in core blocks, so that 554 * we can, for instance, change floppy disks. 555 */ 556 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 557 return (error); 558 /* 559 * We do not want to really close the device if it 560 * is still in use unless we are trying to close it 561 * forcibly. Since every use (buffer, vnode, swap, cmap) 562 * holds a reference to the vnode, and because we mark 563 * any other vnodes that alias this device, when the 564 * sum of the reference counts on all the aliased 565 * vnodes descends to one, we are on last close. 566 */ 567 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 568 return (0); 569 devclose = bdevsw[major(dev)].d_close; 570 mode = S_IFBLK; 571 break; 572 573 default: 574 panic("spec_close: not special"); 575 } 576 577 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 578 } 579 580 /* 581 * Print out the contents of a special device vnode. 582 */ 583 spec_print(ap) 584 struct vop_print_args /* { 585 struct vnode *a_vp; 586 } */ *ap; 587 { 588 589 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 590 minor(ap->a_vp->v_rdev)); 591 } 592 593 /* 594 * Return POSIX pathconf information applicable to special devices. 595 */ 596 spec_pathconf(ap) 597 struct vop_pathconf_args /* { 598 struct vnode *a_vp; 599 int a_name; 600 int *a_retval; 601 } */ *ap; 602 { 603 604 switch (ap->a_name) { 605 case _PC_LINK_MAX: 606 *ap->a_retval = LINK_MAX; 607 return (0); 608 case _PC_MAX_CANON: 609 *ap->a_retval = MAX_CANON; 610 return (0); 611 case _PC_MAX_INPUT: 612 *ap->a_retval = MAX_INPUT; 613 return (0); 614 case _PC_PIPE_BUF: 615 *ap->a_retval = PIPE_BUF; 616 return (0); 617 case _PC_CHOWN_RESTRICTED: 618 *ap->a_retval = 1; 619 return (0); 620 case _PC_VDISABLE: 621 *ap->a_retval = _POSIX_VDISABLE; 622 return (0); 623 default: 624 return (EINVAL); 625 } 626 /* NOTREACHED */ 627 } 628 629 /* 630 * Special device advisory byte-level locks. 631 */ 632 /* ARGSUSED */ 633 spec_advlock(ap) 634 struct vop_advlock_args /* { 635 struct vnode *a_vp; 636 caddr_t a_id; 637 int a_op; 638 struct flock *a_fl; 639 int a_flags; 640 } */ *ap; 641 { 642 643 return (EOPNOTSUPP); 644 } 645 646 /* 647 * Special device failed operation 648 */ 649 spec_ebadf() 650 { 651 652 return (EBADF); 653 } 654 655 /* 656 * Special device bad operation 657 */ 658 spec_badop() 659 { 660 661 panic("spec_badop called"); 662 /* NOTREACHED */ 663 } 664