1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.1 (Berkeley) 06/10/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 49 { &vop_select_desc, spec_select }, /* select */ 50 { &vop_mmap_desc, spec_mmap }, /* mmap */ 51 { &vop_fsync_desc, spec_fsync }, /* fsync */ 52 { &vop_seek_desc, spec_seek }, /* seek */ 53 { &vop_remove_desc, spec_remove }, /* remove */ 54 { &vop_link_desc, spec_link }, /* link */ 55 { &vop_rename_desc, spec_rename }, /* rename */ 56 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 57 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 58 { &vop_symlink_desc, spec_symlink }, /* symlink */ 59 { &vop_readdir_desc, spec_readdir }, /* readdir */ 60 { &vop_readlink_desc, spec_readlink }, /* readlink */ 61 { &vop_abortop_desc, spec_abortop }, /* abortop */ 62 { &vop_inactive_desc, spec_inactive }, /* inactive */ 63 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 64 { &vop_lock_desc, spec_lock }, /* lock */ 65 { &vop_unlock_desc, spec_unlock }, /* unlock */ 66 { &vop_bmap_desc, spec_bmap }, /* bmap */ 67 { &vop_strategy_desc, spec_strategy }, /* strategy */ 68 { &vop_print_desc, spec_print }, /* print */ 69 { &vop_islocked_desc, spec_islocked }, /* islocked */ 70 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 71 { &vop_advlock_desc, spec_advlock }, /* advlock */ 72 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 73 { &vop_valloc_desc, spec_valloc }, /* valloc */ 74 { &vop_vfree_desc, spec_vfree }, /* vfree */ 75 { &vop_truncate_desc, spec_truncate }, /* truncate */ 76 { &vop_update_desc, spec_update }, /* update */ 77 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 78 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 79 }; 80 struct vnodeopv_desc spec_vnodeop_opv_desc = 81 { &spec_vnodeop_p, spec_vnodeop_entries }; 82 83 /* 84 * Trivial lookup routine that always fails. 85 */ 86 int 87 spec_lookup(ap) 88 struct vop_lookup_args /* { 89 struct vnode *a_dvp; 90 struct vnode **a_vpp; 91 struct componentname *a_cnp; 92 } */ *ap; 93 { 94 95 *ap->a_vpp = NULL; 96 return (ENOTDIR); 97 } 98 99 /* 100 * Open a special file. 101 */ 102 /* ARGSUSED */ 103 spec_open(ap) 104 struct vop_open_args /* { 105 struct vnode *a_vp; 106 int a_mode; 107 struct ucred *a_cred; 108 struct proc *a_p; 109 } */ *ap; 110 { 111 struct vnode *bvp, *vp = ap->a_vp; 112 dev_t bdev, dev = (dev_t)vp->v_rdev; 113 register int maj = major(dev); 114 int error; 115 116 /* 117 * Don't allow open if fs is mounted -nodev. 118 */ 119 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 120 return (ENXIO); 121 122 switch (vp->v_type) { 123 124 case VCHR: 125 if ((u_int)maj >= nchrdev) 126 return (ENXIO); 127 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 128 /* 129 * When running in very secure mode, do not allow 130 * opens for writing of any disk character devices. 131 */ 132 if (securelevel >= 2 && isdisk(dev, VCHR)) 133 return (EPERM); 134 /* 135 * When running in secure mode, do not allow opens 136 * for writing of /dev/mem, /dev/kmem, or character 137 * devices whose corresponding block devices are 138 * currently mounted. 139 */ 140 if (securelevel >= 1) { 141 if ((bdev = chrtoblk(dev)) != NODEV && 142 vfinddev(bdev, VBLK, &bvp) && 143 bvp->v_usecount > 0 && 144 (error = ufs_mountedon(bvp))) 145 return (error); 146 if (iskmemdev(dev)) 147 return (EPERM); 148 } 149 } 150 VOP_UNLOCK(vp); 151 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); 152 VOP_LOCK(vp); 153 return (error); 154 155 case VBLK: 156 if ((u_int)maj >= nblkdev) 157 return (ENXIO); 158 /* 159 * When running in very secure mode, do not allow 160 * opens for writing of any disk block devices. 161 */ 162 if (securelevel >= 2 && ap->a_cred != FSCRED && 163 (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) 164 return (EPERM); 165 /* 166 * Do not allow opens of block devices that are 167 * currently mounted. 168 */ 169 if (error = ufs_mountedon(vp)) 170 return (error); 171 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); 172 } 173 return (0); 174 } 175 176 /* 177 * Vnode op for read 178 */ 179 /* ARGSUSED */ 180 spec_read(ap) 181 struct vop_read_args /* { 182 struct vnode *a_vp; 183 struct uio *a_uio; 184 int a_ioflag; 185 struct ucred *a_cred; 186 } */ *ap; 187 { 188 register struct vnode *vp = ap->a_vp; 189 register struct uio *uio = ap->a_uio; 190 struct proc *p = uio->uio_procp; 191 struct buf *bp; 192 daddr_t bn, nextbn; 193 long bsize, bscale; 194 struct partinfo dpart; 195 register int n, on; 196 int error = 0; 197 198 #ifdef DIAGNOSTIC 199 if (uio->uio_rw != UIO_READ) 200 panic("spec_read mode"); 201 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 202 panic("spec_read proc"); 203 #endif 204 if (uio->uio_resid == 0) 205 return (0); 206 207 switch (vp->v_type) { 208 209 case VCHR: 210 VOP_UNLOCK(vp); 211 error = (*cdevsw[major(vp->v_rdev)].d_read) 212 (vp->v_rdev, uio, ap->a_ioflag); 213 VOP_LOCK(vp); 214 return (error); 215 216 case VBLK: 217 if (uio->uio_offset < 0) 218 return (EINVAL); 219 bsize = BLKDEV_IOSIZE; 220 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 221 (caddr_t)&dpart, FREAD, p) == 0) { 222 if (dpart.part->p_fstype == FS_BSDFFS && 223 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 224 bsize = dpart.part->p_frag * 225 dpart.part->p_fsize; 226 } 227 bscale = bsize / DEV_BSIZE; 228 do { 229 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 230 on = uio->uio_offset % bsize; 231 n = min((unsigned)(bsize - on), uio->uio_resid); 232 if (vp->v_lastr + bscale == bn) { 233 nextbn = bn + bscale; 234 error = breadn(vp, bn, (int)bsize, &nextbn, 235 (int *)&bsize, 1, NOCRED, &bp); 236 } else 237 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 238 vp->v_lastr = bn; 239 n = min(n, bsize - bp->b_resid); 240 if (error) { 241 brelse(bp); 242 return (error); 243 } 244 error = uiomove(bp->b_un.b_addr + on, n, uio); 245 if (n + on == bsize) 246 bp->b_flags |= B_AGE; 247 brelse(bp); 248 } while (error == 0 && uio->uio_resid > 0 && n != 0); 249 return (error); 250 251 default: 252 panic("spec_read type"); 253 } 254 /* NOTREACHED */ 255 } 256 257 /* 258 * Vnode op for write 259 */ 260 /* ARGSUSED */ 261 spec_write(ap) 262 struct vop_write_args /* { 263 struct vnode *a_vp; 264 struct uio *a_uio; 265 int a_ioflag; 266 struct ucred *a_cred; 267 } */ *ap; 268 { 269 register struct vnode *vp = ap->a_vp; 270 register struct uio *uio = ap->a_uio; 271 struct proc *p = uio->uio_procp; 272 struct buf *bp; 273 daddr_t bn; 274 int bsize, blkmask; 275 struct partinfo dpart; 276 register int n, on; 277 int error = 0; 278 279 #ifdef DIAGNOSTIC 280 if (uio->uio_rw != UIO_WRITE) 281 panic("spec_write mode"); 282 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 283 panic("spec_write proc"); 284 #endif 285 286 switch (vp->v_type) { 287 288 case VCHR: 289 VOP_UNLOCK(vp); 290 error = (*cdevsw[major(vp->v_rdev)].d_write) 291 (vp->v_rdev, uio, ap->a_ioflag); 292 VOP_LOCK(vp); 293 return (error); 294 295 case VBLK: 296 if (uio->uio_resid == 0) 297 return (0); 298 if (uio->uio_offset < 0) 299 return (EINVAL); 300 bsize = BLKDEV_IOSIZE; 301 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 302 (caddr_t)&dpart, FREAD, p) == 0) { 303 if (dpart.part->p_fstype == FS_BSDFFS && 304 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 305 bsize = dpart.part->p_frag * 306 dpart.part->p_fsize; 307 } 308 blkmask = (bsize / DEV_BSIZE) - 1; 309 do { 310 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 311 on = uio->uio_offset % bsize; 312 n = min((unsigned)(bsize - on), uio->uio_resid); 313 if (n == bsize) 314 bp = getblk(vp, bn, bsize, 0, 0); 315 else 316 error = bread(vp, bn, bsize, NOCRED, &bp); 317 n = min(n, bsize - bp->b_resid); 318 if (error) { 319 brelse(bp); 320 return (error); 321 } 322 error = uiomove(bp->b_un.b_addr + on, n, uio); 323 if (n + on == bsize) { 324 bp->b_flags |= B_AGE; 325 bawrite(bp); 326 } else 327 bdwrite(bp); 328 } while (error == 0 && uio->uio_resid > 0 && n != 0); 329 return (error); 330 331 default: 332 panic("spec_write type"); 333 } 334 /* NOTREACHED */ 335 } 336 337 /* 338 * Device ioctl operation. 339 */ 340 /* ARGSUSED */ 341 spec_ioctl(ap) 342 struct vop_ioctl_args /* { 343 struct vnode *a_vp; 344 int a_command; 345 caddr_t a_data; 346 int a_fflag; 347 struct ucred *a_cred; 348 struct proc *a_p; 349 } */ *ap; 350 { 351 dev_t dev = ap->a_vp->v_rdev; 352 353 switch (ap->a_vp->v_type) { 354 355 case VCHR: 356 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 357 ap->a_fflag, ap->a_p)); 358 359 case VBLK: 360 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 361 if (bdevsw[major(dev)].d_flags & B_TAPE) 362 return (0); 363 else 364 return (1); 365 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 366 ap->a_fflag, ap->a_p)); 367 368 default: 369 panic("spec_ioctl"); 370 /* NOTREACHED */ 371 } 372 } 373 374 /* ARGSUSED */ 375 spec_select(ap) 376 struct vop_select_args /* { 377 struct vnode *a_vp; 378 int a_which; 379 int a_fflags; 380 struct ucred *a_cred; 381 struct proc *a_p; 382 } */ *ap; 383 { 384 register dev_t dev; 385 386 switch (ap->a_vp->v_type) { 387 388 default: 389 return (1); /* XXX */ 390 391 case VCHR: 392 dev = ap->a_vp->v_rdev; 393 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 394 } 395 } 396 /* 397 * Synch buffers associated with a block device 398 */ 399 /* ARGSUSED */ 400 int 401 spec_fsync(ap) 402 struct vop_fsync_args /* { 403 struct vnode *a_vp; 404 struct ucred *a_cred; 405 int a_waitfor; 406 struct proc *a_p; 407 } */ *ap; 408 { 409 register struct vnode *vp = ap->a_vp; 410 register struct buf *bp; 411 struct buf *nbp; 412 int s; 413 414 if (vp->v_type == VCHR) 415 return (0); 416 /* 417 * Flush all dirty buffers associated with a block device. 418 */ 419 loop: 420 s = splbio(); 421 for (bp = vp->v_dirtyblkhd.le_next; bp; bp = nbp) { 422 nbp = bp->b_vnbufs.qe_next; 423 if ((bp->b_flags & B_BUSY)) 424 continue; 425 if ((bp->b_flags & B_DELWRI) == 0) 426 panic("spec_fsync: not dirty"); 427 bremfree(bp); 428 bp->b_flags |= B_BUSY; 429 splx(s); 430 bawrite(bp); 431 goto loop; 432 } 433 if (ap->a_waitfor == MNT_WAIT) { 434 while (vp->v_numoutput) { 435 vp->v_flag |= VBWAIT; 436 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 437 } 438 #ifdef DIAGNOSTIC 439 if (vp->v_dirtyblkhd.le_next) { 440 vprint("spec_fsync: dirty", vp); 441 goto loop; 442 } 443 #endif 444 } 445 splx(s); 446 return (0); 447 } 448 449 /* 450 * Just call the device strategy routine 451 */ 452 spec_strategy(ap) 453 struct vop_strategy_args /* { 454 struct buf *a_bp; 455 } */ *ap; 456 { 457 458 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 459 return (0); 460 } 461 462 /* 463 * This is a noop, simply returning what one has been given. 464 */ 465 spec_bmap(ap) 466 struct vop_bmap_args /* { 467 struct vnode *a_vp; 468 daddr_t a_bn; 469 struct vnode **a_vpp; 470 daddr_t *a_bnp; 471 } */ *ap; 472 { 473 474 if (ap->a_vpp != NULL) 475 *ap->a_vpp = ap->a_vp; 476 if (ap->a_bnp != NULL) 477 *ap->a_bnp = ap->a_bn; 478 return (0); 479 } 480 481 /* 482 * At the moment we do not do any locking. 483 */ 484 /* ARGSUSED */ 485 spec_lock(ap) 486 struct vop_lock_args /* { 487 struct vnode *a_vp; 488 } */ *ap; 489 { 490 491 return (0); 492 } 493 494 /* ARGSUSED */ 495 spec_unlock(ap) 496 struct vop_unlock_args /* { 497 struct vnode *a_vp; 498 } */ *ap; 499 { 500 501 return (0); 502 } 503 504 /* 505 * Device close routine 506 */ 507 /* ARGSUSED */ 508 spec_close(ap) 509 struct vop_close_args /* { 510 struct vnode *a_vp; 511 int a_fflag; 512 struct ucred *a_cred; 513 struct proc *a_p; 514 } */ *ap; 515 { 516 register struct vnode *vp = ap->a_vp; 517 dev_t dev = vp->v_rdev; 518 int (*devclose) __P((dev_t, int, int, struct proc *)); 519 int mode, error; 520 521 switch (vp->v_type) { 522 523 case VCHR: 524 /* 525 * Hack: a tty device that is a controlling terminal 526 * has a reference from the session structure. 527 * We cannot easily tell that a character device is 528 * a controlling terminal, unless it is the closing 529 * process' controlling terminal. In that case, 530 * if the reference count is 2 (this last descriptor 531 * plus the session), release the reference from the session. 532 */ 533 if (vcount(vp) == 2 && ap->a_p && 534 vp == ap->a_p->p_session->s_ttyvp) { 535 vrele(vp); 536 ap->a_p->p_session->s_ttyvp = NULL; 537 } 538 /* 539 * If the vnode is locked, then we are in the midst 540 * of forcably closing the device, otherwise we only 541 * close on last reference. 542 */ 543 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 544 return (0); 545 devclose = cdevsw[major(dev)].d_close; 546 mode = S_IFCHR; 547 break; 548 549 case VBLK: 550 /* 551 * On last close of a block device (that isn't mounted) 552 * we must invalidate any in core blocks, so that 553 * we can, for instance, change floppy disks. 554 */ 555 if (error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0)) 556 return (error); 557 /* 558 * We do not want to really close the device if it 559 * is still in use unless we are trying to close it 560 * forcibly. Since every use (buffer, vnode, swap, cmap) 561 * holds a reference to the vnode, and because we mark 562 * any other vnodes that alias this device, when the 563 * sum of the reference counts on all the aliased 564 * vnodes descends to one, we are on last close. 565 */ 566 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 567 return (0); 568 devclose = bdevsw[major(dev)].d_close; 569 mode = S_IFBLK; 570 break; 571 572 default: 573 panic("spec_close: not special"); 574 } 575 576 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 577 } 578 579 /* 580 * Print out the contents of a special device vnode. 581 */ 582 spec_print(ap) 583 struct vop_print_args /* { 584 struct vnode *a_vp; 585 } */ *ap; 586 { 587 588 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 589 minor(ap->a_vp->v_rdev)); 590 } 591 592 /* 593 * Return POSIX pathconf information applicable to special devices. 594 */ 595 spec_pathconf(ap) 596 struct vop_pathconf_args /* { 597 struct vnode *a_vp; 598 int a_name; 599 int *a_retval; 600 } */ *ap; 601 { 602 603 switch (ap->a_name) { 604 case _PC_LINK_MAX: 605 *ap->a_retval = LINK_MAX; 606 return (0); 607 case _PC_MAX_CANON: 608 *ap->a_retval = MAX_CANON; 609 return (0); 610 case _PC_MAX_INPUT: 611 *ap->a_retval = MAX_INPUT; 612 return (0); 613 case _PC_PIPE_BUF: 614 *ap->a_retval = PIPE_BUF; 615 return (0); 616 case _PC_CHOWN_RESTRICTED: 617 *ap->a_retval = 1; 618 return (0); 619 case _PC_VDISABLE: 620 *ap->a_retval = _POSIX_VDISABLE; 621 return (0); 622 default: 623 return (EINVAL); 624 } 625 /* NOTREACHED */ 626 } 627 628 /* 629 * Special device advisory byte-level locks. 630 */ 631 /* ARGSUSED */ 632 spec_advlock(ap) 633 struct vop_advlock_args /* { 634 struct vnode *a_vp; 635 caddr_t a_id; 636 int a_op; 637 struct flock *a_fl; 638 int a_flags; 639 } */ *ap; 640 { 641 642 return (EOPNOTSUPP); 643 } 644 645 /* 646 * Special device failed operation 647 */ 648 spec_ebadf() 649 { 650 651 return (EBADF); 652 } 653 654 /* 655 * Special device bad operation 656 */ 657 spec_badop() 658 { 659 660 panic("spec_badop called"); 661 /* NOTREACHED */ 662 } 663