1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)spec_vnops.c 8.13 (Berkeley) 05/14/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/conf.h> 15 #include <sys/buf.h> 16 #include <sys/mount.h> 17 #include <sys/namei.h> 18 #include <sys/vnode.h> 19 #include <sys/stat.h> 20 #include <sys/errno.h> 21 #include <sys/ioctl.h> 22 #include <sys/file.h> 23 #include <sys/disklabel.h> 24 #include <miscfs/specfs/specdev.h> 25 26 /* symbolic sleep message strings for devices */ 27 char devopn[] = "devopn"; 28 char devio[] = "devio"; 29 char devwait[] = "devwait"; 30 char devin[] = "devin"; 31 char devout[] = "devout"; 32 char devioc[] = "devioc"; 33 char devcls[] = "devcls"; 34 35 int (**spec_vnodeop_p)(); 36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = { 37 { &vop_default_desc, vn_default_error }, 38 { &vop_lookup_desc, spec_lookup }, /* lookup */ 39 { &vop_create_desc, spec_create }, /* create */ 40 { &vop_mknod_desc, spec_mknod }, /* mknod */ 41 { &vop_open_desc, spec_open }, /* open */ 42 { &vop_close_desc, spec_close }, /* close */ 43 { &vop_access_desc, spec_access }, /* access */ 44 { &vop_getattr_desc, spec_getattr }, /* getattr */ 45 { &vop_setattr_desc, spec_setattr }, /* setattr */ 46 { &vop_read_desc, spec_read }, /* read */ 47 { &vop_write_desc, spec_write }, /* write */ 48 { &vop_lease_desc, spec_lease_check }, /* lease */ 49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 50 { &vop_select_desc, spec_select }, /* select */ 51 { &vop_revoke_desc, spec_revoke }, /* revoke */ 52 { &vop_mmap_desc, spec_mmap }, /* mmap */ 53 { &vop_fsync_desc, spec_fsync }, /* fsync */ 54 { &vop_seek_desc, spec_seek }, /* seek */ 55 { &vop_remove_desc, spec_remove }, /* remove */ 56 { &vop_link_desc, spec_link }, /* link */ 57 { &vop_rename_desc, spec_rename }, /* rename */ 58 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 59 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 60 { &vop_symlink_desc, spec_symlink }, /* symlink */ 61 { &vop_readdir_desc, spec_readdir }, /* readdir */ 62 { &vop_readlink_desc, spec_readlink }, /* readlink */ 63 { &vop_abortop_desc, spec_abortop }, /* abortop */ 64 { &vop_inactive_desc, spec_inactive }, /* inactive */ 65 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */ 66 { &vop_lock_desc, spec_lock }, /* lock */ 67 { &vop_unlock_desc, spec_unlock }, /* unlock */ 68 { &vop_bmap_desc, spec_bmap }, /* bmap */ 69 { &vop_strategy_desc, spec_strategy }, /* strategy */ 70 { &vop_print_desc, spec_print }, /* print */ 71 { &vop_islocked_desc, spec_islocked }, /* islocked */ 72 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 73 { &vop_advlock_desc, spec_advlock }, /* advlock */ 74 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 75 { &vop_valloc_desc, spec_valloc }, /* valloc */ 76 { &vop_vfree_desc, spec_vfree }, /* vfree */ 77 { &vop_truncate_desc, spec_truncate }, /* truncate */ 78 { &vop_update_desc, spec_update }, /* update */ 79 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ 80 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 81 }; 82 struct vnodeopv_desc spec_vnodeop_opv_desc = 83 { &spec_vnodeop_p, spec_vnodeop_entries }; 84 85 /* 86 * Trivial lookup routine that always fails. 87 */ 88 int 89 spec_lookup(ap) 90 struct vop_lookup_args /* { 91 struct vnode *a_dvp; 92 struct vnode **a_vpp; 93 struct componentname *a_cnp; 94 } */ *ap; 95 { 96 97 *ap->a_vpp = NULL; 98 return (ENOTDIR); 99 } 100 101 /* 102 * Open a special file. 103 */ 104 /* ARGSUSED */ 105 spec_open(ap) 106 struct vop_open_args /* { 107 struct vnode *a_vp; 108 int a_mode; 109 struct ucred *a_cred; 110 struct proc *a_p; 111 } */ *ap; 112 { 113 struct proc *p = ap->a_p; 114 struct vnode *bvp, *vp = ap->a_vp; 115 dev_t bdev, dev = (dev_t)vp->v_rdev; 116 int maj = major(dev); 117 int error; 118 119 /* 120 * Don't allow open if fs is mounted -nodev. 121 */ 122 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) 123 return (ENXIO); 124 125 switch (vp->v_type) { 126 127 case VCHR: 128 if ((u_int)maj >= nchrdev) 129 return (ENXIO); 130 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 131 /* 132 * When running in very secure mode, do not allow 133 * opens for writing of any disk character devices. 134 */ 135 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK) 136 return (EPERM); 137 /* 138 * When running in secure mode, do not allow opens 139 * for writing of /dev/mem, /dev/kmem, or character 140 * devices whose corresponding block devices are 141 * currently mounted. 142 */ 143 if (securelevel >= 1) { 144 if ((bdev = chrtoblk(dev)) != NODEV && 145 vfinddev(bdev, VBLK, &bvp) && 146 bvp->v_usecount > 0 && 147 (error = vfs_mountedon(bvp))) 148 return (error); 149 if (iskmemdev(dev)) 150 return (EPERM); 151 } 152 } 153 if (cdevsw[maj].d_type == D_TTY) 154 vp->v_flag |= VISTTY; 155 VOP_UNLOCK(vp, 0, p); 156 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); 157 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 158 return (error); 159 160 case VBLK: 161 if ((u_int)maj >= nblkdev) 162 return (ENXIO); 163 /* 164 * When running in very secure mode, do not allow 165 * opens for writing of any disk block devices. 166 */ 167 if (securelevel >= 2 && ap->a_cred != FSCRED && 168 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) 169 return (EPERM); 170 /* 171 * Do not allow opens of block devices that are 172 * currently mounted. 173 */ 174 if (error = vfs_mountedon(vp)) 175 return (error); 176 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p)); 177 } 178 return (0); 179 } 180 181 /* 182 * Vnode op for read 183 */ 184 /* ARGSUSED */ 185 spec_read(ap) 186 struct vop_read_args /* { 187 struct vnode *a_vp; 188 struct uio *a_uio; 189 int a_ioflag; 190 struct ucred *a_cred; 191 } */ *ap; 192 { 193 register struct vnode *vp = ap->a_vp; 194 register struct uio *uio = ap->a_uio; 195 struct proc *p = uio->uio_procp; 196 struct buf *bp; 197 daddr_t bn, nextbn; 198 long bsize, bscale; 199 struct partinfo dpart; 200 int n, on, majordev, (*ioctl)(); 201 int error = 0; 202 dev_t dev; 203 204 #ifdef DIAGNOSTIC 205 if (uio->uio_rw != UIO_READ) 206 panic("spec_read mode"); 207 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 208 panic("spec_read proc"); 209 #endif 210 if (uio->uio_resid == 0) 211 return (0); 212 213 switch (vp->v_type) { 214 215 case VCHR: 216 VOP_UNLOCK(vp, 0, p); 217 error = (*cdevsw[major(vp->v_rdev)].d_read) 218 (vp->v_rdev, uio, ap->a_ioflag); 219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 220 return (error); 221 222 case VBLK: 223 if (uio->uio_offset < 0) 224 return (EINVAL); 225 bsize = BLKDEV_IOSIZE; 226 dev = vp->v_rdev; 227 if ((majordev = major(dev)) < nblkdev && 228 (ioctl = bdevsw[majordev].d_ioctl) != NULL && 229 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && 230 dpart.part->p_fstype == FS_BSDFFS && 231 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 232 bsize = dpart.part->p_frag * dpart.part->p_fsize; 233 bscale = bsize / DEV_BSIZE; 234 do { 235 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); 236 on = uio->uio_offset % bsize; 237 n = min((unsigned)(bsize - on), uio->uio_resid); 238 if (vp->v_lastr + bscale == bn) { 239 nextbn = bn + bscale; 240 error = breadn(vp, bn, (int)bsize, &nextbn, 241 (int *)&bsize, 1, NOCRED, &bp); 242 } else 243 error = bread(vp, bn, (int)bsize, NOCRED, &bp); 244 vp->v_lastr = bn; 245 n = min(n, bsize - bp->b_resid); 246 if (error) { 247 brelse(bp); 248 return (error); 249 } 250 error = uiomove((char *)bp->b_data + on, n, uio); 251 if (n + on == bsize) 252 bp->b_flags |= B_AGE; 253 brelse(bp); 254 } while (error == 0 && uio->uio_resid > 0 && n != 0); 255 return (error); 256 257 default: 258 panic("spec_read type"); 259 } 260 /* NOTREACHED */ 261 } 262 263 /* 264 * Vnode op for write 265 */ 266 /* ARGSUSED */ 267 spec_write(ap) 268 struct vop_write_args /* { 269 struct vnode *a_vp; 270 struct uio *a_uio; 271 int a_ioflag; 272 struct ucred *a_cred; 273 } */ *ap; 274 { 275 register struct vnode *vp = ap->a_vp; 276 register struct uio *uio = ap->a_uio; 277 struct proc *p = uio->uio_procp; 278 struct buf *bp; 279 daddr_t bn; 280 int bsize, blkmask; 281 struct partinfo dpart; 282 register int n, on; 283 int error = 0; 284 285 #ifdef DIAGNOSTIC 286 if (uio->uio_rw != UIO_WRITE) 287 panic("spec_write mode"); 288 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 289 panic("spec_write proc"); 290 #endif 291 292 switch (vp->v_type) { 293 294 case VCHR: 295 VOP_UNLOCK(vp, 0, p); 296 error = (*cdevsw[major(vp->v_rdev)].d_write) 297 (vp->v_rdev, uio, ap->a_ioflag); 298 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 299 return (error); 300 301 case VBLK: 302 if (uio->uio_resid == 0) 303 return (0); 304 if (uio->uio_offset < 0) 305 return (EINVAL); 306 bsize = BLKDEV_IOSIZE; 307 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, 308 (caddr_t)&dpart, FREAD, p) == 0) { 309 if (dpart.part->p_fstype == FS_BSDFFS && 310 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) 311 bsize = dpart.part->p_frag * 312 dpart.part->p_fsize; 313 } 314 blkmask = (bsize / DEV_BSIZE) - 1; 315 do { 316 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; 317 on = uio->uio_offset % bsize; 318 n = min((unsigned)(bsize - on), uio->uio_resid); 319 if (n == bsize) 320 bp = getblk(vp, bn, bsize, 0, 0); 321 else 322 error = bread(vp, bn, bsize, NOCRED, &bp); 323 n = min(n, bsize - bp->b_resid); 324 if (error) { 325 brelse(bp); 326 return (error); 327 } 328 error = uiomove((char *)bp->b_data + on, n, uio); 329 if (n + on == bsize) { 330 bp->b_flags |= B_AGE; 331 bawrite(bp); 332 } else 333 bdwrite(bp); 334 } while (error == 0 && uio->uio_resid > 0 && n != 0); 335 return (error); 336 337 default: 338 panic("spec_write type"); 339 } 340 /* NOTREACHED */ 341 } 342 343 /* 344 * Device ioctl operation. 345 */ 346 /* ARGSUSED */ 347 spec_ioctl(ap) 348 struct vop_ioctl_args /* { 349 struct vnode *a_vp; 350 int a_command; 351 caddr_t a_data; 352 int a_fflag; 353 struct ucred *a_cred; 354 struct proc *a_p; 355 } */ *ap; 356 { 357 dev_t dev = ap->a_vp->v_rdev; 358 359 switch (ap->a_vp->v_type) { 360 361 case VCHR: 362 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 363 ap->a_fflag, ap->a_p)); 364 365 case VBLK: 366 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) 367 if (bdevsw[major(dev)].d_type == D_TAPE) 368 return (0); 369 else 370 return (1); 371 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, 372 ap->a_fflag, ap->a_p)); 373 374 default: 375 panic("spec_ioctl"); 376 /* NOTREACHED */ 377 } 378 } 379 380 /* ARGSUSED */ 381 spec_select(ap) 382 struct vop_select_args /* { 383 struct vnode *a_vp; 384 int a_which; 385 int a_fflags; 386 struct ucred *a_cred; 387 struct proc *a_p; 388 } */ *ap; 389 { 390 register dev_t dev; 391 392 switch (ap->a_vp->v_type) { 393 394 default: 395 return (1); /* XXX */ 396 397 case VCHR: 398 dev = ap->a_vp->v_rdev; 399 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p); 400 } 401 } 402 /* 403 * Synch buffers associated with a block device 404 */ 405 /* ARGSUSED */ 406 int 407 spec_fsync(ap) 408 struct vop_fsync_args /* { 409 struct vnode *a_vp; 410 struct ucred *a_cred; 411 int a_waitfor; 412 struct proc *a_p; 413 } */ *ap; 414 { 415 register struct vnode *vp = ap->a_vp; 416 register struct buf *bp; 417 struct buf *nbp; 418 int s; 419 420 if (vp->v_type == VCHR) 421 return (0); 422 /* 423 * Flush all dirty buffers associated with a block device. 424 */ 425 loop: 426 s = splbio(); 427 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 428 nbp = bp->b_vnbufs.le_next; 429 if ((bp->b_flags & B_BUSY)) 430 continue; 431 if ((bp->b_flags & B_DELWRI) == 0) 432 panic("spec_fsync: not dirty"); 433 bremfree(bp); 434 bp->b_flags |= B_BUSY; 435 splx(s); 436 bawrite(bp); 437 goto loop; 438 } 439 if (ap->a_waitfor == MNT_WAIT) { 440 while (vp->v_numoutput) { 441 vp->v_flag |= VBWAIT; 442 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 443 } 444 #ifdef DIAGNOSTIC 445 if (vp->v_dirtyblkhd.lh_first) { 446 vprint("spec_fsync: dirty", vp); 447 goto loop; 448 } 449 #endif 450 } 451 splx(s); 452 return (0); 453 } 454 455 /* 456 * Just call the device strategy routine 457 */ 458 spec_strategy(ap) 459 struct vop_strategy_args /* { 460 struct buf *a_bp; 461 } */ *ap; 462 { 463 464 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); 465 return (0); 466 } 467 468 /* 469 * This is a noop, simply returning what one has been given. 470 */ 471 spec_bmap(ap) 472 struct vop_bmap_args /* { 473 struct vnode *a_vp; 474 daddr_t a_bn; 475 struct vnode **a_vpp; 476 daddr_t *a_bnp; 477 int *a_runp; 478 } */ *ap; 479 { 480 481 if (ap->a_vpp != NULL) 482 *ap->a_vpp = ap->a_vp; 483 if (ap->a_bnp != NULL) 484 *ap->a_bnp = ap->a_bn; 485 if (ap->a_runp != NULL) 486 *ap->a_runp = 0; 487 return (0); 488 } 489 490 /* 491 * Device close routine 492 */ 493 /* ARGSUSED */ 494 spec_close(ap) 495 struct vop_close_args /* { 496 struct vnode *a_vp; 497 int a_fflag; 498 struct ucred *a_cred; 499 struct proc *a_p; 500 } */ *ap; 501 { 502 register struct vnode *vp = ap->a_vp; 503 dev_t dev = vp->v_rdev; 504 int (*devclose) __P((dev_t, int, int, struct proc *)); 505 int mode, error; 506 507 switch (vp->v_type) { 508 509 case VCHR: 510 /* 511 * Hack: a tty device that is a controlling terminal 512 * has a reference from the session structure. 513 * We cannot easily tell that a character device is 514 * a controlling terminal, unless it is the closing 515 * process' controlling terminal. In that case, 516 * if the reference count is 2 (this last descriptor 517 * plus the session), release the reference from the session. 518 */ 519 if (vcount(vp) == 2 && ap->a_p && 520 vp == ap->a_p->p_session->s_ttyvp) { 521 vrele(vp); 522 ap->a_p->p_session->s_ttyvp = NULL; 523 } 524 /* 525 * If the vnode is locked, then we are in the midst 526 * of forcably closing the device, otherwise we only 527 * close on last reference. 528 */ 529 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 530 return (0); 531 devclose = cdevsw[major(dev)].d_close; 532 mode = S_IFCHR; 533 break; 534 535 case VBLK: 536 /* 537 * On last close of a block device (that isn't mounted) 538 * we must invalidate any in core blocks, so that 539 * we can, for instance, change floppy disks. 540 */ 541 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0)) 542 return (error); 543 /* 544 * We do not want to really close the device if it 545 * is still in use unless we are trying to close it 546 * forcibly. Since every use (buffer, vnode, swap, cmap) 547 * holds a reference to the vnode, and because we mark 548 * any other vnodes that alias this device, when the 549 * sum of the reference counts on all the aliased 550 * vnodes descends to one, we are on last close. 551 */ 552 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) 553 return (0); 554 devclose = bdevsw[major(dev)].d_close; 555 mode = S_IFBLK; 556 break; 557 558 default: 559 panic("spec_close: not special"); 560 } 561 562 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); 563 } 564 565 /* 566 * Print out the contents of a special device vnode. 567 */ 568 spec_print(ap) 569 struct vop_print_args /* { 570 struct vnode *a_vp; 571 } */ *ap; 572 { 573 574 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), 575 minor(ap->a_vp->v_rdev)); 576 } 577 578 /* 579 * Return POSIX pathconf information applicable to special devices. 580 */ 581 spec_pathconf(ap) 582 struct vop_pathconf_args /* { 583 struct vnode *a_vp; 584 int a_name; 585 int *a_retval; 586 } */ *ap; 587 { 588 589 switch (ap->a_name) { 590 case _PC_LINK_MAX: 591 *ap->a_retval = LINK_MAX; 592 return (0); 593 case _PC_MAX_CANON: 594 *ap->a_retval = MAX_CANON; 595 return (0); 596 case _PC_MAX_INPUT: 597 *ap->a_retval = MAX_INPUT; 598 return (0); 599 case _PC_PIPE_BUF: 600 *ap->a_retval = PIPE_BUF; 601 return (0); 602 case _PC_CHOWN_RESTRICTED: 603 *ap->a_retval = 1; 604 return (0); 605 case _PC_VDISABLE: 606 *ap->a_retval = _POSIX_VDISABLE; 607 return (0); 608 default: 609 return (EINVAL); 610 } 611 /* NOTREACHED */ 612 } 613 614 /* 615 * Special device advisory byte-level locks. 616 */ 617 /* ARGSUSED */ 618 spec_advlock(ap) 619 struct vop_advlock_args /* { 620 struct vnode *a_vp; 621 caddr_t a_id; 622 int a_op; 623 struct flock *a_fl; 624 int a_flags; 625 } */ *ap; 626 { 627 628 return (EOPNOTSUPP); 629 } 630 631 /* 632 * Special device failed operation 633 */ 634 spec_ebadf() 635 { 636 637 return (EBADF); 638 } 639 640 /* 641 * Special device bad operation 642 */ 643 spec_badop() 644 { 645 646 panic("spec_badop called"); 647 /* NOTREACHED */ 648 } 649