1 /*
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)spec_vnops.c 8.14 (Berkeley) 05/21/95
8 */
9
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/systm.h>
13 #include <sys/kernel.h>
14 #include <sys/conf.h>
15 #include <sys/buf.h>
16 #include <sys/mount.h>
17 #include <sys/namei.h>
18 #include <sys/vnode.h>
19 #include <sys/stat.h>
20 #include <sys/errno.h>
21 #include <sys/ioctl.h>
22 #include <sys/file.h>
23 #include <sys/disklabel.h>
24 #include <miscfs/specfs/specdev.h>
25
26 /* symbolic sleep message strings for devices */
27 char devopn[] = "devopn";
28 char devio[] = "devio";
29 char devwait[] = "devwait";
30 char devin[] = "devin";
31 char devout[] = "devout";
32 char devioc[] = "devioc";
33 char devcls[] = "devcls";
34
35 int (**spec_vnodeop_p)();
36 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
37 { &vop_default_desc, vn_default_error },
38 { &vop_lookup_desc, spec_lookup }, /* lookup */
39 { &vop_create_desc, spec_create }, /* create */
40 { &vop_mknod_desc, spec_mknod }, /* mknod */
41 { &vop_open_desc, spec_open }, /* open */
42 { &vop_close_desc, spec_close }, /* close */
43 { &vop_access_desc, spec_access }, /* access */
44 { &vop_getattr_desc, spec_getattr }, /* getattr */
45 { &vop_setattr_desc, spec_setattr }, /* setattr */
46 { &vop_read_desc, spec_read }, /* read */
47 { &vop_write_desc, spec_write }, /* write */
48 { &vop_lease_desc, spec_lease_check }, /* lease */
49 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
50 { &vop_select_desc, spec_select }, /* select */
51 { &vop_revoke_desc, spec_revoke }, /* revoke */
52 { &vop_mmap_desc, spec_mmap }, /* mmap */
53 { &vop_fsync_desc, spec_fsync }, /* fsync */
54 { &vop_seek_desc, spec_seek }, /* seek */
55 { &vop_remove_desc, spec_remove }, /* remove */
56 { &vop_link_desc, spec_link }, /* link */
57 { &vop_rename_desc, spec_rename }, /* rename */
58 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
59 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
60 { &vop_symlink_desc, spec_symlink }, /* symlink */
61 { &vop_readdir_desc, spec_readdir }, /* readdir */
62 { &vop_readlink_desc, spec_readlink }, /* readlink */
63 { &vop_abortop_desc, spec_abortop }, /* abortop */
64 { &vop_inactive_desc, spec_inactive }, /* inactive */
65 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
66 { &vop_lock_desc, spec_lock }, /* lock */
67 { &vop_unlock_desc, spec_unlock }, /* unlock */
68 { &vop_bmap_desc, spec_bmap }, /* bmap */
69 { &vop_strategy_desc, spec_strategy }, /* strategy */
70 { &vop_print_desc, spec_print }, /* print */
71 { &vop_islocked_desc, spec_islocked }, /* islocked */
72 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
73 { &vop_advlock_desc, spec_advlock }, /* advlock */
74 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
75 { &vop_valloc_desc, spec_valloc }, /* valloc */
76 { &vop_vfree_desc, spec_vfree }, /* vfree */
77 { &vop_truncate_desc, spec_truncate }, /* truncate */
78 { &vop_update_desc, spec_update }, /* update */
79 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
80 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
81 };
82 struct vnodeopv_desc spec_vnodeop_opv_desc =
83 { &spec_vnodeop_p, spec_vnodeop_entries };
84
85 /*
86 * Trivial lookup routine that always fails.
87 */
88 int
spec_lookup(ap)89 spec_lookup(ap)
90 struct vop_lookup_args /* {
91 struct vnode *a_dvp;
92 struct vnode **a_vpp;
93 struct componentname *a_cnp;
94 } */ *ap;
95 {
96
97 *ap->a_vpp = NULL;
98 return (ENOTDIR);
99 }
100
101 /*
102 * Open a special file.
103 */
104 /* ARGSUSED */
105 spec_open(ap)
106 struct vop_open_args /* {
107 struct vnode *a_vp;
108 int a_mode;
109 struct ucred *a_cred;
110 struct proc *a_p;
111 } */ *ap;
112 {
113 struct proc *p = ap->a_p;
114 struct vnode *bvp, *vp = ap->a_vp;
115 dev_t bdev, dev = (dev_t)vp->v_rdev;
116 int maj = major(dev);
117 int error;
118
119 /*
120 * Don't allow open if fs is mounted -nodev.
121 */
122 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
123 return (ENXIO);
124
125 switch (vp->v_type) {
126
127 case VCHR:
128 if ((u_int)maj >= nchrdev)
129 return (ENXIO);
130 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
131 /*
132 * When running in very secure mode, do not allow
133 * opens for writing of any disk character devices.
134 */
135 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
136 return (EPERM);
137 /*
138 * When running in secure mode, do not allow opens
139 * for writing of /dev/mem, /dev/kmem, or character
140 * devices whose corresponding block devices are
141 * currently mounted.
142 */
143 if (securelevel >= 1) {
144 if ((bdev = chrtoblk(dev)) != NODEV &&
145 vfinddev(bdev, VBLK, &bvp) &&
146 bvp->v_usecount > 0 &&
147 (error = vfs_mountedon(bvp)))
148 return (error);
149 if (iskmemdev(dev))
150 return (EPERM);
151 }
152 }
153 if (cdevsw[maj].d_type == D_TTY)
154 vp->v_flag |= VISTTY;
155 VOP_UNLOCK(vp, 0, p);
156 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
157 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
158 return (error);
159
160 case VBLK:
161 if ((u_int)maj >= nblkdev)
162 return (ENXIO);
163 /*
164 * When running in very secure mode, do not allow
165 * opens for writing of any disk block devices.
166 */
167 if (securelevel >= 2 && ap->a_cred != FSCRED &&
168 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
169 return (EPERM);
170 /*
171 * Do not allow opens of block devices that are
172 * currently mounted.
173 */
174 if (error = vfs_mountedon(vp))
175 return (error);
176 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p));
177 }
178 return (0);
179 }
180
181 /*
182 * Vnode op for read
183 */
184 /* ARGSUSED */
185 spec_read(ap)
186 struct vop_read_args /* {
187 struct vnode *a_vp;
188 struct uio *a_uio;
189 int a_ioflag;
190 struct ucred *a_cred;
191 } */ *ap;
192 {
193 register struct vnode *vp = ap->a_vp;
194 register struct uio *uio = ap->a_uio;
195 struct proc *p = uio->uio_procp;
196 struct buf *bp;
197 daddr_t bn, nextbn;
198 long bsize, bscale;
199 struct partinfo dpart;
200 int n, on, majordev, (*ioctl)();
201 int error = 0;
202 dev_t dev;
203
204 #ifdef DIAGNOSTIC
205 if (uio->uio_rw != UIO_READ)
206 panic("spec_read mode");
207 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
208 panic("spec_read proc");
209 #endif
210 if (uio->uio_resid == 0)
211 return (0);
212
213 switch (vp->v_type) {
214
215 case VCHR:
216 VOP_UNLOCK(vp, 0, p);
217 error = (*cdevsw[major(vp->v_rdev)].d_read)
218 (vp->v_rdev, uio, ap->a_ioflag);
219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
220 return (error);
221
222 case VBLK:
223 if (uio->uio_offset < 0)
224 return (EINVAL);
225 bsize = BLKDEV_IOSIZE;
226 dev = vp->v_rdev;
227 if ((majordev = major(dev)) < nblkdev &&
228 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
229 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
230 dpart.part->p_fstype == FS_BSDFFS &&
231 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
232 bsize = dpart.part->p_frag * dpart.part->p_fsize;
233 bscale = bsize / DEV_BSIZE;
234 do {
235 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
236 on = uio->uio_offset % bsize;
237 n = min((unsigned)(bsize - on), uio->uio_resid);
238 if (vp->v_lastr + bscale == bn) {
239 nextbn = bn + bscale;
240 error = breadn(vp, bn, (int)bsize, &nextbn,
241 (int *)&bsize, 1, NOCRED, &bp);
242 } else
243 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
244 vp->v_lastr = bn;
245 n = min(n, bsize - bp->b_resid);
246 if (error) {
247 brelse(bp);
248 return (error);
249 }
250 error = uiomove((char *)bp->b_data + on, n, uio);
251 if (n + on == bsize)
252 bp->b_flags |= B_AGE;
253 brelse(bp);
254 } while (error == 0 && uio->uio_resid > 0 && n != 0);
255 return (error);
256
257 default:
258 panic("spec_read type");
259 }
260 /* NOTREACHED */
261 }
262
263 /*
264 * Vnode op for write
265 */
266 /* ARGSUSED */
267 spec_write(ap)
268 struct vop_write_args /* {
269 struct vnode *a_vp;
270 struct uio *a_uio;
271 int a_ioflag;
272 struct ucred *a_cred;
273 } */ *ap;
274 {
275 register struct vnode *vp = ap->a_vp;
276 register struct uio *uio = ap->a_uio;
277 struct proc *p = uio->uio_procp;
278 struct buf *bp;
279 daddr_t bn;
280 int bsize, blkmask;
281 struct partinfo dpart;
282 register int n, on;
283 int error = 0;
284
285 #ifdef DIAGNOSTIC
286 if (uio->uio_rw != UIO_WRITE)
287 panic("spec_write mode");
288 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
289 panic("spec_write proc");
290 #endif
291
292 switch (vp->v_type) {
293
294 case VCHR:
295 VOP_UNLOCK(vp, 0, p);
296 error = (*cdevsw[major(vp->v_rdev)].d_write)
297 (vp->v_rdev, uio, ap->a_ioflag);
298 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
299 return (error);
300
301 case VBLK:
302 if (uio->uio_resid == 0)
303 return (0);
304 if (uio->uio_offset < 0)
305 return (EINVAL);
306 bsize = BLKDEV_IOSIZE;
307 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
308 (caddr_t)&dpart, FREAD, p) == 0) {
309 if (dpart.part->p_fstype == FS_BSDFFS &&
310 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
311 bsize = dpart.part->p_frag *
312 dpart.part->p_fsize;
313 }
314 blkmask = (bsize / DEV_BSIZE) - 1;
315 do {
316 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
317 on = uio->uio_offset % bsize;
318 n = min((unsigned)(bsize - on), uio->uio_resid);
319 if (n == bsize)
320 bp = getblk(vp, bn, bsize, 0, 0);
321 else
322 error = bread(vp, bn, bsize, NOCRED, &bp);
323 n = min(n, bsize - bp->b_resid);
324 if (error) {
325 brelse(bp);
326 return (error);
327 }
328 error = uiomove((char *)bp->b_data + on, n, uio);
329 if (n + on == bsize) {
330 bp->b_flags |= B_AGE;
331 bawrite(bp);
332 } else
333 bdwrite(bp);
334 } while (error == 0 && uio->uio_resid > 0 && n != 0);
335 return (error);
336
337 default:
338 panic("spec_write type");
339 }
340 /* NOTREACHED */
341 }
342
343 /*
344 * Device ioctl operation.
345 */
346 /* ARGSUSED */
347 spec_ioctl(ap)
348 struct vop_ioctl_args /* {
349 struct vnode *a_vp;
350 int a_command;
351 caddr_t a_data;
352 int a_fflag;
353 struct ucred *a_cred;
354 struct proc *a_p;
355 } */ *ap;
356 {
357 dev_t dev = ap->a_vp->v_rdev;
358
359 switch (ap->a_vp->v_type) {
360
361 case VCHR:
362 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
363 ap->a_fflag, ap->a_p));
364
365 case VBLK:
366 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
367 if (bdevsw[major(dev)].d_type == D_TAPE)
368 return (0);
369 else
370 return (1);
371 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
372 ap->a_fflag, ap->a_p));
373
374 default:
375 panic("spec_ioctl");
376 /* NOTREACHED */
377 }
378 }
379
380 /* ARGSUSED */
381 spec_select(ap)
382 struct vop_select_args /* {
383 struct vnode *a_vp;
384 int a_which;
385 int a_fflags;
386 struct ucred *a_cred;
387 struct proc *a_p;
388 } */ *ap;
389 {
390 register dev_t dev;
391
392 switch (ap->a_vp->v_type) {
393
394 default:
395 return (1); /* XXX */
396
397 case VCHR:
398 dev = ap->a_vp->v_rdev;
399 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
400 }
401 }
402 /*
403 * Synch buffers associated with a block device
404 */
405 /* ARGSUSED */
406 int
spec_fsync(ap)407 spec_fsync(ap)
408 struct vop_fsync_args /* {
409 struct vnode *a_vp;
410 struct ucred *a_cred;
411 int a_waitfor;
412 struct proc *a_p;
413 } */ *ap;
414 {
415 register struct vnode *vp = ap->a_vp;
416 register struct buf *bp;
417 struct buf *nbp;
418 int s;
419
420 if (vp->v_type == VCHR)
421 return (0);
422 /*
423 * Flush all dirty buffers associated with a block device.
424 */
425 loop:
426 s = splbio();
427 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
428 nbp = bp->b_vnbufs.le_next;
429 if ((bp->b_flags & B_BUSY))
430 continue;
431 if ((bp->b_flags & B_DELWRI) == 0)
432 panic("spec_fsync: not dirty");
433 bremfree(bp);
434 bp->b_flags |= B_BUSY;
435 splx(s);
436 bawrite(bp);
437 goto loop;
438 }
439 if (ap->a_waitfor == MNT_WAIT) {
440 while (vp->v_numoutput) {
441 vp->v_flag |= VBWAIT;
442 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
443 }
444 #ifdef DIAGNOSTIC
445 if (vp->v_dirtyblkhd.lh_first) {
446 vprint("spec_fsync: dirty", vp);
447 goto loop;
448 }
449 #endif
450 }
451 splx(s);
452 return (0);
453 }
454
455 int
spec_inactive(ap)456 spec_inactive(ap)
457 struct vop_inactive_args /* {
458 struct vnode *a_vp;
459 struct proc *a_p;
460 } */ *ap;
461 {
462
463 VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
464 return (0);
465 }
466
467 /*
468 * Just call the device strategy routine
469 */
470 spec_strategy(ap)
471 struct vop_strategy_args /* {
472 struct buf *a_bp;
473 } */ *ap;
474 {
475
476 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
477 return (0);
478 }
479
480 /*
481 * This is a noop, simply returning what one has been given.
482 */
483 spec_bmap(ap)
484 struct vop_bmap_args /* {
485 struct vnode *a_vp;
486 daddr_t a_bn;
487 struct vnode **a_vpp;
488 daddr_t *a_bnp;
489 int *a_runp;
490 } */ *ap;
491 {
492
493 if (ap->a_vpp != NULL)
494 *ap->a_vpp = ap->a_vp;
495 if (ap->a_bnp != NULL)
496 *ap->a_bnp = ap->a_bn;
497 if (ap->a_runp != NULL)
498 *ap->a_runp = 0;
499 return (0);
500 }
501
502 /*
503 * Device close routine
504 */
505 /* ARGSUSED */
506 spec_close(ap)
507 struct vop_close_args /* {
508 struct vnode *a_vp;
509 int a_fflag;
510 struct ucred *a_cred;
511 struct proc *a_p;
512 } */ *ap;
513 {
514 register struct vnode *vp = ap->a_vp;
515 dev_t dev = vp->v_rdev;
516 int (*devclose) __P((dev_t, int, int, struct proc *));
517 int mode, error;
518
519 switch (vp->v_type) {
520
521 case VCHR:
522 /*
523 * Hack: a tty device that is a controlling terminal
524 * has a reference from the session structure.
525 * We cannot easily tell that a character device is
526 * a controlling terminal, unless it is the closing
527 * process' controlling terminal. In that case,
528 * if the reference count is 2 (this last descriptor
529 * plus the session), release the reference from the session.
530 */
531 if (vcount(vp) == 2 && ap->a_p &&
532 vp == ap->a_p->p_session->s_ttyvp) {
533 vrele(vp);
534 ap->a_p->p_session->s_ttyvp = NULL;
535 }
536 /*
537 * If the vnode is locked, then we are in the midst
538 * of forcably closing the device, otherwise we only
539 * close on last reference.
540 */
541 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
542 return (0);
543 devclose = cdevsw[major(dev)].d_close;
544 mode = S_IFCHR;
545 break;
546
547 case VBLK:
548 /*
549 * On last close of a block device (that isn't mounted)
550 * we must invalidate any in core blocks, so that
551 * we can, for instance, change floppy disks.
552 */
553 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
554 return (error);
555 /*
556 * We do not want to really close the device if it
557 * is still in use unless we are trying to close it
558 * forcibly. Since every use (buffer, vnode, swap, cmap)
559 * holds a reference to the vnode, and because we mark
560 * any other vnodes that alias this device, when the
561 * sum of the reference counts on all the aliased
562 * vnodes descends to one, we are on last close.
563 */
564 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
565 return (0);
566 devclose = bdevsw[major(dev)].d_close;
567 mode = S_IFBLK;
568 break;
569
570 default:
571 panic("spec_close: not special");
572 }
573
574 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
575 }
576
577 /*
578 * Print out the contents of a special device vnode.
579 */
580 spec_print(ap)
581 struct vop_print_args /* {
582 struct vnode *a_vp;
583 } */ *ap;
584 {
585
586 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
587 minor(ap->a_vp->v_rdev));
588 }
589
590 /*
591 * Return POSIX pathconf information applicable to special devices.
592 */
593 spec_pathconf(ap)
594 struct vop_pathconf_args /* {
595 struct vnode *a_vp;
596 int a_name;
597 int *a_retval;
598 } */ *ap;
599 {
600
601 switch (ap->a_name) {
602 case _PC_LINK_MAX:
603 *ap->a_retval = LINK_MAX;
604 return (0);
605 case _PC_MAX_CANON:
606 *ap->a_retval = MAX_CANON;
607 return (0);
608 case _PC_MAX_INPUT:
609 *ap->a_retval = MAX_INPUT;
610 return (0);
611 case _PC_PIPE_BUF:
612 *ap->a_retval = PIPE_BUF;
613 return (0);
614 case _PC_CHOWN_RESTRICTED:
615 *ap->a_retval = 1;
616 return (0);
617 case _PC_VDISABLE:
618 *ap->a_retval = _POSIX_VDISABLE;
619 return (0);
620 default:
621 return (EINVAL);
622 }
623 /* NOTREACHED */
624 }
625
626 /*
627 * Special device advisory byte-level locks.
628 */
629 /* ARGSUSED */
630 spec_advlock(ap)
631 struct vop_advlock_args /* {
632 struct vnode *a_vp;
633 caddr_t a_id;
634 int a_op;
635 struct flock *a_fl;
636 int a_flags;
637 } */ *ap;
638 {
639
640 return (EOPNOTSUPP);
641 }
642
643 /*
644 * Special device failed operation
645 */
spec_ebadf()646 spec_ebadf()
647 {
648
649 return (EBADF);
650 }
651
652 /*
653 * Special device bad operation
654 */
spec_badop()655 spec_badop()
656 {
657
658 panic("spec_badop called");
659 /* NOTREACHED */
660 }
661