xref: /original-bsd/sys/dev/vn.c (revision 6d5a9f9c)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vn.c 1.13 94/04/02$
13  *
14  *	@(#)vn.c	8.9 (Berkeley) 05/14/95
15  */
16 
17 /*
18  * Vnode disk driver.
19  *
20  * Block/character interface to a vnode.  Allows one to treat a file
21  * as a disk (e.g. build a filesystem in it, mount it, etc.).
22  *
23  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
24  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
25  * local buffer cache.
26  *
27  * NOTE 2: There is a security issue involved with this driver.
28  * Once mounted all access to the contents of the "mapped" file via
29  * the special file is controlled by the permissions on the special
30  * file, the protection of the mapped file is ignored (effectively,
31  * by using root credentials in all transactions).
32  *
33  * NOTE 3: Doesn't interact with leases, should it?
34  */
35 #include "vn.h"
36 #if NVN > 0
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/namei.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/dkstat.h>
44 #include <sys/buf.h>
45 #include <sys/malloc.h>
46 #include <sys/ioctl.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/file.h>
50 #include <sys/uio.h>
51 
52 #include <miscfs/specfs/specdev.h>
53 
54 #include <dev/vnioctl.h>
55 
56 #ifdef DEBUG
57 int dovncluster = 1;
58 int vndebug = 0x00;
59 #define VDB_FOLLOW	0x01
60 #define VDB_INIT	0x02
61 #define VDB_IO		0x04
62 #endif
63 
64 #define b_cylin	b_resid
65 
66 #define	vnunit(x)	((minor(x) >> 3) & 0x7)	/* for consistency */
67 
68 #define	getvnbuf()	\
69 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
70 #define putvnbuf(bp)	\
71 	free((caddr_t)(bp), M_DEVBUF)
72 
73 struct vn_softc {
74 	int		 sc_flags;	/* flags */
75 	size_t		 sc_size;	/* size of vn */
76 	struct vnode	*sc_vp;		/* vnode */
77 	struct ucred	*sc_cred;	/* credentials */
78 	int		 sc_maxactive;	/* max # of active requests */
79 	struct buf	 sc_tab;	/* transfer queue */
80 };
81 
82 /* sc_flags */
83 #define	VNF_ALIVE	0x01
84 #define VNF_INITED	0x02
85 
86 #if 0	/* if you need static allocation */
87 struct vn_softc vn_softc[NVN];
88 int numvnd = NVN;
89 #else
90 struct vn_softc *vn_softc;
91 int numvnd;
92 #endif
93 
94 void
95 vnattach(num)
96 	int num;
97 {
98 	char *mem;
99 	register u_long size;
100 
101 	if (num <= 0)
102 		return;
103 	size = num * sizeof(struct vn_softc);
104 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
105 	if (mem == NULL) {
106 		printf("WARNING: no memory for vnode disks\n");
107 		return;
108 	}
109 	bzero(mem, size);
110 	vn_softc = (struct vn_softc *)mem;
111 	numvnd = num;
112 }
113 
114 int
115 vnopen(dev, flags, mode, p)
116 	dev_t dev;
117 	int flags, mode;
118 	struct proc *p;
119 {
120 	int unit = vnunit(dev);
121 
122 #ifdef DEBUG
123 	if (vndebug & VDB_FOLLOW)
124 		printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
125 #endif
126 	if (unit >= numvnd)
127 		return(ENXIO);
128 	return(0);
129 }
130 
131 /*
132  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
133  * Note that this driver can only be used for swapping over NFS on the hp
134  * since nfs_strategy on the vax cannot handle u-areas and page tables.
135  */
136 void
137 vnstrategy(bp)
138 	register struct buf *bp;
139 {
140 	int unit = vnunit(bp->b_dev);
141 	register struct vn_softc *vn = &vn_softc[unit];
142 	register struct buf *nbp;
143 	register int bn, bsize, resid;
144 	register caddr_t addr;
145 	int sz, flags, error;
146 	extern void vniodone();
147 
148 #ifdef DEBUG
149 	if (vndebug & VDB_FOLLOW)
150 		printf("vnstrategy(%x): unit %d\n", bp, unit);
151 #endif
152 	if ((vn->sc_flags & VNF_INITED) == 0) {
153 		bp->b_error = ENXIO;
154 		bp->b_flags |= B_ERROR;
155 		biodone(bp);
156 		return;
157 	}
158 	bn = bp->b_blkno;
159 	sz = howmany(bp->b_bcount, DEV_BSIZE);
160 	bp->b_resid = bp->b_bcount;
161 	if (bn < 0 || bn + sz > vn->sc_size) {
162 		if (bn != vn->sc_size) {
163 			bp->b_error = EINVAL;
164 			bp->b_flags |= B_ERROR;
165 		}
166 		biodone(bp);
167 		return;
168 	}
169 	bn = dbtob(bn);
170 	bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
171 	addr = bp->b_data;
172 	flags = bp->b_flags | B_CALL;
173 	for (resid = bp->b_resid; resid; resid -= sz) {
174 		struct vnode *vp;
175 		daddr_t nbn;
176 		int off, s, nra;
177 
178 		nra = 0;
179 		error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, &nra);
180 		if (error == 0 && (long)nbn == -1)
181 			error = EIO;
182 #ifdef DEBUG
183 		if (!dovncluster)
184 			nra = 0;
185 #endif
186 
187 		if (off = bn % bsize)
188 			sz = bsize - off;
189 		else
190 			sz = (1 + nra) * bsize;
191 		if (resid < sz)
192 			sz = resid;
193 #ifdef DEBUG
194 		if (vndebug & VDB_IO)
195 			printf("vnstrategy: vp %x/%x bn %x/%x sz %x\n",
196 			       vn->sc_vp, vp, bn, nbn, sz);
197 #endif
198 
199 		nbp = getvnbuf();
200 		nbp->b_flags = flags;
201 		nbp->b_bcount = sz;
202 		nbp->b_bufsize = bp->b_bufsize;
203 		nbp->b_error = 0;
204 		if (vp->v_type == VBLK || vp->v_type == VCHR)
205 			nbp->b_dev = vp->v_rdev;
206 		else
207 			nbp->b_dev = NODEV;
208 		nbp->b_data = addr;
209 		nbp->b_blkno = nbn + btodb(off);
210 		nbp->b_proc = bp->b_proc;
211 		nbp->b_iodone = vniodone;
212 		nbp->b_vp = vp;
213 		nbp->b_pfcent = (int) bp;	/* XXX */
214 		nbp->b_rcred = vn->sc_cred;	/* XXX crdup? */
215 		nbp->b_wcred = vn->sc_cred;	/* XXX crdup? */
216 		nbp->b_dirtyoff = bp->b_dirtyoff;
217 		nbp->b_dirtyend = bp->b_dirtyend;
218 		nbp->b_validoff = bp->b_validoff;
219 		nbp->b_validend = bp->b_validend;
220 		/*
221 		 * If there was an error or a hole in the file...punt.
222 		 * Note that we deal with this after the nbp allocation.
223 		 * This ensures that we properly clean up any operations
224 		 * that we have already fired off.
225 		 *
226 		 * XXX we could deal with holes here but it would be
227 		 * a hassle (in the write case).
228 		 */
229 		if (error) {
230 			nbp->b_error = error;
231 			nbp->b_flags |= B_ERROR;
232 			bp->b_resid -= (resid - sz);
233 			biodone(nbp);
234 			return;
235 		}
236 		/*
237 		 * Just sort by block number
238 		 */
239 		nbp->b_cylin = nbp->b_blkno;
240 		s = splbio();
241 		disksort(&vn->sc_tab, nbp);
242 		if (vn->sc_tab.b_active < vn->sc_maxactive) {
243 			vn->sc_tab.b_active++;
244 			vnstart(vn);
245 		}
246 		splx(s);
247 		bn += sz;
248 		addr += sz;
249 	}
250 }
251 
252 /*
253  * Feed requests sequentially.
254  * We do it this way to keep from flooding NFS servers if we are connected
255  * to an NFS file.  This places the burden on the client rather than the
256  * server.
257  */
258 vnstart(vn)
259 	register struct vn_softc *vn;
260 {
261 	register struct buf *bp;
262 
263 	/*
264 	 * Dequeue now since lower level strategy routine might
265 	 * queue using same links
266 	 */
267 	bp = vn->sc_tab.b_actf;
268 	vn->sc_tab.b_actf = bp->b_actf;
269 #ifdef DEBUG
270 	if (vndebug & VDB_IO)
271 		printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
272 		       vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
273 		       bp->b_bcount);
274 #endif
275 	if ((bp->b_flags & B_READ) == 0)
276 		bp->b_vp->v_numoutput++;
277 	VOP_STRATEGY(bp);
278 }
279 
280 void
281 vniodone(bp)
282 	register struct buf *bp;
283 {
284 	register struct buf *pbp = (struct buf *)bp->b_pfcent;	/* XXX */
285 	register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)];
286 	int s;
287 
288 	s = splbio();
289 #ifdef DEBUG
290 	if (vndebug & VDB_IO)
291 		printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
292 		       vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
293 		       bp->b_bcount);
294 #endif
295 	if (bp->b_error) {
296 #ifdef DEBUG
297 		if (vndebug & VDB_IO)
298 			printf("vniodone: bp %x error %d\n", bp, bp->b_error);
299 #endif
300 		pbp->b_flags |= B_ERROR;
301 		pbp->b_error = biowait(bp);
302 	}
303 	pbp->b_resid -= bp->b_bcount;
304 	putvnbuf(bp);
305 	if (pbp->b_resid == 0) {
306 #ifdef DEBUG
307 		if (vndebug & VDB_IO)
308 			printf("vniodone: pbp %x iodone\n", pbp);
309 #endif
310 		biodone(pbp);
311 	}
312 	if (vn->sc_tab.b_actf)
313 		vnstart(vn);
314 	else
315 		vn->sc_tab.b_active--;
316 	splx(s);
317 }
318 
319 vnread(dev, uio, flags, p)
320 	dev_t dev;
321 	struct uio *uio;
322 	int flags;
323 	struct proc *p;
324 {
325 
326 #ifdef DEBUG
327 	if (vndebug & VDB_FOLLOW)
328 		printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p);
329 #endif
330 	return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio));
331 }
332 
333 vnwrite(dev, uio, flags, p)
334 	dev_t dev;
335 	struct uio *uio;
336 	int flags;
337 	struct proc *p;
338 {
339 
340 #ifdef DEBUG
341 	if (vndebug & VDB_FOLLOW)
342 		printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p);
343 #endif
344 	return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio));
345 }
346 
347 /* ARGSUSED */
348 vnioctl(dev, cmd, data, flag, p)
349 	dev_t dev;
350 	u_long cmd;
351 	caddr_t data;
352 	int flag;
353 	struct proc *p;
354 {
355 	int unit = vnunit(dev);
356 	register struct vn_softc *vn;
357 	struct vn_ioctl *vio;
358 	struct vattr vattr;
359 	struct nameidata nd;
360 	int error;
361 
362 #ifdef DEBUG
363 	if (vndebug & VDB_FOLLOW)
364 		printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n",
365 		       dev, cmd, data, flag, p, unit);
366 #endif
367 	error = suser(p->p_ucred, &p->p_acflag);
368 	if (error)
369 		return (error);
370 	if (unit >= numvnd)
371 		return (ENXIO);
372 
373 	vn = &vn_softc[unit];
374 	vio = (struct vn_ioctl *)data;
375 	switch (cmd) {
376 
377 	case VNIOCSET:
378 		if (vn->sc_flags & VNF_INITED)
379 			return(EBUSY);
380 		/*
381 		 * Always open for read and write.
382 		 * This is probably bogus, but it lets vn_open()
383 		 * weed out directories, sockets, etc. so we don't
384 		 * have to worry about them.
385 		 */
386 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
387 		if (error = vn_open(&nd, FREAD|FWRITE, 0))
388 			return(error);
389 		if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
390 			VOP_UNLOCK(nd.ni_vp, 0, p);
391 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
392 			return(error);
393 		}
394 		VOP_UNLOCK(nd.ni_vp, 0, p);
395 		vn->sc_vp = nd.ni_vp;
396 		vn->sc_size = btodb(vattr.va_size);	/* note truncation */
397 		if (error = vnsetcred(vn, p->p_ucred)) {
398 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
399 			return(error);
400 		}
401 		vnthrottle(vn, vn->sc_vp);
402 		vio->vn_size = dbtob(vn->sc_size);
403 		vn->sc_flags |= VNF_INITED;
404 #ifdef DEBUG
405 		if (vndebug & VDB_INIT)
406 			printf("vnioctl: SET vp %x size %x\n",
407 			       vn->sc_vp, vn->sc_size);
408 #endif
409 		break;
410 
411 	case VNIOCCLR:
412 		if ((vn->sc_flags & VNF_INITED) == 0)
413 			return(ENXIO);
414 		vnclear(vn);
415 #ifdef DEBUG
416 		if (vndebug & VDB_INIT)
417 			printf("vnioctl: CLRed\n");
418 #endif
419 		break;
420 
421 	default:
422 		return(ENOTTY);
423 	}
424 	return(0);
425 }
426 
427 /*
428  * Duplicate the current processes' credentials.  Since we are called only
429  * as the result of a SET ioctl and only root can do that, any future access
430  * to this "disk" is essentially as root.  Note that credentials may change
431  * if some other uid can write directly to the mapped file (NFS).
432  */
433 vnsetcred(vn, cred)
434 	register struct vn_softc *vn;
435 	struct ucred *cred;
436 {
437 	struct uio auio;
438 	struct iovec aiov;
439 	char *tmpbuf;
440 	int error;
441 
442 	vn->sc_cred = crdup(cred);
443 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
444 
445 	/* XXX: Horrible kludge to establish credentials for NFS */
446 	aiov.iov_base = tmpbuf;
447 	aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
448 	auio.uio_iov = &aiov;
449 	auio.uio_iovcnt = 1;
450 	auio.uio_offset = 0;
451 	auio.uio_rw = UIO_READ;
452 	auio.uio_segflg = UIO_SYSSPACE;
453 	auio.uio_resid = aiov.iov_len;
454 	error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
455 
456 	free(tmpbuf, M_TEMP);
457 	return (error);
458 }
459 
460 /*
461  * Set maxactive based on FS type
462  */
463 vnthrottle(vn, vp)
464 	register struct vn_softc *vn;
465 	struct vnode *vp;
466 {
467 	extern int (**nfsv2_vnodeop_p)();
468 
469 	if (vp->v_op == nfsv2_vnodeop_p)
470 		vn->sc_maxactive = 2;
471 	else
472 		vn->sc_maxactive = 8;
473 
474 	if (vn->sc_maxactive < 1)
475 		vn->sc_maxactive = 1;
476 }
477 
478 vnshutdown()
479 {
480 	register struct vn_softc *vn;
481 
482 	for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++)
483 		if (vn->sc_flags & VNF_INITED)
484 			vnclear(vn);
485 }
486 
487 vnclear(vn)
488 	register struct vn_softc *vn;
489 {
490 	register struct vnode *vp = vn->sc_vp;
491 	struct proc *p = curproc;		/* XXX */
492 
493 #ifdef DEBUG
494 	if (vndebug & VDB_FOLLOW)
495 		printf("vnclear(%x): vp %x\n", vp);
496 #endif
497 	vn->sc_flags &= ~VNF_INITED;
498 	if (vp == (struct vnode *)0)
499 		panic("vnioctl: null vp");
500 	(void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
501 	crfree(vn->sc_cred);
502 	vn->sc_vp = (struct vnode *)0;
503 	vn->sc_cred = (struct ucred *)0;
504 	vn->sc_size = 0;
505 }
506 
507 vnsize(dev)
508 	dev_t dev;
509 {
510 	int unit = vnunit(dev);
511 	register struct vn_softc *vn = &vn_softc[unit];
512 
513 	if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0)
514 		return(-1);
515 	return(vn->sc_size);
516 }
517 
518 vndump(dev)
519 {
520 	return(ENXIO);
521 }
522 #endif
523