xref: /original-bsd/sys/dev/vn.c (revision e58c8952)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vn.c 1.13 94/04/02$
13  *
14  *	@(#)vn.c	8.6 (Berkeley) 04/01/94
15  */
16 
17 /*
18  * Vnode disk driver.
19  *
20  * Block/character interface to a vnode.  Allows one to treat a file
21  * as a disk (e.g. build a filesystem in it, mount it, etc.).
22  *
23  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
24  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
25  * local buffer cache.
26  *
27  * NOTE 2: There is a security issue involved with this driver.
28  * Once mounted all access to the contents of the "mapped" file via
29  * the special file is controlled by the permissions on the special
30  * file, the protection of the mapped file is ignored (effectively,
31  * by using root credentials in all transactions).
32  *
33  * NOTE 3: Doesn't interact with leases, should it?
34  */
35 #include "vn.h"
36 #if NVN > 0
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/namei.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/dkstat.h>
44 #include <sys/buf.h>
45 #include <sys/malloc.h>
46 #include <sys/ioctl.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/file.h>
50 #include <sys/uio.h>
51 
52 #include <miscfs/specfs/specdev.h>
53 
54 #include <dev/vnioctl.h>
55 
56 #ifdef DEBUG
57 int dovncluster = 1;
58 int vndebug = 0x00;
59 #define VDB_FOLLOW	0x01
60 #define VDB_INIT	0x02
61 #define VDB_IO		0x04
62 #endif
63 
64 #define b_cylin	b_resid
65 
66 #define	vnunit(x)	((minor(x) >> 3) & 0x7)	/* for consistency */
67 
68 #define	getvnbuf()	\
69 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
70 #define putvnbuf(bp)	\
71 	free((caddr_t)(bp), M_DEVBUF)
72 
73 struct vn_softc {
74 	int		 sc_flags;	/* flags */
75 	size_t		 sc_size;	/* size of vn */
76 	struct vnode	*sc_vp;		/* vnode */
77 	struct ucred	*sc_cred;	/* credentials */
78 	int		 sc_maxactive;	/* max # of active requests */
79 	struct buf	 sc_tab;	/* transfer queue */
80 };
81 
82 /* sc_flags */
83 #define	VNF_ALIVE	0x01
84 #define VNF_INITED	0x02
85 
86 #if 0	/* if you need static allocation */
87 struct vn_softc vn_softc[NVN];
88 int numvnd = NVN;
89 #else
90 struct vn_softc *vn_softc;
91 int numvnd;
92 #endif
93 
94 void
95 vnattach(num)
96 	int num;
97 {
98 	char *mem;
99 	register u_long size;
100 
101 	if (num <= 0)
102 		return;
103 	size = num * sizeof(struct vn_softc);
104 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
105 	if (mem == NULL) {
106 		printf("WARNING: no memory for vnode disks\n");
107 		return;
108 	}
109 	bzero(mem, size);
110 	vn_softc = (struct vn_softc *)mem;
111 	numvnd = num;
112 }
113 
114 int
115 vnopen(dev, flags, mode, p)
116 	dev_t dev;
117 	int flags, mode;
118 	struct proc *p;
119 {
120 	int unit = vnunit(dev);
121 
122 #ifdef DEBUG
123 	if (vndebug & VDB_FOLLOW)
124 		printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
125 #endif
126 	if (unit >= numvnd)
127 		return(ENXIO);
128 	return(0);
129 }
130 
131 /*
132  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
133  * Note that this driver can only be used for swapping over NFS on the hp
134  * since nfs_strategy on the vax cannot handle u-areas and page tables.
135  */
136 vnstrategy(bp)
137 	register struct buf *bp;
138 {
139 	int unit = vnunit(bp->b_dev);
140 	register struct vn_softc *vn = &vn_softc[unit];
141 	register struct buf *nbp;
142 	register int bn, bsize, resid;
143 	register caddr_t addr;
144 	int sz, flags, error;
145 	extern void vniodone();
146 
147 #ifdef DEBUG
148 	if (vndebug & VDB_FOLLOW)
149 		printf("vnstrategy(%x): unit %d\n", bp, unit);
150 #endif
151 	if ((vn->sc_flags & VNF_INITED) == 0) {
152 		bp->b_error = ENXIO;
153 		bp->b_flags |= B_ERROR;
154 		biodone(bp);
155 		return;
156 	}
157 	bn = bp->b_blkno;
158 	sz = howmany(bp->b_bcount, DEV_BSIZE);
159 	bp->b_resid = bp->b_bcount;
160 	if (bn < 0 || bn + sz > vn->sc_size) {
161 		if (bn != vn->sc_size) {
162 			bp->b_error = EINVAL;
163 			bp->b_flags |= B_ERROR;
164 		}
165 		biodone(bp);
166 		return;
167 	}
168 	bn = dbtob(bn);
169 	bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
170 	addr = bp->b_data;
171 	flags = bp->b_flags | B_CALL;
172 	for (resid = bp->b_resid; resid; resid -= sz) {
173 		struct vnode *vp;
174 		daddr_t nbn;
175 		int off, s, nra;
176 
177 		nra = 0;
178 		error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, &nra);
179 		if (error == 0 && (long)nbn == -1)
180 			error = EIO;
181 #ifdef DEBUG
182 		if (!dovncluster)
183 			nra = 0;
184 #endif
185 
186 		if (off = bn % bsize)
187 			sz = bsize - off;
188 		else
189 			sz = (1 + nra) * bsize;
190 		if (resid < sz)
191 			sz = resid;
192 #ifdef DEBUG
193 		if (vndebug & VDB_IO)
194 			printf("vnstrategy: vp %x/%x bn %x/%x sz %x\n",
195 			       vn->sc_vp, vp, bn, nbn, sz);
196 #endif
197 
198 		nbp = getvnbuf();
199 		nbp->b_flags = flags;
200 		nbp->b_bcount = sz;
201 		nbp->b_bufsize = bp->b_bufsize;
202 		nbp->b_error = 0;
203 		if (vp->v_type == VBLK || vp->v_type == VCHR)
204 			nbp->b_dev = vp->v_rdev;
205 		else
206 			nbp->b_dev = NODEV;
207 		nbp->b_data = addr;
208 		nbp->b_blkno = nbn + btodb(off);
209 		nbp->b_proc = bp->b_proc;
210 		nbp->b_iodone = vniodone;
211 		nbp->b_vp = vp;
212 		nbp->b_pfcent = (int) bp;	/* XXX */
213 		nbp->b_rcred = vn->sc_cred;	/* XXX crdup? */
214 		nbp->b_wcred = vn->sc_cred;	/* XXX crdup? */
215 		nbp->b_dirtyoff = bp->b_dirtyoff;
216 		nbp->b_dirtyend = bp->b_dirtyend;
217 		nbp->b_validoff = bp->b_validoff;
218 		nbp->b_validend = bp->b_validend;
219 		/*
220 		 * If there was an error or a hole in the file...punt.
221 		 * Note that we deal with this after the nbp allocation.
222 		 * This ensures that we properly clean up any operations
223 		 * that we have already fired off.
224 		 *
225 		 * XXX we could deal with holes here but it would be
226 		 * a hassle (in the write case).
227 		 */
228 		if (error) {
229 			nbp->b_error = error;
230 			nbp->b_flags |= B_ERROR;
231 			bp->b_resid -= (resid - sz);
232 			biodone(nbp);
233 			return;
234 		}
235 		/*
236 		 * Just sort by block number
237 		 */
238 		nbp->b_cylin = nbp->b_blkno;
239 		s = splbio();
240 		disksort(&vn->sc_tab, nbp);
241 		if (vn->sc_tab.b_active < vn->sc_maxactive) {
242 			vn->sc_tab.b_active++;
243 			vnstart(vn);
244 		}
245 		splx(s);
246 		bn += sz;
247 		addr += sz;
248 	}
249 }
250 
251 /*
252  * Feed requests sequentially.
253  * We do it this way to keep from flooding NFS servers if we are connected
254  * to an NFS file.  This places the burden on the client rather than the
255  * server.
256  */
257 vnstart(vn)
258 	register struct vn_softc *vn;
259 {
260 	register struct buf *bp;
261 
262 	/*
263 	 * Dequeue now since lower level strategy routine might
264 	 * queue using same links
265 	 */
266 	bp = vn->sc_tab.b_actf;
267 	vn->sc_tab.b_actf = bp->b_actf;
268 #ifdef DEBUG
269 	if (vndebug & VDB_IO)
270 		printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
271 		       vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
272 		       bp->b_bcount);
273 #endif
274 	if ((bp->b_flags & B_READ) == 0)
275 		bp->b_vp->v_numoutput++;
276 	VOP_STRATEGY(bp);
277 }
278 
279 void
280 vniodone(bp)
281 	register struct buf *bp;
282 {
283 	register struct buf *pbp = (struct buf *)bp->b_pfcent;	/* XXX */
284 	register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)];
285 	int s;
286 
287 	s = splbio();
288 #ifdef DEBUG
289 	if (vndebug & VDB_IO)
290 		printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
291 		       vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
292 		       bp->b_bcount);
293 #endif
294 	if (bp->b_error) {
295 #ifdef DEBUG
296 		if (vndebug & VDB_IO)
297 			printf("vniodone: bp %x error %d\n", bp, bp->b_error);
298 #endif
299 		pbp->b_flags |= B_ERROR;
300 		pbp->b_error = biowait(bp);
301 	}
302 	pbp->b_resid -= bp->b_bcount;
303 	putvnbuf(bp);
304 	if (pbp->b_resid == 0) {
305 #ifdef DEBUG
306 		if (vndebug & VDB_IO)
307 			printf("vniodone: pbp %x iodone\n", pbp);
308 #endif
309 		biodone(pbp);
310 	}
311 	if (vn->sc_tab.b_actf)
312 		vnstart(vn);
313 	else
314 		vn->sc_tab.b_active--;
315 	splx(s);
316 }
317 
318 vnread(dev, uio, flags, p)
319 	dev_t dev;
320 	struct uio *uio;
321 	int flags;
322 	struct proc *p;
323 {
324 
325 #ifdef DEBUG
326 	if (vndebug & VDB_FOLLOW)
327 		printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p);
328 #endif
329 	return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio));
330 }
331 
332 vnwrite(dev, uio, flags, p)
333 	dev_t dev;
334 	struct uio *uio;
335 	int flags;
336 	struct proc *p;
337 {
338 
339 #ifdef DEBUG
340 	if (vndebug & VDB_FOLLOW)
341 		printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p);
342 #endif
343 	return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio));
344 }
345 
346 /* ARGSUSED */
347 vnioctl(dev, cmd, data, flag, p)
348 	dev_t dev;
349 	u_long cmd;
350 	caddr_t data;
351 	int flag;
352 	struct proc *p;
353 {
354 	int unit = vnunit(dev);
355 	register struct vn_softc *vn;
356 	struct vn_ioctl *vio;
357 	struct vattr vattr;
358 	struct nameidata nd;
359 	int error;
360 
361 #ifdef DEBUG
362 	if (vndebug & VDB_FOLLOW)
363 		printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n",
364 		       dev, cmd, data, flag, p, unit);
365 #endif
366 	error = suser(p->p_ucred, &p->p_acflag);
367 	if (error)
368 		return (error);
369 	if (unit >= numvnd)
370 		return (ENXIO);
371 
372 	vn = &vn_softc[unit];
373 	vio = (struct vn_ioctl *)data;
374 	switch (cmd) {
375 
376 	case VNIOCSET:
377 		if (vn->sc_flags & VNF_INITED)
378 			return(EBUSY);
379 		/*
380 		 * Always open for read and write.
381 		 * This is probably bogus, but it lets vn_open()
382 		 * weed out directories, sockets, etc. so we don't
383 		 * have to worry about them.
384 		 */
385 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
386 		if (error = vn_open(&nd, FREAD|FWRITE, 0))
387 			return(error);
388 		if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
389 			VOP_UNLOCK(nd.ni_vp);
390 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
391 			return(error);
392 		}
393 		VOP_UNLOCK(nd.ni_vp);
394 		vn->sc_vp = nd.ni_vp;
395 		vn->sc_size = btodb(vattr.va_size);	/* note truncation */
396 		if (error = vnsetcred(vn, p->p_ucred)) {
397 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
398 			return(error);
399 		}
400 		vnthrottle(vn, vn->sc_vp);
401 		vio->vn_size = dbtob(vn->sc_size);
402 		vn->sc_flags |= VNF_INITED;
403 #ifdef DEBUG
404 		if (vndebug & VDB_INIT)
405 			printf("vnioctl: SET vp %x size %x\n",
406 			       vn->sc_vp, vn->sc_size);
407 #endif
408 		break;
409 
410 	case VNIOCCLR:
411 		if ((vn->sc_flags & VNF_INITED) == 0)
412 			return(ENXIO);
413 		vnclear(vn);
414 #ifdef DEBUG
415 		if (vndebug & VDB_INIT)
416 			printf("vnioctl: CLRed\n");
417 #endif
418 		break;
419 
420 	default:
421 		return(ENXIO);
422 	}
423 	return(0);
424 }
425 
426 /*
427  * Duplicate the current processes' credentials.  Since we are called only
428  * as the result of a SET ioctl and only root can do that, any future access
429  * to this "disk" is essentially as root.  Note that credentials may change
430  * if some other uid can write directly to the mapped file (NFS).
431  */
432 vnsetcred(vn, cred)
433 	register struct vn_softc *vn;
434 	struct ucred *cred;
435 {
436 	struct uio auio;
437 	struct iovec aiov;
438 	char *tmpbuf;
439 	int error;
440 
441 	vn->sc_cred = crdup(cred);
442 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
443 
444 	/* XXX: Horrible kludge to establish credentials for NFS */
445 	aiov.iov_base = tmpbuf;
446 	aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
447 	auio.uio_iov = &aiov;
448 	auio.uio_iovcnt = 1;
449 	auio.uio_offset = 0;
450 	auio.uio_rw = UIO_READ;
451 	auio.uio_segflg = UIO_SYSSPACE;
452 	auio.uio_resid = aiov.iov_len;
453 	error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
454 
455 	free(tmpbuf, M_TEMP);
456 	return (error);
457 }
458 
459 /*
460  * Set maxactive based on FS type
461  */
462 vnthrottle(vn, vp)
463 	register struct vn_softc *vn;
464 	struct vnode *vp;
465 {
466 	extern int (**nfsv2_vnodeop_p)();
467 
468 	if (vp->v_op == nfsv2_vnodeop_p)
469 		vn->sc_maxactive = 2;
470 	else
471 		vn->sc_maxactive = 8;
472 
473 	if (vn->sc_maxactive < 1)
474 		vn->sc_maxactive = 1;
475 }
476 
477 vnshutdown()
478 {
479 	register struct vn_softc *vn;
480 
481 	for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++)
482 		if (vn->sc_flags & VNF_INITED)
483 			vnclear(vn);
484 }
485 
486 vnclear(vn)
487 	register struct vn_softc *vn;
488 {
489 	register struct vnode *vp = vn->sc_vp;
490 	struct proc *p = curproc;		/* XXX */
491 
492 #ifdef DEBUG
493 	if (vndebug & VDB_FOLLOW)
494 		printf("vnclear(%x): vp %x\n", vp);
495 #endif
496 	vn->sc_flags &= ~VNF_INITED;
497 	if (vp == (struct vnode *)0)
498 		panic("vnioctl: null vp");
499 	(void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
500 	crfree(vn->sc_cred);
501 	vn->sc_vp = (struct vnode *)0;
502 	vn->sc_cred = (struct ucred *)0;
503 	vn->sc_size = 0;
504 }
505 
506 vnsize(dev)
507 	dev_t dev;
508 {
509 	int unit = vnunit(dev);
510 	register struct vn_softc *vn = &vn_softc[unit];
511 
512 	if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0)
513 		return(-1);
514 	return(vn->sc_size);
515 }
516 
517 vndump(dev)
518 {
519 	return(ENXIO);
520 }
521 #endif
522