xref: /dragonfly/sys/dev/disk/vn/vn.c (revision 277350a0)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * from: Utah Hdr: vn.c 1.13 94/04/02
35  *
36  *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
37  * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
38  */
39 
40 /*
41  * Vnode disk driver.
42  *
43  * Block/character interface to a vnode.  Allows one to treat a file
44  * as a disk (e.g. build a filesystem in it, mount it, etc.).
45  *
46  * NOTE 1: There is a security issue involved with this driver.
47  * Once mounted all access to the contents of the "mapped" file via
48  * the special file is controlled by the permissions on the special
49  * file, the protection of the mapped file is ignored (effectively,
50  * by using root credentials in all transactions).
51  *
52  * NOTE 2: Doesn't interact with leases, should it?
53  */
54 
55 #include "use_vn.h"
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/proc.h>
60 #include <sys/priv.h>
61 #include <sys/nlookup.h>
62 #include <sys/buf.h>
63 #include <sys/malloc.h>
64 #include <sys/mount.h>
65 #include <sys/vnode.h>
66 #include <sys/fcntl.h>
67 #include <sys/conf.h>
68 #include <sys/diskslice.h>
69 #include <sys/disk.h>
70 #include <sys/stat.h>
71 #include <sys/module.h>
72 #include <sys/vnioctl.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_page.h>
77 #include <vm/vm_pager.h>
78 #include <vm/vm_pageout.h>
79 #include <vm/swap_pager.h>
80 #include <vm/vm_extern.h>
81 #include <vm/vm_zone.h>
82 #include <sys/devfs.h>
83 
84 static	d_ioctl_t	vnioctl;
85 static	d_open_t	vnopen;
86 static	d_close_t	vnclose;
87 static	d_psize_t	vnsize;
88 static	d_strategy_t	vnstrategy;
89 static	d_clone_t	vnclone;
90 
91 MALLOC_DEFINE(M_VN, "vn_softc", "vn driver structures");
92 DEVFS_DEFINE_CLONE_BITMAP(vn);
93 
94 #if NVN <= 1
95 #define VN_PREALLOCATED_UNITS	4
96 #else
97 #define VN_PREALLOCATED_UNITS	NVN
98 #endif
99 
100 #define VN_BSIZE_BEST	8192
101 
102 /*
103  * dev_ops
104  *	D_DISK		we want to look like a disk
105  *	D_CANFREE	We support BUF_CMD_FREEBLKS
106  */
107 
108 static struct dev_ops vn_ops = {
109 	{ "vn", 0, D_DISK | D_CANFREE },
110 	.d_open =	vnopen,
111 	.d_close =	vnclose,
112 	.d_read =	physread,
113 	.d_write =	physwrite,
114 	.d_ioctl =	vnioctl,
115 	.d_strategy =	vnstrategy,
116 	.d_psize =	vnsize
117 };
118 
119 struct vn_softc {
120 	int		sc_unit;
121 	int		sc_flags;	/* flags 			*/
122 	u_int64_t	sc_size;	/* size of vn, sc_secsize scale	*/
123 	int		sc_secsize;	/* sector size			*/
124 	struct disk	sc_disk;
125 	struct vnode	*sc_vp;		/* vnode if not NULL		*/
126 	vm_object_t	sc_object;	/* backing object if not NULL	*/
127 	struct ucred	*sc_cred;	/* credentials 			*/
128 	int		 sc_maxactive;	/* max # of active requests 	*/
129 	struct buf	 sc_tab;	/* transfer queue 		*/
130 	u_long		 sc_options;	/* options 			*/
131 	cdev_t		 sc_dev;	/* devices that refer to this unit */
132 	SLIST_ENTRY(vn_softc) sc_list;
133 };
134 
135 static SLIST_HEAD(, vn_softc) vn_list;
136 
137 /* sc_flags */
138 #define VNF_INITED	0x01
139 #define	VNF_READONLY	0x02
140 #define VNF_OPENED	0x10
141 #define	VNF_DESTROY	0x20
142 
143 static u_long	vn_options;
144 
145 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
146 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
147 
148 static int	vnsetcred (struct vn_softc *vn, struct ucred *cred);
149 static void	vnclear (struct vn_softc *vn);
150 static int	vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu);
151 static int	vn_modevent (module_t, int, void *);
152 static int 	vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred);
153 static int 	vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred);
154 static cdev_t	vn_create(int unit, struct devfs_bitmap *bitmap, int clone);
155 
156 static int
157 vnclone(struct dev_clone_args *ap)
158 {
159 	int unit;
160 
161 	unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(vn), 0);
162 	ap->a_dev = vn_create(unit, &DEVFS_CLONE_BITMAP(vn), 1);
163 
164 	return 0;
165 }
166 
167 static	int
168 vnclose(struct dev_close_args *ap)
169 {
170 	cdev_t dev = ap->a_head.a_dev;
171 	struct vn_softc *vn;
172 
173 	vn = dev->si_drv1;
174 	KKASSERT(vn != NULL);
175 
176 	vn->sc_flags &= ~VNF_OPENED;
177 
178 	/* The disk has been detached and can now be safely destroyed */
179 	if (vn->sc_flags & VNF_DESTROY) {
180 		KKASSERT(disk_getopencount(&vn->sc_disk) == 0);
181 		disk_destroy(&vn->sc_disk);
182 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(vn), dkunit(dev));
183 		SLIST_REMOVE(&vn_list, vn, vn_softc, sc_list);
184 		kfree(vn, M_VN);
185 	}
186 	return (0);
187 }
188 
189 static struct vn_softc *
190 vncreatevn(void)
191 {
192 	struct vn_softc *vn;
193 
194 	vn = kmalloc(sizeof *vn, M_VN, M_WAITOK | M_ZERO);
195 	return vn;
196 }
197 
198 static void
199 vninitvn(struct vn_softc *vn, cdev_t dev)
200 {
201 	int unit;
202 
203 	KKASSERT(vn != NULL);
204 	KKASSERT(dev != NULL);
205 	unit = dkunit(dev);
206 
207 	vn->sc_unit = unit;
208 	dev->si_drv1 = vn;
209 	vn->sc_dev = dev;
210 
211 	SLIST_INSERT_HEAD(&vn_list, vn, sc_list);
212 }
213 
214 static	int
215 vnopen(struct dev_open_args *ap)
216 {
217 	cdev_t dev = ap->a_head.a_dev;
218 	struct vn_softc *vn;
219 
220 	/*
221 	 * Locate preexisting device
222 	 */
223 
224 	vn = dev->si_drv1;
225 	KKASSERT(vn != NULL);
226 
227 	/*
228 	 * Update si_bsize fields for device.  This data will be overriden by
229 	 * the slice/parition code for vn accesses through partitions, and
230 	 * used directly if you open the 'whole disk' device.
231 	 *
232 	 * si_bsize_best must be reinitialized in case VN has been
233 	 * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency.
234 	 */
235 	dev->si_bsize_phys = vn->sc_secsize;
236 	dev->si_bsize_best = vn->sc_secsize;
237 	if (dev->si_bsize_best < VN_BSIZE_BEST)
238 		dev->si_bsize_best = VN_BSIZE_BEST;
239 
240 	if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY))
241 		return (EACCES);
242 
243 	IFOPT(vn, VN_FOLLOW)
244 		kprintf("vnopen(%s, 0x%x, 0x%x)\n",
245 		    devtoname(dev), ap->a_oflags, ap->a_devtype);
246 
247 	vn->sc_flags |= VNF_OPENED;
248 	return(0);
249 }
250 
251 /*
252  *	vnstrategy:
253  *
254  *	Run strategy routine for VN device.  We use VOP_READ/VOP_WRITE calls
255  *	for vnode-backed vn's, and the swap_pager_strategy() call for
256  *	vm_object-backed vn's.
257  */
258 static int
259 vnstrategy(struct dev_strategy_args *ap)
260 {
261 	cdev_t dev = ap->a_head.a_dev;
262 	struct bio *bio = ap->a_bio;
263 	struct buf *bp;
264 	struct bio *nbio;
265 	int unit;
266 	struct vn_softc *vn;
267 	int error;
268 
269 	unit = dkunit(dev);
270 	vn = dev->si_drv1;
271 	KKASSERT(vn != NULL);
272 
273 	bp = bio->bio_buf;
274 
275 	IFOPT(vn, VN_DEBUG)
276 		kprintf("vnstrategy(%p): unit %d\n", bp, unit);
277 
278 	if ((vn->sc_flags & VNF_INITED) == 0) {
279 		bp->b_error = ENXIO;
280 		bp->b_flags |= B_ERROR;
281 		biodone(bio);
282 		return(0);
283 	}
284 
285 	bp->b_resid = bp->b_bcount;
286 
287 	/*
288 	 * The vnode device is using disk/slice label support.
289 	 *
290 	 * The dscheck() function is called for validating the
291 	 * slices that exist ON the vnode device itself, and
292 	 * translate the "slice-relative" block number, again.
293 	 * dscheck() will call biodone() and return NULL if
294 	 * we are at EOF or beyond the device size.
295 	 */
296 
297 	nbio = bio;
298 
299 	/*
300 	 * Use the translated nbio from this point on
301 	 */
302 	if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) {
303 		/*
304 		 * Freeblks is not handled for vnode-backed elements yet.
305 		 */
306 		bp->b_resid = 0;
307 		/* operation complete */
308 	} else if (vn->sc_vp) {
309 		/*
310 		 * VNODE I/O
311 		 *
312 		 * If an error occurs, we set B_ERROR but we do not set
313 		 * B_INVAL because (for a write anyway), the buffer is
314 		 * still valid.
315 		 */
316 		struct uio auio;
317 		struct iovec aiov;
318 
319 		bzero(&auio, sizeof(auio));
320 
321 		aiov.iov_base = bp->b_data;
322 		aiov.iov_len = bp->b_bcount;
323 		auio.uio_iov = &aiov;
324 		auio.uio_iovcnt = 1;
325 		auio.uio_offset = nbio->bio_offset;
326 		auio.uio_segflg = UIO_SYSSPACE;
327 		if (bp->b_cmd == BUF_CMD_READ)
328 			auio.uio_rw = UIO_READ;
329 		else
330 			auio.uio_rw = UIO_WRITE;
331 		auio.uio_resid = bp->b_bcount;
332 		auio.uio_td = curthread;
333 
334 		/*
335 		 * Don't use IO_DIRECT here, it really gets in the way
336 		 * due to typical blocksize differences between the
337 		 * fs backing the VN device and whatever is running on
338 		 * the VN device.
339 		 */
340 		switch (bp->b_cmd) {
341 		case (BUF_CMD_READ):
342 			vn_lock(vn->sc_vp, LK_SHARED | LK_RETRY);
343 			error = VOP_READ(vn->sc_vp, &auio, IO_RECURSE,
344 					 vn->sc_cred);
345 			break;
346 
347 		case (BUF_CMD_WRITE):
348 			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY);
349 			error = VOP_WRITE(vn->sc_vp, &auio, IO_RECURSE,
350 					  vn->sc_cred);
351 			break;
352 
353 		case (BUF_CMD_FLUSH):
354 			auio.uio_resid = 0;
355 			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY);
356 			error = VOP_FSYNC(vn->sc_vp, MNT_WAIT, 0);
357 			break;
358 		default:
359 			auio.uio_resid = 0;
360 			error = 0;
361 			goto breakunlocked;
362 		}
363 		vn_unlock(vn->sc_vp);
364 breakunlocked:
365 		bp->b_resid = auio.uio_resid;
366 		if (error) {
367 			bp->b_error = error;
368 			bp->b_flags |= B_ERROR;
369 		}
370 		/* operation complete */
371 	} else if (vn->sc_object) {
372 		/*
373 		 * OBJT_SWAP I/O (handles read, write, freebuf)
374 		 *
375 		 * We have nothing to do if freeing  blocks on a reserved
376 		 * swap area, othrewise execute the op.
377 		 */
378 		if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) {
379 			bp->b_resid = 0;
380 			/* operation complete */
381 		} else {
382 			swap_pager_strategy(vn->sc_object, nbio);
383 			return(0);
384 			/* NOT REACHED */
385 		}
386 	} else {
387 		bp->b_resid = bp->b_bcount;
388 		bp->b_flags |= B_ERROR | B_INVAL;
389 		bp->b_error = EINVAL;
390 		/* operation complete */
391 	}
392 	biodone(nbio);
393 	return(0);
394 }
395 
396 /* ARGSUSED */
397 static	int
398 vnioctl(struct dev_ioctl_args *ap)
399 {
400 	cdev_t dev = ap->a_head.a_dev;
401 	struct vn_softc *vn;
402 	struct vn_ioctl *vio;
403 	int error;
404 	u_long *f;
405 
406 	vn = dev->si_drv1;
407 	IFOPT(vn,VN_FOLLOW) {
408 		kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n",
409 		    devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag,
410 		    dkunit(dev));
411 	}
412 
413 	switch (ap->a_cmd) {
414 	case VNIOCATTACH:
415 	case VNIOCDETACH:
416 	case VNIOCGSET:
417 	case VNIOCGCLEAR:
418 	case VNIOCGET:
419 	case VNIOCUSET:
420 	case VNIOCUCLEAR:
421 		goto vn_specific;
422 	}
423 
424 #if 0
425 	if (dkslice(dev) != WHOLE_DISK_SLICE ||
426 		dkpart(dev) != WHOLE_SLICE_PART)
427 		return (ENOTTY);
428 #endif
429 
430     vn_specific:
431 
432 	error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0);
433 	if (error)
434 		return (error);
435 
436 	vio = (struct vn_ioctl *)ap->a_data;
437 	f = (u_long*)ap->a_data;
438 
439 	switch (ap->a_cmd) {
440 	case VNIOCATTACH:
441 		if (vn->sc_flags & VNF_INITED)
442 			return(EBUSY);
443 
444 		if (vn->sc_flags & VNF_DESTROY)
445 			return(ENXIO);
446 
447 		if (vio->vn_file == NULL)
448 			error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred);
449 		else
450 			error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred);
451 		break;
452 
453 	case VNIOCDETACH:
454 		if ((vn->sc_flags & VNF_INITED) == 0)
455 			return(ENXIO);
456 		/*
457 		 * XXX handle i/o in progress.  Return EBUSY, or wait, or
458 		 * flush the i/o.
459 		 * XXX handle multiple opens of the device.  Return EBUSY,
460 		 * or revoke the fd's.
461 		 * How are these problems handled for removable and failing
462 		 * hardware devices? (Hint: They are not)
463 		 */
464 		if ((disk_getopencount(&vn->sc_disk)) > 1)
465 			return (EBUSY);
466 
467 		vnclear(vn);
468 		IFOPT(vn, VN_FOLLOW)
469 			kprintf("vnioctl: CLRed\n");
470 
471 		if (dkunit(dev) >= VN_PREALLOCATED_UNITS) {
472 			vn->sc_flags |= VNF_DESTROY;
473 		}
474 
475 		break;
476 
477 	case VNIOCGET:
478 		error = vnget(dev, vn, (struct vn_user *) ap->a_data);
479 		break;
480 
481 	case VNIOCGSET:
482 		vn_options |= *f;
483 		*f = vn_options;
484 		break;
485 
486 	case VNIOCGCLEAR:
487 		vn_options &= ~(*f);
488 		*f = vn_options;
489 		break;
490 
491 	case VNIOCUSET:
492 		vn->sc_options |= *f;
493 		*f = vn->sc_options;
494 		break;
495 
496 	case VNIOCUCLEAR:
497 		vn->sc_options &= ~(*f);
498 		*f = vn->sc_options;
499 		break;
500 
501 	default:
502 		error = ENOTTY;
503 		break;
504 	}
505 	return(error);
506 }
507 
508 /*
509  *	vniocattach_file:
510  *
511  *	Attach a file to a VN partition.  Return the size in the vn_size
512  *	field.
513  */
514 
515 static int
516 vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev,
517 		 int flag, struct ucred *cred)
518 {
519 	struct vattr vattr;
520 	struct nlookupdata nd;
521 	int error, flags;
522 	struct vnode *vp;
523 	struct disk_info info;
524 
525 	flags = FREAD|FWRITE;
526 	error = nlookup_init(&nd, vio->vn_file,
527 				UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
528 	if (error)
529 		return (error);
530 	if ((error = vn_open(&nd, NULL, flags, 0)) != 0) {
531 		if (error != EACCES && error != EPERM && error != EROFS)
532 			goto done;
533 		flags &= ~FWRITE;
534 		nlookup_done(&nd);
535 		error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
536 		if (error)
537 			return (error);
538 		if ((error = vn_open(&nd, NULL, flags, 0)) != 0)
539 			goto done;
540 	}
541 	vp = nd.nl_open_vp;
542 	if (vp->v_type != VREG ||
543 	    (error = VOP_GETATTR(vp, &vattr))) {
544 		if (error == 0)
545 			error = EINVAL;
546 		goto done;
547 	}
548 	vn_unlock(vp);
549 	vn->sc_secsize = DEV_BSIZE;
550 	vn->sc_vp = vp;
551 	nd.nl_open_vp = NULL;
552 
553 	/*
554 	 * If the size is specified, override the file attributes.  Note that
555 	 * the vn_size argument is in PAGE_SIZE sized blocks.
556 	 */
557 	if (vio->vn_size)
558 		vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize;
559 	else
560 		vn->sc_size = vattr.va_size / vn->sc_secsize;
561 	error = vnsetcred(vn, cred);
562 	if (error) {
563 		vn->sc_vp = NULL;
564 		vn_close(vp, flags, NULL);
565 		goto done;
566 	}
567 	vn->sc_flags |= VNF_INITED;
568 	if (flags == FREAD)
569 		vn->sc_flags |= VNF_READONLY;
570 
571 	/*
572 	 * Set the disk info so that probing is triggered
573 	 */
574 	bzero(&info, sizeof(struct disk_info));
575 	info.d_media_blksize = vn->sc_secsize;
576 	info.d_media_blocks = vn->sc_size;
577 	/*
578 	 * reserve mbr sector for backwards compatibility
579 	 * when no slices exist.
580 	 */
581 	info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE;
582 	info.d_secpertrack = 32;
583 	info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE);
584 	info.d_secpercyl = info.d_secpertrack * info.d_nheads;
585 	info.d_ncylinders = vn->sc_size / info.d_secpercyl;
586 	disk_setdiskinfo_sync(&vn->sc_disk, &info);
587 
588 	error = dev_dopen(dev, flag, S_IFCHR, cred, NULL);
589 	if (error)
590 		vnclear(vn);
591 
592 	IFOPT(vn, VN_FOLLOW)
593 		kprintf("vnioctl: SET vp %p size %llx blks\n",
594 		       vn->sc_vp, (long long)vn->sc_size);
595 done:
596 	nlookup_done(&nd);
597 	return(error);
598 }
599 
600 /*
601  *	vniocattach_swap:
602  *
603  *	Attach swap backing store to a VN partition of the size specified
604  *	in vn_size.
605  */
606 
607 static int
608 vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev,
609 		 int flag, struct ucred *cred)
610 {
611 	int error;
612 	struct disk_info info;
613 
614 	/*
615 	 * Range check.  Disallow negative sizes or any size less then the
616 	 * size of a page.  Then round to a page.
617 	 */
618 
619 	if (vio->vn_size <= 0)
620 		return(EDOM);
621 
622 	/*
623 	 * Allocate an OBJT_SWAP object.
624 	 *
625 	 * sc_secsize is PAGE_SIZE'd
626 	 *
627 	 * vio->vn_size is in PAGE_SIZE'd chunks.
628 	 * sc_size must be in PAGE_SIZE'd chunks.
629 	 * Note the truncation.
630 	 */
631 
632 	vn->sc_secsize = PAGE_SIZE;
633 	vn->sc_size = vio->vn_size;
634 	vn->sc_object = swap_pager_alloc(NULL,
635 					 vn->sc_secsize * (off_t)vio->vn_size,
636 					 VM_PROT_DEFAULT, 0);
637 	IFOPT(vn, VN_RESERVE) {
638 		if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) {
639 			vm_pager_deallocate(vn->sc_object);
640 			vn->sc_object = NULL;
641 			return(EDOM);
642 		}
643 	}
644 	vn->sc_flags |= VNF_INITED;
645 
646 	error = vnsetcred(vn, cred);
647 	if (error == 0) {
648 		/*
649 		 * Set the disk info so that probing is triggered
650 		 */
651 		bzero(&info, sizeof(struct disk_info));
652 		info.d_media_blksize = vn->sc_secsize;
653 		info.d_media_blocks = vn->sc_size;
654 		/*
655 		 * reserve mbr sector for backwards compatibility
656 		 * when no slices exist.
657 		 */
658 		info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE;
659 		info.d_secpertrack = 32;
660 		info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE);
661 		info.d_secpercyl = info.d_secpertrack * info.d_nheads;
662 		info.d_ncylinders = vn->sc_size / info.d_secpercyl;
663 		disk_setdiskinfo_sync(&vn->sc_disk, &info);
664 
665 		error = dev_dopen(dev, flag, S_IFCHR, cred, NULL);
666 	}
667 	if (error == 0) {
668 		IFOPT(vn, VN_FOLLOW) {
669 			kprintf("vnioctl: SET vp %p size %llx\n",
670 			       vn->sc_vp, (long long)vn->sc_size);
671 		}
672 	}
673 	if (error)
674 		vnclear(vn);
675 	return(error);
676 }
677 
678 /*
679  * Duplicate the current processes' credentials.  Since we are called only
680  * as the result of a SET ioctl and only root can do that, any future access
681  * to this "disk" is essentially as root.  Note that credentials may change
682  * if some other uid can write directly to the mapped file (NFS).
683  */
684 int
685 vnsetcred(struct vn_softc *vn, struct ucred *cred)
686 {
687 	char *tmpbuf;
688 	int error = 0;
689 
690 	/*
691 	 * Set credits in our softc
692 	 */
693 
694 	if (vn->sc_cred)
695 		crfree(vn->sc_cred);
696 	vn->sc_cred = crdup(cred);
697 
698 	/*
699 	 * Horrible kludge to establish credentials for NFS  XXX.
700 	 */
701 
702 	if (vn->sc_vp) {
703 		struct uio auio;
704 		struct iovec aiov;
705 
706 		tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK);
707 		bzero(&auio, sizeof(auio));
708 
709 		aiov.iov_base = tmpbuf;
710 		aiov.iov_len = vn->sc_secsize;
711 		auio.uio_iov = &aiov;
712 		auio.uio_iovcnt = 1;
713 		auio.uio_offset = 0;
714 		auio.uio_rw = UIO_READ;
715 		auio.uio_segflg = UIO_SYSSPACE;
716 		auio.uio_resid = aiov.iov_len;
717 		vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY);
718 		error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
719 		vn_unlock(vn->sc_vp);
720 		kfree(tmpbuf, M_TEMP);
721 	}
722 	return (error);
723 }
724 
725 void
726 vnclear(struct vn_softc *vn)
727 {
728 	IFOPT(vn, VN_FOLLOW)
729 		kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp);
730 	vn->sc_flags &= ~VNF_INITED;
731 	if (vn->sc_vp != NULL) {
732 		vn_close(vn->sc_vp,
733 		    (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE),
734 		    NULL);
735 		vn->sc_vp = NULL;
736 	}
737 	vn->sc_flags &= ~VNF_READONLY;
738 	if (vn->sc_cred) {
739 		crfree(vn->sc_cred);
740 		vn->sc_cred = NULL;
741 	}
742 	if (vn->sc_object != NULL) {
743 		vm_pager_deallocate(vn->sc_object);
744 		vn->sc_object = NULL;
745 	}
746 
747 	disk_unprobe(&vn->sc_disk);
748 
749 	vn->sc_size = 0;
750 }
751 
752 /*
753  * 	vnget:
754  *
755  *	populate a struct vn_user for the VNIOCGET ioctl.
756  *	interface conventions defined in sys/sys/vnioctl.h.
757  */
758 
759 static int
760 vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu)
761 {
762 	int error, found = 0;
763 	char *freepath, *fullpath;
764 	struct vattr vattr;
765 
766 	if (vnu->vnu_unit == -1) {
767 		vnu->vnu_unit = dkunit(dev);
768 	}
769 	else if (vnu->vnu_unit < 0)
770 		return (EINVAL);
771 
772 	SLIST_FOREACH(vn, &vn_list, sc_list) {
773 
774 		if(vn->sc_unit != vnu->vnu_unit)
775 			continue;
776 
777 		found = 1;
778 
779 		if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) {
780 
781 			/* note: u_cred checked in vnioctl above */
782 			error = VOP_GETATTR(vn->sc_vp, &vattr);
783 			if (error) {
784 				kprintf("vnget: VOP_GETATTR for %p failed\n",
785 					vn->sc_vp);
786 				return (error);
787 			}
788 
789 			error = vn_fullpath(curproc, vn->sc_vp,
790 						&fullpath, &freepath, 0);
791 
792 			if (error) {
793 				kprintf("vnget: unable to resolve vp %p\n",
794 					vn->sc_vp);
795 				return(error);
796 			}
797 
798 			strlcpy(vnu->vnu_file, fullpath,
799 				sizeof(vnu->vnu_file));
800 			kfree(freepath, M_TEMP);
801 			vnu->vnu_dev = vattr.va_fsid;
802 			vnu->vnu_ino = vattr.va_fileid;
803 
804 		}
805 		else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){
806 
807 			strlcpy(vnu->vnu_file, _VN_USER_SWAP,
808 				sizeof(vnu->vnu_file));
809 			vnu->vnu_size = vn->sc_size;
810 			vnu->vnu_secsize = vn->sc_secsize;
811 
812 		} else {
813 
814 			bzero(vnu->vnu_file, sizeof(vnu->vnu_file));
815 			vnu->vnu_dev = 0;
816 			vnu->vnu_ino = 0;
817 
818 		}
819 		break;
820 	}
821 
822 	if (!found)
823 		return(ENXIO);
824 
825 	return(0);
826 }
827 
828 static int
829 vnsize(struct dev_psize_args *ap)
830 {
831 	cdev_t dev = ap->a_head.a_dev;
832 	struct vn_softc *vn;
833 
834 	vn = dev->si_drv1;
835 	if (!vn)
836 		return(ENXIO);
837 	if ((vn->sc_flags & VNF_INITED) == 0)
838 		return(ENXIO);
839 	ap->a_result = (int64_t)vn->sc_size;
840 	return(0);
841 }
842 
843 static cdev_t
844 vn_create(int unit, struct devfs_bitmap *bitmap, int clone)
845 {
846 	struct vn_softc *vn;
847 	struct disk_info info;
848 	cdev_t dev, ret_dev;
849 
850 	vn = vncreatevn();
851 	if (clone) {
852 		/*
853 		 * For clone devices we need to return the top-level cdev,
854 		 * not the raw dev we'd normally work with.
855 		 */
856 		dev = disk_create_clone(unit, &vn->sc_disk, &vn_ops);
857 		ret_dev = vn->sc_disk.d_cdev;
858 	} else {
859 		ret_dev = dev = disk_create(unit, &vn->sc_disk, &vn_ops);
860 	}
861 	vninitvn(vn, dev);
862 
863 	bzero(&info, sizeof(struct disk_info));
864 	info.d_media_blksize = 512;
865 	info.d_media_blocks = 0;
866 	info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE;
867 	info.d_secpertrack = 32;
868 	info.d_nheads = 64;
869 	info.d_secpercyl = info.d_secpertrack * info.d_nheads;
870 	info.d_ncylinders = 0;
871 	disk_setdiskinfo_sync(&vn->sc_disk, &info);
872 
873 	if (bitmap != NULL)
874 		devfs_clone_bitmap_set(bitmap, unit);
875 
876 	return ret_dev;
877 }
878 
879 static int
880 vn_modevent(module_t mod, int type, void *data)
881 {
882 	struct vn_softc *vn;
883 	static cdev_t dev = NULL;
884 	int i;
885 
886 	switch (type) {
887 	case MOD_LOAD:
888 		dev = make_autoclone_dev(&vn_ops, &DEVFS_CLONE_BITMAP(vn), vnclone, UID_ROOT,
889 		    GID_OPERATOR, 0640, "vn");
890 
891 		for (i = 0; i < VN_PREALLOCATED_UNITS; i++) {
892 			vn_create(i, &DEVFS_CLONE_BITMAP(vn), 0);
893 		}
894 		break;
895 
896 	case MOD_UNLOAD:
897 	case MOD_SHUTDOWN:
898 		while ((vn = SLIST_FIRST(&vn_list)) != NULL) {
899 			/*
900 			 * XXX: no idea if we can return EBUSY even in the
901 			 *	shutdown case, so err on the side of caution
902 			 *	and just rip stuff out on shutdown.
903 			 */
904 			if (type != MOD_SHUTDOWN) {
905 				if (vn->sc_flags & VNF_OPENED)
906 					return (EBUSY);
907 			}
908 
909 			disk_destroy(&vn->sc_disk);
910 
911 			SLIST_REMOVE_HEAD(&vn_list, sc_list);
912 
913 			if (vn->sc_flags & VNF_INITED)
914 				vnclear(vn);
915 
916 			kfree(vn, M_VN);
917 		}
918 		destroy_autoclone_dev(dev, &DEVFS_CLONE_BITMAP(vn));
919 		dev_ops_remove_all(&vn_ops);
920 		break;
921 	default:
922 		break;
923 	}
924 	return 0;
925 }
926 
927 DEV_MODULE(vn, vn_modevent, 0);
928