1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah Hdr: vn.c 1.13 94/04/02 35 * 36 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 37 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ 38 */ 39 40 /* 41 * Vnode disk driver. 42 * 43 * Block/character interface to a vnode. Allows one to treat a file 44 * as a disk (e.g. build a filesystem in it, mount it, etc.). 45 * 46 * NOTE 1: There is a security issue involved with this driver. 47 * Once mounted all access to the contents of the "mapped" file via 48 * the special file is controlled by the permissions on the special 49 * file, the protection of the mapped file is ignored (effectively, 50 * by using root credentials in all transactions). 51 * 52 * NOTE 2: Doesn't interact with leases, should it? 53 */ 54 55 #include "use_vn.h" 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/uio.h> 59 #include <sys/kernel.h> 60 #include <sys/proc.h> 61 #include <sys/priv.h> 62 #include <sys/nlookup.h> 63 #include <sys/buf.h> 64 #include <sys/malloc.h> 65 #include <sys/mount.h> 66 #include <sys/vnode.h> 67 #include <sys/fcntl.h> 68 #include <sys/conf.h> 69 #include <sys/diskslice.h> 70 #include <sys/disk.h> 71 #include <sys/stat.h> 72 #include <sys/module.h> 73 #include <sys/vnioctl.h> 74 75 #include <vm/vm.h> 76 #include <vm/vm_object.h> 77 #include <vm/vm_page.h> 78 #include <vm/vm_pager.h> 79 #include <vm/vm_pageout.h> 80 #include <vm/swap_pager.h> 81 #include <vm/vm_extern.h> 82 #include <vm/vm_zone.h> 83 #include <sys/devfs.h> 84 85 static d_ioctl_t vnioctl; 86 static d_open_t vnopen; 87 static d_close_t vnclose; 88 static d_psize_t vnsize; 89 static d_strategy_t vnstrategy; 90 static d_clone_t vnclone; 91 92 MALLOC_DEFINE(M_VN, "vn_softc", "vn driver structures"); 93 DEVFS_DEFINE_CLONE_BITMAP(vn); 94 95 #if NVN <= 1 96 #define VN_PREALLOCATED_UNITS 4 97 #else 98 #define VN_PREALLOCATED_UNITS NVN 99 #endif 100 101 #define VN_BSIZE_BEST 8192 102 103 /* 104 * dev_ops 105 * D_DISK We want to look like a disk 106 * D_CANFREE We support BUF_CMD_FREEBLKS 107 * D_NOEMERGPGR Too complex for emergency pager 108 */ 109 110 static struct dev_ops vn_ops = { 111 { "vn", 0, D_DISK | D_CANFREE | D_NOEMERGPGR }, 112 .d_open = vnopen, 113 .d_close = vnclose, 114 .d_read = physread, 115 .d_write = physwrite, 116 .d_ioctl = vnioctl, 117 .d_strategy = vnstrategy, 118 .d_psize = vnsize 119 }; 120 121 struct vn_softc { 122 int sc_unit; 123 int sc_flags; /* flags */ 124 u_int64_t sc_size; /* size of vn, sc_secsize scale */ 125 int sc_secsize; /* sector size */ 126 struct disk sc_disk; 127 struct vnode *sc_vp; /* vnode if not NULL */ 128 vm_object_t sc_object; /* backing object if not NULL */ 129 struct ucred *sc_cred; /* credentials */ 130 int sc_maxactive; /* max # of active requests */ 131 struct buf sc_tab; /* transfer queue */ 132 u_long sc_options; /* options */ 133 cdev_t sc_dev; /* devices that refer to this unit */ 134 SLIST_ENTRY(vn_softc) sc_list; 135 }; 136 137 static SLIST_HEAD(, vn_softc) vn_list; 138 139 /* sc_flags */ 140 #define VNF_INITED 0x01 141 #define VNF_READONLY 0x02 142 #define VNF_OPENED 0x10 143 #define VNF_DESTROY 0x20 144 145 static u_long vn_options; 146 147 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) 148 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) 149 150 static int vnsetcred (struct vn_softc *vn, struct ucred *cred); 151 static void vnclear (struct vn_softc *vn); 152 static int vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu); 153 static int vn_modevent (module_t, int, void *); 154 static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); 155 static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); 156 static cdev_t vn_create(int unit, int clone); 157 158 static int 159 vnclone(struct dev_clone_args *ap) 160 { 161 int unit; 162 163 unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(vn), 0); 164 ap->a_dev = vn_create(unit, 1); 165 166 return 0; 167 } 168 169 static int 170 vnclose(struct dev_close_args *ap) 171 { 172 cdev_t dev = ap->a_head.a_dev; 173 struct vn_softc *vn; 174 175 vn = dev->si_drv1; 176 KKASSERT(vn != NULL); 177 178 vn->sc_flags &= ~VNF_OPENED; 179 180 /* The disk has been detached and can now be safely destroyed */ 181 if (vn->sc_flags & VNF_DESTROY) { 182 KKASSERT(disk_getopencount(&vn->sc_disk) == 0); 183 disk_destroy(&vn->sc_disk); 184 devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(vn), dkunit(dev)); 185 SLIST_REMOVE(&vn_list, vn, vn_softc, sc_list); 186 kfree(vn, M_VN); 187 } 188 return (0); 189 } 190 191 static struct vn_softc * 192 vncreatevn(void) 193 { 194 struct vn_softc *vn; 195 196 vn = kmalloc(sizeof *vn, M_VN, M_WAITOK | M_ZERO); 197 return vn; 198 } 199 200 static void 201 vninitvn(struct vn_softc *vn, cdev_t dev) 202 { 203 int unit; 204 205 KKASSERT(vn != NULL); 206 KKASSERT(dev != NULL); 207 unit = dkunit(dev); 208 209 vn->sc_unit = unit; 210 dev->si_drv1 = vn; 211 vn->sc_dev = dev; 212 213 SLIST_INSERT_HEAD(&vn_list, vn, sc_list); 214 } 215 216 static int 217 vnopen(struct dev_open_args *ap) 218 { 219 cdev_t dev = ap->a_head.a_dev; 220 struct vn_softc *vn; 221 222 /* 223 * Locate preexisting device 224 */ 225 226 vn = dev->si_drv1; 227 KKASSERT(vn != NULL); 228 229 /* 230 * Update si_bsize fields for device. This data will be overridden by 231 * the slice/parition code for vn accesses through partitions, and 232 * used directly if you open the 'whole disk' device. 233 * 234 * si_bsize_best must be reinitialized in case VN has been 235 * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. 236 */ 237 dev->si_bsize_phys = vn->sc_secsize; 238 dev->si_bsize_best = vn->sc_secsize; 239 if (dev->si_bsize_best < VN_BSIZE_BEST) 240 dev->si_bsize_best = VN_BSIZE_BEST; 241 242 if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY)) 243 return (EACCES); 244 245 IFOPT(vn, VN_FOLLOW) 246 kprintf("vnopen(%s, 0x%x, 0x%x)\n", 247 devtoname(dev), ap->a_oflags, ap->a_devtype); 248 249 vn->sc_flags |= VNF_OPENED; 250 return(0); 251 } 252 253 /* 254 * vnstrategy: 255 * 256 * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls 257 * for vnode-backed vn's, and the swap_pager_strategy() call for 258 * vm_object-backed vn's. 259 */ 260 static int 261 vnstrategy(struct dev_strategy_args *ap) 262 { 263 cdev_t dev = ap->a_head.a_dev; 264 struct bio *bio = ap->a_bio; 265 struct buf *bp; 266 struct bio *nbio; 267 int unit; 268 struct vn_softc *vn; 269 int error; 270 271 unit = dkunit(dev); 272 vn = dev->si_drv1; 273 KKASSERT(vn != NULL); 274 275 bp = bio->bio_buf; 276 277 IFOPT(vn, VN_DEBUG) 278 kprintf("vnstrategy(%p): unit %d\n", bp, unit); 279 280 if ((vn->sc_flags & VNF_INITED) == 0) { 281 bp->b_error = ENXIO; 282 bp->b_flags |= B_ERROR; 283 biodone(bio); 284 return(0); 285 } 286 287 bp->b_resid = bp->b_bcount; 288 289 /* 290 * The vnode device is using disk/slice label support. 291 * 292 * The dscheck() function is called for validating the 293 * slices that exist ON the vnode device itself, and 294 * translate the "slice-relative" block number, again. 295 * dscheck() will call biodone() and return NULL if 296 * we are at EOF or beyond the device size. 297 */ 298 299 nbio = bio; 300 301 /* 302 * Use the translated nbio from this point on 303 */ 304 if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) { 305 /* 306 * Freeblks is not handled for vnode-backed elements yet. 307 */ 308 bp->b_resid = 0; 309 /* operation complete */ 310 } else if (vn->sc_vp) { 311 /* 312 * VNODE I/O 313 * 314 * If an error occurs, we set B_ERROR but we do not set 315 * B_INVAL because (for a write anyway), the buffer is 316 * still valid. 317 */ 318 struct uio auio; 319 struct iovec aiov; 320 321 bzero(&auio, sizeof(auio)); 322 323 aiov.iov_base = bp->b_data; 324 aiov.iov_len = bp->b_bcount; 325 auio.uio_iov = &aiov; 326 auio.uio_iovcnt = 1; 327 auio.uio_offset = nbio->bio_offset; 328 auio.uio_segflg = UIO_SYSSPACE; 329 if (bp->b_cmd == BUF_CMD_READ) 330 auio.uio_rw = UIO_READ; 331 else 332 auio.uio_rw = UIO_WRITE; 333 auio.uio_resid = bp->b_bcount; 334 auio.uio_td = curthread; 335 336 /* 337 * Don't use IO_DIRECT here, it really gets in the way 338 * due to typical blocksize differences between the 339 * fs backing the VN device and whatever is running on 340 * the VN device. 341 */ 342 switch (bp->b_cmd) { 343 case (BUF_CMD_READ): 344 vn_lock(vn->sc_vp, LK_SHARED | LK_RETRY); 345 error = VOP_READ(vn->sc_vp, &auio, IO_RECURSE, 346 vn->sc_cred); 347 break; 348 349 case (BUF_CMD_WRITE): 350 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 351 error = VOP_WRITE(vn->sc_vp, &auio, IO_RECURSE, 352 vn->sc_cred); 353 break; 354 355 case (BUF_CMD_FLUSH): 356 auio.uio_resid = 0; 357 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 358 error = VOP_FSYNC(vn->sc_vp, MNT_WAIT, 0); 359 break; 360 default: 361 auio.uio_resid = 0; 362 error = 0; 363 goto breakunlocked; 364 } 365 vn_unlock(vn->sc_vp); 366 breakunlocked: 367 bp->b_resid = auio.uio_resid; 368 if (error) { 369 bp->b_error = error; 370 bp->b_flags |= B_ERROR; 371 } 372 /* operation complete */ 373 } else if (vn->sc_object) { 374 /* 375 * OBJT_SWAP I/O (handles read, write, freebuf) 376 * 377 * We have nothing to do if freeing blocks on a reserved 378 * swap area, othrewise execute the op. 379 */ 380 if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) { 381 bp->b_resid = 0; 382 /* operation complete */ 383 } else { 384 swap_pager_strategy(vn->sc_object, nbio); 385 return(0); 386 /* NOT REACHED */ 387 } 388 } else { 389 bp->b_resid = bp->b_bcount; 390 bp->b_flags |= B_ERROR | B_INVAL; 391 bp->b_error = EINVAL; 392 /* operation complete */ 393 } 394 biodone(nbio); 395 return(0); 396 } 397 398 /* ARGSUSED */ 399 static int 400 vnioctl(struct dev_ioctl_args *ap) 401 { 402 cdev_t dev = ap->a_head.a_dev; 403 struct vn_softc *vn; 404 struct vn_ioctl *vio; 405 int error; 406 u_long *f; 407 408 vn = dev->si_drv1; 409 IFOPT(vn,VN_FOLLOW) { 410 kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n", 411 devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag, 412 dkunit(dev)); 413 } 414 415 switch (ap->a_cmd) { 416 case VNIOCATTACH: 417 case VNIOCDETACH: 418 case VNIOCGSET: 419 case VNIOCGCLEAR: 420 case VNIOCGET: 421 case VNIOCUSET: 422 case VNIOCUCLEAR: 423 goto vn_specific; 424 } 425 426 #if 0 427 if (dkslice(dev) != WHOLE_DISK_SLICE || 428 dkpart(dev) != WHOLE_SLICE_PART) 429 return (ENOTTY); 430 #endif 431 432 vn_specific: 433 434 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); 435 if (error) 436 return (error); 437 438 vio = (struct vn_ioctl *)ap->a_data; 439 f = (u_long*)ap->a_data; 440 441 switch (ap->a_cmd) { 442 case VNIOCATTACH: 443 if (vn->sc_flags & VNF_INITED) 444 return(EBUSY); 445 446 if (vn->sc_flags & VNF_DESTROY) 447 return(ENXIO); 448 449 if (vio->vn_file == NULL) 450 error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred); 451 else 452 error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred); 453 break; 454 455 case VNIOCDETACH: 456 if ((vn->sc_flags & VNF_INITED) == 0) 457 return(ENXIO); 458 /* 459 * XXX handle i/o in progress. Return EBUSY, or wait, or 460 * flush the i/o. 461 * XXX handle multiple opens of the device. Return EBUSY, 462 * or revoke the fd's. 463 * How are these problems handled for removable and failing 464 * hardware devices? (Hint: They are not) 465 */ 466 if ((disk_getopencount(&vn->sc_disk)) > 1) 467 return (EBUSY); 468 469 vnclear(vn); 470 IFOPT(vn, VN_FOLLOW) 471 kprintf("vnioctl: CLRed\n"); 472 473 if (dkunit(dev) >= VN_PREALLOCATED_UNITS) { 474 vn->sc_flags |= VNF_DESTROY; 475 } 476 477 break; 478 479 case VNIOCGET: 480 error = vnget(dev, vn, (struct vn_user *) ap->a_data); 481 break; 482 483 case VNIOCGSET: 484 vn_options |= *f; 485 *f = vn_options; 486 break; 487 488 case VNIOCGCLEAR: 489 vn_options &= ~(*f); 490 *f = vn_options; 491 break; 492 493 case VNIOCUSET: 494 vn->sc_options |= *f; 495 *f = vn->sc_options; 496 break; 497 498 case VNIOCUCLEAR: 499 vn->sc_options &= ~(*f); 500 *f = vn->sc_options; 501 break; 502 503 default: 504 error = ENOTTY; 505 break; 506 } 507 return(error); 508 } 509 510 /* 511 * vniocattach_file: 512 * 513 * Attach a file to a VN partition. Return the size in the vn_size 514 * field. 515 */ 516 517 static int 518 vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, 519 int flag, struct ucred *cred) 520 { 521 struct vattr vattr; 522 struct nlookupdata nd; 523 int error, flags; 524 struct vnode *vp; 525 struct disk_info info; 526 527 flags = FREAD|FWRITE; 528 error = nlookup_init(&nd, vio->vn_file, 529 UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 530 if (error) 531 return (error); 532 if ((error = vn_open(&nd, NULL, flags, 0)) != 0) { 533 if (error != EACCES && error != EPERM && error != EROFS) 534 goto done; 535 flags &= ~FWRITE; 536 nlookup_done(&nd); 537 error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 538 if (error) 539 return (error); 540 if ((error = vn_open(&nd, NULL, flags, 0)) != 0) 541 goto done; 542 } 543 vp = nd.nl_open_vp; 544 if (vp->v_type != VREG || 545 (error = VOP_GETATTR(vp, &vattr))) { 546 if (error == 0) 547 error = EINVAL; 548 goto done; 549 } 550 vn_unlock(vp); 551 vn->sc_secsize = DEV_BSIZE; 552 vn->sc_vp = vp; 553 nd.nl_open_vp = NULL; 554 555 /* 556 * If the size is specified, override the file attributes. Note that 557 * the vn_size argument is in PAGE_SIZE sized blocks. 558 */ 559 if (vio->vn_size) 560 vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize; 561 else 562 vn->sc_size = vattr.va_size / vn->sc_secsize; 563 error = vnsetcred(vn, cred); 564 if (error) { 565 vn->sc_vp = NULL; 566 vn_close(vp, flags, NULL); 567 goto done; 568 } 569 vn->sc_flags |= VNF_INITED; 570 if (flags == FREAD) 571 vn->sc_flags |= VNF_READONLY; 572 573 /* 574 * Set the disk info so that probing is triggered 575 */ 576 bzero(&info, sizeof(struct disk_info)); 577 info.d_media_blksize = vn->sc_secsize; 578 info.d_media_blocks = vn->sc_size; 579 /* 580 * reserve mbr sector for backwards compatibility 581 * when no slices exist. 582 */ 583 info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; 584 info.d_secpertrack = 32; 585 info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); 586 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 587 info.d_ncylinders = vn->sc_size / info.d_secpercyl; 588 disk_setdiskinfo_sync(&vn->sc_disk, &info); 589 590 error = dev_dopen(dev, flag, S_IFCHR, cred, NULL, NULL); 591 if (error) 592 vnclear(vn); 593 594 IFOPT(vn, VN_FOLLOW) 595 kprintf("vnioctl: SET vp %p size %llx blks\n", 596 vn->sc_vp, (long long)vn->sc_size); 597 done: 598 nlookup_done(&nd); 599 return(error); 600 } 601 602 /* 603 * vniocattach_swap: 604 * 605 * Attach swap backing store to a VN partition of the size specified 606 * in vn_size. 607 */ 608 609 static int 610 vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, 611 int flag, struct ucred *cred) 612 { 613 int error; 614 struct disk_info info; 615 616 /* 617 * Range check. Disallow negative sizes or any size less then the 618 * size of a page. Then round to a page. 619 */ 620 621 if (vio->vn_size <= 0) 622 return(EDOM); 623 624 /* 625 * Allocate an OBJT_SWAP object. 626 * 627 * sc_secsize is PAGE_SIZE'd 628 * 629 * vio->vn_size is in PAGE_SIZE'd chunks. 630 * sc_size must be in PAGE_SIZE'd chunks. 631 * Note the truncation. 632 */ 633 634 vn->sc_secsize = PAGE_SIZE; 635 vn->sc_size = vio->vn_size; 636 vn->sc_object = swap_pager_alloc(NULL, 637 vn->sc_secsize * (off_t)vio->vn_size, 638 VM_PROT_DEFAULT, 0); 639 vm_object_set_flag(vn->sc_object, OBJ_NOPAGEIN); 640 IFOPT(vn, VN_RESERVE) { 641 if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { 642 vm_pager_deallocate(vn->sc_object); 643 vn->sc_object = NULL; 644 return(EDOM); 645 } 646 } 647 vn->sc_flags |= VNF_INITED; 648 649 error = vnsetcred(vn, cred); 650 if (error == 0) { 651 /* 652 * Set the disk info so that probing is triggered 653 */ 654 bzero(&info, sizeof(struct disk_info)); 655 info.d_media_blksize = vn->sc_secsize; 656 info.d_media_blocks = vn->sc_size; 657 /* 658 * reserve mbr sector for backwards compatibility 659 * when no slices exist. 660 */ 661 info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; 662 info.d_secpertrack = 32; 663 info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); 664 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 665 info.d_ncylinders = vn->sc_size / info.d_secpercyl; 666 disk_setdiskinfo_sync(&vn->sc_disk, &info); 667 668 error = dev_dopen(dev, flag, S_IFCHR, cred, NULL, NULL); 669 } 670 if (error == 0) { 671 IFOPT(vn, VN_FOLLOW) { 672 kprintf("vnioctl: SET vp %p size %llx\n", 673 vn->sc_vp, (long long)vn->sc_size); 674 } 675 } 676 if (error) 677 vnclear(vn); 678 return(error); 679 } 680 681 /* 682 * Duplicate the current processes' credentials. Since we are called only 683 * as the result of a SET ioctl and only root can do that, any future access 684 * to this "disk" is essentially as root. Note that credentials may change 685 * if some other uid can write directly to the mapped file (NFS). 686 */ 687 static int 688 vnsetcred(struct vn_softc *vn, struct ucred *cred) 689 { 690 char *tmpbuf; 691 int error = 0; 692 693 /* 694 * Set credits in our softc 695 */ 696 697 if (vn->sc_cred) 698 crfree(vn->sc_cred); 699 vn->sc_cred = crdup(cred); 700 701 /* 702 * Horrible kludge to establish credentials for NFS XXX. 703 */ 704 705 if (vn->sc_vp) { 706 struct uio auio; 707 struct iovec aiov; 708 709 tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK); 710 bzero(&auio, sizeof(auio)); 711 712 aiov.iov_base = tmpbuf; 713 aiov.iov_len = vn->sc_secsize; 714 auio.uio_iov = &aiov; 715 auio.uio_iovcnt = 1; 716 auio.uio_offset = 0; 717 auio.uio_rw = UIO_READ; 718 auio.uio_segflg = UIO_SYSSPACE; 719 auio.uio_resid = aiov.iov_len; 720 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 721 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); 722 vn_unlock(vn->sc_vp); 723 kfree(tmpbuf, M_TEMP); 724 } 725 return (error); 726 } 727 728 static void 729 vnclear(struct vn_softc *vn) 730 { 731 IFOPT(vn, VN_FOLLOW) 732 kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); 733 vn->sc_flags &= ~VNF_INITED; 734 if (vn->sc_vp != NULL) { 735 vn_close(vn->sc_vp, 736 (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE), 737 NULL); 738 vn->sc_vp = NULL; 739 } 740 vn->sc_flags &= ~VNF_READONLY; 741 if (vn->sc_cred) { 742 crfree(vn->sc_cred); 743 vn->sc_cred = NULL; 744 } 745 if (vn->sc_object != NULL) { 746 vm_pager_deallocate(vn->sc_object); 747 vn->sc_object = NULL; 748 } 749 750 disk_unprobe(&vn->sc_disk); 751 752 vn->sc_size = 0; 753 } 754 755 /* 756 * vnget: 757 * 758 * populate a struct vn_user for the VNIOCGET ioctl. 759 * interface conventions defined in sys/sys/vnioctl.h. 760 */ 761 762 static int 763 vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu) 764 { 765 int error, found = 0; 766 char *freepath, *fullpath; 767 struct vattr vattr; 768 769 if (vnu->vnu_unit == -1) { 770 vnu->vnu_unit = dkunit(dev); 771 } 772 else if (vnu->vnu_unit < 0) 773 return (EINVAL); 774 775 SLIST_FOREACH(vn, &vn_list, sc_list) { 776 777 if(vn->sc_unit != vnu->vnu_unit) 778 continue; 779 780 found = 1; 781 782 if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) { 783 784 /* note: u_cred checked in vnioctl above */ 785 error = VOP_GETATTR(vn->sc_vp, &vattr); 786 if (error) { 787 kprintf("vnget: VOP_GETATTR for %p failed\n", 788 vn->sc_vp); 789 return (error); 790 } 791 792 error = vn_fullpath(curproc, vn->sc_vp, 793 &fullpath, &freepath, 0); 794 795 if (error) { 796 kprintf("vnget: unable to resolve vp %p\n", 797 vn->sc_vp); 798 return(error); 799 } 800 801 strlcpy(vnu->vnu_file, fullpath, 802 sizeof(vnu->vnu_file)); 803 kfree(freepath, M_TEMP); 804 vnu->vnu_dev = vattr.va_fsid; 805 vnu->vnu_ino = vattr.va_fileid; 806 807 } 808 else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){ 809 810 strlcpy(vnu->vnu_file, _VN_USER_SWAP, 811 sizeof(vnu->vnu_file)); 812 vnu->vnu_size = vn->sc_size; 813 vnu->vnu_secsize = vn->sc_secsize; 814 815 } else { 816 817 bzero(vnu->vnu_file, sizeof(vnu->vnu_file)); 818 vnu->vnu_dev = 0; 819 vnu->vnu_ino = 0; 820 821 } 822 break; 823 } 824 825 if (!found) 826 return(ENXIO); 827 828 return(0); 829 } 830 831 static int 832 vnsize(struct dev_psize_args *ap) 833 { 834 cdev_t dev = ap->a_head.a_dev; 835 struct vn_softc *vn; 836 837 vn = dev->si_drv1; 838 if (!vn) 839 return(ENXIO); 840 if ((vn->sc_flags & VNF_INITED) == 0) 841 return(ENXIO); 842 ap->a_result = (int64_t)vn->sc_size; 843 return(0); 844 } 845 846 /* 847 * Returns NULL only if the specified unit cannot be allocated. 848 */ 849 static cdev_t 850 vn_create(int unit, int clone) 851 { 852 struct vn_softc *vn; 853 struct disk_info info; 854 cdev_t dev, ret_dev; 855 856 vn = vncreatevn(); 857 if (clone) { 858 /* 859 * For clone devices we need to return the top-level cdev, 860 * not the raw dev we'd normally work with. 861 */ 862 dev = disk_create_clone(unit, &vn->sc_disk, &vn_ops); 863 ret_dev = vn->sc_disk.d_cdev; 864 } else { 865 ret_dev = dev = disk_create(unit, &vn->sc_disk, &vn_ops); 866 } 867 vninitvn(vn, dev); 868 869 bzero(&info, sizeof(struct disk_info)); 870 info.d_media_blksize = 512; 871 info.d_media_blocks = 0; 872 info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE; 873 info.d_secpertrack = 32; 874 info.d_nheads = 64; 875 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 876 info.d_ncylinders = 0; 877 disk_setdiskinfo_sync(&vn->sc_disk, &info); 878 879 return ret_dev; 880 } 881 882 static int 883 vn_modevent(module_t mod, int type, void *data) 884 { 885 struct vn_softc *vn; 886 static cdev_t dev = NULL; 887 int i; 888 889 switch (type) { 890 case MOD_LOAD: 891 dev = make_autoclone_dev(&vn_ops, &DEVFS_CLONE_BITMAP(vn), 892 vnclone, UID_ROOT, GID_OPERATOR, 893 0640, "vn"); 894 for (i = 0; i < VN_PREALLOCATED_UNITS; i++) { 895 devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(vn), i); 896 vn_create(i, 0); 897 } 898 break; 899 900 case MOD_UNLOAD: 901 case MOD_SHUTDOWN: 902 while ((vn = SLIST_FIRST(&vn_list)) != NULL) { 903 /* 904 * XXX: no idea if we can return EBUSY even in the 905 * shutdown case, so err on the side of caution 906 * and just rip stuff out on shutdown. 907 */ 908 if (type != MOD_SHUTDOWN) { 909 if (vn->sc_flags & VNF_OPENED) 910 return (EBUSY); 911 } 912 913 disk_destroy(&vn->sc_disk); 914 915 SLIST_REMOVE_HEAD(&vn_list, sc_list); 916 917 if (vn->sc_flags & VNF_INITED) 918 vnclear(vn); 919 920 kfree(vn, M_VN); 921 } 922 destroy_autoclone_dev(dev, &DEVFS_CLONE_BITMAP(vn)); 923 dev_ops_remove_all(&vn_ops); 924 break; 925 default: 926 break; 927 } 928 return 0; 929 } 930 931 DEV_MODULE(vn, vn_modevent, 0); 932