1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah Hdr: vn.c 1.13 94/04/02 35 * 36 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 37 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ 38 */ 39 40 /* 41 * Vnode disk driver. 42 * 43 * Block/character interface to a vnode. Allows one to treat a file 44 * as a disk (e.g. build a filesystem in it, mount it, etc.). 45 * 46 * NOTE 1: There is a security issue involved with this driver. 47 * Once mounted all access to the contents of the "mapped" file via 48 * the special file is controlled by the permissions on the special 49 * file, the protection of the mapped file is ignored (effectively, 50 * by using root credentials in all transactions). 51 * 52 * NOTE 2: Doesn't interact with leases, should it? 53 */ 54 55 #include "use_vn.h" 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/nlookup.h> 62 #include <sys/buf.h> 63 #include <sys/malloc.h> 64 #include <sys/mount.h> 65 #include <sys/vnode.h> 66 #include <sys/fcntl.h> 67 #include <sys/conf.h> 68 #include <sys/diskslice.h> 69 #include <sys/disk.h> 70 #include <sys/stat.h> 71 #include <sys/module.h> 72 #include <sys/vnioctl.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vm_pageout.h> 79 #include <vm/swap_pager.h> 80 #include <vm/vm_extern.h> 81 #include <vm/vm_zone.h> 82 #include <sys/devfs.h> 83 84 static d_ioctl_t vnioctl; 85 static d_open_t vnopen; 86 static d_close_t vnclose; 87 static d_psize_t vnsize; 88 static d_strategy_t vnstrategy; 89 static d_clone_t vnclone; 90 91 MALLOC_DEFINE(M_VN, "vn_softc", "vn driver structures"); 92 DEVFS_DEFINE_CLONE_BITMAP(vn); 93 94 #if NVN <= 1 95 #define VN_PREALLOCATED_UNITS 4 96 #else 97 #define VN_PREALLOCATED_UNITS NVN 98 #endif 99 100 #define VN_BSIZE_BEST 8192 101 102 /* 103 * dev_ops 104 * D_DISK We want to look like a disk 105 * D_CANFREE We support BUF_CMD_FREEBLKS 106 * D_NOEMERGPGR Too complex for emergency pager 107 */ 108 109 static struct dev_ops vn_ops = { 110 { "vn", 0, D_DISK | D_CANFREE | D_NOEMERGPGR }, 111 .d_open = vnopen, 112 .d_close = vnclose, 113 .d_read = physread, 114 .d_write = physwrite, 115 .d_ioctl = vnioctl, 116 .d_strategy = vnstrategy, 117 .d_psize = vnsize 118 }; 119 120 struct vn_softc { 121 int sc_unit; 122 int sc_flags; /* flags */ 123 u_int64_t sc_size; /* size of vn, sc_secsize scale */ 124 int sc_secsize; /* sector size */ 125 struct disk sc_disk; 126 struct vnode *sc_vp; /* vnode if not NULL */ 127 vm_object_t sc_object; /* backing object if not NULL */ 128 struct ucred *sc_cred; /* credentials */ 129 int sc_maxactive; /* max # of active requests */ 130 struct buf sc_tab; /* transfer queue */ 131 u_long sc_options; /* options */ 132 cdev_t sc_dev; /* devices that refer to this unit */ 133 SLIST_ENTRY(vn_softc) sc_list; 134 }; 135 136 static SLIST_HEAD(, vn_softc) vn_list; 137 138 /* sc_flags */ 139 #define VNF_INITED 0x01 140 #define VNF_READONLY 0x02 141 #define VNF_OPENED 0x10 142 #define VNF_DESTROY 0x20 143 144 static u_long vn_options; 145 146 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) 147 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) 148 149 static int vnsetcred (struct vn_softc *vn, struct ucred *cred); 150 static void vnclear (struct vn_softc *vn); 151 static int vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu); 152 static int vn_modevent (module_t, int, void *); 153 static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); 154 static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); 155 static cdev_t vn_create(int unit, struct devfs_bitmap *bitmap, int clone); 156 157 static int 158 vnclone(struct dev_clone_args *ap) 159 { 160 int unit; 161 162 unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(vn), 0); 163 ap->a_dev = vn_create(unit, &DEVFS_CLONE_BITMAP(vn), 1); 164 165 return 0; 166 } 167 168 static int 169 vnclose(struct dev_close_args *ap) 170 { 171 cdev_t dev = ap->a_head.a_dev; 172 struct vn_softc *vn; 173 174 vn = dev->si_drv1; 175 KKASSERT(vn != NULL); 176 177 vn->sc_flags &= ~VNF_OPENED; 178 179 /* The disk has been detached and can now be safely destroyed */ 180 if (vn->sc_flags & VNF_DESTROY) { 181 KKASSERT(disk_getopencount(&vn->sc_disk) == 0); 182 disk_destroy(&vn->sc_disk); 183 devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(vn), dkunit(dev)); 184 SLIST_REMOVE(&vn_list, vn, vn_softc, sc_list); 185 kfree(vn, M_VN); 186 } 187 return (0); 188 } 189 190 static struct vn_softc * 191 vncreatevn(void) 192 { 193 struct vn_softc *vn; 194 195 vn = kmalloc(sizeof *vn, M_VN, M_WAITOK | M_ZERO); 196 return vn; 197 } 198 199 static void 200 vninitvn(struct vn_softc *vn, cdev_t dev) 201 { 202 int unit; 203 204 KKASSERT(vn != NULL); 205 KKASSERT(dev != NULL); 206 unit = dkunit(dev); 207 208 vn->sc_unit = unit; 209 dev->si_drv1 = vn; 210 vn->sc_dev = dev; 211 212 SLIST_INSERT_HEAD(&vn_list, vn, sc_list); 213 } 214 215 static int 216 vnopen(struct dev_open_args *ap) 217 { 218 cdev_t dev = ap->a_head.a_dev; 219 struct vn_softc *vn; 220 221 /* 222 * Locate preexisting device 223 */ 224 225 vn = dev->si_drv1; 226 KKASSERT(vn != NULL); 227 228 /* 229 * Update si_bsize fields for device. This data will be overriden by 230 * the slice/parition code for vn accesses through partitions, and 231 * used directly if you open the 'whole disk' device. 232 * 233 * si_bsize_best must be reinitialized in case VN has been 234 * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. 235 */ 236 dev->si_bsize_phys = vn->sc_secsize; 237 dev->si_bsize_best = vn->sc_secsize; 238 if (dev->si_bsize_best < VN_BSIZE_BEST) 239 dev->si_bsize_best = VN_BSIZE_BEST; 240 241 if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY)) 242 return (EACCES); 243 244 IFOPT(vn, VN_FOLLOW) 245 kprintf("vnopen(%s, 0x%x, 0x%x)\n", 246 devtoname(dev), ap->a_oflags, ap->a_devtype); 247 248 vn->sc_flags |= VNF_OPENED; 249 return(0); 250 } 251 252 /* 253 * vnstrategy: 254 * 255 * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls 256 * for vnode-backed vn's, and the swap_pager_strategy() call for 257 * vm_object-backed vn's. 258 */ 259 static int 260 vnstrategy(struct dev_strategy_args *ap) 261 { 262 cdev_t dev = ap->a_head.a_dev; 263 struct bio *bio = ap->a_bio; 264 struct buf *bp; 265 struct bio *nbio; 266 int unit; 267 struct vn_softc *vn; 268 int error; 269 270 unit = dkunit(dev); 271 vn = dev->si_drv1; 272 KKASSERT(vn != NULL); 273 274 bp = bio->bio_buf; 275 276 IFOPT(vn, VN_DEBUG) 277 kprintf("vnstrategy(%p): unit %d\n", bp, unit); 278 279 if ((vn->sc_flags & VNF_INITED) == 0) { 280 bp->b_error = ENXIO; 281 bp->b_flags |= B_ERROR; 282 biodone(bio); 283 return(0); 284 } 285 286 bp->b_resid = bp->b_bcount; 287 288 /* 289 * The vnode device is using disk/slice label support. 290 * 291 * The dscheck() function is called for validating the 292 * slices that exist ON the vnode device itself, and 293 * translate the "slice-relative" block number, again. 294 * dscheck() will call biodone() and return NULL if 295 * we are at EOF or beyond the device size. 296 */ 297 298 nbio = bio; 299 300 /* 301 * Use the translated nbio from this point on 302 */ 303 if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) { 304 /* 305 * Freeblks is not handled for vnode-backed elements yet. 306 */ 307 bp->b_resid = 0; 308 /* operation complete */ 309 } else if (vn->sc_vp) { 310 /* 311 * VNODE I/O 312 * 313 * If an error occurs, we set B_ERROR but we do not set 314 * B_INVAL because (for a write anyway), the buffer is 315 * still valid. 316 */ 317 struct uio auio; 318 struct iovec aiov; 319 320 bzero(&auio, sizeof(auio)); 321 322 aiov.iov_base = bp->b_data; 323 aiov.iov_len = bp->b_bcount; 324 auio.uio_iov = &aiov; 325 auio.uio_iovcnt = 1; 326 auio.uio_offset = nbio->bio_offset; 327 auio.uio_segflg = UIO_SYSSPACE; 328 if (bp->b_cmd == BUF_CMD_READ) 329 auio.uio_rw = UIO_READ; 330 else 331 auio.uio_rw = UIO_WRITE; 332 auio.uio_resid = bp->b_bcount; 333 auio.uio_td = curthread; 334 335 /* 336 * Don't use IO_DIRECT here, it really gets in the way 337 * due to typical blocksize differences between the 338 * fs backing the VN device and whatever is running on 339 * the VN device. 340 */ 341 switch (bp->b_cmd) { 342 case (BUF_CMD_READ): 343 vn_lock(vn->sc_vp, LK_SHARED | LK_RETRY); 344 error = VOP_READ(vn->sc_vp, &auio, IO_RECURSE, 345 vn->sc_cred); 346 break; 347 348 case (BUF_CMD_WRITE): 349 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 350 error = VOP_WRITE(vn->sc_vp, &auio, IO_RECURSE, 351 vn->sc_cred); 352 break; 353 354 case (BUF_CMD_FLUSH): 355 auio.uio_resid = 0; 356 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 357 error = VOP_FSYNC(vn->sc_vp, MNT_WAIT, 0); 358 break; 359 default: 360 auio.uio_resid = 0; 361 error = 0; 362 goto breakunlocked; 363 } 364 vn_unlock(vn->sc_vp); 365 breakunlocked: 366 bp->b_resid = auio.uio_resid; 367 if (error) { 368 bp->b_error = error; 369 bp->b_flags |= B_ERROR; 370 } 371 /* operation complete */ 372 } else if (vn->sc_object) { 373 /* 374 * OBJT_SWAP I/O (handles read, write, freebuf) 375 * 376 * We have nothing to do if freeing blocks on a reserved 377 * swap area, othrewise execute the op. 378 */ 379 if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) { 380 bp->b_resid = 0; 381 /* operation complete */ 382 } else { 383 swap_pager_strategy(vn->sc_object, nbio); 384 return(0); 385 /* NOT REACHED */ 386 } 387 } else { 388 bp->b_resid = bp->b_bcount; 389 bp->b_flags |= B_ERROR | B_INVAL; 390 bp->b_error = EINVAL; 391 /* operation complete */ 392 } 393 biodone(nbio); 394 return(0); 395 } 396 397 /* ARGSUSED */ 398 static int 399 vnioctl(struct dev_ioctl_args *ap) 400 { 401 cdev_t dev = ap->a_head.a_dev; 402 struct vn_softc *vn; 403 struct vn_ioctl *vio; 404 int error; 405 u_long *f; 406 407 vn = dev->si_drv1; 408 IFOPT(vn,VN_FOLLOW) { 409 kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n", 410 devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag, 411 dkunit(dev)); 412 } 413 414 switch (ap->a_cmd) { 415 case VNIOCATTACH: 416 case VNIOCDETACH: 417 case VNIOCGSET: 418 case VNIOCGCLEAR: 419 case VNIOCGET: 420 case VNIOCUSET: 421 case VNIOCUCLEAR: 422 goto vn_specific; 423 } 424 425 #if 0 426 if (dkslice(dev) != WHOLE_DISK_SLICE || 427 dkpart(dev) != WHOLE_SLICE_PART) 428 return (ENOTTY); 429 #endif 430 431 vn_specific: 432 433 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); 434 if (error) 435 return (error); 436 437 vio = (struct vn_ioctl *)ap->a_data; 438 f = (u_long*)ap->a_data; 439 440 switch (ap->a_cmd) { 441 case VNIOCATTACH: 442 if (vn->sc_flags & VNF_INITED) 443 return(EBUSY); 444 445 if (vn->sc_flags & VNF_DESTROY) 446 return(ENXIO); 447 448 if (vio->vn_file == NULL) 449 error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred); 450 else 451 error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred); 452 break; 453 454 case VNIOCDETACH: 455 if ((vn->sc_flags & VNF_INITED) == 0) 456 return(ENXIO); 457 /* 458 * XXX handle i/o in progress. Return EBUSY, or wait, or 459 * flush the i/o. 460 * XXX handle multiple opens of the device. Return EBUSY, 461 * or revoke the fd's. 462 * How are these problems handled for removable and failing 463 * hardware devices? (Hint: They are not) 464 */ 465 if ((disk_getopencount(&vn->sc_disk)) > 1) 466 return (EBUSY); 467 468 vnclear(vn); 469 IFOPT(vn, VN_FOLLOW) 470 kprintf("vnioctl: CLRed\n"); 471 472 if (dkunit(dev) >= VN_PREALLOCATED_UNITS) { 473 vn->sc_flags |= VNF_DESTROY; 474 } 475 476 break; 477 478 case VNIOCGET: 479 error = vnget(dev, vn, (struct vn_user *) ap->a_data); 480 break; 481 482 case VNIOCGSET: 483 vn_options |= *f; 484 *f = vn_options; 485 break; 486 487 case VNIOCGCLEAR: 488 vn_options &= ~(*f); 489 *f = vn_options; 490 break; 491 492 case VNIOCUSET: 493 vn->sc_options |= *f; 494 *f = vn->sc_options; 495 break; 496 497 case VNIOCUCLEAR: 498 vn->sc_options &= ~(*f); 499 *f = vn->sc_options; 500 break; 501 502 default: 503 error = ENOTTY; 504 break; 505 } 506 return(error); 507 } 508 509 /* 510 * vniocattach_file: 511 * 512 * Attach a file to a VN partition. Return the size in the vn_size 513 * field. 514 */ 515 516 static int 517 vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, 518 int flag, struct ucred *cred) 519 { 520 struct vattr vattr; 521 struct nlookupdata nd; 522 int error, flags; 523 struct vnode *vp; 524 struct disk_info info; 525 526 flags = FREAD|FWRITE; 527 error = nlookup_init(&nd, vio->vn_file, 528 UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 529 if (error) 530 return (error); 531 if ((error = vn_open(&nd, NULL, flags, 0)) != 0) { 532 if (error != EACCES && error != EPERM && error != EROFS) 533 goto done; 534 flags &= ~FWRITE; 535 nlookup_done(&nd); 536 error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 537 if (error) 538 return (error); 539 if ((error = vn_open(&nd, NULL, flags, 0)) != 0) 540 goto done; 541 } 542 vp = nd.nl_open_vp; 543 if (vp->v_type != VREG || 544 (error = VOP_GETATTR(vp, &vattr))) { 545 if (error == 0) 546 error = EINVAL; 547 goto done; 548 } 549 vn_unlock(vp); 550 vn->sc_secsize = DEV_BSIZE; 551 vn->sc_vp = vp; 552 nd.nl_open_vp = NULL; 553 554 /* 555 * If the size is specified, override the file attributes. Note that 556 * the vn_size argument is in PAGE_SIZE sized blocks. 557 */ 558 if (vio->vn_size) 559 vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize; 560 else 561 vn->sc_size = vattr.va_size / vn->sc_secsize; 562 error = vnsetcred(vn, cred); 563 if (error) { 564 vn->sc_vp = NULL; 565 vn_close(vp, flags, NULL); 566 goto done; 567 } 568 vn->sc_flags |= VNF_INITED; 569 if (flags == FREAD) 570 vn->sc_flags |= VNF_READONLY; 571 572 /* 573 * Set the disk info so that probing is triggered 574 */ 575 bzero(&info, sizeof(struct disk_info)); 576 info.d_media_blksize = vn->sc_secsize; 577 info.d_media_blocks = vn->sc_size; 578 /* 579 * reserve mbr sector for backwards compatibility 580 * when no slices exist. 581 */ 582 info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; 583 info.d_secpertrack = 32; 584 info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); 585 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 586 info.d_ncylinders = vn->sc_size / info.d_secpercyl; 587 disk_setdiskinfo_sync(&vn->sc_disk, &info); 588 589 error = dev_dopen(dev, flag, S_IFCHR, cred, NULL); 590 if (error) 591 vnclear(vn); 592 593 IFOPT(vn, VN_FOLLOW) 594 kprintf("vnioctl: SET vp %p size %llx blks\n", 595 vn->sc_vp, (long long)vn->sc_size); 596 done: 597 nlookup_done(&nd); 598 return(error); 599 } 600 601 /* 602 * vniocattach_swap: 603 * 604 * Attach swap backing store to a VN partition of the size specified 605 * in vn_size. 606 */ 607 608 static int 609 vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, 610 int flag, struct ucred *cred) 611 { 612 int error; 613 struct disk_info info; 614 615 /* 616 * Range check. Disallow negative sizes or any size less then the 617 * size of a page. Then round to a page. 618 */ 619 620 if (vio->vn_size <= 0) 621 return(EDOM); 622 623 /* 624 * Allocate an OBJT_SWAP object. 625 * 626 * sc_secsize is PAGE_SIZE'd 627 * 628 * vio->vn_size is in PAGE_SIZE'd chunks. 629 * sc_size must be in PAGE_SIZE'd chunks. 630 * Note the truncation. 631 */ 632 633 vn->sc_secsize = PAGE_SIZE; 634 vn->sc_size = vio->vn_size; 635 vn->sc_object = swap_pager_alloc(NULL, 636 vn->sc_secsize * (off_t)vio->vn_size, 637 VM_PROT_DEFAULT, 0); 638 vm_object_set_flag(vn->sc_object, OBJ_NOPAGEIN); 639 IFOPT(vn, VN_RESERVE) { 640 if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { 641 vm_pager_deallocate(vn->sc_object); 642 vn->sc_object = NULL; 643 return(EDOM); 644 } 645 } 646 vn->sc_flags |= VNF_INITED; 647 648 error = vnsetcred(vn, cred); 649 if (error == 0) { 650 /* 651 * Set the disk info so that probing is triggered 652 */ 653 bzero(&info, sizeof(struct disk_info)); 654 info.d_media_blksize = vn->sc_secsize; 655 info.d_media_blocks = vn->sc_size; 656 /* 657 * reserve mbr sector for backwards compatibility 658 * when no slices exist. 659 */ 660 info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; 661 info.d_secpertrack = 32; 662 info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); 663 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 664 info.d_ncylinders = vn->sc_size / info.d_secpercyl; 665 disk_setdiskinfo_sync(&vn->sc_disk, &info); 666 667 error = dev_dopen(dev, flag, S_IFCHR, cred, NULL); 668 } 669 if (error == 0) { 670 IFOPT(vn, VN_FOLLOW) { 671 kprintf("vnioctl: SET vp %p size %llx\n", 672 vn->sc_vp, (long long)vn->sc_size); 673 } 674 } 675 if (error) 676 vnclear(vn); 677 return(error); 678 } 679 680 /* 681 * Duplicate the current processes' credentials. Since we are called only 682 * as the result of a SET ioctl and only root can do that, any future access 683 * to this "disk" is essentially as root. Note that credentials may change 684 * if some other uid can write directly to the mapped file (NFS). 685 */ 686 static int 687 vnsetcred(struct vn_softc *vn, struct ucred *cred) 688 { 689 char *tmpbuf; 690 int error = 0; 691 692 /* 693 * Set credits in our softc 694 */ 695 696 if (vn->sc_cred) 697 crfree(vn->sc_cred); 698 vn->sc_cred = crdup(cred); 699 700 /* 701 * Horrible kludge to establish credentials for NFS XXX. 702 */ 703 704 if (vn->sc_vp) { 705 struct uio auio; 706 struct iovec aiov; 707 708 tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK); 709 bzero(&auio, sizeof(auio)); 710 711 aiov.iov_base = tmpbuf; 712 aiov.iov_len = vn->sc_secsize; 713 auio.uio_iov = &aiov; 714 auio.uio_iovcnt = 1; 715 auio.uio_offset = 0; 716 auio.uio_rw = UIO_READ; 717 auio.uio_segflg = UIO_SYSSPACE; 718 auio.uio_resid = aiov.iov_len; 719 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); 720 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); 721 vn_unlock(vn->sc_vp); 722 kfree(tmpbuf, M_TEMP); 723 } 724 return (error); 725 } 726 727 static void 728 vnclear(struct vn_softc *vn) 729 { 730 IFOPT(vn, VN_FOLLOW) 731 kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); 732 vn->sc_flags &= ~VNF_INITED; 733 if (vn->sc_vp != NULL) { 734 vn_close(vn->sc_vp, 735 (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE), 736 NULL); 737 vn->sc_vp = NULL; 738 } 739 vn->sc_flags &= ~VNF_READONLY; 740 if (vn->sc_cred) { 741 crfree(vn->sc_cred); 742 vn->sc_cred = NULL; 743 } 744 if (vn->sc_object != NULL) { 745 vm_pager_deallocate(vn->sc_object); 746 vn->sc_object = NULL; 747 } 748 749 disk_unprobe(&vn->sc_disk); 750 751 vn->sc_size = 0; 752 } 753 754 /* 755 * vnget: 756 * 757 * populate a struct vn_user for the VNIOCGET ioctl. 758 * interface conventions defined in sys/sys/vnioctl.h. 759 */ 760 761 static int 762 vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu) 763 { 764 int error, found = 0; 765 char *freepath, *fullpath; 766 struct vattr vattr; 767 768 if (vnu->vnu_unit == -1) { 769 vnu->vnu_unit = dkunit(dev); 770 } 771 else if (vnu->vnu_unit < 0) 772 return (EINVAL); 773 774 SLIST_FOREACH(vn, &vn_list, sc_list) { 775 776 if(vn->sc_unit != vnu->vnu_unit) 777 continue; 778 779 found = 1; 780 781 if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) { 782 783 /* note: u_cred checked in vnioctl above */ 784 error = VOP_GETATTR(vn->sc_vp, &vattr); 785 if (error) { 786 kprintf("vnget: VOP_GETATTR for %p failed\n", 787 vn->sc_vp); 788 return (error); 789 } 790 791 error = vn_fullpath(curproc, vn->sc_vp, 792 &fullpath, &freepath, 0); 793 794 if (error) { 795 kprintf("vnget: unable to resolve vp %p\n", 796 vn->sc_vp); 797 return(error); 798 } 799 800 strlcpy(vnu->vnu_file, fullpath, 801 sizeof(vnu->vnu_file)); 802 kfree(freepath, M_TEMP); 803 vnu->vnu_dev = vattr.va_fsid; 804 vnu->vnu_ino = vattr.va_fileid; 805 806 } 807 else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){ 808 809 strlcpy(vnu->vnu_file, _VN_USER_SWAP, 810 sizeof(vnu->vnu_file)); 811 vnu->vnu_size = vn->sc_size; 812 vnu->vnu_secsize = vn->sc_secsize; 813 814 } else { 815 816 bzero(vnu->vnu_file, sizeof(vnu->vnu_file)); 817 vnu->vnu_dev = 0; 818 vnu->vnu_ino = 0; 819 820 } 821 break; 822 } 823 824 if (!found) 825 return(ENXIO); 826 827 return(0); 828 } 829 830 static int 831 vnsize(struct dev_psize_args *ap) 832 { 833 cdev_t dev = ap->a_head.a_dev; 834 struct vn_softc *vn; 835 836 vn = dev->si_drv1; 837 if (!vn) 838 return(ENXIO); 839 if ((vn->sc_flags & VNF_INITED) == 0) 840 return(ENXIO); 841 ap->a_result = (int64_t)vn->sc_size; 842 return(0); 843 } 844 845 static cdev_t 846 vn_create(int unit, struct devfs_bitmap *bitmap, int clone) 847 { 848 struct vn_softc *vn; 849 struct disk_info info; 850 cdev_t dev, ret_dev; 851 852 vn = vncreatevn(); 853 if (clone) { 854 /* 855 * For clone devices we need to return the top-level cdev, 856 * not the raw dev we'd normally work with. 857 */ 858 dev = disk_create_clone(unit, &vn->sc_disk, &vn_ops); 859 ret_dev = vn->sc_disk.d_cdev; 860 } else { 861 ret_dev = dev = disk_create(unit, &vn->sc_disk, &vn_ops); 862 } 863 vninitvn(vn, dev); 864 865 bzero(&info, sizeof(struct disk_info)); 866 info.d_media_blksize = 512; 867 info.d_media_blocks = 0; 868 info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE; 869 info.d_secpertrack = 32; 870 info.d_nheads = 64; 871 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 872 info.d_ncylinders = 0; 873 disk_setdiskinfo_sync(&vn->sc_disk, &info); 874 875 if (bitmap != NULL) 876 devfs_clone_bitmap_set(bitmap, unit); 877 878 return ret_dev; 879 } 880 881 static int 882 vn_modevent(module_t mod, int type, void *data) 883 { 884 struct vn_softc *vn; 885 static cdev_t dev = NULL; 886 int i; 887 888 switch (type) { 889 case MOD_LOAD: 890 dev = make_autoclone_dev(&vn_ops, &DEVFS_CLONE_BITMAP(vn), vnclone, UID_ROOT, 891 GID_OPERATOR, 0640, "vn"); 892 893 for (i = 0; i < VN_PREALLOCATED_UNITS; i++) { 894 vn_create(i, &DEVFS_CLONE_BITMAP(vn), 0); 895 } 896 break; 897 898 case MOD_UNLOAD: 899 case MOD_SHUTDOWN: 900 while ((vn = SLIST_FIRST(&vn_list)) != NULL) { 901 /* 902 * XXX: no idea if we can return EBUSY even in the 903 * shutdown case, so err on the side of caution 904 * and just rip stuff out on shutdown. 905 */ 906 if (type != MOD_SHUTDOWN) { 907 if (vn->sc_flags & VNF_OPENED) 908 return (EBUSY); 909 } 910 911 disk_destroy(&vn->sc_disk); 912 913 SLIST_REMOVE_HEAD(&vn_list, sc_list); 914 915 if (vn->sc_flags & VNF_INITED) 916 vnclear(vn); 917 918 kfree(vn, M_VN); 919 } 920 destroy_autoclone_dev(dev, &DEVFS_CLONE_BITMAP(vn)); 921 dev_ops_remove_all(&vn_ops); 922 break; 923 default: 924 break; 925 } 926 return 0; 927 } 928 929 DEV_MODULE(vn, vn_modevent, 0); 930