1 /* $OpenBSD: vnd.c,v 1.27 2000/07/05 07:27:12 niklas Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 42 * 43 * @(#)vn.c 8.6 (Berkeley) 4/1/94 44 */ 45 46 /* 47 * Vnode disk driver. 48 * 49 * Block/character interface to a vnode. Allows one to treat a file 50 * as a disk (e.g. build a filesystem in it, mount it, etc.). 51 * 52 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 53 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 54 * as it doesn't distort the local buffer cache. The latter is good for 55 * building disk images as it keeps the cache consistent after the block 56 * device is closed. 57 * 58 * NOTE 2: There is a security issue involved with this driver. 59 * Once mounted all access to the contents of the "mapped" file via 60 * the special file is controlled by the permissions on the special 61 * file, the protection of the mapped file is ignored (effectively, 62 * by using root credentials in all transactions). 63 * 64 * NOTE 3: Doesn't interact with leases, should it? 65 */ 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/namei.h> 70 #include <sys/proc.h> 71 #include <sys/errno.h> 72 #include <sys/buf.h> 73 #include <sys/malloc.h> 74 #include <sys/ioctl.h> 75 #include <sys/disklabel.h> 76 #include <sys/device.h> 77 #include <sys/disk.h> 78 #include <sys/stat.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/file.h> 82 #include <sys/uio.h> 83 #include <sys/conf.h> 84 85 #include <crypto/blf.h> 86 87 #include <miscfs/specfs/specdev.h> 88 89 #include <dev/vndioctl.h> 90 91 #ifdef DEBUG 92 int dovndcluster = 1; 93 int vnddebug = 0x00; 94 #define VDB_FOLLOW 0x01 95 #define VDB_INIT 0x02 96 #define VDB_IO 0x04 97 #endif 98 99 #define b_cylin b_resid 100 101 #define vndunit(x) DISKUNIT((x) & 0x7f) 102 #define vndsimple(x) ((x) & 0x80) 103 #define MAKEVNDDEV(maj, unit, part) MAKEDISKDEV(maj, unit, part) 104 105 #define VNDLABELDEV(dev) (MAKEVNDDEV(major(dev), vndunit(dev), RAW_PART)) 106 107 struct vndbuf { 108 struct buf vb_buf; 109 struct buf *vb_obp; 110 }; 111 112 #define getvndbuf() \ 113 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 114 #define putvndbuf(vbp) \ 115 free((caddr_t)(vbp), M_DEVBUF) 116 117 struct vnd_softc { 118 struct device sc_dev; 119 struct disk sc_dk; 120 121 int sc_flags; /* flags */ 122 size_t sc_size; /* size of vnd */ 123 struct vnode *sc_vp; /* vnode */ 124 struct ucred *sc_cred; /* credentials */ 125 int sc_maxactive; /* max # of active requests */ 126 struct buf sc_tab; /* transfer queue */ 127 void *sc_keyctx; /* key context */ 128 }; 129 130 /* sc_flags */ 131 #define VNF_ALIVE 0x0001 132 #define VNF_INITED 0x0002 133 #define VNF_WANTED 0x0040 134 #define VNF_LOCKED 0x0080 135 #define VNF_LABELLING 0x0100 136 #define VNF_WLABEL 0x0200 137 #define VNF_HAVELABEL 0x0400 138 #define VNF_BUSY 0x0800 139 #define VNF_SIMPLE 0x1000 140 141 struct vnd_softc *vnd_softc; 142 int numvnd = 0; 143 144 struct dkdriver vnddkdriver = { vndstrategy }; 145 146 /* called by main() at boot time */ 147 void vndattach __P((int)); 148 149 void vndclear __P((struct vnd_softc *)); 150 void vndstart __P((struct vnd_softc *)); 151 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 152 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 153 void vndiodone __P((struct buf *)); 154 void vndshutdown __P((void)); 155 void vndgetdisklabel __P((dev_t, struct vnd_softc *)); 156 void vndencrypt __P((struct vnd_softc *, caddr_t, size_t, daddr_t, int)); 157 158 static int vndlock __P((struct vnd_softc *)); 159 static void vndunlock __P((struct vnd_softc *)); 160 161 void 162 vndencrypt(vnd, addr, size, off, encrypt) 163 struct vnd_softc *vnd; 164 caddr_t addr; 165 size_t size; 166 daddr_t off; 167 int encrypt; 168 { 169 int i, bsize; 170 u_char iv[8]; 171 172 bsize = dbtob(1); 173 for (i = 0; i < size/bsize; i++) { 174 bzero(iv, sizeof(iv)); 175 bcopy((u_char *)&off, iv, sizeof(off)); 176 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 177 if (encrypt) 178 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 179 else 180 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 181 182 addr += bsize; 183 off++; 184 } 185 } 186 187 void 188 vndattach(num) 189 int num; 190 { 191 char *mem; 192 register u_long size; 193 194 if (num <= 0) 195 return; 196 size = num * sizeof(struct vnd_softc); 197 mem = malloc(size, M_DEVBUF, M_NOWAIT); 198 if (mem == NULL) { 199 printf("WARNING: no memory for vnode disks\n"); 200 return; 201 } 202 bzero(mem, size); 203 vnd_softc = (struct vnd_softc *)mem; 204 numvnd = num; 205 } 206 207 int 208 vndopen(dev, flags, mode, p) 209 dev_t dev; 210 int flags, mode; 211 struct proc *p; 212 { 213 int unit = vndunit(dev); 214 struct vnd_softc *sc; 215 int error = 0, part, pmask; 216 217 #ifdef DEBUG 218 if (vnddebug & VDB_FOLLOW) 219 printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 220 #endif 221 if (unit >= numvnd) 222 return (ENXIO); 223 sc = &vnd_softc[unit]; 224 225 if ((error = vndlock(sc)) != 0) 226 return (error); 227 228 if ((sc->sc_flags & VNF_INITED) && 229 (sc->sc_flags & VNF_HAVELABEL) == 0) { 230 sc->sc_flags |= VNF_HAVELABEL; 231 vndgetdisklabel(dev, sc); 232 } 233 234 part = DISKPART(dev); 235 pmask = 1 << part; 236 237 /* 238 * If any partition is open, all succeeding openings must be of the 239 * same type. 240 */ 241 if (sc->sc_dk.dk_openmask) { 242 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 243 (vndsimple(dev) != 0)) { 244 error = EBUSY; 245 goto bad; 246 } 247 } else if (vndsimple(dev)) 248 sc->sc_flags |= VNF_SIMPLE; 249 else 250 sc->sc_flags &= ~VNF_SIMPLE; 251 252 /* Check that the partition exists. */ 253 if (part != RAW_PART && 254 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 255 part >= sc->sc_dk.dk_label->d_npartitions || 256 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 257 error = ENXIO; 258 goto bad; 259 } 260 261 /* Prevent our unit from being unconfigured while open. */ 262 switch (mode) { 263 case S_IFCHR: 264 sc->sc_dk.dk_copenmask |= pmask; 265 break; 266 267 case S_IFBLK: 268 sc->sc_dk.dk_bopenmask |= pmask; 269 break; 270 } 271 sc->sc_dk.dk_openmask = 272 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 273 274 vndunlock(sc); 275 return (0); 276 bad: 277 vndunlock(sc); 278 return (error); 279 } 280 281 /* 282 * Load the label information on the named device 283 */ 284 void 285 vndgetdisklabel(dev, sc) 286 dev_t dev; 287 struct vnd_softc *sc; 288 { 289 struct disklabel *lp = sc->sc_dk.dk_label; 290 char *errstring; 291 292 bzero(lp, sizeof(struct disklabel)); 293 bzero(sc->sc_dk.dk_cpulabel, sizeof(struct cpu_disklabel)); 294 295 lp->d_secsize = 512; 296 lp->d_ntracks = 1; 297 lp->d_nsectors = 100; 298 lp->d_ncylinders = sc->sc_size / 100; 299 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 300 if (lp->d_secpercyl == 0) { 301 lp->d_secpercyl = 100; 302 /* as long as it's not 0 - readdisklabel divides by it (?) */ 303 } 304 305 strncpy(lp->d_typename, "vnd device", 16); 306 lp->d_type = DTYPE_SCSI; 307 strncpy(lp->d_packname, "fictitious", 16); 308 lp->d_secperunit = sc->sc_size; 309 lp->d_rpm = 3600; 310 lp->d_interleave = 1; 311 lp->d_flags = 0; 312 313 lp->d_partitions[RAW_PART].p_offset = 0; 314 lp->d_partitions[RAW_PART].p_size = 315 lp->d_secperunit * (lp->d_secsize / DEV_BSIZE); 316 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 317 lp->d_npartitions = RAW_PART + 1; 318 319 lp->d_magic = DISKMAGIC; 320 lp->d_magic2 = DISKMAGIC; 321 lp->d_checksum = dkcksum(lp); 322 323 /* 324 * Call the generic disklabel extraction routine 325 */ 326 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 327 sc->sc_dk.dk_cpulabel, 0); 328 if (errstring) { 329 /*printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);*/ 330 return; 331 } 332 } 333 334 int 335 vndclose(dev, flags, mode, p) 336 dev_t dev; 337 int flags, mode; 338 struct proc *p; 339 { 340 int unit = vndunit(dev); 341 struct vnd_softc *sc; 342 int error = 0, part; 343 344 #ifdef DEBUG 345 if (vnddebug & VDB_FOLLOW) 346 printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 347 #endif 348 349 if (unit >= numvnd) 350 return (ENXIO); 351 sc = &vnd_softc[unit]; 352 353 if ((error = vndlock(sc)) != 0) 354 return (error); 355 356 part = DISKPART(dev); 357 358 /* ...that much closer to allowing unconfiguration... */ 359 switch (mode) { 360 case S_IFCHR: 361 sc->sc_dk.dk_copenmask &= ~(1 << part); 362 break; 363 364 case S_IFBLK: 365 sc->sc_dk.dk_bopenmask &= ~(1 << part); 366 break; 367 } 368 sc->sc_dk.dk_openmask = 369 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 370 371 vndunlock(sc); 372 return (0); 373 } 374 375 /* 376 * Two methods are used, the traditional buffercache bypassing and the 377 * newer, cache-coherent on unmount, one. 378 * 379 * Former method: 380 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 381 * Note that this driver can only be used for swapping over NFS on the hp 382 * since nfs_strategy on the vax cannot handle u-areas and page tables. 383 * 384 * Latter method: 385 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 386 * access the underlying file. Things are complicated by the fact that we 387 * might get recursively called due to buffer flushes. In those cases we 388 * queue one write. 389 */ 390 void 391 vndstrategy(bp) 392 register struct buf *bp; 393 { 394 int unit = vndunit(bp->b_dev); 395 register struct vnd_softc *vnd = &vnd_softc[unit]; 396 register struct vndbuf *nbp; 397 register int bn, bsize; 398 register caddr_t addr; 399 register size_t resid; 400 int sz, flags, error, s; 401 struct iovec aiov; 402 struct uio auio; 403 struct proc *p = curproc; 404 405 #ifdef DEBUG 406 if (vnddebug & VDB_FOLLOW) 407 printf("vndstrategy(%p): unit %d\n", bp, unit); 408 #endif 409 if ((vnd->sc_flags & VNF_INITED) == 0) { 410 bp->b_error = ENXIO; 411 bp->b_flags |= B_ERROR; 412 biodone(bp); 413 return; 414 } 415 416 bn = bp->b_blkno; 417 sz = howmany(bp->b_bcount, DEV_BSIZE); 418 bp->b_resid = bp->b_bcount; 419 if (bn < 0) { 420 bp->b_error = EINVAL; 421 bp->b_flags |= B_ERROR; 422 biodone(bp); 423 return; 424 } 425 if (DISKPART(bp->b_dev) != RAW_PART && 426 bounds_check_with_label(bp, vnd->sc_dk.dk_label, 427 vnd->sc_dk.dk_cpulabel, 1) == 0) { 428 biodone(bp); 429 return; 430 } 431 432 /* No bypassing of buffer cache? */ 433 if (vndsimple(bp->b_dev)) { 434 /* 435 * In order to avoid "locking against myself" panics, we 436 * must be prepared to queue operations during another I/O 437 * operation. This situation comes up where a dirty cache 438 * buffer needs to be flushed in order to provide the current 439 * operation with a fresh buffer. 440 * 441 * XXX do we really need to protect stuff relating to this with 442 * splbio? 443 */ 444 if (vnd->sc_flags & VNF_BUSY) { 445 s = splbio(); 446 bp->b_actf = vnd->sc_tab.b_actf; 447 vnd->sc_tab.b_actf = bp; 448 vnd->sc_tab.b_active++; 449 splx(s); 450 return; 451 } 452 453 /* Loop until all queued requests are handled. */ 454 for (;;) { 455 int part = DISKPART(bp->b_dev); 456 int off = vnd->sc_dk.dk_label->d_partitions[part].p_offset; 457 458 aiov.iov_base = bp->b_data; 459 auio.uio_resid = aiov.iov_len = bp->b_bcount; 460 auio.uio_iov = &aiov; 461 auio.uio_iovcnt = 1; 462 auio.uio_offset = dbtob(bp->b_blkno + off); 463 auio.uio_segflg = UIO_SYSSPACE; 464 auio.uio_procp = NULL; 465 466 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 467 vnd->sc_flags |= VNF_BUSY; 468 if (bp->b_flags & B_READ) { 469 auio.uio_rw = UIO_READ; 470 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 471 vnd->sc_cred); 472 if (vnd->sc_keyctx) 473 vndencrypt(vnd, bp->b_data, 474 bp->b_bcount, 475 bp->b_blkno, 0); 476 } else { 477 if (vnd->sc_keyctx) 478 vndencrypt(vnd, bp->b_data, 479 bp->b_bcount, 480 bp->b_blkno, 1); 481 auio.uio_rw = UIO_WRITE; 482 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 0, 483 vnd->sc_cred); 484 /* Data in buffer cache needs to be in clear */ 485 if (vnd->sc_keyctx) 486 vndencrypt(vnd, bp->b_data, 487 bp->b_bcount, 488 bp->b_blkno, 0); 489 } 490 vnd->sc_flags &= ~VNF_BUSY; 491 VOP_UNLOCK(vnd->sc_vp, 0, p); 492 if (bp->b_error) 493 bp->b_flags |= B_ERROR; 494 bp->b_resid = auio.uio_resid; 495 biodone(bp); 496 497 /* If nothing more is queued, we are done. */ 498 if (!vnd->sc_tab.b_active) 499 return; 500 501 /* 502 * Dequeue now since lower level strategy 503 * routine might queue using same links. 504 */ 505 s = splbio(); 506 bp = vnd->sc_tab.b_actf; 507 vnd->sc_tab.b_actf = bp->b_actf; 508 vnd->sc_tab.b_active--; 509 splx(s); 510 } 511 } 512 513 /* The old-style buffercache bypassing method. */ 514 bn += vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)].p_offset; 515 bn = dbtob(bn); 516 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 517 addr = bp->b_data; 518 flags = bp->b_flags | B_CALL; 519 for (resid = bp->b_resid; resid; resid -= sz) { 520 struct vnode *vp; 521 daddr_t nbn; 522 int off, s, nra; 523 524 nra = 0; 525 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 526 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 527 VOP_UNLOCK(vnd->sc_vp, 0, p); 528 if (error == 0 && (long)nbn == -1) 529 error = EIO; 530 #ifdef DEBUG 531 if (!dovndcluster) 532 nra = 0; 533 #endif 534 535 if ((off = bn % bsize) != 0) 536 sz = bsize - off; 537 else 538 sz = (1 + nra) * bsize; 539 if (resid < sz) 540 sz = resid; 541 #ifdef DEBUG 542 if (vnddebug & VDB_IO) 543 printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n", 544 vnd->sc_vp, vp, bn, nbn, sz); 545 #endif 546 547 nbp = getvndbuf(); 548 nbp->vb_buf.b_flags = flags; 549 nbp->vb_buf.b_bcount = sz; 550 nbp->vb_buf.b_bufsize = bp->b_bufsize; 551 nbp->vb_buf.b_error = 0; 552 if (vp->v_type == VBLK || vp->v_type == VCHR) 553 nbp->vb_buf.b_dev = vp->v_rdev; 554 else 555 nbp->vb_buf.b_dev = NODEV; 556 nbp->vb_buf.b_data = addr; 557 nbp->vb_buf.b_blkno = nbn + btodb(off); 558 nbp->vb_buf.b_proc = bp->b_proc; 559 nbp->vb_buf.b_iodone = vndiodone; 560 nbp->vb_buf.b_vp = vp; 561 nbp->vb_buf.b_rcred = vnd->sc_cred; /* XXX crdup? */ 562 nbp->vb_buf.b_wcred = vnd->sc_cred; /* XXX crdup? */ 563 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 564 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 565 nbp->vb_buf.b_validoff = bp->b_validoff; 566 nbp->vb_buf.b_validend = bp->b_validend; 567 LIST_INIT(&nbp->vb_buf.b_dep); 568 569 /* save a reference to the old buffer */ 570 nbp->vb_obp = bp; 571 572 /* 573 * If there was an error or a hole in the file...punt. 574 * Note that we deal with this after the nbp allocation. 575 * This ensures that we properly clean up any operations 576 * that we have already fired off. 577 * 578 * XXX we could deal with holes here but it would be 579 * a hassle (in the write case). 580 */ 581 if (error) { 582 nbp->vb_buf.b_error = error; 583 nbp->vb_buf.b_flags |= B_ERROR; 584 bp->b_resid -= (resid - sz); 585 biodone(&nbp->vb_buf); 586 return; 587 } 588 /* 589 * Just sort by block number 590 */ 591 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 592 s = splbio(); 593 disksort(&vnd->sc_tab, &nbp->vb_buf); 594 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 595 vnd->sc_tab.b_active++; 596 vndstart(vnd); 597 } 598 splx(s); 599 600 bn += sz; 601 addr += sz; 602 } 603 } 604 605 /* 606 * Feed requests sequentially. 607 * We do it this way to keep from flooding NFS servers if we are connected 608 * to an NFS file. This places the burden on the client rather than the 609 * server. 610 */ 611 void 612 vndstart(vnd) 613 register struct vnd_softc *vnd; 614 { 615 register struct buf *bp; 616 617 /* 618 * Dequeue now since lower level strategy routine might 619 * queue using same links 620 */ 621 bp = vnd->sc_tab.b_actf; 622 vnd->sc_tab.b_actf = bp->b_actf; 623 #ifdef DEBUG 624 if (vnddebug & VDB_IO) 625 printf("vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n", 626 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 627 bp->b_bcount); 628 #endif 629 630 /* Instrumentation. */ 631 disk_busy(&vnd->sc_dk); 632 633 if ((bp->b_flags & B_READ) == 0) 634 bp->b_vp->v_numoutput++; 635 VOP_STRATEGY(bp); 636 } 637 638 void 639 vndiodone(bp) 640 struct buf *bp; 641 { 642 register struct vndbuf *vbp = (struct vndbuf *) bp; 643 register struct buf *pbp = vbp->vb_obp; 644 register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 645 long count; 646 int s; 647 648 s = splbio(); 649 #ifdef DEBUG 650 if (vnddebug & VDB_IO) 651 printf("vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n", 652 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 653 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 654 #endif 655 656 if (vbp->vb_buf.b_error) { 657 #ifdef DEBUG 658 if (vnddebug & VDB_IO) 659 printf("vndiodone: vbp %p error %d\n", vbp, 660 vbp->vb_buf.b_error); 661 #endif 662 pbp->b_flags |= B_ERROR; 663 pbp->b_error = biowait(&vbp->vb_buf); 664 } 665 pbp->b_resid -= vbp->vb_buf.b_bcount; 666 putvndbuf(vbp); 667 count = pbp->b_bcount - pbp->b_resid; 668 if (pbp->b_resid == 0) { 669 #ifdef DEBUG 670 if (vnddebug & VDB_IO) 671 printf("vndiodone: pbp %p iodone\n", pbp); 672 #endif 673 biodone(pbp); 674 } 675 if (vnd->sc_tab.b_active) { 676 disk_unbusy(&vnd->sc_dk, count); 677 if (vnd->sc_tab.b_actf) 678 vndstart(vnd); 679 else 680 vnd->sc_tab.b_active--; 681 } 682 splx(s); 683 } 684 685 /* ARGSUSED */ 686 int 687 vndread(dev, uio, flags) 688 dev_t dev; 689 struct uio *uio; 690 int flags; 691 { 692 int unit = vndunit(dev); 693 struct vnd_softc *sc; 694 695 #ifdef DEBUG 696 if (vnddebug & VDB_FOLLOW) 697 printf("vndread(%x, %p)\n", dev, uio); 698 #endif 699 700 if (unit >= numvnd) 701 return (ENXIO); 702 sc = &vnd_softc[unit]; 703 704 if ((sc->sc_flags & VNF_INITED) == 0) 705 return (ENXIO); 706 707 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 708 } 709 710 /* ARGSUSED */ 711 int 712 vndwrite(dev, uio, flags) 713 dev_t dev; 714 struct uio *uio; 715 int flags; 716 { 717 int unit = vndunit(dev); 718 struct vnd_softc *sc; 719 720 #ifdef DEBUG 721 if (vnddebug & VDB_FOLLOW) 722 printf("vndwrite(%x, %p)\n", dev, uio); 723 #endif 724 725 if (unit >= numvnd) 726 return (ENXIO); 727 sc = &vnd_softc[unit]; 728 729 if ((sc->sc_flags & VNF_INITED) == 0) 730 return (ENXIO); 731 732 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 733 } 734 735 /* ARGSUSED */ 736 int 737 vndioctl(dev, cmd, addr, flag, p) 738 dev_t dev; 739 u_long cmd; 740 caddr_t addr; 741 int flag; 742 struct proc *p; 743 { 744 int unit = vndunit(dev); 745 register struct vnd_softc *vnd; 746 struct vnd_ioctl *vio; 747 struct vattr vattr; 748 struct nameidata nd; 749 int error, part, pmask, s; 750 751 #ifdef DEBUG 752 if (vnddebug & VDB_FOLLOW) 753 printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 754 dev, cmd, addr, flag, p, unit); 755 #endif 756 error = suser(p->p_ucred, &p->p_acflag); 757 if (error) 758 return (error); 759 if (unit >= numvnd) 760 return (ENXIO); 761 762 vnd = &vnd_softc[unit]; 763 vio = (struct vnd_ioctl *)addr; 764 switch (cmd) { 765 766 case VNDIOCSET: 767 if (vnd->sc_flags & VNF_INITED) 768 return (EBUSY); 769 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 770 return (EINVAL); 771 772 if ((error = vndlock(vnd)) != 0) 773 return (error); 774 775 /* 776 * Always open for read and write. 777 * This is probably bogus, but it lets vn_open() 778 * weed out directories, sockets, etc. so we don't 779 * have to worry about them. 780 */ 781 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 782 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 783 vndunlock(vnd); 784 return (error); 785 } 786 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 787 if (error) { 788 VOP_UNLOCK(nd.ni_vp, 0, p); 789 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 790 vndunlock(vnd); 791 return (error); 792 } 793 VOP_UNLOCK(nd.ni_vp, 0, p); 794 vnd->sc_vp = nd.ni_vp; 795 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 796 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 797 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 798 vndunlock(vnd); 799 return (error); 800 } 801 802 if (vio->vnd_keylen) { 803 char *key; 804 805 key = malloc(vio->vnd_keylen, M_TEMP, M_WAITOK); 806 if ((error = copyin((caddr_t)vio->vnd_key, key, 807 vio->vnd_keylen)) != 0) { 808 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 809 p->p_ucred, p); 810 vndunlock(vnd); 811 return (error); 812 } 813 814 vnd->sc_keyctx = malloc(sizeof(blf_ctx), M_DEVBUF, 815 M_WAITOK); 816 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 817 bzero(key, vio->vnd_keylen); 818 free((caddr_t)key, M_TEMP); 819 } else 820 vnd->sc_keyctx = NULL; 821 822 vndthrottle(vnd, vnd->sc_vp); 823 vio->vnd_size = dbtob(vnd->sc_size); 824 vnd->sc_flags |= VNF_INITED; 825 #ifdef DEBUG 826 if (vnddebug & VDB_INIT) 827 printf("vndioctl: SET vp %p size %x\n", 828 vnd->sc_vp, vnd->sc_size); 829 #endif 830 831 /* Attach the disk. */ 832 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 833 sprintf(vnd->sc_dev.dv_xname, "vnd%d", unit); 834 vnd->sc_dk.dk_driver = &vnddkdriver; 835 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 836 disk_attach(&vnd->sc_dk); 837 dk_establish(&vnd->sc_dk, &vnd->sc_dev); 838 839 vndunlock(vnd); 840 841 break; 842 843 case VNDIOCCLR: 844 if ((vnd->sc_flags & VNF_INITED) == 0) 845 return (ENXIO); 846 847 if ((error = vndlock(vnd)) != 0) 848 return (error); 849 850 /* 851 * Don't unconfigure if any other partitions are open 852 * or if both the character and block flavors of this 853 * partition are open. 854 */ 855 part = DISKPART(dev); 856 pmask = (1 << part); 857 if ((vnd->sc_dk.dk_openmask & ~pmask) || 858 ((vnd->sc_dk.dk_bopenmask & pmask) && 859 (vnd->sc_dk.dk_copenmask & pmask))) { 860 vndunlock(vnd); 861 return (EBUSY); 862 } 863 864 vndclear(vnd); 865 #ifdef DEBUG 866 if (vnddebug & VDB_INIT) 867 printf("vndioctl: CLRed\n"); 868 #endif 869 /* Free crypto key */ 870 if (vnd->sc_keyctx) { 871 bzero(vnd->sc_keyctx, vio->vnd_keylen); 872 free((caddr_t)vnd->sc_keyctx, M_DEVBUF); 873 } 874 875 /* Detatch the disk. */ 876 disk_detach(&vnd->sc_dk); 877 878 /* This must be atomic. */ 879 s = splhigh(); 880 vndunlock(vnd); 881 bzero(vnd, sizeof(struct vnd_softc)); 882 splx(s); 883 break; 884 885 case DIOCGDINFO: 886 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 887 return (ENOTTY); 888 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 889 return (0); 890 891 case DIOCGPART: 892 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 893 return (ENOTTY); 894 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 895 ((struct partinfo *)addr)->part = 896 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 897 return (0); 898 899 case DIOCWDINFO: 900 case DIOCSDINFO: 901 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 902 return (ENOTTY); 903 if ((flag & FWRITE) == 0) 904 return (EBADF); 905 906 if ((error = vndlock(vnd)) != 0) 907 return (error); 908 vnd->sc_flags |= VNF_LABELLING; 909 910 error = setdisklabel(vnd->sc_dk.dk_label, 911 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0, 912 vnd->sc_dk.dk_cpulabel); 913 if (error == 0) { 914 if (cmd == DIOCWDINFO) 915 error = writedisklabel(MAKEDISKDEV(major(dev), 916 DISKUNIT(dev), RAW_PART), 917 vndstrategy, vnd->sc_dk.dk_label, 918 vnd->sc_dk.dk_cpulabel); 919 } 920 921 vnd->sc_flags &= ~VNF_LABELLING; 922 vndunlock(vnd); 923 return (error); 924 925 case DIOCWLABEL: 926 if ((flag & FWRITE) == 0) 927 return (EBADF); 928 if (*(int *)addr) 929 vnd->sc_flags |= VNF_WLABEL; 930 else 931 vnd->sc_flags &= ~VNF_WLABEL; 932 return (0); 933 934 default: 935 return (ENOTTY); 936 } 937 938 return (0); 939 } 940 941 /* 942 * Duplicate the current processes' credentials. Since we are called only 943 * as the result of a SET ioctl and only root can do that, any future access 944 * to this "disk" is essentially as root. Note that credentials may change 945 * if some other uid can write directly to the mapped file (NFS). 946 */ 947 int 948 vndsetcred(vnd, cred) 949 register struct vnd_softc *vnd; 950 struct ucred *cred; 951 { 952 struct uio auio; 953 struct iovec aiov; 954 char *tmpbuf; 955 int error; 956 struct proc *p = curproc; 957 958 vnd->sc_cred = crdup(cred); 959 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 960 961 /* XXX: Horrible kludge to establish credentials for NFS */ 962 aiov.iov_base = tmpbuf; 963 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 964 auio.uio_iov = &aiov; 965 auio.uio_iovcnt = 1; 966 auio.uio_offset = 0; 967 auio.uio_rw = UIO_READ; 968 auio.uio_segflg = UIO_SYSSPACE; 969 auio.uio_resid = aiov.iov_len; 970 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 971 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 972 VOP_UNLOCK(vnd->sc_vp, 0, p); 973 974 free(tmpbuf, M_TEMP); 975 return (error); 976 } 977 978 /* 979 * Set maxactive based on FS type 980 */ 981 void 982 vndthrottle(vnd, vp) 983 register struct vnd_softc *vnd; 984 struct vnode *vp; 985 { 986 #ifdef NFSCLIENT 987 extern int (**nfsv2_vnodeop_p) __P((void *)); 988 989 if (vp->v_op == nfsv2_vnodeop_p) 990 vnd->sc_maxactive = 2; 991 else 992 #endif 993 vnd->sc_maxactive = 8; 994 995 if (vnd->sc_maxactive < 1) 996 vnd->sc_maxactive = 1; 997 } 998 999 void 1000 vndshutdown() 1001 { 1002 register struct vnd_softc *vnd; 1003 1004 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1005 if (vnd->sc_flags & VNF_INITED) 1006 vndclear(vnd); 1007 } 1008 1009 void 1010 vndclear(vnd) 1011 register struct vnd_softc *vnd; 1012 { 1013 register struct vnode *vp = vnd->sc_vp; 1014 struct proc *p = curproc; /* XXX */ 1015 1016 #ifdef DEBUG 1017 if (vnddebug & VDB_FOLLOW) 1018 printf("vndclear(%p): vp %p\n", vnd, vp); 1019 #endif 1020 vnd->sc_flags &= ~VNF_INITED; 1021 if (vp == (struct vnode *)0) 1022 panic("vndioctl: null vp"); 1023 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1024 crfree(vnd->sc_cred); 1025 vnd->sc_vp = (struct vnode *)0; 1026 vnd->sc_cred = (struct ucred *)0; 1027 vnd->sc_size = 0; 1028 } 1029 1030 int 1031 vndsize(dev) 1032 dev_t dev; 1033 { 1034 int unit = vndunit(dev); 1035 register struct vnd_softc *vnd = &vnd_softc[unit]; 1036 1037 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1038 return (-1); 1039 return (vnd->sc_size); 1040 } 1041 1042 int 1043 vnddump(dev, blkno, va, size) 1044 dev_t dev; 1045 daddr_t blkno; 1046 caddr_t va; 1047 size_t size; 1048 { 1049 1050 /* Not implemented. */ 1051 return (ENXIO); 1052 } 1053 1054 /* 1055 * Wait interruptibly for an exclusive lock. 1056 * 1057 * XXX 1058 * Several drivers do this; it should be abstracted and made MP-safe. 1059 */ 1060 static int 1061 vndlock(sc) 1062 struct vnd_softc *sc; 1063 { 1064 int error; 1065 1066 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1067 sc->sc_flags |= VNF_WANTED; 1068 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1069 return (error); 1070 } 1071 sc->sc_flags |= VNF_LOCKED; 1072 return (0); 1073 } 1074 1075 /* 1076 * Unlock and wake up any waiters. 1077 */ 1078 static void 1079 vndunlock(sc) 1080 struct vnd_softc *sc; 1081 { 1082 1083 sc->sc_flags &= ~VNF_LOCKED; 1084 if ((sc->sc_flags & VNF_WANTED) != 0) { 1085 sc->sc_flags &= ~VNF_WANTED; 1086 wakeup(sc); 1087 } 1088 } 1089