1 /* $NetBSD: vnd.c,v 1.89 2002/11/16 08:10:48 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81 /* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.89 2002/11/16 08:10:48 mrg Exp $"); 102 103 #if defined(_KERNEL_OPT) 104 #include "fs_nfs.h" 105 #endif 106 107 #include <sys/param.h> 108 #include <sys/systm.h> 109 #include <sys/namei.h> 110 #include <sys/proc.h> 111 #include <sys/errno.h> 112 #include <sys/buf.h> 113 #include <sys/malloc.h> 114 #include <sys/ioctl.h> 115 #include <sys/disklabel.h> 116 #include <sys/device.h> 117 #include <sys/disk.h> 118 #include <sys/stat.h> 119 #include <sys/mount.h> 120 #include <sys/vnode.h> 121 #include <sys/file.h> 122 #include <sys/uio.h> 123 #include <sys/conf.h> 124 125 #include <miscfs/specfs/specdev.h> 126 127 #include <dev/vndvar.h> 128 129 #if defined(VNDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 int dovndcluster = 1; 135 #define VDB_FOLLOW 0x01 136 #define VDB_INIT 0x02 137 #define VDB_IO 0x04 138 #define VDB_LABEL 0x08 139 int vnddebug = 0x00; 140 #endif 141 142 #define vndunit(x) DISKUNIT(x) 143 144 struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149 #define VX_BUSY 1 150 }; 151 152 struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155 }; 156 157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163 struct vnd_softc *vnd_softc; 164 int numvnd = 0; 165 166 #define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169 /* called by main() at boot time (XXX: and the LKM driver) */ 170 void vndattach __P((int)); 171 int vnddetach __P((void)); 172 173 void vndclear __P((struct vnd_softc *)); 174 void vndstart __P((struct vnd_softc *)); 175 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 176 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 177 void vndiodone __P((struct buf *)); 178 void vndshutdown __P((void)); 179 180 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 181 void vndgetdisklabel __P((dev_t)); 182 183 static int vndlock __P((struct vnd_softc *)); 184 static void vndunlock __P((struct vnd_softc *)); 185 186 dev_type_open(vndopen); 187 dev_type_close(vndclose); 188 dev_type_read(vndread); 189 dev_type_write(vndwrite); 190 dev_type_ioctl(vndioctl); 191 dev_type_strategy(vndstrategy); 192 dev_type_dump(vnddump); 193 dev_type_size(vndsize); 194 195 const struct bdevsw vnd_bdevsw = { 196 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 197 }; 198 199 const struct cdevsw vnd_cdevsw = { 200 vndopen, vndclose, vndread, vndwrite, vndioctl, 201 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 202 }; 203 204 int vndattached = 0; 205 206 void 207 vndattach(num) 208 int num; 209 { 210 int i; 211 char *mem; 212 213 if (vndattached) 214 return; 215 vndattached = 1; 216 if (num <= 0) 217 return; 218 i = num * sizeof(struct vnd_softc); 219 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 220 if (mem == NULL) { 221 printf("WARNING: no memory for vnode disks\n"); 222 return; 223 } 224 vnd_softc = (struct vnd_softc *)mem; 225 numvnd = num; 226 227 for (i = 0; i < numvnd; i++) 228 bufq_alloc(&vnd_softc[i].sc_tab, 229 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 230 } 231 232 int 233 vnddetach() 234 { 235 int i; 236 237 /* First check we aren't in use. */ 238 for (i = 0; i < numvnd; i++) 239 if (vnd_softc[i].sc_flags & VNF_INITED) 240 return (EBUSY); 241 242 for (i = 0; i < numvnd; i++) 243 bufq_free(&vnd_softc[i].sc_tab); 244 245 free(vnd_softc, M_DEVBUF); 246 vndattached = 0; 247 248 return (0); 249 } 250 251 int 252 vndopen(dev, flags, mode, p) 253 dev_t dev; 254 int flags, mode; 255 struct proc *p; 256 { 257 int unit = vndunit(dev); 258 struct vnd_softc *sc; 259 int error = 0, part, pmask; 260 struct disklabel *lp; 261 262 #ifdef DEBUG 263 if (vnddebug & VDB_FOLLOW) 264 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 265 #endif 266 if (unit >= numvnd) 267 return (ENXIO); 268 sc = &vnd_softc[unit]; 269 270 if ((error = vndlock(sc)) != 0) 271 return (error); 272 273 lp = sc->sc_dkdev.dk_label; 274 275 part = DISKPART(dev); 276 pmask = (1 << part); 277 278 /* 279 * If we're initialized, check to see if there are any other 280 * open partitions. If not, then it's safe to update the 281 * in-core disklabel. 282 */ 283 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 284 vndgetdisklabel(dev); 285 286 /* Check that the partitions exists. */ 287 if (part != RAW_PART) { 288 if (((sc->sc_flags & VNF_INITED) == 0) || 289 ((part >= lp->d_npartitions) || 290 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 291 error = ENXIO; 292 goto done; 293 } 294 } 295 296 /* Prevent our unit from being unconfigured while open. */ 297 switch (mode) { 298 case S_IFCHR: 299 sc->sc_dkdev.dk_copenmask |= pmask; 300 break; 301 302 case S_IFBLK: 303 sc->sc_dkdev.dk_bopenmask |= pmask; 304 break; 305 } 306 sc->sc_dkdev.dk_openmask = 307 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 308 309 done: 310 vndunlock(sc); 311 return (error); 312 } 313 314 int 315 vndclose(dev, flags, mode, p) 316 dev_t dev; 317 int flags, mode; 318 struct proc *p; 319 { 320 int unit = vndunit(dev); 321 struct vnd_softc *sc; 322 int error = 0, part; 323 324 #ifdef DEBUG 325 if (vnddebug & VDB_FOLLOW) 326 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 327 #endif 328 329 if (unit >= numvnd) 330 return (ENXIO); 331 sc = &vnd_softc[unit]; 332 333 if ((error = vndlock(sc)) != 0) 334 return (error); 335 336 part = DISKPART(dev); 337 338 /* ...that much closer to allowing unconfiguration... */ 339 switch (mode) { 340 case S_IFCHR: 341 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 342 break; 343 344 case S_IFBLK: 345 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 346 break; 347 } 348 sc->sc_dkdev.dk_openmask = 349 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 350 351 vndunlock(sc); 352 return (0); 353 } 354 355 /* 356 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 357 */ 358 void 359 vndstrategy(bp) 360 struct buf *bp; 361 { 362 int unit = vndunit(bp->b_dev); 363 struct vnd_softc *vnd = &vnd_softc[unit]; 364 struct vndxfer *vnx; 365 int s, bsize, resid; 366 off_t bn; 367 caddr_t addr; 368 int sz, flags, error, wlabel; 369 struct disklabel *lp; 370 struct partition *pp; 371 372 #ifdef DEBUG 373 if (vnddebug & VDB_FOLLOW) 374 printf("vndstrategy(%p): unit %d\n", bp, unit); 375 #endif 376 if ((vnd->sc_flags & VNF_INITED) == 0) { 377 bp->b_error = ENXIO; 378 bp->b_flags |= B_ERROR; 379 goto done; 380 } 381 382 /* If it's a nil transfer, wake up the top half now. */ 383 if (bp->b_bcount == 0) 384 goto done; 385 386 lp = vnd->sc_dkdev.dk_label; 387 388 /* 389 * The transfer must be a whole number of blocks. 390 */ 391 if ((bp->b_bcount % lp->d_secsize) != 0) { 392 bp->b_error = EINVAL; 393 bp->b_flags |= B_ERROR; 394 goto done; 395 } 396 397 /* 398 * Do bounds checking and adjust transfer. If there's an error, 399 * the bounds check will flag that for us. 400 */ 401 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 402 if (DISKPART(bp->b_dev) != RAW_PART) 403 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 404 goto done; 405 406 bp->b_resid = bp->b_bcount; 407 408 /* 409 * Put the block number in terms of the logical blocksize 410 * of the "device". 411 */ 412 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 413 414 /* 415 * Translate the partition-relative block number to an absolute. 416 */ 417 if (DISKPART(bp->b_dev) != RAW_PART) { 418 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 419 bn += pp->p_offset; 420 } 421 422 /* ...and convert to a byte offset within the file. */ 423 bn *= lp->d_secsize; 424 425 if (vnd->sc_vp->v_mount == NULL) { 426 bp->b_error = ENXIO; 427 bp->b_flags |= B_ERROR; 428 goto done; 429 } 430 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 431 addr = bp->b_data; 432 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 433 434 /* Allocate a header for this transfer and link it to the buffer */ 435 s = splbio(); 436 vnx = VND_GETXFER(vnd); 437 splx(s); 438 vnx->vx_flags = VX_BUSY; 439 vnx->vx_error = 0; 440 vnx->vx_pending = 0; 441 vnx->vx_bp = bp; 442 443 for (resid = bp->b_resid; resid; resid -= sz) { 444 struct vndbuf *nbp; 445 struct vnode *vp; 446 daddr_t nbn; 447 int off, nra; 448 449 nra = 0; 450 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 451 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 452 VOP_UNLOCK(vnd->sc_vp, 0); 453 454 if (error == 0 && (long)nbn == -1) 455 error = EIO; 456 457 /* 458 * If there was an error or a hole in the file...punt. 459 * Note that we may have to wait for any operations 460 * that we have already fired off before releasing 461 * the buffer. 462 * 463 * XXX we could deal with holes here but it would be 464 * a hassle (in the write case). 465 */ 466 if (error) { 467 s = splbio(); 468 vnx->vx_error = error; 469 goto out; 470 } 471 472 #ifdef DEBUG 473 if (!dovndcluster) 474 nra = 0; 475 #endif 476 477 if ((off = bn % bsize) != 0) 478 sz = bsize - off; 479 else 480 sz = (1 + nra) * bsize; 481 if (resid < sz) 482 sz = resid; 483 #ifdef DEBUG 484 if (vnddebug & VDB_IO) 485 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 486 vnd->sc_vp, vp, (long long)bn, nbn, sz); 487 #endif 488 489 s = splbio(); 490 nbp = VND_GETBUF(vnd); 491 splx(s); 492 nbp->vb_buf.b_flags = flags; 493 nbp->vb_buf.b_bcount = sz; 494 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 495 - trunc_page((ulong) addr); 496 nbp->vb_buf.b_error = 0; 497 nbp->vb_buf.b_data = addr; 498 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 499 nbp->vb_buf.b_proc = bp->b_proc; 500 nbp->vb_buf.b_iodone = vndiodone; 501 nbp->vb_buf.b_vp = NULLVP; 502 LIST_INIT(&nbp->vb_buf.b_dep); 503 504 nbp->vb_xfer = vnx; 505 506 /* 507 * Just sort by block number 508 */ 509 s = splbio(); 510 if (vnx->vx_error != 0) { 511 VND_PUTBUF(vnd, nbp); 512 goto out; 513 } 514 vnx->vx_pending++; 515 bgetvp(vp, &nbp->vb_buf); 516 BUFQ_PUT(&vnd->sc_tab, &nbp->vb_buf); 517 vndstart(vnd); 518 splx(s); 519 bn += sz; 520 addr += sz; 521 } 522 523 s = splbio(); 524 525 out: /* Arrive here at splbio */ 526 vnx->vx_flags &= ~VX_BUSY; 527 if (vnx->vx_pending == 0) { 528 if (vnx->vx_error != 0) { 529 bp->b_error = vnx->vx_error; 530 bp->b_flags |= B_ERROR; 531 } 532 VND_PUTXFER(vnd, vnx); 533 biodone(bp); 534 } 535 splx(s); 536 return; 537 538 done: 539 biodone(bp); 540 } 541 542 /* 543 * Feed requests sequentially. 544 * We do it this way to keep from flooding NFS servers if we are connected 545 * to an NFS file. This places the burden on the client rather than the 546 * server. 547 */ 548 void 549 vndstart(vnd) 550 struct vnd_softc *vnd; 551 { 552 struct buf *bp; 553 554 /* 555 * Dequeue now since lower level strategy routine might 556 * queue using same links 557 */ 558 559 if ((vnd->sc_flags & VNF_BUSY) != 0) 560 return; 561 562 vnd->sc_flags |= VNF_BUSY; 563 564 while (vnd->sc_active < vnd->sc_maxactive) { 565 bp = BUFQ_GET(&vnd->sc_tab); 566 if (bp == NULL) 567 break; 568 vnd->sc_active++; 569 #ifdef DEBUG 570 if (vnddebug & VDB_IO) 571 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 572 " flags %lx addr %p cnt 0x%lx\n", 573 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 574 bp->b_flags, bp->b_data, bp->b_bcount); 575 #endif 576 577 /* Instrumentation. */ 578 disk_busy(&vnd->sc_dkdev); 579 580 if ((bp->b_flags & B_READ) == 0) 581 bp->b_vp->v_numoutput++; 582 VOP_STRATEGY(bp); 583 } 584 vnd->sc_flags &= ~VNF_BUSY; 585 } 586 587 void 588 vndiodone(bp) 589 struct buf *bp; 590 { 591 struct vndbuf *vbp = (struct vndbuf *) bp; 592 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 593 struct buf *pbp = vnx->vx_bp; 594 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 595 int s, resid; 596 597 s = splbio(); 598 #ifdef DEBUG 599 if (vnddebug & VDB_IO) 600 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 601 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 602 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 603 vbp->vb_buf.b_bcount); 604 #endif 605 606 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 607 pbp->b_resid -= resid; 608 disk_unbusy(&vnd->sc_dkdev, resid, (pbp->b_flags & B_READ)); 609 vnx->vx_pending--; 610 611 if (vbp->vb_buf.b_error) { 612 #ifdef DEBUG 613 if (vnddebug & VDB_IO) 614 printf("vndiodone: vbp %p error %d\n", vbp, 615 vbp->vb_buf.b_error); 616 #endif 617 vnx->vx_error = vbp->vb_buf.b_error; 618 } 619 620 if (vbp->vb_buf.b_vp != NULLVP) 621 brelvp(&vbp->vb_buf); 622 623 VND_PUTBUF(vnd, vbp); 624 625 /* 626 * Wrap up this transaction if it has run to completion or, in 627 * case of an error, when all auxiliary buffers have returned. 628 */ 629 if (vnx->vx_error != 0) { 630 pbp->b_flags |= B_ERROR; 631 pbp->b_error = vnx->vx_error; 632 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 633 634 #ifdef DEBUG 635 if (vnddebug & VDB_IO) 636 printf("vndiodone: pbp %p iodone: error %d\n", 637 pbp, vnx->vx_error); 638 #endif 639 VND_PUTXFER(vnd, vnx); 640 biodone(pbp); 641 } 642 } else if (pbp->b_resid == 0) { 643 644 #ifdef DIAGNOSTIC 645 if (vnx->vx_pending != 0) 646 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 647 #endif 648 649 if ((vnx->vx_flags & VX_BUSY) == 0) { 650 #ifdef DEBUG 651 if (vnddebug & VDB_IO) 652 printf("vndiodone: pbp %p iodone\n", pbp); 653 #endif 654 VND_PUTXFER(vnd, vnx); 655 biodone(pbp); 656 } 657 } 658 659 vnd->sc_active--; 660 vndstart(vnd); 661 splx(s); 662 } 663 664 /* ARGSUSED */ 665 int 666 vndread(dev, uio, flags) 667 dev_t dev; 668 struct uio *uio; 669 int flags; 670 { 671 int unit = vndunit(dev); 672 struct vnd_softc *sc; 673 674 #ifdef DEBUG 675 if (vnddebug & VDB_FOLLOW) 676 printf("vndread(0x%x, %p)\n", dev, uio); 677 #endif 678 679 if (unit >= numvnd) 680 return (ENXIO); 681 sc = &vnd_softc[unit]; 682 683 if ((sc->sc_flags & VNF_INITED) == 0) 684 return (ENXIO); 685 686 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 687 } 688 689 /* ARGSUSED */ 690 int 691 vndwrite(dev, uio, flags) 692 dev_t dev; 693 struct uio *uio; 694 int flags; 695 { 696 int unit = vndunit(dev); 697 struct vnd_softc *sc; 698 699 #ifdef DEBUG 700 if (vnddebug & VDB_FOLLOW) 701 printf("vndwrite(0x%x, %p)\n", dev, uio); 702 #endif 703 704 if (unit >= numvnd) 705 return (ENXIO); 706 sc = &vnd_softc[unit]; 707 708 if ((sc->sc_flags & VNF_INITED) == 0) 709 return (ENXIO); 710 711 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 712 } 713 714 /* ARGSUSED */ 715 int 716 vndioctl(dev, cmd, data, flag, p) 717 dev_t dev; 718 u_long cmd; 719 caddr_t data; 720 int flag; 721 struct proc *p; 722 { 723 int unit = vndunit(dev); 724 struct vnd_softc *vnd; 725 struct vnd_ioctl *vio; 726 struct vattr vattr; 727 struct nameidata nd; 728 int error, part, pmask; 729 size_t geomsize; 730 #ifdef __HAVE_OLD_DISKLABEL 731 struct disklabel newlabel; 732 #endif 733 734 #ifdef DEBUG 735 if (vnddebug & VDB_FOLLOW) 736 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 737 dev, cmd, data, flag, p, unit); 738 #endif 739 error = suser(p->p_ucred, &p->p_acflag); 740 if (error) 741 return (error); 742 if (unit >= numvnd) 743 return (ENXIO); 744 745 vnd = &vnd_softc[unit]; 746 vio = (struct vnd_ioctl *)data; 747 748 /* Must be open for writes for these commands... */ 749 switch (cmd) { 750 case VNDIOCSET: 751 case VNDIOCCLR: 752 case DIOCSDINFO: 753 case DIOCWDINFO: 754 #ifdef __HAVE_OLD_DISKLABEL 755 case ODIOCSDINFO: 756 case ODIOCWDINFO: 757 #endif 758 case DIOCWLABEL: 759 if ((flag & FWRITE) == 0) 760 return (EBADF); 761 } 762 763 /* Must be initialized for these... */ 764 switch (cmd) { 765 case VNDIOCCLR: 766 case DIOCGDINFO: 767 case DIOCSDINFO: 768 case DIOCWDINFO: 769 case DIOCGPART: 770 case DIOCWLABEL: 771 case DIOCGDEFLABEL: 772 #ifdef __HAVE_OLD_DISKLABEL 773 case ODIOCGDINFO: 774 case ODIOCSDINFO: 775 case ODIOCWDINFO: 776 case ODIOCGDEFLABEL: 777 #endif 778 if ((vnd->sc_flags & VNF_INITED) == 0) 779 return (ENXIO); 780 } 781 782 switch (cmd) { 783 case VNDIOCSET: 784 if (vnd->sc_flags & VNF_INITED) 785 return (EBUSY); 786 787 if ((error = vndlock(vnd)) != 0) 788 return (error); 789 790 /* 791 * Always open for read and write. 792 * This is probably bogus, but it lets vn_open() 793 * weed out directories, sockets, etc. so we don't 794 * have to worry about them. 795 */ 796 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 797 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 798 goto unlock_and_exit; 799 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 800 VOP_UNLOCK(nd.ni_vp, 0); 801 if (error) 802 goto close_and_exit; 803 vnd->sc_vp = nd.ni_vp; 804 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 805 806 /* 807 * Use pseudo-geometry specified. If none was provided, 808 * use "standard" Adaptec fictitious geometry. 809 */ 810 if (vio->vnd_flags & VNDIOF_HASGEOM) { 811 812 memcpy(&vnd->sc_geom, &vio->vnd_geom, 813 sizeof(vio->vnd_geom)); 814 815 /* 816 * Sanity-check the sector size. 817 * XXX Don't allow secsize < DEV_BSIZE. Should 818 * XXX we? 819 */ 820 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 821 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 822 error = EINVAL; 823 goto close_and_exit; 824 } 825 826 /* 827 * Compute the size (in DEV_BSIZE blocks) specified 828 * by the geometry. 829 */ 830 geomsize = (vnd->sc_geom.vng_nsectors * 831 vnd->sc_geom.vng_ntracks * 832 vnd->sc_geom.vng_ncylinders) * 833 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 834 835 /* 836 * Sanity-check the size against the specified 837 * geometry. 838 */ 839 if (vnd->sc_size < geomsize) { 840 error = EINVAL; 841 goto close_and_exit; 842 } 843 } else { 844 /* 845 * Size must be at least 2048 DEV_BSIZE blocks 846 * (1M) in order to use this geometry. 847 */ 848 if (vnd->sc_size < (32 * 64)) { 849 error = EINVAL; 850 goto close_and_exit; 851 } 852 853 vnd->sc_geom.vng_secsize = DEV_BSIZE; 854 vnd->sc_geom.vng_nsectors = 32; 855 vnd->sc_geom.vng_ntracks = 64; 856 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 857 } 858 859 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 860 goto close_and_exit; 861 vndthrottle(vnd, vnd->sc_vp); 862 vio->vnd_size = dbtob(vnd->sc_size); 863 vnd->sc_flags |= VNF_INITED; 864 #ifdef DEBUG 865 if (vnddebug & VDB_INIT) 866 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 867 vnd->sc_vp, (unsigned long) vnd->sc_size, 868 vnd->sc_geom.vng_secsize, 869 vnd->sc_geom.vng_nsectors, 870 vnd->sc_geom.vng_ntracks, 871 vnd->sc_geom.vng_ncylinders); 872 #endif 873 874 /* Attach the disk. */ 875 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 876 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 877 vnd->sc_dkdev.dk_name = vnd->sc_xname; 878 disk_attach(&vnd->sc_dkdev); 879 880 /* Initialize the xfer and buffer pools. */ 881 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 882 0, 0, "vndxpl", NULL); 883 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 884 0, 0, "vndbpl", NULL); 885 886 /* Try and read the disklabel. */ 887 vndgetdisklabel(dev); 888 889 vndunlock(vnd); 890 891 break; 892 893 close_and_exit: 894 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 895 unlock_and_exit: 896 vndunlock(vnd); 897 return (error); 898 899 case VNDIOCCLR: 900 if ((error = vndlock(vnd)) != 0) 901 return (error); 902 903 /* 904 * Don't unconfigure if any other partitions are open 905 * or if both the character and block flavors of this 906 * partition are open. 907 */ 908 part = DISKPART(dev); 909 pmask = (1 << part); 910 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 911 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 912 (vnd->sc_dkdev.dk_copenmask & pmask))) { 913 vndunlock(vnd); 914 return (EBUSY); 915 } 916 917 vndclear(vnd); 918 #ifdef DEBUG 919 if (vnddebug & VDB_INIT) 920 printf("vndioctl: CLRed\n"); 921 #endif 922 923 /* Destroy the xfer and buffer pools. */ 924 pool_destroy(&vnd->sc_vxpool); 925 pool_destroy(&vnd->sc_vbpool); 926 927 /* Detatch the disk. */ 928 disk_detach(&vnd->sc_dkdev); 929 930 vndunlock(vnd); 931 932 break; 933 934 case VNDIOCGET: { 935 struct vnd_user *vnu; 936 struct vattr va; 937 938 vnu = (struct vnd_user *)data; 939 940 if (vnu->vnu_unit == -1) 941 vnu->vnu_unit = unit; 942 if (vnu->vnu_unit >= numvnd) 943 return (ENXIO); 944 if (vnu->vnu_unit < 0) 945 return (EINVAL); 946 947 vnd = &vnd_softc[vnu->vnu_unit]; 948 949 if (vnd->sc_flags & VNF_INITED) { 950 error = VOP_GETATTR(vnd->sc_vp, &va, p->p_ucred, p); 951 if (error) 952 return (error); 953 vnu->vnu_dev = va.va_fsid; 954 vnu->vnu_ino = va.va_fileid; 955 } 956 else { 957 /* unused is not an error */ 958 vnu->vnu_dev = 0; 959 vnu->vnu_ino = 0; 960 } 961 962 break; 963 } 964 965 case DIOCGDINFO: 966 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 967 break; 968 969 #ifdef __HAVE_OLD_DISKLABEL 970 case ODIOCGDINFO: 971 newlabel = *(vnd->sc_dkdev.dk_label); 972 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 973 return ENOTTY; 974 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 975 break; 976 #endif 977 978 case DIOCGPART: 979 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 980 ((struct partinfo *)data)->part = 981 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 982 break; 983 984 case DIOCWDINFO: 985 case DIOCSDINFO: 986 #ifdef __HAVE_OLD_DISKLABEL 987 case ODIOCWDINFO: 988 case ODIOCSDINFO: 989 #endif 990 { 991 struct disklabel *lp; 992 993 if ((error = vndlock(vnd)) != 0) 994 return (error); 995 996 vnd->sc_flags |= VNF_LABELLING; 997 998 #ifdef __HAVE_OLD_DISKLABEL 999 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1000 memset(&newlabel, 0, sizeof newlabel); 1001 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1002 lp = &newlabel; 1003 } else 1004 #endif 1005 lp = (struct disklabel *)data; 1006 1007 error = setdisklabel(vnd->sc_dkdev.dk_label, 1008 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1009 if (error == 0) { 1010 if (cmd == DIOCWDINFO 1011 #ifdef __HAVE_OLD_DISKLABEL 1012 || cmd == ODIOCWDINFO 1013 #endif 1014 ) 1015 error = writedisklabel(VNDLABELDEV(dev), 1016 vndstrategy, vnd->sc_dkdev.dk_label, 1017 vnd->sc_dkdev.dk_cpulabel); 1018 } 1019 1020 vnd->sc_flags &= ~VNF_LABELLING; 1021 1022 vndunlock(vnd); 1023 1024 if (error) 1025 return (error); 1026 break; 1027 } 1028 1029 case DIOCWLABEL: 1030 if (*(int *)data != 0) 1031 vnd->sc_flags |= VNF_WLABEL; 1032 else 1033 vnd->sc_flags &= ~VNF_WLABEL; 1034 break; 1035 1036 case DIOCGDEFLABEL: 1037 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1038 break; 1039 1040 #ifdef __HAVE_OLD_DISKLABEL 1041 case ODIOCGDEFLABEL: 1042 vndgetdefaultlabel(vnd, &newlabel); 1043 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1044 return ENOTTY; 1045 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1046 break; 1047 #endif 1048 1049 default: 1050 return (ENOTTY); 1051 } 1052 1053 return (0); 1054 } 1055 1056 /* 1057 * Duplicate the current processes' credentials. Since we are called only 1058 * as the result of a SET ioctl and only root can do that, any future access 1059 * to this "disk" is essentially as root. Note that credentials may change 1060 * if some other uid can write directly to the mapped file (NFS). 1061 */ 1062 int 1063 vndsetcred(vnd, cred) 1064 struct vnd_softc *vnd; 1065 struct ucred *cred; 1066 { 1067 struct uio auio; 1068 struct iovec aiov; 1069 char *tmpbuf; 1070 int error; 1071 1072 vnd->sc_cred = crdup(cred); 1073 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1074 1075 /* XXX: Horrible kludge to establish credentials for NFS */ 1076 aiov.iov_base = tmpbuf; 1077 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1078 auio.uio_iov = &aiov; 1079 auio.uio_iovcnt = 1; 1080 auio.uio_offset = 0; 1081 auio.uio_rw = UIO_READ; 1082 auio.uio_segflg = UIO_SYSSPACE; 1083 auio.uio_resid = aiov.iov_len; 1084 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1085 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1086 if (error == 0) { 1087 /* 1088 * Because vnd does all IO directly through the vnode 1089 * we need to flush (at least) the buffer from the above 1090 * VOP_READ from the buffer cache to prevent cache 1091 * incoherencies. Also, be careful to write dirty 1092 * buffers back to stable storage. 1093 */ 1094 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1095 curproc, 0, 0); 1096 } 1097 VOP_UNLOCK(vnd->sc_vp, 0); 1098 1099 free(tmpbuf, M_TEMP); 1100 return (error); 1101 } 1102 1103 /* 1104 * Set maxactive based on FS type 1105 */ 1106 void 1107 vndthrottle(vnd, vp) 1108 struct vnd_softc *vnd; 1109 struct vnode *vp; 1110 { 1111 #ifdef NFS 1112 extern int (**nfsv2_vnodeop_p) __P((void *)); 1113 1114 if (vp->v_op == nfsv2_vnodeop_p) 1115 vnd->sc_maxactive = 2; 1116 else 1117 #endif 1118 vnd->sc_maxactive = 8; 1119 1120 if (vnd->sc_maxactive < 1) 1121 vnd->sc_maxactive = 1; 1122 } 1123 1124 void 1125 vndshutdown() 1126 { 1127 struct vnd_softc *vnd; 1128 1129 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1130 if (vnd->sc_flags & VNF_INITED) 1131 vndclear(vnd); 1132 } 1133 1134 void 1135 vndclear(vnd) 1136 struct vnd_softc *vnd; 1137 { 1138 struct vnode *vp = vnd->sc_vp; 1139 struct proc *p = curproc; /* XXX */ 1140 1141 #ifdef DEBUG 1142 if (vnddebug & VDB_FOLLOW) 1143 printf("vndclear(%p): vp %p\n", vnd, vp); 1144 #endif 1145 vnd->sc_flags &= ~VNF_INITED; 1146 if (vp == (struct vnode *)0) 1147 panic("vndioctl: null vp"); 1148 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1149 crfree(vnd->sc_cred); 1150 vnd->sc_vp = (struct vnode *)0; 1151 vnd->sc_cred = (struct ucred *)0; 1152 vnd->sc_size = 0; 1153 } 1154 1155 int 1156 vndsize(dev) 1157 dev_t dev; 1158 { 1159 struct vnd_softc *sc; 1160 struct disklabel *lp; 1161 int part, unit, omask; 1162 int size; 1163 1164 unit = vndunit(dev); 1165 if (unit >= numvnd) 1166 return (-1); 1167 sc = &vnd_softc[unit]; 1168 1169 if ((sc->sc_flags & VNF_INITED) == 0) 1170 return (-1); 1171 1172 part = DISKPART(dev); 1173 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1174 lp = sc->sc_dkdev.dk_label; 1175 1176 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1177 return (-1); 1178 1179 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1180 size = -1; 1181 else 1182 size = lp->d_partitions[part].p_size * 1183 (lp->d_secsize / DEV_BSIZE); 1184 1185 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1186 return (-1); 1187 1188 return (size); 1189 } 1190 1191 int 1192 vnddump(dev, blkno, va, size) 1193 dev_t dev; 1194 daddr_t blkno; 1195 caddr_t va; 1196 size_t size; 1197 { 1198 1199 /* Not implemented. */ 1200 return ENXIO; 1201 } 1202 1203 void 1204 vndgetdefaultlabel(sc, lp) 1205 struct vnd_softc *sc; 1206 struct disklabel *lp; 1207 { 1208 struct vndgeom *vng = &sc->sc_geom; 1209 struct partition *pp; 1210 1211 memset(lp, 0, sizeof(*lp)); 1212 1213 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1214 lp->d_secsize = vng->vng_secsize; 1215 lp->d_nsectors = vng->vng_nsectors; 1216 lp->d_ntracks = vng->vng_ntracks; 1217 lp->d_ncylinders = vng->vng_ncylinders; 1218 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1219 1220 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1221 lp->d_type = DTYPE_VND; 1222 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1223 lp->d_rpm = 3600; 1224 lp->d_interleave = 1; 1225 lp->d_flags = 0; 1226 1227 pp = &lp->d_partitions[RAW_PART]; 1228 pp->p_offset = 0; 1229 pp->p_size = lp->d_secperunit; 1230 pp->p_fstype = FS_UNUSED; 1231 lp->d_npartitions = RAW_PART + 1; 1232 1233 lp->d_magic = DISKMAGIC; 1234 lp->d_magic2 = DISKMAGIC; 1235 lp->d_checksum = dkcksum(lp); 1236 } 1237 1238 /* 1239 * Read the disklabel from a vnd. If one is not present, create a fake one. 1240 */ 1241 void 1242 vndgetdisklabel(dev) 1243 dev_t dev; 1244 { 1245 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1246 char *errstring; 1247 struct disklabel *lp = sc->sc_dkdev.dk_label; 1248 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1249 int i; 1250 1251 memset(clp, 0, sizeof(*clp)); 1252 1253 vndgetdefaultlabel(sc, lp); 1254 1255 /* 1256 * Call the generic disklabel extraction routine. 1257 */ 1258 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1259 if (errstring) { 1260 /* 1261 * Lack of disklabel is common, but we print the warning 1262 * anyway, since it might contain other useful information. 1263 */ 1264 printf("%s: %s\n", sc->sc_xname, errstring); 1265 1266 /* 1267 * For historical reasons, if there's no disklabel 1268 * present, all partitions must be FS_BSDFFS and 1269 * occupy the entire disk. 1270 */ 1271 for (i = 0; i < MAXPARTITIONS; i++) { 1272 /* 1273 * Don't wipe out port specific hack (such as 1274 * dos partition hack of i386 port). 1275 */ 1276 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1277 continue; 1278 1279 lp->d_partitions[i].p_size = lp->d_secperunit; 1280 lp->d_partitions[i].p_offset = 0; 1281 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1282 } 1283 1284 strncpy(lp->d_packname, "default label", 1285 sizeof(lp->d_packname)); 1286 1287 lp->d_checksum = dkcksum(lp); 1288 } 1289 } 1290 1291 /* 1292 * Wait interruptibly for an exclusive lock. 1293 * 1294 * XXX 1295 * Several drivers do this; it should be abstracted and made MP-safe. 1296 */ 1297 static int 1298 vndlock(sc) 1299 struct vnd_softc *sc; 1300 { 1301 int error; 1302 1303 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1304 sc->sc_flags |= VNF_WANTED; 1305 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1306 return (error); 1307 } 1308 sc->sc_flags |= VNF_LOCKED; 1309 return (0); 1310 } 1311 1312 /* 1313 * Unlock and wake up any waiters. 1314 */ 1315 static void 1316 vndunlock(sc) 1317 struct vnd_softc *sc; 1318 { 1319 1320 sc->sc_flags &= ~VNF_LOCKED; 1321 if ((sc->sc_flags & VNF_WANTED) != 0) { 1322 sc->sc_flags &= ~VNF_WANTED; 1323 wakeup(sc); 1324 } 1325 } 1326