1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 13 * 14 * @(#)vn.c 8.9 (Berkeley) 05/14/95 15 */ 16 17 /* 18 * Vnode disk driver. 19 * 20 * Block/character interface to a vnode. Allows one to treat a file 21 * as a disk (e.g. build a filesystem in it, mount it, etc.). 22 * 23 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 24 * instead of a simple VOP_RDWR. We do this to avoid distorting the 25 * local buffer cache. 26 * 27 * NOTE 2: There is a security issue involved with this driver. 28 * Once mounted all access to the contents of the "mapped" file via 29 * the special file is controlled by the permissions on the special 30 * file, the protection of the mapped file is ignored (effectively, 31 * by using root credentials in all transactions). 32 * 33 * NOTE 3: Doesn't interact with leases, should it? 34 */ 35 #include "vn.h" 36 #if NVN > 0 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/namei.h> 41 #include <sys/proc.h> 42 #include <sys/errno.h> 43 #include <sys/dkstat.h> 44 #include <sys/buf.h> 45 #include <sys/malloc.h> 46 #include <sys/ioctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/file.h> 50 #include <sys/uio.h> 51 52 #include <miscfs/specfs/specdev.h> 53 54 #include <dev/vnioctl.h> 55 56 #ifdef DEBUG 57 int dovncluster = 1; 58 int vndebug = 0x00; 59 #define VDB_FOLLOW 0x01 60 #define VDB_INIT 0x02 61 #define VDB_IO 0x04 62 #endif 63 64 #define b_cylin b_resid 65 66 #define vnunit(x) ((minor(x) >> 3) & 0x7) /* for consistency */ 67 68 #define getvnbuf() \ 69 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK)) 70 #define putvnbuf(bp) \ 71 free((caddr_t)(bp), M_DEVBUF) 72 73 struct vn_softc { 74 int sc_flags; /* flags */ 75 size_t sc_size; /* size of vn */ 76 struct vnode *sc_vp; /* vnode */ 77 struct ucred *sc_cred; /* credentials */ 78 int sc_maxactive; /* max # of active requests */ 79 struct buf sc_tab; /* transfer queue */ 80 }; 81 82 /* sc_flags */ 83 #define VNF_ALIVE 0x01 84 #define VNF_INITED 0x02 85 86 #if 0 /* if you need static allocation */ 87 struct vn_softc vn_softc[NVN]; 88 int numvnd = NVN; 89 #else 90 struct vn_softc *vn_softc; 91 int numvnd; 92 #endif 93 94 void 95 vnattach(num) 96 int num; 97 { 98 char *mem; 99 register u_long size; 100 101 if (num <= 0) 102 return; 103 size = num * sizeof(struct vn_softc); 104 mem = malloc(size, M_DEVBUF, M_NOWAIT); 105 if (mem == NULL) { 106 printf("WARNING: no memory for vnode disks\n"); 107 return; 108 } 109 bzero(mem, size); 110 vn_softc = (struct vn_softc *)mem; 111 numvnd = num; 112 } 113 114 int 115 vnopen(dev, flags, mode, p) 116 dev_t dev; 117 int flags, mode; 118 struct proc *p; 119 { 120 int unit = vnunit(dev); 121 122 #ifdef DEBUG 123 if (vndebug & VDB_FOLLOW) 124 printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p); 125 #endif 126 if (unit >= numvnd) 127 return(ENXIO); 128 return(0); 129 } 130 131 /* 132 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 133 * Note that this driver can only be used for swapping over NFS on the hp 134 * since nfs_strategy on the vax cannot handle u-areas and page tables. 135 */ 136 void 137 vnstrategy(bp) 138 register struct buf *bp; 139 { 140 int unit = vnunit(bp->b_dev); 141 register struct vn_softc *vn = &vn_softc[unit]; 142 register struct buf *nbp; 143 register int bn, bsize, resid; 144 register caddr_t addr; 145 int sz, flags, error; 146 extern void vniodone(); 147 148 #ifdef DEBUG 149 if (vndebug & VDB_FOLLOW) 150 printf("vnstrategy(%x): unit %d\n", bp, unit); 151 #endif 152 if ((vn->sc_flags & VNF_INITED) == 0) { 153 bp->b_error = ENXIO; 154 bp->b_flags |= B_ERROR; 155 biodone(bp); 156 return; 157 } 158 bn = bp->b_blkno; 159 sz = howmany(bp->b_bcount, DEV_BSIZE); 160 bp->b_resid = bp->b_bcount; 161 if (bn < 0 || bn + sz > vn->sc_size) { 162 if (bn != vn->sc_size) { 163 bp->b_error = EINVAL; 164 bp->b_flags |= B_ERROR; 165 } 166 biodone(bp); 167 return; 168 } 169 bn = dbtob(bn); 170 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize; 171 addr = bp->b_data; 172 flags = bp->b_flags | B_CALL; 173 for (resid = bp->b_resid; resid; resid -= sz) { 174 struct vnode *vp; 175 daddr_t nbn; 176 int off, s, nra; 177 178 nra = 0; 179 error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, &nra); 180 if (error == 0 && (long)nbn == -1) 181 error = EIO; 182 #ifdef DEBUG 183 if (!dovncluster) 184 nra = 0; 185 #endif 186 187 if (off = bn % bsize) 188 sz = bsize - off; 189 else 190 sz = (1 + nra) * bsize; 191 if (resid < sz) 192 sz = resid; 193 #ifdef DEBUG 194 if (vndebug & VDB_IO) 195 printf("vnstrategy: vp %x/%x bn %x/%x sz %x\n", 196 vn->sc_vp, vp, bn, nbn, sz); 197 #endif 198 199 nbp = getvnbuf(); 200 nbp->b_flags = flags; 201 nbp->b_bcount = sz; 202 nbp->b_bufsize = bp->b_bufsize; 203 nbp->b_error = 0; 204 if (vp->v_type == VBLK || vp->v_type == VCHR) 205 nbp->b_dev = vp->v_rdev; 206 else 207 nbp->b_dev = NODEV; 208 nbp->b_data = addr; 209 nbp->b_blkno = nbn + btodb(off); 210 nbp->b_proc = bp->b_proc; 211 nbp->b_iodone = vniodone; 212 nbp->b_vp = vp; 213 nbp->b_pfcent = (int) bp; /* XXX */ 214 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */ 215 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */ 216 nbp->b_dirtyoff = bp->b_dirtyoff; 217 nbp->b_dirtyend = bp->b_dirtyend; 218 nbp->b_validoff = bp->b_validoff; 219 nbp->b_validend = bp->b_validend; 220 /* 221 * If there was an error or a hole in the file...punt. 222 * Note that we deal with this after the nbp allocation. 223 * This ensures that we properly clean up any operations 224 * that we have already fired off. 225 * 226 * XXX we could deal with holes here but it would be 227 * a hassle (in the write case). 228 */ 229 if (error) { 230 nbp->b_error = error; 231 nbp->b_flags |= B_ERROR; 232 bp->b_resid -= (resid - sz); 233 biodone(nbp); 234 return; 235 } 236 /* 237 * Just sort by block number 238 */ 239 nbp->b_cylin = nbp->b_blkno; 240 s = splbio(); 241 disksort(&vn->sc_tab, nbp); 242 if (vn->sc_tab.b_active < vn->sc_maxactive) { 243 vn->sc_tab.b_active++; 244 vnstart(vn); 245 } 246 splx(s); 247 bn += sz; 248 addr += sz; 249 } 250 } 251 252 /* 253 * Feed requests sequentially. 254 * We do it this way to keep from flooding NFS servers if we are connected 255 * to an NFS file. This places the burden on the client rather than the 256 * server. 257 */ 258 vnstart(vn) 259 register struct vn_softc *vn; 260 { 261 register struct buf *bp; 262 263 /* 264 * Dequeue now since lower level strategy routine might 265 * queue using same links 266 */ 267 bp = vn->sc_tab.b_actf; 268 vn->sc_tab.b_actf = bp->b_actf; 269 #ifdef DEBUG 270 if (vndebug & VDB_IO) 271 printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 272 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 273 bp->b_bcount); 274 #endif 275 if ((bp->b_flags & B_READ) == 0) 276 bp->b_vp->v_numoutput++; 277 VOP_STRATEGY(bp); 278 } 279 280 void 281 vniodone(bp) 282 register struct buf *bp; 283 { 284 register struct buf *pbp = (struct buf *)bp->b_pfcent; /* XXX */ 285 register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)]; 286 int s; 287 288 s = splbio(); 289 #ifdef DEBUG 290 if (vndebug & VDB_IO) 291 printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 292 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 293 bp->b_bcount); 294 #endif 295 if (bp->b_error) { 296 #ifdef DEBUG 297 if (vndebug & VDB_IO) 298 printf("vniodone: bp %x error %d\n", bp, bp->b_error); 299 #endif 300 pbp->b_flags |= B_ERROR; 301 pbp->b_error = biowait(bp); 302 } 303 pbp->b_resid -= bp->b_bcount; 304 putvnbuf(bp); 305 if (pbp->b_resid == 0) { 306 #ifdef DEBUG 307 if (vndebug & VDB_IO) 308 printf("vniodone: pbp %x iodone\n", pbp); 309 #endif 310 biodone(pbp); 311 } 312 if (vn->sc_tab.b_actf) 313 vnstart(vn); 314 else 315 vn->sc_tab.b_active--; 316 splx(s); 317 } 318 319 vnread(dev, uio, flags, p) 320 dev_t dev; 321 struct uio *uio; 322 int flags; 323 struct proc *p; 324 { 325 326 #ifdef DEBUG 327 if (vndebug & VDB_FOLLOW) 328 printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p); 329 #endif 330 return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio)); 331 } 332 333 vnwrite(dev, uio, flags, p) 334 dev_t dev; 335 struct uio *uio; 336 int flags; 337 struct proc *p; 338 { 339 340 #ifdef DEBUG 341 if (vndebug & VDB_FOLLOW) 342 printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p); 343 #endif 344 return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio)); 345 } 346 347 /* ARGSUSED */ 348 vnioctl(dev, cmd, data, flag, p) 349 dev_t dev; 350 u_long cmd; 351 caddr_t data; 352 int flag; 353 struct proc *p; 354 { 355 int unit = vnunit(dev); 356 register struct vn_softc *vn; 357 struct vn_ioctl *vio; 358 struct vattr vattr; 359 struct nameidata nd; 360 int error; 361 362 #ifdef DEBUG 363 if (vndebug & VDB_FOLLOW) 364 printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n", 365 dev, cmd, data, flag, p, unit); 366 #endif 367 error = suser(p->p_ucred, &p->p_acflag); 368 if (error) 369 return (error); 370 if (unit >= numvnd) 371 return (ENXIO); 372 373 vn = &vn_softc[unit]; 374 vio = (struct vn_ioctl *)data; 375 switch (cmd) { 376 377 case VNIOCSET: 378 if (vn->sc_flags & VNF_INITED) 379 return(EBUSY); 380 /* 381 * Always open for read and write. 382 * This is probably bogus, but it lets vn_open() 383 * weed out directories, sockets, etc. so we don't 384 * have to worry about them. 385 */ 386 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); 387 if (error = vn_open(&nd, FREAD|FWRITE, 0)) 388 return(error); 389 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) { 390 VOP_UNLOCK(nd.ni_vp, 0, p); 391 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 392 return(error); 393 } 394 VOP_UNLOCK(nd.ni_vp, 0, p); 395 vn->sc_vp = nd.ni_vp; 396 vn->sc_size = btodb(vattr.va_size); /* note truncation */ 397 if (error = vnsetcred(vn, p->p_ucred)) { 398 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 399 return(error); 400 } 401 vnthrottle(vn, vn->sc_vp); 402 vio->vn_size = dbtob(vn->sc_size); 403 vn->sc_flags |= VNF_INITED; 404 #ifdef DEBUG 405 if (vndebug & VDB_INIT) 406 printf("vnioctl: SET vp %x size %x\n", 407 vn->sc_vp, vn->sc_size); 408 #endif 409 break; 410 411 case VNIOCCLR: 412 if ((vn->sc_flags & VNF_INITED) == 0) 413 return(ENXIO); 414 vnclear(vn); 415 #ifdef DEBUG 416 if (vndebug & VDB_INIT) 417 printf("vnioctl: CLRed\n"); 418 #endif 419 break; 420 421 default: 422 return(ENOTTY); 423 } 424 return(0); 425 } 426 427 /* 428 * Duplicate the current processes' credentials. Since we are called only 429 * as the result of a SET ioctl and only root can do that, any future access 430 * to this "disk" is essentially as root. Note that credentials may change 431 * if some other uid can write directly to the mapped file (NFS). 432 */ 433 vnsetcred(vn, cred) 434 register struct vn_softc *vn; 435 struct ucred *cred; 436 { 437 struct uio auio; 438 struct iovec aiov; 439 char *tmpbuf; 440 int error; 441 442 vn->sc_cred = crdup(cred); 443 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 444 445 /* XXX: Horrible kludge to establish credentials for NFS */ 446 aiov.iov_base = tmpbuf; 447 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size)); 448 auio.uio_iov = &aiov; 449 auio.uio_iovcnt = 1; 450 auio.uio_offset = 0; 451 auio.uio_rw = UIO_READ; 452 auio.uio_segflg = UIO_SYSSPACE; 453 auio.uio_resid = aiov.iov_len; 454 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); 455 456 free(tmpbuf, M_TEMP); 457 return (error); 458 } 459 460 /* 461 * Set maxactive based on FS type 462 */ 463 vnthrottle(vn, vp) 464 register struct vn_softc *vn; 465 struct vnode *vp; 466 { 467 extern int (**nfsv2_vnodeop_p)(); 468 469 if (vp->v_op == nfsv2_vnodeop_p) 470 vn->sc_maxactive = 2; 471 else 472 vn->sc_maxactive = 8; 473 474 if (vn->sc_maxactive < 1) 475 vn->sc_maxactive = 1; 476 } 477 478 vnshutdown() 479 { 480 register struct vn_softc *vn; 481 482 for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++) 483 if (vn->sc_flags & VNF_INITED) 484 vnclear(vn); 485 } 486 487 vnclear(vn) 488 register struct vn_softc *vn; 489 { 490 register struct vnode *vp = vn->sc_vp; 491 struct proc *p = curproc; /* XXX */ 492 493 #ifdef DEBUG 494 if (vndebug & VDB_FOLLOW) 495 printf("vnclear(%x): vp %x\n", vp); 496 #endif 497 vn->sc_flags &= ~VNF_INITED; 498 if (vp == (struct vnode *)0) 499 panic("vnioctl: null vp"); 500 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p); 501 crfree(vn->sc_cred); 502 vn->sc_vp = (struct vnode *)0; 503 vn->sc_cred = (struct ucred *)0; 504 vn->sc_size = 0; 505 } 506 507 vnsize(dev) 508 dev_t dev; 509 { 510 int unit = vnunit(dev); 511 register struct vn_softc *vn = &vn_softc[unit]; 512 513 if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0) 514 return(-1); 515 return(vn->sc_size); 516 } 517 518 vndump(dev) 519 { 520 return(ENXIO); 521 } 522 #endif 523