1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 13 * 14 * @(#)vn.c 8.6 (Berkeley) 04/01/94 15 */ 16 17 /* 18 * Vnode disk driver. 19 * 20 * Block/character interface to a vnode. Allows one to treat a file 21 * as a disk (e.g. build a filesystem in it, mount it, etc.). 22 * 23 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 24 * instead of a simple VOP_RDWR. We do this to avoid distorting the 25 * local buffer cache. 26 * 27 * NOTE 2: There is a security issue involved with this driver. 28 * Once mounted all access to the contents of the "mapped" file via 29 * the special file is controlled by the permissions on the special 30 * file, the protection of the mapped file is ignored (effectively, 31 * by using root credentials in all transactions). 32 * 33 * NOTE 3: Doesn't interact with leases, should it? 34 */ 35 #include "vn.h" 36 #if NVN > 0 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/namei.h> 41 #include <sys/proc.h> 42 #include <sys/errno.h> 43 #include <sys/dkstat.h> 44 #include <sys/buf.h> 45 #include <sys/malloc.h> 46 #include <sys/ioctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/file.h> 50 #include <sys/uio.h> 51 52 #include <miscfs/specfs/specdev.h> 53 54 #include <dev/vnioctl.h> 55 56 #ifdef DEBUG 57 int dovncluster = 1; 58 int vndebug = 0x00; 59 #define VDB_FOLLOW 0x01 60 #define VDB_INIT 0x02 61 #define VDB_IO 0x04 62 #endif 63 64 #define b_cylin b_resid 65 66 #define vnunit(x) ((minor(x) >> 3) & 0x7) /* for consistency */ 67 68 #define getvnbuf() \ 69 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK)) 70 #define putvnbuf(bp) \ 71 free((caddr_t)(bp), M_DEVBUF) 72 73 struct vn_softc { 74 int sc_flags; /* flags */ 75 size_t sc_size; /* size of vn */ 76 struct vnode *sc_vp; /* vnode */ 77 struct ucred *sc_cred; /* credentials */ 78 int sc_maxactive; /* max # of active requests */ 79 struct buf sc_tab; /* transfer queue */ 80 }; 81 82 /* sc_flags */ 83 #define VNF_ALIVE 0x01 84 #define VNF_INITED 0x02 85 86 #if 0 /* if you need static allocation */ 87 struct vn_softc vn_softc[NVN]; 88 int numvnd = NVN; 89 #else 90 struct vn_softc *vn_softc; 91 int numvnd; 92 #endif 93 94 void 95 vnattach(num) 96 int num; 97 { 98 char *mem; 99 register u_long size; 100 101 if (num <= 0) 102 return; 103 size = num * sizeof(struct vn_softc); 104 mem = malloc(size, M_DEVBUF, M_NOWAIT); 105 if (mem == NULL) { 106 printf("WARNING: no memory for vnode disks\n"); 107 return; 108 } 109 bzero(mem, size); 110 vn_softc = (struct vn_softc *)mem; 111 numvnd = num; 112 } 113 114 int 115 vnopen(dev, flags, mode, p) 116 dev_t dev; 117 int flags, mode; 118 struct proc *p; 119 { 120 int unit = vnunit(dev); 121 122 #ifdef DEBUG 123 if (vndebug & VDB_FOLLOW) 124 printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p); 125 #endif 126 if (unit >= numvnd) 127 return(ENXIO); 128 return(0); 129 } 130 131 /* 132 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 133 * Note that this driver can only be used for swapping over NFS on the hp 134 * since nfs_strategy on the vax cannot handle u-areas and page tables. 135 */ 136 vnstrategy(bp) 137 register struct buf *bp; 138 { 139 int unit = vnunit(bp->b_dev); 140 register struct vn_softc *vn = &vn_softc[unit]; 141 register struct buf *nbp; 142 register int bn, bsize, resid; 143 register caddr_t addr; 144 int sz, flags, error; 145 extern void vniodone(); 146 147 #ifdef DEBUG 148 if (vndebug & VDB_FOLLOW) 149 printf("vnstrategy(%x): unit %d\n", bp, unit); 150 #endif 151 if ((vn->sc_flags & VNF_INITED) == 0) { 152 bp->b_error = ENXIO; 153 bp->b_flags |= B_ERROR; 154 biodone(bp); 155 return; 156 } 157 bn = bp->b_blkno; 158 sz = howmany(bp->b_bcount, DEV_BSIZE); 159 bp->b_resid = bp->b_bcount; 160 if (bn < 0 || bn + sz > vn->sc_size) { 161 if (bn != vn->sc_size) { 162 bp->b_error = EINVAL; 163 bp->b_flags |= B_ERROR; 164 } 165 biodone(bp); 166 return; 167 } 168 bn = dbtob(bn); 169 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize; 170 addr = bp->b_data; 171 flags = bp->b_flags | B_CALL; 172 for (resid = bp->b_resid; resid; resid -= sz) { 173 struct vnode *vp; 174 daddr_t nbn; 175 int off, s, nra; 176 177 nra = 0; 178 error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, &nra); 179 if (error == 0 && (long)nbn == -1) 180 error = EIO; 181 #ifdef DEBUG 182 if (!dovncluster) 183 nra = 0; 184 #endif 185 186 if (off = bn % bsize) 187 sz = bsize - off; 188 else 189 sz = (1 + nra) * bsize; 190 if (resid < sz) 191 sz = resid; 192 #ifdef DEBUG 193 if (vndebug & VDB_IO) 194 printf("vnstrategy: vp %x/%x bn %x/%x sz %x\n", 195 vn->sc_vp, vp, bn, nbn, sz); 196 #endif 197 198 nbp = getvnbuf(); 199 nbp->b_flags = flags; 200 nbp->b_bcount = sz; 201 nbp->b_bufsize = bp->b_bufsize; 202 nbp->b_error = 0; 203 if (vp->v_type == VBLK || vp->v_type == VCHR) 204 nbp->b_dev = vp->v_rdev; 205 else 206 nbp->b_dev = NODEV; 207 nbp->b_data = addr; 208 nbp->b_blkno = nbn + btodb(off); 209 nbp->b_proc = bp->b_proc; 210 nbp->b_iodone = vniodone; 211 nbp->b_vp = vp; 212 nbp->b_pfcent = (int) bp; /* XXX */ 213 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */ 214 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */ 215 nbp->b_dirtyoff = bp->b_dirtyoff; 216 nbp->b_dirtyend = bp->b_dirtyend; 217 nbp->b_validoff = bp->b_validoff; 218 nbp->b_validend = bp->b_validend; 219 /* 220 * If there was an error or a hole in the file...punt. 221 * Note that we deal with this after the nbp allocation. 222 * This ensures that we properly clean up any operations 223 * that we have already fired off. 224 * 225 * XXX we could deal with holes here but it would be 226 * a hassle (in the write case). 227 */ 228 if (error) { 229 nbp->b_error = error; 230 nbp->b_flags |= B_ERROR; 231 bp->b_resid -= (resid - sz); 232 biodone(nbp); 233 return; 234 } 235 /* 236 * Just sort by block number 237 */ 238 nbp->b_cylin = nbp->b_blkno; 239 s = splbio(); 240 disksort(&vn->sc_tab, nbp); 241 if (vn->sc_tab.b_active < vn->sc_maxactive) { 242 vn->sc_tab.b_active++; 243 vnstart(vn); 244 } 245 splx(s); 246 bn += sz; 247 addr += sz; 248 } 249 } 250 251 /* 252 * Feed requests sequentially. 253 * We do it this way to keep from flooding NFS servers if we are connected 254 * to an NFS file. This places the burden on the client rather than the 255 * server. 256 */ 257 vnstart(vn) 258 register struct vn_softc *vn; 259 { 260 register struct buf *bp; 261 262 /* 263 * Dequeue now since lower level strategy routine might 264 * queue using same links 265 */ 266 bp = vn->sc_tab.b_actf; 267 vn->sc_tab.b_actf = bp->b_actf; 268 #ifdef DEBUG 269 if (vndebug & VDB_IO) 270 printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 271 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 272 bp->b_bcount); 273 #endif 274 if ((bp->b_flags & B_READ) == 0) 275 bp->b_vp->v_numoutput++; 276 VOP_STRATEGY(bp); 277 } 278 279 void 280 vniodone(bp) 281 register struct buf *bp; 282 { 283 register struct buf *pbp = (struct buf *)bp->b_pfcent; /* XXX */ 284 register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)]; 285 int s; 286 287 s = splbio(); 288 #ifdef DEBUG 289 if (vndebug & VDB_IO) 290 printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 291 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 292 bp->b_bcount); 293 #endif 294 if (bp->b_error) { 295 #ifdef DEBUG 296 if (vndebug & VDB_IO) 297 printf("vniodone: bp %x error %d\n", bp, bp->b_error); 298 #endif 299 pbp->b_flags |= B_ERROR; 300 pbp->b_error = biowait(bp); 301 } 302 pbp->b_resid -= bp->b_bcount; 303 putvnbuf(bp); 304 if (pbp->b_resid == 0) { 305 #ifdef DEBUG 306 if (vndebug & VDB_IO) 307 printf("vniodone: pbp %x iodone\n", pbp); 308 #endif 309 biodone(pbp); 310 } 311 if (vn->sc_tab.b_actf) 312 vnstart(vn); 313 else 314 vn->sc_tab.b_active--; 315 splx(s); 316 } 317 318 vnread(dev, uio, flags, p) 319 dev_t dev; 320 struct uio *uio; 321 int flags; 322 struct proc *p; 323 { 324 325 #ifdef DEBUG 326 if (vndebug & VDB_FOLLOW) 327 printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p); 328 #endif 329 return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio)); 330 } 331 332 vnwrite(dev, uio, flags, p) 333 dev_t dev; 334 struct uio *uio; 335 int flags; 336 struct proc *p; 337 { 338 339 #ifdef DEBUG 340 if (vndebug & VDB_FOLLOW) 341 printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p); 342 #endif 343 return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio)); 344 } 345 346 /* ARGSUSED */ 347 vnioctl(dev, cmd, data, flag, p) 348 dev_t dev; 349 u_long cmd; 350 caddr_t data; 351 int flag; 352 struct proc *p; 353 { 354 int unit = vnunit(dev); 355 register struct vn_softc *vn; 356 struct vn_ioctl *vio; 357 struct vattr vattr; 358 struct nameidata nd; 359 int error; 360 361 #ifdef DEBUG 362 if (vndebug & VDB_FOLLOW) 363 printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n", 364 dev, cmd, data, flag, p, unit); 365 #endif 366 error = suser(p->p_ucred, &p->p_acflag); 367 if (error) 368 return (error); 369 if (unit >= numvnd) 370 return (ENXIO); 371 372 vn = &vn_softc[unit]; 373 vio = (struct vn_ioctl *)data; 374 switch (cmd) { 375 376 case VNIOCSET: 377 if (vn->sc_flags & VNF_INITED) 378 return(EBUSY); 379 /* 380 * Always open for read and write. 381 * This is probably bogus, but it lets vn_open() 382 * weed out directories, sockets, etc. so we don't 383 * have to worry about them. 384 */ 385 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); 386 if (error = vn_open(&nd, FREAD|FWRITE, 0)) 387 return(error); 388 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) { 389 VOP_UNLOCK(nd.ni_vp); 390 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 391 return(error); 392 } 393 VOP_UNLOCK(nd.ni_vp); 394 vn->sc_vp = nd.ni_vp; 395 vn->sc_size = btodb(vattr.va_size); /* note truncation */ 396 if (error = vnsetcred(vn, p->p_ucred)) { 397 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 398 return(error); 399 } 400 vnthrottle(vn, vn->sc_vp); 401 vio->vn_size = dbtob(vn->sc_size); 402 vn->sc_flags |= VNF_INITED; 403 #ifdef DEBUG 404 if (vndebug & VDB_INIT) 405 printf("vnioctl: SET vp %x size %x\n", 406 vn->sc_vp, vn->sc_size); 407 #endif 408 break; 409 410 case VNIOCCLR: 411 if ((vn->sc_flags & VNF_INITED) == 0) 412 return(ENXIO); 413 vnclear(vn); 414 #ifdef DEBUG 415 if (vndebug & VDB_INIT) 416 printf("vnioctl: CLRed\n"); 417 #endif 418 break; 419 420 default: 421 return(ENXIO); 422 } 423 return(0); 424 } 425 426 /* 427 * Duplicate the current processes' credentials. Since we are called only 428 * as the result of a SET ioctl and only root can do that, any future access 429 * to this "disk" is essentially as root. Note that credentials may change 430 * if some other uid can write directly to the mapped file (NFS). 431 */ 432 vnsetcred(vn, cred) 433 register struct vn_softc *vn; 434 struct ucred *cred; 435 { 436 struct uio auio; 437 struct iovec aiov; 438 char *tmpbuf; 439 int error; 440 441 vn->sc_cred = crdup(cred); 442 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 443 444 /* XXX: Horrible kludge to establish credentials for NFS */ 445 aiov.iov_base = tmpbuf; 446 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size)); 447 auio.uio_iov = &aiov; 448 auio.uio_iovcnt = 1; 449 auio.uio_offset = 0; 450 auio.uio_rw = UIO_READ; 451 auio.uio_segflg = UIO_SYSSPACE; 452 auio.uio_resid = aiov.iov_len; 453 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); 454 455 free(tmpbuf, M_TEMP); 456 return (error); 457 } 458 459 /* 460 * Set maxactive based on FS type 461 */ 462 vnthrottle(vn, vp) 463 register struct vn_softc *vn; 464 struct vnode *vp; 465 { 466 extern int (**nfsv2_vnodeop_p)(); 467 468 if (vp->v_op == nfsv2_vnodeop_p) 469 vn->sc_maxactive = 2; 470 else 471 vn->sc_maxactive = 8; 472 473 if (vn->sc_maxactive < 1) 474 vn->sc_maxactive = 1; 475 } 476 477 vnshutdown() 478 { 479 register struct vn_softc *vn; 480 481 for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++) 482 if (vn->sc_flags & VNF_INITED) 483 vnclear(vn); 484 } 485 486 vnclear(vn) 487 register struct vn_softc *vn; 488 { 489 register struct vnode *vp = vn->sc_vp; 490 struct proc *p = curproc; /* XXX */ 491 492 #ifdef DEBUG 493 if (vndebug & VDB_FOLLOW) 494 printf("vnclear(%x): vp %x\n", vp); 495 #endif 496 vn->sc_flags &= ~VNF_INITED; 497 if (vp == (struct vnode *)0) 498 panic("vnioctl: null vp"); 499 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p); 500 crfree(vn->sc_cred); 501 vn->sc_vp = (struct vnode *)0; 502 vn->sc_cred = (struct ucred *)0; 503 vn->sc_size = 0; 504 } 505 506 vnsize(dev) 507 dev_t dev; 508 { 509 int unit = vnunit(dev); 510 register struct vn_softc *vn = &vn_softc[unit]; 511 512 if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0) 513 return(-1); 514 return(vn->sc_size); 515 } 516 517 vndump(dev) 518 { 519 return(ENXIO); 520 } 521 #endif 522