1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: vn.c 1.8 92/12/20$ 13 * 14 * @(#)vn.c 7.16 (Berkeley) 04/27/93 15 */ 16 17 /* 18 * Vnode disk driver. 19 * 20 * Block/character interface to a vnode. Allows one to treat a file 21 * as a disk (e.g. build a filesystem in it, mount it, etc.). 22 * 23 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 24 * instead of a simple VOP_RDWR. We do this to avoid distorting the 25 * local buffer cache. 26 * 27 * NOTE 2: There is a security issue involved with this driver. 28 * Once mounted all access to the contents of the "mapped" file via 29 * the special file is controlled by the permissions on the special 30 * file, the protection of the mapped file is ignored (effectively, 31 * by using root credentials in all transactions). 32 * 33 * NOTE 3: Doesn't interact with leases, should it? 34 */ 35 #include "vn.h" 36 #if NVN > 0 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/namei.h> 41 #include <sys/proc.h> 42 #include <sys/errno.h> 43 #include <sys/dkstat.h> 44 #include <sys/buf.h> 45 #include <sys/malloc.h> 46 #include <sys/ioctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/file.h> 50 #include <sys/uio.h> 51 52 #include <miscfs/specfs/specdev.h> 53 54 #include <dev/vnioctl.h> 55 56 #ifdef DEBUG 57 int vndebug = 0x00; 58 #define VDB_FOLLOW 0x01 59 #define VDB_INIT 0x02 60 #define VDB_IO 0x04 61 #endif 62 63 #define b_cylin b_resid 64 65 #define vnunit(x) ((minor(x) >> 3) & 0x7) /* for consistency */ 66 67 #define getvnbuf() \ 68 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK)) 69 #define putvnbuf(bp) \ 70 free((caddr_t)(bp), M_DEVBUF) 71 72 struct vn_softc { 73 int sc_flags; /* flags */ 74 size_t sc_size; /* size of vn */ 75 struct vnode *sc_vp; /* vnode */ 76 struct ucred *sc_cred; /* credentials */ 77 int sc_maxactive; /* max # of active requests */ 78 struct buf sc_tab; /* transfer queue */ 79 }; 80 81 /* sc_flags */ 82 #define VNF_ALIVE 0x01 83 #define VNF_INITED 0x02 84 85 #if 0 /* if you need static allocation */ 86 struct vn_softc vn_softc[NVN]; 87 int numvnd = NVN; 88 #else 89 struct vn_softc *vn_softc; 90 int numvnd; 91 #endif 92 93 void 94 vnattach(num) 95 int num; 96 { 97 char *mem; 98 register u_long size; 99 100 if (num <= 0) 101 return; 102 size = num * sizeof(struct vn_softc); 103 mem = malloc(size, M_DEVBUF, M_NOWAIT); 104 if (mem == NULL) { 105 printf("WARNING: no memory for vnode disks\n"); 106 return; 107 } 108 bzero(mem, size); 109 vn_softc = (struct vn_softc *)mem; 110 numvnd = num; 111 } 112 113 int 114 vnopen(dev, flags, mode, p) 115 dev_t dev; 116 int flags, mode; 117 struct proc *p; 118 { 119 int unit = vnunit(dev); 120 121 #ifdef DEBUG 122 if (vndebug & VDB_FOLLOW) 123 printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p); 124 #endif 125 if (unit >= numvnd) 126 return(ENXIO); 127 return(0); 128 } 129 130 /* 131 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 132 * Note that this driver can only be used for swapping over NFS on the hp 133 * since nfs_strategy on the vax cannot handle u-areas and page tables. 134 */ 135 vnstrategy(bp) 136 register struct buf *bp; 137 { 138 int unit = vnunit(bp->b_dev); 139 register struct vn_softc *vn = &vn_softc[unit]; 140 register struct buf *nbp; 141 register int bn, bsize, resid; 142 register caddr_t addr; 143 int sz, flags; 144 extern void vniodone(); 145 146 #ifdef DEBUG 147 if (vndebug & VDB_FOLLOW) 148 printf("vnstrategy(%x): unit %d\n", bp, unit); 149 #endif 150 if ((vn->sc_flags & VNF_INITED) == 0) { 151 bp->b_error = ENXIO; 152 bp->b_flags |= B_ERROR; 153 biodone(bp); 154 return; 155 } 156 bn = bp->b_blkno; 157 sz = howmany(bp->b_bcount, DEV_BSIZE); 158 bp->b_resid = bp->b_bcount; 159 if (bn < 0 || bn + sz > vn->sc_size) { 160 if (bn != vn->sc_size) { 161 bp->b_error = EINVAL; 162 bp->b_flags |= B_ERROR; 163 } 164 biodone(bp); 165 return; 166 } 167 bn = dbtob(bn); 168 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize; 169 addr = bp->b_un.b_addr; 170 flags = bp->b_flags | B_CALL; 171 for (resid = bp->b_resid; resid; resid -= sz) { 172 struct vnode *vp; 173 daddr_t nbn; 174 int off, s; 175 176 nbp = getvnbuf(); 177 off = bn % bsize; 178 sz = min(bsize - off, resid); 179 (void) VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, NULL); 180 #ifdef DEBUG 181 if (vndebug & VDB_IO) 182 printf("vnstrategy: vp %x/%x bn %x/%x\n", 183 vn->sc_vp, vp, bn, nbn); 184 #endif 185 nbp->b_flags = flags; 186 nbp->b_bcount = sz; 187 nbp->b_bufsize = bp->b_bufsize; 188 nbp->b_error = 0; 189 if (vp->v_type == VBLK || vp->v_type == VCHR) 190 nbp->b_dev = vp->v_rdev; 191 else 192 nbp->b_dev = NODEV; 193 nbp->b_un.b_addr = addr; 194 nbp->b_blkno = nbn + btodb(off); 195 nbp->b_proc = bp->b_proc; 196 nbp->b_iodone = vniodone; 197 nbp->b_vp = vp; 198 nbp->b_pfcent = (int) bp; /* XXX */ 199 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */ 200 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */ 201 nbp->b_dirtyoff = bp->b_dirtyoff; 202 nbp->b_dirtyend = bp->b_dirtyend; 203 nbp->b_validoff = bp->b_validoff; 204 nbp->b_validend = bp->b_validend; 205 /* 206 * Just sort by block number 207 */ 208 nbp->b_cylin = nbp->b_blkno; 209 s = splbio(); 210 disksort(&vn->sc_tab, nbp); 211 if (vn->sc_tab.b_active < vn->sc_maxactive) { 212 vn->sc_tab.b_active++; 213 vnstart(vn); 214 } 215 splx(s); 216 bn += sz; 217 addr += sz; 218 } 219 } 220 221 /* 222 * Feed requests sequentially. 223 * We do it this way to keep from flooding NFS servers if we are connected 224 * to an NFS file. This places the burden on the client rather than the 225 * server. 226 */ 227 vnstart(vn) 228 register struct vn_softc *vn; 229 { 230 register struct buf *bp; 231 232 /* 233 * Dequeue now since lower level strategy routine might 234 * queue using same links 235 */ 236 bp = vn->sc_tab.b_actf; 237 vn->sc_tab.b_actf = bp->b_actf; 238 #ifdef DEBUG 239 if (vndebug & VDB_IO) 240 printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 241 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr, 242 bp->b_bcount); 243 #endif 244 if ((bp->b_flags & B_READ) == 0) 245 bp->b_vp->v_numoutput++; 246 VOP_STRATEGY(bp); 247 } 248 249 void 250 vniodone(bp) 251 register struct buf *bp; 252 { 253 register struct buf *pbp = (struct buf *)bp->b_pfcent; /* XXX */ 254 register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)]; 255 int s; 256 257 s = splbio(); 258 #ifdef DEBUG 259 if (vndebug & VDB_IO) 260 printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 261 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr, 262 bp->b_bcount); 263 #endif 264 if (bp->b_error) { 265 #ifdef DEBUG 266 if (vndebug & VDB_IO) 267 printf("vniodone: bp %x error %d\n", bp, bp->b_error); 268 #endif 269 pbp->b_flags |= B_ERROR; 270 pbp->b_error = biowait(bp); 271 } 272 pbp->b_resid -= bp->b_bcount; 273 putvnbuf(bp); 274 if (pbp->b_resid == 0) { 275 #ifdef DEBUG 276 if (vndebug & VDB_IO) 277 printf("vniodone: pbp %x iodone\n", pbp); 278 #endif 279 biodone(pbp); 280 } 281 if (vn->sc_tab.b_actf) 282 vnstart(vn); 283 else 284 vn->sc_tab.b_active--; 285 splx(s); 286 } 287 288 vnread(dev, uio, flags, p) 289 dev_t dev; 290 struct uio *uio; 291 int flags; 292 struct proc *p; 293 { 294 295 #ifdef DEBUG 296 if (vndebug & VDB_FOLLOW) 297 printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p); 298 #endif 299 return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio)); 300 } 301 302 vnwrite(dev, uio, flags, p) 303 dev_t dev; 304 struct uio *uio; 305 int flags; 306 struct proc *p; 307 { 308 309 #ifdef DEBUG 310 if (vndebug & VDB_FOLLOW) 311 printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p); 312 #endif 313 return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio)); 314 } 315 316 /* ARGSUSED */ 317 vnioctl(dev, cmd, data, flag, p) 318 dev_t dev; 319 u_long cmd; 320 caddr_t data; 321 int flag; 322 struct proc *p; 323 { 324 int unit = vnunit(dev); 325 register struct vn_softc *vn; 326 struct vn_ioctl *vio; 327 struct vattr vattr; 328 struct nameidata nd; 329 int error; 330 331 #ifdef DEBUG 332 if (vndebug & VDB_FOLLOW) 333 printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n", 334 dev, cmd, data, flag, p, unit); 335 #endif 336 error = suser(p->p_ucred, &p->p_acflag); 337 if (error) 338 return (error); 339 if (unit >= numvnd) 340 return (ENXIO); 341 342 vn = &vn_softc[unit]; 343 vio = (struct vn_ioctl *)data; 344 switch (cmd) { 345 346 case VNIOCSET: 347 if (vn->sc_flags & VNF_INITED) 348 return(EBUSY); 349 /* 350 * Always open for read and write. 351 * This is probably bogus, but it lets vn_open() 352 * weed out directories, sockets, etc. so we don't 353 * have to worry about them. 354 */ 355 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); 356 if (error = vn_open(&nd, FREAD|FWRITE, 0)) 357 return(error); 358 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) { 359 VOP_UNLOCK(nd.ni_vp); 360 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 361 return(error); 362 } 363 VOP_UNLOCK(nd.ni_vp); 364 vn->sc_vp = nd.ni_vp; 365 vn->sc_size = btodb(vattr.va_size); /* note truncation */ 366 if (error = vnsetcred(vn, p->p_ucred)) { 367 (void) vn_close(vn->sc_vp, FREAD|FWRITE, p->p_ucred, p); 368 return(error); 369 } 370 vnthrottle(vn, vn->sc_vp); 371 vio->vn_size = dbtob(vn->sc_size); 372 vn->sc_flags |= VNF_INITED; 373 #ifdef DEBUG 374 if (vndebug & VDB_INIT) 375 printf("vnioctl: SET vp %x size %x\n", 376 vn->sc_vp, vn->sc_size); 377 #endif 378 break; 379 380 case VNIOCCLR: 381 if ((vn->sc_flags & VNF_INITED) == 0) 382 return(ENXIO); 383 vnclear(vn); 384 #ifdef DEBUG 385 if (vndebug & VDB_INIT) 386 printf("vnioctl: CLRed\n"); 387 #endif 388 break; 389 390 default: 391 return(ENXIO); 392 } 393 return(0); 394 } 395 396 /* 397 * Duplicate the current processes' credentials. Since we are called only 398 * as the result of a SET ioctl and only root can do that, any future access 399 * to this "disk" is essentially as root. Note that credentials may change 400 * if some other uid can write directly to the mapped file (NFS). 401 */ 402 vnsetcred(vn, cred) 403 register struct vn_softc *vn; 404 struct ucred cred; 405 { 406 struct uio auio; 407 struct iovec aiov; 408 char tmpbuf[DEV_BSIZE]; 409 410 vn->sc_cred = crdup(cred); 411 /* XXX: Horrible kludge to establish credentials for NFS */ 412 aiov.iov_base = tmpbuf; 413 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size)); 414 auio.uio_iov = &aiov; 415 auio.uio_iovcnt = 1; 416 auio.uio_offset = 0; 417 auio.uio_rw = UIO_READ; 418 auio.uio_segflg = UIO_SYSSPACE; 419 auio.uio_resid = aiov.iov_len; 420 return(VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred)); 421 } 422 423 /* 424 * Set maxactive based on FS type 425 */ 426 vnthrottle(vn, vp) 427 register struct vn_softc *vn; 428 struct vnode *vp; 429 { 430 extern int (**nfsv2_vnodeop_p)(); 431 432 if (vp->v_op == nfsv2_vnodeop_p) 433 vn->sc_maxactive = 2; 434 else 435 vn->sc_maxactive = 8; 436 437 if (vn->sc_maxactive < 1) 438 vn->sc_maxactive = 1; 439 } 440 441 vnshutdown() 442 { 443 register struct vn_softc *vn; 444 445 for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++) 446 if (vn->sc_flags & VNF_INITED) 447 vnclear(vn); 448 } 449 450 vnclear(vn) 451 register struct vn_softc *vn; 452 { 453 register struct vnode *vp = vn->sc_vp; 454 struct proc *p = curproc; /* XXX */ 455 456 #ifdef DEBUG 457 if (vndebug & VDB_FOLLOW) 458 printf("vnclear(%x): vp %x\n", vp); 459 #endif 460 vn->sc_flags &= ~VNF_INITED; 461 if (vp == (struct vnode *)0) 462 panic("vnioctl: null vp"); 463 #if 0 464 /* XXX - this doesn't work right now */ 465 (void) VOP_FSYNC(vp, 0, vn->sc_cred, MNT_WAIT, p); 466 #endif 467 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p); 468 crfree(vn->sc_cred); 469 vn->sc_vp = (struct vnode *)0; 470 vn->sc_cred = (struct ucred *)0; 471 vn->sc_size = 0; 472 } 473 474 vnsize(dev) 475 dev_t dev; 476 { 477 int unit = vnunit(dev); 478 register struct vn_softc *vn = &vn_softc[unit]; 479 480 if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0) 481 return(-1); 482 return(vn->sc_size); 483 } 484 485 vndump(dev) 486 { 487 return(ENXIO); 488 } 489 #endif 490