1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $ 40 * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.41 2006/06/13 08:12:03 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/fcntl.h> 46 #include <sys/file.h> 47 #include <sys/stat.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/nlookup.h> 51 #include <sys/vnode.h> 52 #include <sys/buf.h> 53 #include <sys/filio.h> 54 #include <sys/ttycom.h> 55 #include <sys/conf.h> 56 #include <sys/syslog.h> 57 58 static int vn_closefile (struct file *fp); 59 static int vn_ioctl (struct file *fp, u_long com, caddr_t data, 60 struct ucred *cred); 61 static int vn_read (struct file *fp, struct uio *uio, 62 struct ucred *cred, int flags); 63 static int svn_read (struct file *fp, struct uio *uio, 64 struct ucred *cred, int flags); 65 static int vn_poll (struct file *fp, int events, struct ucred *cred); 66 static int vn_kqfilter (struct file *fp, struct knote *kn); 67 static int vn_statfile (struct file *fp, struct stat *sb, struct ucred *cred); 68 static int vn_write (struct file *fp, struct uio *uio, 69 struct ucred *cred, int flags); 70 static int svn_write (struct file *fp, struct uio *uio, 71 struct ucred *cred, int flags); 72 73 struct fileops vnode_fileops = { 74 NULL, /* port */ 75 NULL, /* clone */ 76 vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, 77 vn_statfile, vn_closefile, nofo_shutdown 78 }; 79 80 struct fileops specvnode_fileops = { 81 NULL, /* port */ 82 NULL, /* clone */ 83 svn_read, svn_write, vn_ioctl, vn_poll, vn_kqfilter, 84 vn_statfile, vn_closefile, nofo_shutdown 85 }; 86 87 /* 88 * Shortcut the device read/write. This avoids a lot of vnode junk. 89 * Basically the specfs vnops for read and write take the locked vnode, 90 * unlock it (because we can't hold the vnode locked while reading or writing 91 * a device which may block indefinitely), issues the device operation, then 92 * relock the vnode before returning, plus other junk. This bypasses all 93 * of that and just does the device operation. 94 */ 95 void 96 vn_setspecops(struct file *fp) 97 { 98 if (vfs_fastdev && fp->f_ops == &vnode_fileops) { 99 fp->f_ops = &specvnode_fileops; 100 } 101 } 102 103 /* 104 * Common code for vnode open operations. Check permissions, and call 105 * the VOP_NOPEN or VOP_NCREATE routine. 106 * 107 * The caller is responsible for setting up nd with nlookup_init() and 108 * for cleaning it up with nlookup_done(), whether we return an error 109 * or not. 110 * 111 * On success nd->nl_open_vp will hold a referenced and, if requested, 112 * locked vnode. A locked vnode is requested via NLC_LOCKVP. If fp 113 * is non-NULL the vnode will be installed in the file pointer. 114 * 115 * NOTE: The vnode is referenced just once on return whether or not it 116 * is also installed in the file pointer. 117 */ 118 int 119 vn_open(struct nlookupdata *nd, struct file *fp, int fmode, int cmode) 120 { 121 struct vnode *vp; 122 struct ucred *cred = nd->nl_cred; 123 struct vattr vat; 124 struct vattr *vap = &vat; 125 struct namecache *ncp; 126 int mode, error; 127 128 /* 129 * Lookup the path and create or obtain the vnode. After a 130 * successful lookup a locked nd->nl_ncp will be returned. 131 * 132 * The result of this section should be a locked vnode. 133 * 134 * XXX with only a little work we should be able to avoid locking 135 * the vnode if FWRITE, O_CREAT, and O_TRUNC are *not* set. 136 */ 137 if (fmode & O_CREAT) { 138 /* 139 * CONDITIONAL CREATE FILE CASE 140 * 141 * Setting NLC_CREATE causes a negative hit to store 142 * the negative hit ncp and not return an error. Then 143 * nc_error or nc_vp may be checked to see if the ncp 144 * represents a negative hit. NLC_CREATE also requires 145 * write permission on the governing directory or EPERM 146 * is returned. 147 */ 148 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 149 nd->nl_flags |= NLC_FOLLOW; 150 nd->nl_flags |= NLC_CREATE; 151 bwillwrite(); 152 error = nlookup(nd); 153 } else { 154 /* 155 * NORMAL OPEN FILE CASE 156 */ 157 error = nlookup(nd); 158 } 159 160 if (error) 161 return (error); 162 ncp = nd->nl_ncp; 163 164 /* 165 * split case to allow us to re-resolve and retry the ncp in case 166 * we get ESTALE. 167 */ 168 again: 169 if (fmode & O_CREAT) { 170 if (ncp->nc_vp == NULL) { 171 VATTR_NULL(vap); 172 vap->va_type = VREG; 173 vap->va_mode = cmode; 174 if (fmode & O_EXCL) 175 vap->va_vaflags |= VA_EXCLUSIVE; 176 error = VOP_NCREATE(ncp, &vp, nd->nl_cred, vap); 177 if (error) 178 return (error); 179 fmode &= ~O_TRUNC; 180 ASSERT_VOP_LOCKED(vp, "create"); 181 /* locked vnode is returned */ 182 } else { 183 if (fmode & O_EXCL) { 184 error = EEXIST; 185 } else { 186 error = cache_vget(ncp, cred, 187 LK_EXCLUSIVE, &vp); 188 } 189 if (error) 190 return (error); 191 fmode &= ~O_CREAT; 192 } 193 } else { 194 error = cache_vget(ncp, cred, LK_EXCLUSIVE, &vp); 195 if (error) 196 return (error); 197 } 198 199 /* 200 * We have a locked vnode and ncp now. Note that the ncp will 201 * be cleaned up by the caller if nd->nl_ncp is left intact. 202 */ 203 if (vp->v_type == VLNK) { 204 error = EMLINK; 205 goto bad; 206 } 207 if (vp->v_type == VSOCK) { 208 error = EOPNOTSUPP; 209 goto bad; 210 } 211 if ((fmode & O_CREAT) == 0) { 212 mode = 0; 213 if (fmode & (FWRITE | O_TRUNC)) { 214 if (vp->v_type == VDIR) { 215 error = EISDIR; 216 goto bad; 217 } 218 error = vn_writechk(vp); 219 if (error) { 220 /* 221 * Special stale handling, re-resolve the 222 * vnode. 223 */ 224 if (error == ESTALE) { 225 vput(vp); 226 vp = NULL; 227 cache_setunresolved(ncp); 228 error = cache_resolve(ncp, cred); 229 if (error == 0) 230 goto again; 231 } 232 goto bad; 233 } 234 mode |= VWRITE; 235 } 236 if (fmode & FREAD) 237 mode |= VREAD; 238 if (mode) { 239 error = VOP_ACCESS(vp, mode, cred); 240 if (error) { 241 /* 242 * Special stale handling, re-resolve the 243 * vnode. 244 */ 245 if (error == ESTALE) { 246 vput(vp); 247 vp = NULL; 248 cache_setunresolved(ncp); 249 error = cache_resolve(ncp, cred); 250 if (error == 0) 251 goto again; 252 } 253 goto bad; 254 } 255 } 256 } 257 if (fmode & O_TRUNC) { 258 VOP_UNLOCK(vp, 0); /* XXX */ 259 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 260 VATTR_NULL(vap); 261 vap->va_size = 0; 262 error = VOP_SETATTR(vp, vap, cred); 263 if (error) 264 goto bad; 265 } 266 267 /* 268 * Setup the fp so VOP_OPEN can override it. No descriptor has been 269 * associated with the fp yet so we own it clean. f_ncp inherits 270 * nl_ncp . 271 */ 272 if (fp) { 273 if (vp->v_type == VDIR) { 274 fp->f_ncp = nd->nl_ncp; 275 nd->nl_ncp = NULL; 276 cache_unlock(fp->f_ncp); 277 } 278 } 279 280 /* 281 * Get rid of nl_ncp. vn_open does not return it (it returns the 282 * vnode or the file pointer). Note: we can't leave nl_ncp locked 283 * through the VOP_OPEN anyway since the VOP_OPEN may block, e.g. 284 * on /dev/ttyd0 285 */ 286 if (nd->nl_ncp) { 287 cache_put(nd->nl_ncp); 288 nd->nl_ncp = NULL; 289 } 290 291 error = VOP_OPEN(vp, fmode, cred, fp); 292 if (error) { 293 /* 294 * setting f_ops to &badfileops will prevent the descriptor 295 * code from trying to close and release the vnode, since 296 * the open failed we do not want to call close. 297 */ 298 if (fp) { 299 fp->f_data = NULL; 300 fp->f_ops = &badfileops; 301 } 302 goto bad; 303 } 304 305 #if 0 306 /* 307 * Assert that VREG files have been setup for vmio. 308 */ 309 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 310 ("vn_open: regular file was not VMIO enabled!")); 311 #endif 312 313 /* 314 * Return the vnode. XXX needs some cleaning up. The vnode is 315 * only returned in the fp == NULL case. 316 */ 317 if (fp == NULL) { 318 nd->nl_open_vp = vp; 319 nd->nl_vp_fmode = fmode; 320 if ((nd->nl_flags & NLC_LOCKVP) == 0) 321 VOP_UNLOCK(vp, 0); 322 } else { 323 vput(vp); 324 } 325 return (0); 326 bad: 327 if (vp) 328 vput(vp); 329 return (error); 330 } 331 332 /* 333 * Check for write permissions on the specified vnode. 334 * Prototype text segments cannot be written. 335 */ 336 int 337 vn_writechk(vp) 338 struct vnode *vp; 339 { 340 341 /* 342 * If there's shared text associated with 343 * the vnode, try to free it up once. If 344 * we fail, we can't allow writing. 345 */ 346 if (vp->v_flag & VTEXT) 347 return (ETXTBSY); 348 return (0); 349 } 350 351 /* 352 * Vnode close call 353 */ 354 int 355 vn_close(struct vnode *vp, int flags) 356 { 357 int error; 358 359 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) == 0) { 360 error = VOP_CLOSE(vp, flags); 361 VOP_UNLOCK(vp, 0); 362 } 363 vrele(vp); 364 return (error); 365 } 366 367 static __inline 368 int 369 sequential_heuristic(struct uio *uio, struct file *fp) 370 { 371 /* 372 * Sequential heuristic - detect sequential operation 373 */ 374 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 375 uio->uio_offset == fp->f_nextoff) { 376 int tmpseq = fp->f_seqcount; 377 /* 378 * XXX we assume that the filesystem block size is 379 * the default. Not true, but still gives us a pretty 380 * good indicator of how sequential the read operations 381 * are. 382 */ 383 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 384 if (tmpseq > IO_SEQMAX) 385 tmpseq = IO_SEQMAX; 386 fp->f_seqcount = tmpseq; 387 return(fp->f_seqcount << IO_SEQSHIFT); 388 } 389 390 /* 391 * Not sequential, quick draw-down of seqcount 392 */ 393 if (fp->f_seqcount > 1) 394 fp->f_seqcount = 1; 395 else 396 fp->f_seqcount = 0; 397 return(0); 398 } 399 400 /* 401 * Package up an I/O request on a vnode into a uio and do it. 402 */ 403 int 404 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, 405 off_t offset, enum uio_seg segflg, int ioflg, 406 struct ucred *cred, int *aresid) 407 { 408 struct uio auio; 409 struct iovec aiov; 410 int error; 411 412 if ((ioflg & IO_NODELOCKED) == 0) 413 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 414 auio.uio_iov = &aiov; 415 auio.uio_iovcnt = 1; 416 aiov.iov_base = base; 417 aiov.iov_len = len; 418 auio.uio_resid = len; 419 auio.uio_offset = offset; 420 auio.uio_segflg = segflg; 421 auio.uio_rw = rw; 422 auio.uio_td = curthread; 423 if (rw == UIO_READ) { 424 error = VOP_READ(vp, &auio, ioflg, cred); 425 } else { 426 error = VOP_WRITE(vp, &auio, ioflg, cred); 427 } 428 if (aresid) 429 *aresid = auio.uio_resid; 430 else 431 if (auio.uio_resid && error == 0) 432 error = EIO; 433 if ((ioflg & IO_NODELOCKED) == 0) 434 VOP_UNLOCK(vp, 0); 435 return (error); 436 } 437 438 /* 439 * Package up an I/O request on a vnode into a uio and do it. The I/O 440 * request is split up into smaller chunks and we try to avoid saturating 441 * the buffer cache while potentially holding a vnode locked, so we 442 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield() 443 * to give other processes a chance to lock the vnode (either other processes 444 * core'ing the same binary, or unrelated processes scanning the directory). 445 */ 446 int 447 vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, 448 off_t offset, enum uio_seg segflg, int ioflg, 449 struct ucred *cred, int *aresid) 450 { 451 int error = 0; 452 453 do { 454 int chunk; 455 456 /* 457 * Force `offset' to a multiple of MAXBSIZE except possibly 458 * for the first chunk, so that filesystems only need to 459 * write full blocks except possibly for the first and last 460 * chunks. 461 */ 462 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE; 463 464 if (chunk > len) 465 chunk = len; 466 if (rw != UIO_READ && vp->v_type == VREG) 467 bwillwrite(); 468 error = vn_rdwr(rw, vp, base, chunk, offset, segflg, 469 ioflg, cred, aresid); 470 len -= chunk; /* aresid calc already includes length */ 471 if (error) 472 break; 473 offset += chunk; 474 base += chunk; 475 uio_yield(); 476 } while (len); 477 if (aresid) 478 *aresid += len; 479 return (error); 480 } 481 482 /* 483 * MPALMOSTSAFE - acquires mplock 484 */ 485 static int 486 vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 487 { 488 struct vnode *vp; 489 int error, ioflag; 490 491 get_mplock(); 492 KASSERT(uio->uio_td == curthread, 493 ("uio_td %p is not td %p", uio->uio_td, curthread)); 494 vp = (struct vnode *)fp->f_data; 495 496 ioflag = 0; 497 if (flags & O_FBLOCKING) { 498 /* ioflag &= ~IO_NDELAY; */ 499 } else if (flags & O_FNONBLOCKING) { 500 ioflag |= IO_NDELAY; 501 } else if (fp->f_flag & FNONBLOCK) { 502 ioflag |= IO_NDELAY; 503 } 504 if (flags & O_FBUFFERED) { 505 /* ioflag &= ~IO_DIRECT; */ 506 } else if (flags & O_FUNBUFFERED) { 507 ioflag |= IO_DIRECT; 508 } else if (fp->f_flag & O_DIRECT) { 509 ioflag |= IO_DIRECT; 510 } 511 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY); 512 if ((flags & O_FOFFSET) == 0) 513 uio->uio_offset = fp->f_offset; 514 515 ioflag |= sequential_heuristic(uio, fp); 516 517 error = VOP_READ(vp, uio, ioflag, cred); 518 if ((flags & O_FOFFSET) == 0) 519 fp->f_offset = uio->uio_offset; 520 fp->f_nextoff = uio->uio_offset; 521 VOP_UNLOCK(vp, 0); 522 rel_mplock(); 523 return (error); 524 } 525 526 /* 527 * Device-optimized file table vnode read routine. 528 * 529 * This bypasses the VOP table and talks directly to the device. Most 530 * filesystems just route to specfs and can make this optimization. 531 * 532 * MPALMOSTSAFE - acquires mplock 533 */ 534 static int 535 svn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 536 { 537 struct vnode *vp; 538 int ioflag; 539 int error; 540 dev_t dev; 541 542 get_mplock(); 543 KASSERT(uio->uio_td == curthread, 544 ("uio_td %p is not td %p", uio->uio_td, curthread)); 545 546 vp = (struct vnode *)fp->f_data; 547 if (vp == NULL || vp->v_type == VBAD) { 548 error = EBADF; 549 goto done; 550 } 551 552 if ((dev = vp->v_rdev) == NULL) { 553 error = EBADF; 554 goto done; 555 } 556 reference_dev(dev); 557 558 if (uio->uio_resid == 0) { 559 error = 0; 560 goto done; 561 } 562 if ((flags & O_FOFFSET) == 0) 563 uio->uio_offset = fp->f_offset; 564 565 ioflag = 0; 566 if (flags & O_FBLOCKING) { 567 /* ioflag &= ~IO_NDELAY; */ 568 } else if (flags & O_FNONBLOCKING) { 569 ioflag |= IO_NDELAY; 570 } else if (fp->f_flag & FNONBLOCK) { 571 ioflag |= IO_NDELAY; 572 } 573 if (flags & O_FBUFFERED) { 574 /* ioflag &= ~IO_DIRECT; */ 575 } else if (flags & O_FUNBUFFERED) { 576 ioflag |= IO_DIRECT; 577 } else if (fp->f_flag & O_DIRECT) { 578 ioflag |= IO_DIRECT; 579 } 580 ioflag |= sequential_heuristic(uio, fp); 581 582 error = dev_dread(dev, uio, ioflag); 583 584 release_dev(dev); 585 if ((flags & O_FOFFSET) == 0) 586 fp->f_offset = uio->uio_offset; 587 fp->f_nextoff = uio->uio_offset; 588 done: 589 rel_mplock(); 590 return (error); 591 } 592 593 /* 594 * MPALMOSTSAFE - acquires mplock 595 */ 596 static int 597 vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 598 { 599 struct vnode *vp; 600 int error, ioflag; 601 602 get_mplock(); 603 KASSERT(uio->uio_td == curthread, 604 ("uio_procp %p is not p %p", uio->uio_td, curthread)); 605 vp = (struct vnode *)fp->f_data; 606 if (vp->v_type == VREG) 607 bwillwrite(); 608 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 609 610 ioflag = IO_UNIT; 611 if (vp->v_type == VREG && 612 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) { 613 ioflag |= IO_APPEND; 614 } 615 616 if (flags & O_FBLOCKING) { 617 /* ioflag &= ~IO_NDELAY; */ 618 } else if (flags & O_FNONBLOCKING) { 619 ioflag |= IO_NDELAY; 620 } else if (fp->f_flag & FNONBLOCK) { 621 ioflag |= IO_NDELAY; 622 } 623 if (flags & O_FBUFFERED) { 624 /* ioflag &= ~IO_DIRECT; */ 625 } else if (flags & O_FUNBUFFERED) { 626 ioflag |= IO_DIRECT; 627 } else if (fp->f_flag & O_DIRECT) { 628 ioflag |= IO_DIRECT; 629 } 630 if (flags & O_FASYNCWRITE) { 631 /* ioflag &= ~IO_SYNC; */ 632 } else if (flags & O_FSYNCWRITE) { 633 ioflag |= IO_SYNC; 634 } else if (fp->f_flag & O_FSYNC) { 635 ioflag |= IO_SYNC; 636 } 637 638 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)) 639 ioflag |= IO_SYNC; 640 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 641 if ((flags & O_FOFFSET) == 0) 642 uio->uio_offset = fp->f_offset; 643 ioflag |= sequential_heuristic(uio, fp); 644 error = VOP_WRITE(vp, uio, ioflag, cred); 645 if ((flags & O_FOFFSET) == 0) 646 fp->f_offset = uio->uio_offset; 647 fp->f_nextoff = uio->uio_offset; 648 VOP_UNLOCK(vp, 0); 649 rel_mplock(); 650 return (error); 651 } 652 653 /* 654 * Device-optimized file table vnode write routine. 655 * 656 * This bypasses the VOP table and talks directly to the device. Most 657 * filesystems just route to specfs and can make this optimization. 658 * 659 * MPALMOSTSAFE - acquires mplock 660 */ 661 static int 662 svn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 663 { 664 struct vnode *vp; 665 int ioflag; 666 int error; 667 dev_t dev; 668 669 get_mplock(); 670 KASSERT(uio->uio_td == curthread, 671 ("uio_procp %p is not p %p", uio->uio_td, curthread)); 672 673 vp = (struct vnode *)fp->f_data; 674 if (vp == NULL || vp->v_type == VBAD) { 675 error = EBADF; 676 goto done; 677 } 678 if (vp->v_type == VREG) 679 bwillwrite(); 680 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 681 682 if ((dev = vp->v_rdev) == NULL) { 683 error = EBADF; 684 goto done; 685 } 686 reference_dev(dev); 687 688 if ((flags & O_FOFFSET) == 0) 689 uio->uio_offset = fp->f_offset; 690 691 ioflag = IO_UNIT; 692 if (vp->v_type == VREG && 693 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) { 694 ioflag |= IO_APPEND; 695 } 696 697 if (flags & O_FBLOCKING) { 698 /* ioflag &= ~IO_NDELAY; */ 699 } else if (flags & O_FNONBLOCKING) { 700 ioflag |= IO_NDELAY; 701 } else if (fp->f_flag & FNONBLOCK) { 702 ioflag |= IO_NDELAY; 703 } 704 if (flags & O_FBUFFERED) { 705 /* ioflag &= ~IO_DIRECT; */ 706 } else if (flags & O_FUNBUFFERED) { 707 ioflag |= IO_DIRECT; 708 } else if (fp->f_flag & O_DIRECT) { 709 ioflag |= IO_DIRECT; 710 } 711 if (flags & O_FASYNCWRITE) { 712 /* ioflag &= ~IO_SYNC; */ 713 } else if (flags & O_FSYNCWRITE) { 714 ioflag |= IO_SYNC; 715 } else if (fp->f_flag & O_FSYNC) { 716 ioflag |= IO_SYNC; 717 } 718 719 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)) 720 ioflag |= IO_SYNC; 721 ioflag |= sequential_heuristic(uio, fp); 722 723 error = dev_dwrite(dev, uio, ioflag); 724 725 release_dev(dev); 726 if ((flags & O_FOFFSET) == 0) 727 fp->f_offset = uio->uio_offset; 728 fp->f_nextoff = uio->uio_offset; 729 done: 730 rel_mplock(); 731 return (error); 732 } 733 734 /* 735 * MPALMOSTSAFE - acquires mplock 736 */ 737 static int 738 vn_statfile(struct file *fp, struct stat *sb, struct ucred *cred) 739 { 740 struct vnode *vp; 741 int error; 742 743 get_mplock(); 744 vp = (struct vnode *)fp->f_data; 745 error = vn_stat(vp, sb, cred); 746 rel_mplock(); 747 return (error); 748 } 749 750 int 751 vn_stat(struct vnode *vp, struct stat *sb, struct ucred *cred) 752 { 753 struct vattr vattr; 754 struct vattr *vap; 755 int error; 756 u_short mode; 757 dev_t dev; 758 759 vap = &vattr; 760 error = VOP_GETATTR(vp, vap); 761 if (error) 762 return (error); 763 764 /* 765 * Zero the spare stat fields 766 */ 767 sb->st_lspare = 0; 768 sb->st_qspare = 0; 769 770 /* 771 * Copy from vattr table 772 */ 773 if (vap->va_fsid != VNOVAL) 774 sb->st_dev = vap->va_fsid; 775 else 776 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 777 sb->st_ino = vap->va_fileid; 778 mode = vap->va_mode; 779 switch (vap->va_type) { 780 case VREG: 781 mode |= S_IFREG; 782 break; 783 case VDIR: 784 mode |= S_IFDIR; 785 break; 786 case VBLK: 787 mode |= S_IFBLK; 788 break; 789 case VCHR: 790 mode |= S_IFCHR; 791 break; 792 case VLNK: 793 mode |= S_IFLNK; 794 /* This is a cosmetic change, symlinks do not have a mode. */ 795 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 796 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 797 else 798 sb->st_mode |= ACCESSPERMS; /* 0777 */ 799 break; 800 case VSOCK: 801 mode |= S_IFSOCK; 802 break; 803 case VFIFO: 804 mode |= S_IFIFO; 805 break; 806 default: 807 return (EBADF); 808 }; 809 sb->st_mode = mode; 810 sb->st_nlink = vap->va_nlink; 811 sb->st_uid = vap->va_uid; 812 sb->st_gid = vap->va_gid; 813 sb->st_rdev = vap->va_rdev; 814 sb->st_size = vap->va_size; 815 sb->st_atimespec = vap->va_atime; 816 sb->st_mtimespec = vap->va_mtime; 817 sb->st_ctimespec = vap->va_ctime; 818 819 /* 820 * A VCHR and VBLK device may track the last access and last modified 821 * time independantly of the filesystem. This is particularly true 822 * because device read and write calls may bypass the filesystem. 823 */ 824 if (vp->v_type == VCHR || vp->v_type == VBLK) { 825 if ((dev = vp->v_rdev) != NULL) { 826 if (dev->si_lastread) { 827 sb->st_atimespec.tv_sec = dev->si_lastread; 828 sb->st_atimespec.tv_nsec = 0; 829 } 830 if (dev->si_lastwrite) { 831 sb->st_atimespec.tv_sec = dev->si_lastwrite; 832 sb->st_atimespec.tv_nsec = 0; 833 } 834 } 835 } 836 837 /* 838 * According to www.opengroup.org, the meaning of st_blksize is 839 * "a filesystem-specific preferred I/O block size for this 840 * object. In some filesystem types, this may vary from file 841 * to file" 842 * Default to PAGE_SIZE after much discussion. 843 */ 844 845 if (vap->va_type == VREG) { 846 sb->st_blksize = vap->va_blocksize; 847 } else if (vn_isdisk(vp, NULL)) { 848 /* 849 * XXX this is broken. If the device is not yet open (aka 850 * stat() call, aka v_rdev == NULL), how are we supposed 851 * to get a valid block size out of it? 852 */ 853 dev_t dev; 854 855 if ((dev = vp->v_rdev) == NULL) 856 dev = udev2dev(vp->v_udev, vp->v_type == VBLK); 857 sb->st_blksize = dev->si_bsize_best; 858 if (sb->st_blksize < dev->si_bsize_phys) 859 sb->st_blksize = dev->si_bsize_phys; 860 if (sb->st_blksize < BLKDEV_IOSIZE) 861 sb->st_blksize = BLKDEV_IOSIZE; 862 } else { 863 sb->st_blksize = PAGE_SIZE; 864 } 865 866 sb->st_flags = vap->va_flags; 867 if (suser_cred(cred, 0)) 868 sb->st_gen = 0; 869 else 870 sb->st_gen = vap->va_gen; 871 872 #if (S_BLKSIZE == 512) 873 /* Optimize this case */ 874 sb->st_blocks = vap->va_bytes >> 9; 875 #else 876 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 877 #endif 878 sb->st_fsmid = vap->va_fsmid; 879 return (0); 880 } 881 882 /* 883 * MPALMOSTSAFE - acquires mplock 884 */ 885 static int 886 vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred) 887 { 888 struct vnode *vp = ((struct vnode *)fp->f_data); 889 struct vnode *ovp; 890 struct vattr vattr; 891 int error; 892 893 get_mplock(); 894 895 switch (vp->v_type) { 896 case VREG: 897 case VDIR: 898 if (com == FIONREAD) { 899 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 900 break; 901 *(int *)data = vattr.va_size - fp->f_offset; 902 error = 0; 903 break; 904 } 905 if (com == FIOASYNC) { /* XXX */ 906 error = 0; /* XXX */ 907 break; 908 } 909 /* fall into ... */ 910 default: 911 #if 0 912 return (ENOTTY); 913 #endif 914 case VFIFO: 915 case VCHR: 916 case VBLK: 917 if (com == FIODTYPE) { 918 if (vp->v_type != VCHR && vp->v_type != VBLK) { 919 error = ENOTTY; 920 break; 921 } 922 *(int *)data = dev_dflags(vp->v_rdev) & D_TYPEMASK; 923 error = 0; 924 break; 925 } 926 error = VOP_IOCTL(vp, com, data, fp->f_flag, ucred); 927 if (error == 0 && com == TIOCSCTTY) { 928 struct proc *p = curthread->td_proc; 929 struct session *sess; 930 931 if (p == NULL) { 932 error = ENOTTY; 933 break; 934 } 935 936 sess = p->p_session; 937 /* Do nothing if reassigning same control tty */ 938 if (sess->s_ttyvp == vp) { 939 error = 0; 940 break; 941 } 942 943 /* Get rid of reference to old control tty */ 944 ovp = sess->s_ttyvp; 945 vref(vp); 946 sess->s_ttyvp = vp; 947 if (ovp) 948 vrele(ovp); 949 } 950 break; 951 } 952 rel_mplock(); 953 return (error); 954 } 955 956 /* 957 * MPALMOSTSAFE - acquires mplock 958 */ 959 static int 960 vn_poll(struct file *fp, int events, struct ucred *cred) 961 { 962 int error; 963 964 get_mplock(); 965 error = VOP_POLL(((struct vnode *)fp->f_data), events, cred); 966 rel_mplock(); 967 return (error); 968 } 969 970 /* 971 * Check that the vnode is still valid, and if so 972 * acquire requested lock. 973 */ 974 int 975 #ifndef DEBUG_LOCKS 976 vn_lock(struct vnode *vp, int flags) 977 #else 978 debug_vn_lock(struct vnode *vp, int flags, const char *filename, int line) 979 #endif 980 { 981 int error; 982 983 do { 984 #ifdef DEBUG_LOCKS 985 vp->filename = filename; 986 vp->line = line; 987 #endif 988 error = VOP_LOCK(vp, flags | LK_NOPAUSE); 989 if (error == 0) 990 break; 991 } while (flags & LK_RETRY); 992 993 /* 994 * Because we (had better!) have a ref on the vnode, once it 995 * goes to VRECLAIMED state it will not be recycled until all 996 * refs go away. So we can just check the flag. 997 */ 998 if (error == 0 && (vp->v_flag & VRECLAIMED)) { 999 VOP_UNLOCK(vp, 0); 1000 error = ENOENT; 1001 } 1002 return (error); 1003 } 1004 1005 /* 1006 * MPALMOSTSAFE - acquires mplock 1007 */ 1008 static int 1009 vn_closefile(struct file *fp) 1010 { 1011 int error; 1012 1013 get_mplock(); 1014 fp->f_ops = &badfileops; 1015 error = vn_close(((struct vnode *)fp->f_data), fp->f_flag); 1016 rel_mplock(); 1017 return(error); 1018 } 1019 1020 /* 1021 * MPALMOSTSAFE - acquires mplock 1022 */ 1023 static int 1024 vn_kqfilter(struct file *fp, struct knote *kn) 1025 { 1026 int error; 1027 1028 get_mplock(); 1029 error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn); 1030 rel_mplock(); 1031 return (error); 1032 } 1033