1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * (c) UNIX System Laboratories, Inc. 13 * All or some portions of this file are derived from material licensed 14 * to the University of California by American Telephone and Telegraph 15 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 16 * the permission of UNIX System Laboratories, Inc. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 43 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 44 * $FreeBSD$ 45 */ 46 47 #include "opt_suiddir.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/fcntl.h> 53 #include <sys/filio.h> 54 #include <sys/limits.h> 55 #include <sys/stat.h> 56 #include <sys/bio.h> 57 #include <sys/buf2.h> 58 #include <sys/endian.h> 59 #include <sys/caps.h> 60 #include <sys/mount.h> 61 #include <sys/unistd.h> 62 #include <sys/time.h> 63 #include <sys/vnode.h> 64 #include <sys/namei.h> 65 #include <sys/lockf.h> 66 #include <sys/event.h> 67 #include <sys/conf.h> 68 #include <sys/file.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vfsops.h> 71 #include <sys/malloc.h> 72 #include <sys/uio.h> 73 #include <sys/jail.h> 74 75 #include <vm/vm.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_extern.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page2.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vnode_pager.h> 82 83 #include <vfs/ufs/dir.h> 84 #include <vfs/fifofs/fifo.h> 85 86 #include <vfs/ext2fs/fs.h> 87 #include <vfs/ext2fs/inode.h> 88 #include <vfs/ext2fs/ext2fs.h> 89 #include <vfs/ext2fs/ext2_extern.h> 90 #include <vfs/ext2fs/ext2_dinode.h> 91 #include <vfs/ext2fs/ext2_dir.h> 92 #include <vfs/ext2fs/ext2_mount.h> 93 #include <vfs/ext2fs/ext2_extents.h> 94 95 SDT_PROVIDER_DECLARE(ext2fs); 96 /* 97 * ext2fs trace probe: 98 * arg0: verbosity. Higher numbers give more verbose messages 99 * arg1: Textual message 100 */ 101 SDT_PROBE_DEFINE2(ext2fs, , vnops, trace, "int", "char*"); 102 103 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 104 105 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); 106 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, 107 struct thread *); 108 109 /* 110 * A virgin directory (no blushing please). 111 * Note that the type and namlen fields are reversed relative to ext2. 112 * Also, we don't use `struct odirtemplate', since it would just cause 113 * endianness problems. 114 */ 115 static struct dirtemplate mastertemplate = { 116 0, htole16(12), 1, EXT2_FT_DIR, ".", 117 0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_DIR, ".." 118 }; 119 static struct dirtemplate omastertemplate = { 120 0, htole16(12), 1, EXT2_FT_UNKNOWN, ".", 121 0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_UNKNOWN, ".." 122 }; 123 124 void 125 ext2_itimes(struct vnode *vp) 126 { 127 struct inode *ip; 128 struct timespec ts; 129 130 ip = VTOI(vp); 131 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 132 return; 133 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 134 ip->i_flag |= IN_LAZYMOD; 135 else 136 ip->i_flag |= IN_MODIFIED; 137 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 138 vfs_timestamp(&ts); 139 if (ip->i_flag & IN_ACCESS) { 140 ip->i_atime = ts.tv_sec; 141 ip->i_atimensec = ts.tv_nsec; 142 } 143 if (ip->i_flag & IN_UPDATE) { 144 ip->i_mtime = ts.tv_sec; 145 ip->i_mtimensec = ts.tv_nsec; 146 ip->i_modrev++; 147 } 148 if (ip->i_flag & IN_CHANGE) { 149 ip->i_ctime = ts.tv_sec; 150 ip->i_ctimensec = ts.tv_nsec; 151 } 152 } 153 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 154 } 155 156 /* 157 * Create a regular file 158 */ 159 static int 160 ext2_create(struct vop_old_create_args *ap) 161 { 162 int error; 163 164 error = 165 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 166 ap->a_dvp, ap->a_vpp, ap->a_cnp); 167 if (error != 0) 168 return (error); 169 return (0); 170 } 171 172 static int 173 ext2_open(struct vop_open_args *ap) 174 { 175 176 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) 177 return (EOPNOTSUPP); 178 179 /* 180 * Files marked append-only must be opened for appending. 181 */ 182 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 183 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 184 return (EPERM); 185 186 return (vop_stdopen(ap)); 187 } 188 189 /* 190 * Close called. 191 * 192 * Update the times on the inode. 193 */ 194 static int 195 ext2_close(struct vop_close_args *ap) 196 { 197 struct vnode *vp = ap->a_vp; 198 199 if (VREFCNT(vp) > 1) 200 ext2_itimes(vp); 201 return (vop_stdclose(ap)); 202 } 203 204 static int 205 ext2_access(struct vop_access_args *ap) 206 { 207 struct vnode *vp = ap->a_vp; 208 struct inode *ip = VTOI(vp); 209 int error; 210 211 if (vp->v_type == VBLK || vp->v_type == VCHR) 212 return (EOPNOTSUPP); 213 214 error = vop_helper_access(ap, ip->i_uid, ip->i_gid, ip->i_mode, 215 ip->i_flags); 216 return (error); 217 } 218 219 static int 220 ext2_getattr(struct vop_getattr_args *ap) 221 { 222 struct vnode *vp = ap->a_vp; 223 struct inode *ip = VTOI(vp); 224 struct vattr *vap = ap->a_vap; 225 226 ext2_itimes(vp); 227 /* 228 * Copy from inode table 229 */ 230 vap->va_fsid = devid_from_dev(ip->i_dev); 231 vap->va_fileid = ip->i_number; 232 vap->va_mode = ip->i_mode & ~IFMT; 233 vap->va_nlink = ip->i_nlink; 234 vap->va_uid = ip->i_uid; 235 vap->va_gid = ip->i_gid; 236 vap->va_size = ip->i_size; 237 vap->va_atime.tv_sec = ip->i_atime; 238 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0; 239 vap->va_mtime.tv_sec = ip->i_mtime; 240 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0; 241 vap->va_ctime.tv_sec = ip->i_ctime; 242 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0; 243 vap->va_flags = ip->i_flags; 244 vap->va_gen = ip->i_gen; 245 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 246 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); 247 vap->va_type = IFTOVT(ip->i_mode); 248 vap->va_filerev = ip->i_modrev; 249 return (0); 250 } 251 252 /* 253 * Set attribute vnode op. called from several syscalls 254 */ 255 static int 256 ext2_setattr(struct vop_setattr_args *ap) 257 { 258 struct vattr *vap = ap->a_vap; 259 struct vnode *vp = ap->a_vp; 260 struct inode *ip = VTOI(vp); 261 struct ucred *cred = ap->a_cred; 262 struct thread *td = curthread; 263 int error; 264 265 /* 266 * Check for unsettable attributes. 267 */ 268 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 269 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 270 (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) || 271 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 272 return (EINVAL); 273 } 274 if (vap->va_flags != VNOVAL) { 275 /* Disallow flags not supported by ext2fs. */ 276 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) 277 return (EOPNOTSUPP); 278 279 if (vp->v_mount->mnt_flag & MNT_RDONLY) 280 return (EROFS); 281 if (cred->cr_uid != ip->i_uid && 282 (error = caps_priv_check(cred, SYSCAP_NOVFS_SETATTR))) 283 { 284 return (error); 285 } 286 287 /* 288 * Note that a root chflags becomes a user chflags when 289 * we are jailed, unless the jail vfs_chflags sysctl 290 * is set. 291 */ 292 if (cred->cr_uid == 0 && 293 (!jailed(cred) || PRISON_CAP_ISSET(cred->cr_prison->pr_caps, 294 PRISON_CAP_VFS_CHFLAGS))) { 295 if ((ip->i_flags 296 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) && 297 securelevel > 0) 298 return (EPERM); 299 ip->i_flags = vap->va_flags; 300 } else { 301 if (ip->i_flags 302 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 303 (vap->va_flags & UF_SETTABLE) != vap->va_flags) 304 return (EPERM); 305 ip->i_flags &= SF_SETTABLE; 306 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 307 } 308 ip->i_flag |= IN_CHANGE; 309 if (vap->va_flags & (IMMUTABLE | APPEND)) 310 return (0); 311 } 312 if (ip->i_flags & (IMMUTABLE | APPEND)) 313 return (EPERM); 314 /* 315 * Go through the fields and update iff not VNOVAL. 316 */ 317 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 318 if (vp->v_mount->mnt_flag & MNT_RDONLY) 319 return (EROFS); 320 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, 321 td)) != 0) 322 return (error); 323 } 324 if (vap->va_size != VNOVAL) { 325 /* 326 * Disallow write attempts on read-only file systems; 327 * unless the file is a socket, fifo, or a block or 328 * character device resident on the file system. 329 */ 330 switch (vp->v_type) { 331 case VDIR: 332 return (EISDIR); 333 case VLNK: 334 case VREG: 335 if (vp->v_mount->mnt_flag & MNT_RDONLY) 336 return (EROFS); 337 break; 338 default: 339 break; 340 } 341 if ((error = ext2_truncate(vp, vap->va_size, 0, cred)) != 0) 342 return (error); 343 } 344 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 345 if (vp->v_mount->mnt_flag & MNT_RDONLY) 346 return (EROFS); 347 if (cred->cr_uid != ip->i_uid && 348 (error = caps_priv_check(cred, SYSCAP_NOVFS_SETATTR)) && 349 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 350 (error = VOP_EACCESS(vp, VWRITE, cred)))) 351 { 352 return (error); 353 } 354 ip->i_flag |= IN_CHANGE | IN_MODIFIED; 355 if (vap->va_atime.tv_sec != VNOVAL) { 356 ip->i_flag &= ~IN_ACCESS; 357 ip->i_atime = vap->va_atime.tv_sec; 358 ip->i_atimensec = vap->va_atime.tv_nsec; 359 } 360 if (vap->va_mtime.tv_sec != VNOVAL) { 361 ip->i_flag &= ~IN_UPDATE; 362 ip->i_mtime = vap->va_mtime.tv_sec; 363 ip->i_mtimensec = vap->va_mtime.tv_nsec; 364 } 365 error = ext2_update(vp, 0); 366 if (error) 367 return (error); 368 } 369 error = 0; 370 if (vap->va_mode != (mode_t)VNOVAL) { 371 if (vp->v_mount->mnt_flag & MNT_RDONLY) 372 return (EROFS); 373 error = ext2_chmod(vp, (int)vap->va_mode, cred, td); 374 } 375 return (error); 376 } 377 378 /* 379 * Change the mode on a file. 380 * Inode must be locked before calling. 381 */ 382 static int 383 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) 384 { 385 struct inode *ip = VTOI(vp); 386 int error; 387 388 if (cred->cr_uid != ip->i_uid) { 389 error = caps_priv_check(cred, SYSCAP_NOVFS_CHMOD); 390 if (error) 391 return (error); 392 } 393 if (cred->cr_uid) { 394 if (vp->v_type != VDIR && (mode & S_ISTXT)) 395 return (EFTYPE); 396 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) 397 return (EPERM); 398 } 399 ip->i_mode &= ~ALLPERMS; 400 ip->i_mode |= (mode & ALLPERMS); 401 ip->i_flag |= IN_CHANGE; 402 return (0); 403 } 404 405 /* 406 * Perform chown operation on inode ip; 407 * inode must be locked prior to call. 408 */ 409 static int 410 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, 411 struct thread *td) 412 { 413 struct inode *ip = VTOI(vp); 414 uid_t ouid; 415 gid_t ogid; 416 int error = 0; 417 418 if (uid == (uid_t)VNOVAL) 419 uid = ip->i_uid; 420 if (gid == (gid_t)VNOVAL) 421 gid = ip->i_gid; 422 /* 423 * If we don't own the file, are trying to change the owner 424 * of the file, or are not a member of the target group, 425 * the caller must be superuser or the call fails. 426 */ 427 if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid || 428 (gid != ip->i_gid && !(cred->cr_gid == gid || 429 groupmember(gid, cred)))) && 430 (error = caps_priv_check(cred, SYSCAP_NOVFS_CHOWN))) 431 { 432 return (error); 433 } 434 435 ogid = ip->i_gid; 436 ouid = ip->i_uid; 437 ip->i_gid = gid; 438 ip->i_uid = uid; 439 ip->i_flag |= IN_CHANGE; 440 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 441 if (caps_priv_check(cred, SYSCAP_NOVFS_RETAINSUGID) != 0) 442 ip->i_mode &= ~(ISUID | ISGID); 443 } 444 return (0); 445 } 446 447 struct ext2_fsync_bp_info { 448 struct vnode *vp; 449 int waitfor; 450 }; 451 452 static int 453 ext2_fsync_bp(struct buf *bp, void *data) 454 { 455 struct ext2_fsync_bp_info *info = data; 456 457 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) 458 return (0); 459 if ((bp->b_flags & B_DELWRI) == 0) 460 panic("ext2_fsync: not dirty"); 461 bremfree(bp); 462 463 /* 464 * Wait for I/O associated with indirect blocks to complete, 465 * since there is no way to quickly wait for them below. 466 */ 467 if (bp->b_vp == info->vp || (info->waitfor & MNT_NOWAIT)) 468 bawrite(bp); 469 else 470 bwrite(bp); 471 return (1); 472 } 473 474 /* 475 * Synch an open file. 476 */ 477 /* ARGSUSED */ 478 static int 479 ext2_fsync(struct vop_fsync_args *ap) 480 { 481 struct ext2_fsync_bp_info info; 482 struct vnode *vp = ap->a_vp; 483 int count; 484 485 /* 486 * XXX why is all this fs specific? 487 */ 488 489 /* 490 * Flush all dirty buffers associated with a vnode. 491 */ 492 lwkt_gettoken(&vp->v_token); 493 info.vp = vp; 494 loop: 495 info.waitfor = ap->a_waitfor; 496 count = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, ext2_fsync_bp, 497 &info); 498 if (count) 499 goto loop; 500 501 if (ap->a_waitfor == MNT_WAIT) { 502 bio_track_wait(&vp->v_track_write, 0, 0); 503 #ifdef DIAGNOSTIC 504 if (!RB_EMPTY(&vp->v_rbdirty_tree)) { 505 vprint("ext2_fsync: dirty", vp); 506 goto loop; 507 } 508 #endif 509 } 510 lwkt_reltoken(&vp->v_token); 511 512 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); 513 } 514 515 /* 516 * Mknod vnode call 517 */ 518 /* ARGSUSED */ 519 static int 520 ext2_mknod(struct vop_old_mknod_args *ap) 521 { 522 struct vattr *vap = ap->a_vap; 523 struct vnode **vpp = ap->a_vpp; 524 struct inode *ip; 525 ino_t ino; 526 int error; 527 528 if (vap->va_rmajor != VNOVAL && 529 makeudev(vap->va_rmajor, vap->va_rminor) == NOUDEV) { 530 return (EINVAL); 531 } 532 533 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 534 ap->a_dvp, vpp, ap->a_cnp); 535 if (error) 536 return (error); 537 ip = VTOI(*vpp); 538 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 539 if (vap->va_rmajor != VNOVAL) { 540 /* 541 * Want to be able to use this to make badblock 542 * inodes, so don't truncate the dev number. 543 */ 544 ip->i_rdev = makeudev(vap->va_rmajor, vap->va_rminor); 545 } 546 /* 547 * Remove inode, then reload it through VFS_VGET so it is 548 * checked to see if it is an alias of an existing entry in 549 * the inode cache. XXX I don't believe this is necessary now. 550 */ 551 (*vpp)->v_type = VNON; 552 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 553 vgone_vxlocked(*vpp); 554 vput(*vpp); 555 error = VFS_VGET(ap->a_dvp->v_mount, NULL, ino, vpp); 556 if (error) { 557 *vpp = NULL; 558 return (error); 559 } 560 return (0); 561 } 562 563 static int 564 ext2_remove(struct vop_old_remove_args *ap) 565 { 566 struct inode *ip; 567 struct vnode *vp = ap->a_vp; 568 struct vnode *dvp = ap->a_dvp; 569 int error; 570 571 ip = VTOI(vp); 572 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 573 (VTOI(dvp)->i_flags & APPEND)) { 574 error = EPERM; 575 goto out; 576 } 577 error = ext2_dirremove(dvp, ap->a_cnp); 578 if (error == 0) { 579 ip->i_nlink--; 580 ip->i_flag |= IN_CHANGE; 581 } 582 out: 583 return (error); 584 } 585 586 /* 587 * link vnode call 588 */ 589 static int 590 ext2_link(struct vop_old_link_args *ap) 591 { 592 struct vnode *vp = ap->a_vp; 593 struct vnode *tdvp = ap->a_tdvp; 594 struct componentname *cnp = ap->a_cnp; 595 struct inode *ip; 596 int error; 597 598 if (tdvp->v_mount != vp->v_mount) { 599 error = EXDEV; 600 goto out2; 601 } 602 if (tdvp != vp) { 603 error = vn_lock(vp, LK_EXCLUSIVE | LK_FAILRECLAIM); 604 if (error) 605 goto out2; 606 } 607 ip = VTOI(vp); 608 if ((nlink_t)ip->i_nlink >= EXT4_LINK_MAX) { 609 error = EMLINK; 610 goto out; 611 } 612 if (ip->i_flags & (IMMUTABLE | APPEND)) { 613 error = EPERM; 614 goto out; 615 } 616 ip->i_nlink++; 617 ip->i_flag |= IN_CHANGE; 618 error = ext2_update(vp, !DOINGASYNC(vp)); 619 if (!error) 620 error = ext2_direnter(ip, tdvp, cnp); 621 if (error) { 622 ip->i_nlink--; 623 ip->i_flag |= IN_CHANGE; 624 } 625 out: 626 if (tdvp != vp) 627 vn_unlock(vp); 628 out2: 629 return (error); 630 } 631 632 static int 633 ext2_inc_nlink(struct inode *ip) 634 { 635 636 ip->i_nlink++; 637 638 if (S_ISDIR(ip->i_mode) && 639 EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK) && 640 ip->i_nlink > 1) { 641 if (ip->i_nlink >= EXT4_LINK_MAX || ip->i_nlink == 2) 642 ip->i_nlink = 1; 643 } else if (ip->i_nlink > EXT4_LINK_MAX) { 644 ip->i_nlink--; 645 return (EMLINK); 646 } 647 648 return (0); 649 } 650 651 static void 652 ext2_dec_nlink(struct inode *ip) 653 { 654 655 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2) 656 ip->i_nlink--; 657 } 658 659 /* 660 * Rename system call. 661 * rename("foo", "bar"); 662 * is essentially 663 * unlink("bar"); 664 * link("foo", "bar"); 665 * unlink("foo"); 666 * but ``atomically''. Can't do full commit without saving state in the 667 * inode on disk which isn't feasible at this time. Best we can do is 668 * always guarantee the target exists. 669 * 670 * Basic algorithm is: 671 * 672 * 1) Bump link count on source while we're linking it to the 673 * target. This also ensure the inode won't be deleted out 674 * from underneath us while we work (it may be truncated by 675 * a concurrent `trunc' or `open' for creation). 676 * 2) Link source to destination. If destination already exists, 677 * delete it first. 678 * 3) Unlink source reference to inode if still around. If a 679 * directory was moved and the parent of the destination 680 * is different from the source, patch the ".." entry in the 681 * directory. 682 */ 683 static int 684 ext2_rename(struct vop_old_rename_args *ap) 685 { 686 struct vnode *tvp = ap->a_tvp; 687 struct vnode *tdvp = ap->a_tdvp; 688 struct vnode *fvp = ap->a_fvp; 689 struct vnode *fdvp = ap->a_fdvp; 690 struct componentname *tcnp = ap->a_tcnp; 691 struct componentname *fcnp = ap->a_fcnp; 692 struct inode *ip, *xp, *dp; 693 struct dirtemplate *dirbuf; 694 int doingdirectory = 0, oldparent = 0, newparent = 0; 695 int error = 0; 696 u_char namlen; 697 698 /* 699 * Check for cross-device rename. 700 */ 701 if ((fvp->v_mount != tdvp->v_mount) || 702 (tvp && (fvp->v_mount != tvp->v_mount))) { 703 error = EXDEV; 704 abortit: 705 if (tdvp == tvp) 706 vrele(tdvp); 707 else 708 vput(tdvp); 709 if (tvp) 710 vput(tvp); 711 vrele(fdvp); 712 vrele(fvp); 713 return (error); 714 } 715 716 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 717 (VTOI(tdvp)->i_flags & APPEND))) { 718 error = EPERM; 719 goto abortit; 720 } 721 722 /* 723 * Renaming a file to itself has no effect. The upper layers should 724 * not call us in that case. Temporarily just warn if they do. 725 */ 726 if (fvp == tvp) { 727 SDT_PROBE2(ext2fs, , vnops, trace, 1, 728 "rename: fvp == tvp (can't happen)"); 729 error = 0; 730 goto abortit; 731 } 732 733 if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_FAILRECLAIM)) != 0) 734 goto abortit; 735 dp = VTOI(fdvp); 736 ip = VTOI(fvp); 737 if (ip->i_nlink >= EXT4_LINK_MAX && 738 !EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) { 739 vn_unlock(fvp); 740 error = EMLINK; 741 goto abortit; 742 } 743 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 744 || (dp->i_flags & APPEND)) { 745 vn_unlock(fvp); 746 error = EPERM; 747 goto abortit; 748 } 749 if ((ip->i_mode & IFMT) == IFDIR) { 750 /* 751 * Avoid ".", "..", and aliases of "." for obvious reasons. 752 */ 753 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 754 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & CNP_ISDOTDOT || 755 (ip->i_flag & IN_RENAME)) { 756 vn_unlock(fvp); 757 error = EINVAL; 758 goto abortit; 759 } 760 ip->i_flag |= IN_RENAME; 761 oldparent = dp->i_number; 762 doingdirectory++; 763 } 764 //vrele(fdvp); XXX 765 766 /* 767 * When the target exists, both the directory 768 * and target vnodes are returned locked. 769 */ 770 dp = VTOI(tdvp); 771 xp = NULL; 772 if (tvp) 773 xp = VTOI(tvp); 774 775 /* 776 * 1) Bump link count while we're moving stuff 777 * around. If we crash somewhere before 778 * completing our work, the link count 779 * may be wrong, but correctable. 780 */ 781 ext2_inc_nlink(ip); 782 ip->i_flag |= IN_CHANGE; 783 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) { 784 vn_unlock(fvp); 785 goto bad; 786 } 787 788 /* 789 * If ".." must be changed (ie the directory gets a new 790 * parent) then the source directory must not be in the 791 * directory hierarchy above the target, as this would 792 * orphan everything below the source directory. Also 793 * the user must have write permission in the source so 794 * as to be able to change "..". We must repeat the call 795 * to namei, as the parent directory is unlocked by the 796 * call to checkpath(). 797 */ 798 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); 799 vn_unlock(fvp); 800 801 /* 802 * tvp (if not NULL) and tdvp are locked. fvp and fdvp are not. 803 * dp and xp are set according to tdvp and tvp. 804 */ 805 if (oldparent != dp->i_number) 806 newparent = dp->i_number; 807 if (doingdirectory && newparent) { 808 if (error) /* write access check above */ 809 goto bad; 810 811 /* 812 * Prepare for relookup, get rid of xp 813 */ 814 if (xp != NULL) { 815 vput(tvp); 816 xp = NULL; 817 } 818 819 /* 820 * checkpath vput()'s tdvp (VTOI(dp)) on return no matter what, 821 * get an extra ref so we wind up with just an unlocked, ref'd 822 * tdvp. The 'out' target skips xp and tdvp cleanups. Our 823 * tdvp is now unlocked so we have to clean it up ourselves. 824 */ 825 vref(tdvp); 826 error = ext2_checkpath(ip, dp, tcnp->cn_cred); 827 tcnp->cn_flags |= CNP_PDIRUNLOCK; 828 if (error) { 829 vrele(tdvp); 830 goto out; 831 } 832 /* 833 * relookup no longer messes with the ref count. An unlocked 834 * tdvp must be passed and if no error occurs a locked tdvp 835 * will be returned. We have to use the out target again. 836 */ 837 error = relookup(tdvp, &tvp, tcnp); 838 if (error) { 839 if (tcnp->cn_flags & CNP_PDIRUNLOCK) 840 vrele(tdvp); 841 else 842 vput(tdvp); 843 goto out; 844 } 845 846 /* 847 * tdvp is locked at this point. in the RENAME case tvp may 848 * be NULL without an error, assign xp accordingly. The 849 * 'bad' target can be used again after this. 850 */ 851 dp = VTOI(tdvp); 852 if (tvp) 853 xp = VTOI(tvp); 854 } 855 856 /* 857 * 2) If target doesn't exist, link the target 858 * to the source and unlink the source. 859 * Otherwise, rewrite the target directory 860 * entry to reference the source inode and 861 * expunge the original entry's existence. 862 */ 863 if (xp == NULL) { 864 if (dp->i_devvp != ip->i_devvp) 865 panic("ext2_rename: EXDEV"); 866 /* 867 * Account for ".." in new directory. 868 * When source and destination have the same 869 * parent we don't fool with the link count. 870 */ 871 if (doingdirectory && newparent) { 872 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 873 error = EMLINK; 874 goto bad; 875 } 876 error = ext2_inc_nlink(dp); 877 if (error) 878 goto bad; 879 880 dp->i_flag |= IN_CHANGE; 881 error = ext2_update(tdvp, !DOINGASYNC(tdvp)); 882 if (error) 883 goto bad; 884 } 885 error = ext2_direnter(ip, tdvp, tcnp); 886 if (error) { 887 if (doingdirectory && newparent) { 888 ext2_dec_nlink(dp); 889 dp->i_flag |= IN_CHANGE; 890 (void)ext2_update(tdvp, 1); 891 } 892 goto bad; 893 } 894 vput(tdvp); 895 } else { 896 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) 897 panic("ext2_rename: EXDEV"); 898 /* 899 * Short circuit rename(foo, foo). 900 */ 901 if (xp->i_number == ip->i_number) 902 panic("ext2_rename: same file"); 903 /* 904 * If the parent directory is "sticky", then the user must 905 * own the parent directory, or the destination of the rename, 906 * otherwise the destination may not be changed (except by 907 * root). This implements append-only directories. 908 */ 909 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 910 tcnp->cn_cred->cr_uid != dp->i_uid && 911 xp->i_uid != tcnp->cn_cred->cr_uid) { 912 error = EPERM; 913 goto bad; 914 } 915 /* 916 * Target must be empty if a directory and have no links 917 * to it. Also, ensure source and target are compatible 918 * (both directories, or both not directories). 919 */ 920 if ((xp->i_mode & IFMT) == IFDIR) { 921 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 922 error = ENOTEMPTY; 923 goto bad; 924 } 925 if (!doingdirectory) { 926 error = ENOTDIR; 927 goto bad; 928 } 929 } else if (doingdirectory) { 930 error = EISDIR; 931 goto bad; 932 } 933 error = ext2_dirrewrite(dp, ip, tcnp); 934 if (error) 935 goto bad; 936 /* 937 * If the target directory is in the same 938 * directory as the source directory, 939 * decrement the link count on the parent 940 * of the target directory. 941 */ 942 if (doingdirectory && !newparent) { 943 ext2_dec_nlink(dp); 944 dp->i_flag |= IN_CHANGE; 945 } 946 vput(tdvp); 947 /* 948 * Adjust the link count of the target to 949 * reflect the dirrewrite above. If this is 950 * a directory it is empty and there are 951 * no links to it, so we can squash the inode and 952 * any space associated with it. We disallowed 953 * renaming over top of a directory with links to 954 * it above, as the remaining link would point to 955 * a directory without "." or ".." entries. 956 */ 957 ext2_dec_nlink(xp); 958 if (doingdirectory) { 959 if (xp->i_nlink > 2) 960 panic("ext2_rename: linked directory"); 961 error = ext2_truncate(tvp, (off_t)0, IO_SYNC, 962 tcnp->cn_cred); 963 xp->i_nlink = 0; 964 } 965 xp->i_flag |= IN_CHANGE; 966 vput(tvp); 967 xp = NULL; 968 } 969 970 /* 971 * 3) Unlink the source. 972 */ 973 fcnp->cn_flags &= ~CNP_MODMASK; 974 fcnp->cn_flags |= CNP_LOCKPARENT; 975 //vref(fdvp); XXX 976 error = relookup(fdvp, &fvp, fcnp); 977 if (error) { 978 /* 979 * From name has disappeared. 980 */ 981 if (doingdirectory) 982 panic("ext2_rename: lost dir entry"); 983 /* ip->i_flag only sets IN_RENAME if doingdirectory */ 984 vrele(ap->a_fvp); 985 if (fcnp->cn_flags & CNP_PDIRUNLOCK) 986 vrele(fdvp); 987 else 988 vput(fdvp); 989 return (0); 990 } 991 KKASSERT((fcnp->cn_flags & CNP_PDIRUNLOCK) == 0); 992 993 /* 994 * This case shouldn't occur 995 */ 996 if (fvp == NULL) { 997 /* 998 * From name has disappeared. 999 */ 1000 if (doingdirectory) 1001 panic("ext2_rename: lost dir entry"); 1002 /* ip->i_flag only sets IN_RENAME if doingdirectory */ 1003 vrele(ap->a_fvp); 1004 vput(fvp); 1005 vput(fdvp); 1006 return (0); 1007 } 1008 1009 /* 1010 * fvp and fdvp are both ref'd and locked. 1011 */ 1012 xp = VTOI(fvp); 1013 dp = VTOI(fdvp); 1014 1015 /* 1016 * Ensure that the directory entry still exists and has not 1017 * changed while the new name has been entered. If the source is 1018 * a file then the entry may have been unlinked or renamed. In 1019 * either case there is no further work to be done. If the source 1020 * is a directory then it cannot have been rmdir'ed; its link 1021 * count of three would cause a rmdir to fail with ENOTEMPTY. 1022 * The IN_RENAME flag ensures that it cannot be moved by another 1023 * rename. 1024 */ 1025 if (xp != ip) { 1026 /* 1027 * From name resolves to a different inode. IN_RENAME is 1028 * not sufficient protection against timing window races 1029 * so we can't panic here. 1030 */ 1031 } else { 1032 /* 1033 * If the source is a directory with a 1034 * new parent, the link count of the old 1035 * parent directory must be decremented 1036 * and ".." set to point to the new parent. 1037 */ 1038 if (doingdirectory && newparent) { 1039 ext2_dec_nlink(dp); 1040 dp->i_flag |= IN_CHANGE; 1041 dirbuf = malloc(dp->i_e2fs->e2fs_bsize, M_TEMP, M_WAITOK | M_ZERO); 1042 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, 1043 sizeof (struct dirtemplate), (off_t)0, 1044 UIO_SYSSPACE, IO_NODELOCKED, 1045 tcnp->cn_cred, NULL); 1046 if (error == 0) { 1047 /* Like ufs little-endian: */ 1048 namlen = dirbuf->dotdot_type; 1049 if (namlen != 2 || 1050 dirbuf->dotdot_name[0] != '.' || 1051 dirbuf->dotdot_name[1] != '.') { 1052 ext2_dirbad(xp, (doff_t)12, 1053 "rename: mangled dir"); 1054 } else { 1055 dirbuf->dotdot_ino = htole32(newparent); 1056 /* 1057 * dirblock 0 could be htree root, 1058 * try both csum update functions. 1059 */ 1060 ext2_dirent_csum_set(ip, 1061 (struct ext2fs_direct_2 *)dirbuf); 1062 ext2_dx_csum_set(ip, 1063 (struct ext2fs_direct_2 *)dirbuf); 1064 vn_rdwr(UIO_WRITE, fvp, 1065 (caddr_t)&dirbuf, 1066 sizeof (struct dirtemplate), 1067 (off_t)0, UIO_SYSSPACE, 1068 IO_NODELOCKED | IO_SYNC, 1069 tcnp->cn_cred, NULL); 1070 } 1071 } 1072 free(dirbuf, M_TEMP); 1073 } 1074 error = ext2_dirremove(fdvp, fcnp); 1075 if (!error) { 1076 ext2_dec_nlink(xp); 1077 xp->i_flag |= IN_CHANGE; 1078 } 1079 xp->i_flag &= ~IN_RENAME; 1080 } 1081 if (dp) 1082 vput(fdvp); 1083 if (xp) 1084 vput(fvp); 1085 vrele(ap->a_fvp); 1086 return (error); 1087 1088 bad: 1089 if (xp) 1090 vput(ITOV(xp)); 1091 vput(ITOV(dp)); 1092 out: 1093 if (doingdirectory) 1094 ip->i_flag &= ~IN_RENAME; 1095 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1096 ext2_dec_nlink(ip); 1097 ip->i_flag |= IN_CHANGE; 1098 ip->i_flag &= ~IN_RENAME; 1099 vput(fvp); 1100 } else 1101 vrele(fvp); 1102 return (error); 1103 } 1104 1105 /* 1106 * Mkdir system call 1107 */ 1108 static int 1109 ext2_mkdir(struct vop_old_mkdir_args *ap) 1110 { 1111 struct m_ext2fs *fs; 1112 struct vnode *dvp = ap->a_dvp; 1113 struct vattr *vap = ap->a_vap; 1114 struct componentname *cnp = ap->a_cnp; 1115 struct inode *ip, *dp; 1116 struct vnode *tvp; 1117 struct dirtemplate dirtemplate, *dtp; 1118 char *buf = NULL; 1119 int error, dmode; 1120 1121 dp = VTOI(dvp); 1122 if ((nlink_t)dp->i_nlink >= EXT4_LINK_MAX && 1123 !EXT2_HAS_RO_COMPAT_FEATURE(dp->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) { 1124 error = EMLINK; 1125 goto out; 1126 } 1127 dmode = vap->va_mode & 0777; 1128 dmode |= IFDIR; 1129 /* 1130 * Must simulate part of ext2_makeinode here to acquire the inode, 1131 * but not have it entered in the parent directory. The entry is 1132 * made later after writing "." and ".." entries. 1133 */ 1134 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); 1135 if (error) 1136 goto out; 1137 ip = VTOI(tvp); 1138 fs = ip->i_e2fs; 1139 ip->i_gid = dp->i_gid; 1140 #ifdef SUIDDIR 1141 { 1142 /* 1143 * if we are hacking owners here, (only do this where told to) 1144 * and we are not giving it TOO root, (would subvert quotas) 1145 * then go ahead and give it to the other user. 1146 * The new directory also inherits the SUID bit. 1147 * If user's UID and dir UID are the same, 1148 * 'give it away' so that the SUID is still forced on. 1149 */ 1150 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1151 (dp->i_mode & ISUID) && dp->i_uid) { 1152 dmode |= ISUID; 1153 ip->i_uid = dp->i_uid; 1154 } else { 1155 ip->i_uid = cnp->cn_cred->cr_uid; 1156 } 1157 } 1158 #else 1159 ip->i_uid = cnp->cn_cred->cr_uid; 1160 #endif 1161 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1162 ip->i_mode = dmode; 1163 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1164 ip->i_nlink = 2; 1165 if (cnp->cn_flags & CNP_ISWHITEOUT) 1166 ip->i_flags |= UF_OPAQUE; 1167 error = ext2_update(tvp, 1); 1168 1169 /* 1170 * The vnode must have a VM object in order to issue buffer cache 1171 * ops on it. 1172 */ 1173 vinitvmio(tvp, 0, PAGE_SIZE, -1); 1174 1175 /* 1176 * Bump link count in parent directory 1177 * to reflect work done below. Should 1178 * be done before reference is created 1179 * so reparation is possible if we crash. 1180 */ 1181 ext2_inc_nlink(dp); 1182 dp->i_flag |= IN_CHANGE; 1183 error = ext2_update(dvp, !DOINGASYNC(dvp)); 1184 if (error) 1185 goto bad; 1186 1187 /* Initialize directory with "." and ".." from static template. */ 1188 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, 1189 EXT2F_INCOMPAT_FTYPE)) 1190 dtp = &mastertemplate; 1191 else 1192 dtp = &omastertemplate; 1193 dirtemplate = *dtp; 1194 dirtemplate.dot_ino = htole32(ip->i_number); 1195 dirtemplate.dotdot_ino = htole32(dp->i_number); 1196 /* 1197 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's 1198 * just redefine it - for this function only 1199 */ 1200 #undef DIRBLKSIZ 1201 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize 1202 dirtemplate.dotdot_reclen = htole16(DIRBLKSIZ - 12); 1203 buf = malloc(DIRBLKSIZ, M_TEMP, M_WAITOK | M_ZERO); 1204 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1205 dirtemplate.dotdot_reclen = 1206 htole16(le16toh(dirtemplate.dotdot_reclen) - 1207 sizeof(struct ext2fs_direct_tail)); 1208 ext2_init_dirent_tail(EXT2_DIRENT_TAIL(buf, DIRBLKSIZ)); 1209 } 1210 memcpy(buf, &dirtemplate, sizeof(dirtemplate)); 1211 ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)buf); 1212 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)buf, 1213 DIRBLKSIZ, (off_t)0, UIO_SYSSPACE, 1214 IO_NODELOCKED | IO_SYNC, cnp->cn_cred, NULL); 1215 if (error) { 1216 ext2_dec_nlink(dp); 1217 dp->i_flag |= IN_CHANGE; 1218 goto bad; 1219 } 1220 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 1221 /* XXX should grow with balloc() */ 1222 panic("ext2_mkdir: blksize"); 1223 else { 1224 ip->i_size = DIRBLKSIZ; 1225 ip->i_flag |= IN_CHANGE; 1226 } 1227 1228 /* Directory set up, now install its entry in the parent directory. */ 1229 error = ext2_direnter(ip, dvp, cnp); 1230 if (error) { 1231 ext2_dec_nlink(dp); 1232 dp->i_flag |= IN_CHANGE; 1233 } 1234 bad: 1235 /* 1236 * No need to do an explicit VOP_TRUNCATE here, vrele will do this 1237 * for us because we set the link count to 0. 1238 */ 1239 if (error) { 1240 ip->i_nlink = 0; 1241 ip->i_flag |= IN_CHANGE; 1242 vput(tvp); 1243 } else 1244 *ap->a_vpp = tvp; 1245 out: 1246 free(buf, M_TEMP); 1247 return (error); 1248 #undef DIRBLKSIZ 1249 #define DIRBLKSIZ DEV_BSIZE 1250 } 1251 1252 /* 1253 * Rmdir system call. 1254 */ 1255 static int 1256 ext2_rmdir(struct vop_old_rmdir_args *ap) 1257 { 1258 struct vnode *vp = ap->a_vp; 1259 struct vnode *dvp = ap->a_dvp; 1260 struct componentname *cnp = ap->a_cnp; 1261 struct inode *ip, *dp; 1262 int error; 1263 1264 ip = VTOI(vp); 1265 dp = VTOI(dvp); 1266 1267 /* 1268 * Verify the directory is empty (and valid). 1269 * (Rmdir ".." won't be valid since 1270 * ".." will contain a reference to 1271 * the current directory and thus be 1272 * non-empty.) 1273 */ 1274 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1275 error = ENOTEMPTY; 1276 goto out; 1277 } 1278 if ((dp->i_flags & APPEND) 1279 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1280 error = EPERM; 1281 goto out; 1282 } 1283 /* 1284 * Delete reference to directory before purging 1285 * inode. If we crash in between, the directory 1286 * will be reattached to lost+found, 1287 */ 1288 error = ext2_dirremove(dvp, cnp); 1289 if (error) 1290 goto out; 1291 ext2_dec_nlink(dp); 1292 dp->i_flag |= IN_CHANGE; 1293 vn_unlock(dvp); 1294 /* 1295 * Truncate inode. The only stuff left 1296 * in the directory is "." and "..". 1297 */ 1298 ip->i_nlink = 0; 1299 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred); 1300 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1301 out: 1302 return (error); 1303 } 1304 1305 /* 1306 * symlink -- make a symbolic link 1307 */ 1308 static int 1309 ext2_symlink(struct vop_old_symlink_args *ap) 1310 { 1311 struct vnode *vp, **vpp = ap->a_vpp; 1312 struct inode *ip; 1313 int len, error; 1314 1315 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1316 vpp, ap->a_cnp); 1317 if (error) 1318 return (error); 1319 vp = *vpp; 1320 len = strlen(ap->a_target); 1321 if (len < vp->v_mount->mnt_maxsymlinklen) { 1322 ip = VTOI(vp); 1323 bcopy(ap->a_target, (char *)ip->i_shortlink, len); 1324 ip->i_size = len; 1325 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1326 } else { 1327 /* 1328 * Make sure we have a VM object in order to use 1329 * the buffer cache. 1330 */ 1331 if (vp->v_object == NULL) 1332 vinitvmio(vp, 0, PAGE_SIZE, -1); 1333 1334 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1335 UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL); 1336 } 1337 if (error) 1338 vput(vp); 1339 return (error); 1340 } 1341 1342 /* 1343 * Return target name of a symbolic link 1344 */ 1345 static int 1346 ext2_readlink(struct vop_readlink_args *ap) 1347 { 1348 struct vnode *vp = ap->a_vp; 1349 struct inode *ip = VTOI(vp); 1350 int isize; 1351 1352 isize = ip->i_size; 1353 if (isize < vp->v_mount->mnt_maxsymlinklen) { 1354 uiomove((char *)ip->i_shortlink, isize, ap->a_uio); 1355 return (0); 1356 } 1357 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1358 } 1359 1360 /* 1361 * Calculate the logical to physical mapping if not done already, 1362 * then call the device strategy routine. 1363 * 1364 * In order to be able to swap to a file, the ext2_bmaparray() operation may not 1365 * deadlock on memory. See ext2_bmap() for details. 1366 */ 1367 static int 1368 ext2_strategy(struct vop_strategy_args *ap) 1369 { 1370 struct bio *bio = ap->a_bio; 1371 struct bio *nbio; 1372 struct buf *bp = bio->bio_buf; 1373 struct vnode *vp = ap->a_vp; 1374 struct inode *ip; 1375 int error; 1376 1377 ip = VTOI(vp); 1378 if (vp->v_type == VBLK || vp->v_type == VCHR) 1379 panic("ext2_strategy: spec"); 1380 nbio = push_bio(bio); 1381 if (nbio->bio_offset == NOOFFSET) { 1382 error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset, NULL, 1383 NULL, bp->b_cmd); 1384 if (error) { 1385 bp->b_error = error; 1386 bp->b_flags |= B_ERROR; 1387 /* I/O was never started on nbio, must biodone(bio) */ 1388 biodone(bio); 1389 return (error); 1390 } 1391 if (nbio->bio_offset == NOOFFSET) 1392 vfs_bio_clrbuf(bp); 1393 } 1394 if (nbio->bio_offset == NOOFFSET) { 1395 /* I/O was never started on nbio, must biodone(bio) */ 1396 biodone(bio); 1397 return (0); 1398 } 1399 vn_strategy(ip->i_devvp, nbio); 1400 return (0); 1401 } 1402 1403 /* 1404 * Print out the contents of an inode. 1405 */ 1406 static int 1407 ext2_print(struct vop_print_args *ap) 1408 { 1409 struct vnode *vp = ap->a_vp; 1410 struct inode *ip = VTOI(vp); 1411 1412 printf("tag VT_EXT2FS, ino %lu, on dev %s (%d, %d)", 1413 (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev), 1414 minor(ip->i_dev)); 1415 if (vp->v_type == VFIFO) 1416 fifo_printinfo(vp); 1417 lockmgr_printinfo(&vp->v_lock); 1418 printf("\n"); 1419 return (0); 1420 } 1421 1422 /* 1423 * Read wrapper for fifos. 1424 */ 1425 static 1426 int 1427 ext2fifo_read(struct vop_read_args *ap) 1428 { 1429 int error, resid; 1430 struct inode *ip; 1431 struct uio *uio; 1432 1433 uio = ap->a_uio; 1434 resid = uio->uio_resid; 1435 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 1436 ip = VTOI(ap->a_vp); 1437 if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL && 1438 (uio->uio_resid != resid || (error == 0 && resid != 0))) 1439 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 1440 return (error); 1441 } 1442 1443 /* 1444 * Write wrapper for fifos. 1445 */ 1446 static 1447 int 1448 ext2fifo_write(struct vop_write_args *ap) 1449 { 1450 int error, resid; 1451 struct inode *ip; 1452 struct uio *uio; 1453 1454 uio = ap->a_uio; 1455 resid = uio->uio_resid; 1456 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 1457 ip = VTOI(ap->a_vp); 1458 if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) 1459 VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; 1460 return (error); 1461 } 1462 1463 /* 1464 * Close wrapper for fifos. 1465 * 1466 * Update the times on the inode then do device close. 1467 */ 1468 static int 1469 ext2fifo_close(struct vop_close_args *ap) 1470 { 1471 struct vnode *vp = ap->a_vp; 1472 1473 if (VREFCNT(vp) > 1) 1474 ext2_itimes(vp); 1475 return (VOCALL(&fifo_vnode_vops, &ap->a_head)); 1476 } 1477 1478 static void 1479 filt_ext2detach(struct knote *kn) 1480 { 1481 struct vnode *vp = (struct vnode *)kn->kn_hook; 1482 1483 lwkt_gettoken(&vp->v_token); 1484 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1485 lwkt_reltoken(&vp->v_token); 1486 } 1487 1488 /*ARGSUSED*/ 1489 static int 1490 filt_ext2read(struct knote *kn, long hint) 1491 { 1492 struct vnode *vp = (struct vnode *)kn->kn_hook; 1493 struct inode *ip = VTOI(vp); 1494 off_t off; 1495 1496 /* 1497 * filesystem is gone, so set the EOF flag and schedule 1498 * the knote for deletion. 1499 */ 1500 if (hint == NOTE_REVOKE) { 1501 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1502 return (1); 1503 } 1504 off = ip->i_size - kn->kn_fp->f_offset; 1505 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 1506 if (kn->kn_sfflags & NOTE_OLDAPI) 1507 return (1); 1508 return (kn->kn_data != 0); 1509 } 1510 1511 /*ARGSUSED*/ 1512 static int 1513 filt_ext2write(struct knote *kn, long hint) 1514 { 1515 /* 1516 * filesystem is gone, so set the EOF flag and schedule 1517 * the knote for deletion. 1518 */ 1519 if (hint == NOTE_REVOKE) 1520 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1521 1522 kn->kn_data = 0; 1523 return (1); 1524 } 1525 1526 static int 1527 filt_ext2vnode(struct knote *kn, long hint) 1528 { 1529 if (kn->kn_sfflags & hint) 1530 kn->kn_fflags |= hint; 1531 if (hint == NOTE_REVOKE) { 1532 kn->kn_flags |= (EV_EOF | EV_NODATA); 1533 return (1); 1534 } 1535 return (kn->kn_fflags != 0); 1536 } 1537 1538 static struct filterops ext2read_filtops = 1539 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2read }; 1540 static struct filterops ext2write_filtops = 1541 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2write }; 1542 static struct filterops ext2vnode_filtops = 1543 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2vnode }; 1544 1545 static int 1546 ext2_kqfilter(struct vop_kqfilter_args *ap) 1547 { 1548 struct vnode *vp = ap->a_vp; 1549 struct knote *kn = ap->a_kn; 1550 1551 switch (kn->kn_filter) { 1552 case EVFILT_READ: 1553 kn->kn_fop = &ext2read_filtops; 1554 break; 1555 case EVFILT_WRITE: 1556 kn->kn_fop = &ext2write_filtops; 1557 break; 1558 case EVFILT_VNODE: 1559 kn->kn_fop = &ext2vnode_filtops; 1560 break; 1561 default: 1562 return (EOPNOTSUPP); 1563 } 1564 1565 kn->kn_hook = (caddr_t)vp; 1566 1567 /* XXX: kq token actually protects the list */ 1568 lwkt_gettoken(&vp->v_token); 1569 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1570 lwkt_reltoken(&vp->v_token); 1571 1572 return (0); 1573 } 1574 1575 /* 1576 * Kqfilter wrapper for fifos. 1577 * 1578 * Fall through to ext2 kqfilter routines if needed 1579 */ 1580 static int 1581 ext2fifo_kqfilter(struct vop_kqfilter_args *ap) 1582 { 1583 int error; 1584 1585 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 1586 if (error) 1587 error = ext2_kqfilter(ap); 1588 return (error); 1589 } 1590 1591 /* 1592 * Return POSIX pathconf information applicable to ext2 filesystems. 1593 */ 1594 static int 1595 ext2_pathconf(struct vop_pathconf_args *ap) 1596 { 1597 int error = 0; 1598 1599 switch (ap->a_name) { 1600 case _PC_LINK_MAX: 1601 if (EXT2_HAS_RO_COMPAT_FEATURE(VTOI(ap->a_vp)->i_e2fs, 1602 EXT2F_ROCOMPAT_DIR_NLINK)) 1603 *ap->a_retval = INT_MAX; 1604 else 1605 *ap->a_retval = EXT4_LINK_MAX; 1606 break; 1607 case _PC_NAME_MAX: 1608 *ap->a_retval = NAME_MAX; 1609 break; 1610 case _PC_PATH_MAX: 1611 *ap->a_retval = PATH_MAX; 1612 break; 1613 case _PC_PIPE_BUF: 1614 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 1615 *ap->a_retval = PIPE_BUF; 1616 else 1617 error = EINVAL; 1618 break; 1619 case _PC_CHOWN_RESTRICTED: 1620 *ap->a_retval = 1; 1621 break; 1622 case _PC_NO_TRUNC: 1623 *ap->a_retval = 1; 1624 break; 1625 case _PC_MIN_HOLE_SIZE: 1626 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1627 break; 1628 case _PC_PRIO_IO: 1629 *ap->a_retval = 0; 1630 break; 1631 case _PC_SYNC_IO: 1632 *ap->a_retval = 0; 1633 break; 1634 case _PC_ALLOC_SIZE_MIN: 1635 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1636 break; 1637 case _PC_FILESIZEBITS: 1638 *ap->a_retval = 64; 1639 break; 1640 case _PC_REC_INCR_XFER_SIZE: 1641 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1642 break; 1643 case _PC_REC_MAX_XFER_SIZE: 1644 *ap->a_retval = -1; /* means ``unlimited'' */ 1645 break; 1646 case _PC_REC_MIN_XFER_SIZE: 1647 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1648 break; 1649 case _PC_REC_XFER_ALIGN: 1650 *ap->a_retval = PAGE_SIZE; 1651 break; 1652 case _PC_SYMLINK_MAX: 1653 *ap->a_retval = MAXPATHLEN; 1654 break; 1655 1656 default: 1657 error = vop_stdpathconf(ap); 1658 break; 1659 } 1660 return (error); 1661 } 1662 1663 /* 1664 * Initialize the vnode associated with a new inode, handle aliased vnodes. 1665 */ 1666 int 1667 ext2_vinit(struct mount *mntp, struct vnode **vpp) 1668 { 1669 struct inode *ip; 1670 struct vnode *vp; 1671 1672 vp = *vpp; 1673 ip = VTOI(vp); 1674 1675 switch (vp->v_type = IFTOVT(ip->i_mode)) { 1676 case VCHR: 1677 case VBLK: 1678 vp->v_ops = &mntp->mnt_vn_spec_ops; 1679 addaliasu(vp, umajor(ip->i_rdev), uminor(ip->i_rdev)); 1680 break; 1681 case VFIFO: 1682 vp->v_ops = &mntp->mnt_vn_fifo_ops; 1683 break; 1684 case VDIR: 1685 case VREG: 1686 vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); /* XXX */ 1687 break; 1688 case VLNK: 1689 if ((ip->i_size >= vp->v_mount->mnt_maxsymlinklen) && 1690 ip->i_blocks != 0) { 1691 vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); 1692 } 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 /* 1699 * Only unallocated inodes should be of type VNON. 1700 */ 1701 if (ip->i_mode != 0 && vp->v_type == VNON) 1702 return (EINVAL); 1703 1704 if (ip->i_number == EXT2_ROOTINO) 1705 vp->v_flag |= VROOT; 1706 /* 1707 * Initialize modrev times. 1708 */ 1709 ip->i_modrev = init_va_filerev(); 1710 *vpp = vp; 1711 return (0); 1712 } 1713 1714 /* 1715 * Allocate a new inode. 1716 */ 1717 static int 1718 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1719 struct componentname *cnp) 1720 { 1721 struct inode *ip, *pdir; 1722 struct vnode *tvp; 1723 int error; 1724 1725 pdir = VTOI(dvp); 1726 *vpp = NULL; 1727 if ((mode & IFMT) == 0) 1728 mode |= IFREG; 1729 1730 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); 1731 if (error) { 1732 return (error); 1733 } 1734 ip = VTOI(tvp); 1735 ip->i_gid = pdir->i_gid; 1736 #ifdef SUIDDIR 1737 { 1738 /* 1739 * if we are 1740 * not the owner of the directory, 1741 * and we are hacking owners here, (only do this where told to) 1742 * and we are not giving it TOO root, (would subvert quotas) 1743 * then go ahead and give it to the other user. 1744 * Note that this drops off the execute bits for security. 1745 */ 1746 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1747 (pdir->i_mode & ISUID) && 1748 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 1749 ip->i_uid = pdir->i_uid; 1750 mode &= ~07111; 1751 } else { 1752 ip->i_uid = cnp->cn_cred->cr_uid; 1753 } 1754 } 1755 #else 1756 ip->i_uid = cnp->cn_cred->cr_uid; 1757 #endif 1758 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1759 ip->i_mode = mode; 1760 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1761 ip->i_nlink = 1; 1762 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { 1763 if (caps_priv_check(cnp->cn_cred, SYSCAP_NOVFS_RETAINSUGID)) 1764 ip->i_mode &= ~ISGID; 1765 } 1766 1767 if (cnp->cn_flags & CNP_ISWHITEOUT) 1768 ip->i_flags |= UF_OPAQUE; 1769 1770 /* 1771 * Regular files and directories need VM objects. Softlinks do 1772 * not (not immediately anyway). 1773 */ 1774 if (tvp->v_type == VREG || tvp->v_type == VDIR) 1775 vinitvmio(tvp, 0, PAGE_SIZE, -1); 1776 1777 /* 1778 * Make sure inode goes to disk before directory entry. 1779 */ 1780 error = ext2_update(tvp, !DOINGASYNC(tvp)); 1781 if (error) 1782 goto bad; 1783 1784 error = ext2_direnter(ip, dvp, cnp); 1785 if (error) 1786 goto bad; 1787 1788 *vpp = tvp; 1789 return (0); 1790 1791 bad: 1792 /* 1793 * Write error occurred trying to update the inode 1794 * or the directory so must deallocate the inode. 1795 */ 1796 ip->i_nlink = 0; 1797 ip->i_flag |= IN_CHANGE; 1798 vput(tvp); 1799 return (error); 1800 } 1801 1802 /* 1803 * Vnode op for reading. 1804 */ 1805 static int 1806 ext2_read(struct vop_read_args *ap) 1807 { 1808 struct vnode *vp; 1809 struct inode *ip; 1810 struct uio *uio; 1811 struct m_ext2fs *fs; 1812 struct buf *bp; 1813 daddr_t lbn; 1814 off_t nextlbn; 1815 off_t nextloffset; 1816 off_t bytesinfile; 1817 long size, xfersize, blkoffset; 1818 int error, orig_resid, seqcount; 1819 int ioflag; 1820 1821 vp = ap->a_vp; 1822 uio = ap->a_uio; 1823 ioflag = ap->a_ioflag; 1824 1825 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 1826 ip = VTOI(vp); 1827 1828 #ifdef INVARIANTS 1829 if (uio->uio_rw != UIO_READ) 1830 panic("%s: mode", "ext2_read"); 1831 1832 if (vp->v_type == VLNK) { 1833 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 1834 panic("%s: short symlink", "ext2_read"); 1835 } else if (vp->v_type != VREG && vp->v_type != VDIR) 1836 panic("%s: type %d", "ext2_read", vp->v_type); 1837 #endif 1838 orig_resid = uio->uio_resid; 1839 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); 1840 if (orig_resid == 0) 1841 return (0); 1842 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); 1843 fs = ip->i_e2fs; 1844 if (uio->uio_offset < ip->i_size && 1845 uio->uio_offset >= fs->e2fs_maxfilesize) 1846 return (EOVERFLOW); 1847 1848 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 1849 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 1850 break; 1851 lbn = lblkno(fs, uio->uio_offset); 1852 nextlbn = lbn + 1; 1853 nextloffset = lblktodoff(fs, nextlbn); 1854 size = blksize(fs, ip, lbn); 1855 blkoffset = blkoff(fs, uio->uio_offset); 1856 1857 xfersize = fs->e2fs_fsize - blkoffset; 1858 if (uio->uio_resid < xfersize) 1859 xfersize = uio->uio_resid; 1860 if (bytesinfile < xfersize) 1861 xfersize = bytesinfile; 1862 1863 if (nextloffset >= ip->i_size) 1864 error = bread(vp, lblktodoff(fs, lbn), size, &bp); 1865 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 1866 error = cluster_read(vp, (off_t)ip->i_size, 1867 lblktodoff(fs, lbn), size, uio->uio_resid, 1868 (ap->a_ioflag >> IO_SEQSHIFT) * MAXBSIZE, &bp); 1869 } else if (seqcount > 1) { 1870 u_int nextsize = blksize(fs, ip, nextlbn); 1871 1872 error = breadn(vp, lblktodoff(fs, lbn), size, 1873 &nextloffset, &nextsize, 1, &bp); 1874 } else 1875 error = bread(vp, lblktodoff(fs, lbn), size, &bp); 1876 if (error) { 1877 brelse(bp); 1878 bp = NULL; 1879 break; 1880 } 1881 1882 /* 1883 * We should only get non-zero b_resid when an I/O error 1884 * has occurred, which should cause us to break above. 1885 * However, if the short read did not cause an error, 1886 * then we want to ensure that we do not uiomove bad 1887 * or uninitialized data. 1888 */ 1889 size -= bp->b_resid; 1890 if (size < xfersize) { 1891 if (size == 0) 1892 break; 1893 xfersize = size; 1894 } 1895 error = uiomove((char *)bp->b_data + blkoffset, 1896 (int)xfersize, uio); 1897 if (error) 1898 break; 1899 bqrelse(bp); 1900 } 1901 1902 /* 1903 * This can only happen in the case of an error because the loop 1904 * above resets bp to NULL on each iteration and on normal 1905 * completion has not set a new value into it. so it must have come 1906 * from a 'break' statement 1907 */ 1908 if (bp != NULL) 1909 bqrelse(bp); 1910 1911 if ((error == 0 || uio->uio_resid != orig_resid) && 1912 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 1913 ip->i_flag |= IN_ACCESS; 1914 return (error); 1915 } 1916 1917 /* 1918 * Vnode op for writing. 1919 */ 1920 static int 1921 ext2_write(struct vop_write_args *ap) 1922 { 1923 struct vnode *vp; 1924 struct uio *uio; 1925 struct inode *ip; 1926 struct m_ext2fs *fs; 1927 struct buf *bp; 1928 struct thread *td; 1929 daddr_t lbn; 1930 off_t osize; 1931 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; 1932 1933 ioflag = ap->a_ioflag; 1934 uio = ap->a_uio; 1935 vp = ap->a_vp; 1936 1937 seqcount = ioflag >> IO_SEQSHIFT; 1938 ip = VTOI(vp); 1939 1940 #ifdef INVARIANTS 1941 if (uio->uio_rw != UIO_WRITE) 1942 panic("%s: mode", "ext2_write"); 1943 #endif 1944 1945 switch (vp->v_type) { 1946 case VREG: 1947 if (ioflag & IO_APPEND) 1948 uio->uio_offset = ip->i_size; 1949 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 1950 return (EPERM); 1951 /* FALLTHROUGH */ 1952 case VLNK: 1953 break; 1954 case VDIR: 1955 /* XXX differs from ffs -- this is called from ext2_mkdir(). */ 1956 if ((ioflag & IO_SYNC) == 0) 1957 panic("ext2_write: nonsync dir write"); 1958 break; 1959 default: 1960 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, 1961 vp->v_type, (intmax_t)uio->uio_offset, 1962 (intmax_t)uio->uio_resid); 1963 } 1964 1965 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); 1966 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); 1967 fs = ip->i_e2fs; 1968 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) 1969 return (EFBIG); 1970 /* 1971 * Maybe this should be above the vnode op call, but so long as 1972 * file servers have no limits, I don't think it matters. 1973 */ 1974 td = uio->uio_td; 1975 if (vp->v_type == VREG && td && td->td_proc && 1976 uio->uio_offset + uio->uio_resid > 1977 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 1978 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 1979 return (EFBIG); 1980 } 1981 1982 resid = uio->uio_resid; 1983 osize = ip->i_size; 1984 if (seqcount > BA_SEQMAX) 1985 flags = BA_SEQMAX << BA_SEQSHIFT; 1986 else 1987 flags = seqcount << BA_SEQSHIFT; 1988 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1989 flags |= IO_SYNC; 1990 1991 for (error = 0; uio->uio_resid > 0;) { 1992 lbn = lblkno(fs, uio->uio_offset); 1993 blkoffset = blkoff(fs, uio->uio_offset); 1994 xfersize = fs->e2fs_fsize - blkoffset; 1995 if (uio->uio_resid < xfersize) 1996 xfersize = uio->uio_resid; 1997 if (uio->uio_offset + xfersize > ip->i_size) 1998 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 1999 2000 /* 2001 * We must perform a read-before-write if the transfer size 2002 * does not cover the entire buffer. 2003 */ 2004 if (fs->e2fs_bsize > xfersize) 2005 flags |= BA_CLRBUF; 2006 else 2007 flags &= ~BA_CLRBUF; 2008 error = ext2_balloc(ip, lbn, blkoffset + xfersize, 2009 ap->a_cred, &bp, flags); 2010 if (error != 0) 2011 break; 2012 2013 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL)) 2014 bp->b_flags |= B_NOCACHE; 2015 if (uio->uio_offset + xfersize > ip->i_size) 2016 ip->i_size = uio->uio_offset + xfersize; 2017 size = blksize(fs, ip, lbn) - bp->b_resid; 2018 if (size < xfersize) 2019 xfersize = size; 2020 2021 error = 2022 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 2023 if ((ioflag & IO_VMIO) && 2024 LIST_FIRST(&bp->b_dep) == NULL) /* in ext2fs? */ 2025 bp->b_flags |= B_RELBUF; 2026 /* 2027 * If the buffer is not already filled and we encounter an 2028 * error while trying to fill it, we have to clear out any 2029 * garbage data from the pages instantiated for the buffer. 2030 * If we do not, a failed uiomove() during a write can leave 2031 * the prior contents of the pages exposed to a userland mmap. 2032 * 2033 * Note that we need only clear buffers with a transfer size 2034 * equal to the block size because buffers with a shorter 2035 * transfer size were cleared above by the call to ext2_balloc() 2036 * with the BA_CLRBUF flag set. 2037 * 2038 * If the source region for uiomove identically mmaps the 2039 * buffer, uiomove() performed the NOP copy, and the buffer 2040 * content remains valid because the page fault handler 2041 * validated the pages. 2042 */ 2043 if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 2044 fs->e2fs_bsize == xfersize) 2045 vfs_bio_clrbuf(bp); 2046 2047 /* 2048 * If IO_SYNC each buffer is written synchronously. Otherwise 2049 * if we have a severe page deficiency write the buffer 2050 * asynchronously. Otherwise try to cluster, and if that 2051 * doesn't do it then either do an async write (if O_DIRECT), 2052 * or a delayed write (if not). 2053 */ 2054 if (ioflag & IO_SYNC) { 2055 (void)bwrite(bp); 2056 } else if (vm_paging_severe() || 2057 buf_dirty_count_severe() || 2058 (ioflag & IO_ASYNC)) 2059 { 2060 bp->b_flags |= B_CLUSTEROK; 2061 bawrite(bp); 2062 } else if (xfersize + blkoffset == fs->e2fs_fsize) { 2063 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 2064 bp->b_flags |= B_CLUSTEROK; 2065 cluster_write(bp, (off_t)ip->i_size, 2066 vp->v_mount->mnt_stat.f_iosize, seqcount); 2067 } else { 2068 bawrite(bp); 2069 } 2070 } else if (ioflag & IO_DIRECT) { 2071 bp->b_flags |= B_CLUSTEROK; 2072 bawrite(bp); 2073 } else { 2074 bp->b_flags |= B_CLUSTEROK; 2075 bdwrite(bp); 2076 } 2077 if (error || xfersize == 0) 2078 break; 2079 } 2080 /* 2081 * If we successfully wrote any data, and we are not the superuser 2082 * we clear the setuid and setgid bits as a precaution against 2083 * tampering. 2084 */ 2085 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 2086 ap->a_cred) { 2087 if (caps_priv_check(ap->a_cred, SYSCAP_NOVFS_RETAINSUGID)) 2088 ip->i_mode &= ~(ISUID | ISGID); 2089 } 2090 if (error) { 2091 if (ioflag & IO_UNIT) { 2092 (void)ext2_truncate(vp, osize, ioflag & IO_SYNC, 2093 ap->a_cred); 2094 uio->uio_offset -= resid - uio->uio_resid; 2095 uio->uio_resid = resid; 2096 } 2097 } 2098 if (uio->uio_resid != resid) { 2099 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2100 if (ioflag & IO_SYNC) 2101 error = ext2_update(vp, 1); 2102 } 2103 return (error); 2104 } 2105 2106 /* Global vfs data structures for ext2. */ 2107 struct vop_ops ext2_vnodeops = { 2108 .vop_default = vop_defaultop, 2109 .vop_access = ext2_access, 2110 .vop_bmap = ext2_bmap, 2111 .vop_old_lookup = ext2_lookup, 2112 .vop_close = ext2_close, 2113 .vop_old_create = ext2_create, 2114 .vop_fsync = ext2_fsync, 2115 .vop_getpages = vop_stdgetpages, 2116 .vop_putpages = vop_stdputpages, 2117 .vop_getattr = ext2_getattr, 2118 .vop_inactive = ext2_inactive, 2119 .vop_old_link = ext2_link, 2120 .vop_old_lookup = ext2_lookup, 2121 .vop_old_mkdir = ext2_mkdir, 2122 .vop_old_mknod = ext2_mknod, 2123 .vop_open = ext2_open, 2124 .vop_pathconf = ext2_pathconf, 2125 .vop_print = ext2_print, 2126 .vop_read = ext2_read, 2127 .vop_readdir = ext2_readdir, 2128 .vop_readlink = ext2_readlink, 2129 .vop_reallocblks = ext2_reallocblks, 2130 .vop_reclaim = ext2_reclaim, 2131 .vop_old_remove = ext2_remove, 2132 .vop_old_rename = ext2_rename, 2133 .vop_old_rmdir = ext2_rmdir, 2134 .vop_setattr = ext2_setattr, 2135 .vop_strategy = ext2_strategy, 2136 .vop_old_symlink = ext2_symlink, 2137 .vop_write = ext2_write, 2138 }; 2139 2140 struct vop_ops ext2_specops = { 2141 .vop_default = vop_defaultop, 2142 .vop_access = ext2_access, 2143 .vop_close = ext2_close, 2144 .vop_fsync = ext2_fsync, 2145 .vop_getattr = ext2_getattr, 2146 .vop_inactive = ext2_inactive, 2147 .vop_pathconf = ext2_pathconf, 2148 .vop_print = ext2_print, 2149 .vop_read = vop_stdnoread, 2150 .vop_reclaim = ext2_reclaim, 2151 .vop_setattr = ext2_setattr, 2152 .vop_write = vop_stdnowrite 2153 }; 2154 2155 struct vop_ops ext2_fifoops = { 2156 .vop_default = fifo_vnoperate, 2157 .vop_access = ext2_access, 2158 .vop_close = ext2fifo_close, 2159 .vop_fsync = ext2_fsync, 2160 .vop_getattr = ext2_getattr, 2161 .vop_inactive = ext2_inactive, 2162 .vop_kqfilter = ext2fifo_kqfilter, 2163 .vop_pathconf = ext2_pathconf, 2164 .vop_print = ext2_print, 2165 .vop_read = ext2fifo_read, 2166 .vop_reclaim = ext2_reclaim, 2167 .vop_setattr = ext2_setattr, 2168 .vop_write = ext2fifo_write 2169 }; 2170 2171 VNODEOP_SET(ext2_vnodeops); 2172 VNODEOP_SET(ext2_specops); 2173 VNODEOP_SET(ext2_fifoops); 2174