1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_quota.h" 43 #include "opt_suiddir.h" 44 #include "opt_ufs.h" 45 #include "opt_ffs.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/namei.h> 51 #include <sys/kernel.h> 52 #include <sys/fcntl.h> 53 #include <sys/filio.h> 54 #include <sys/stat.h> 55 #include <sys/bio.h> 56 #include <sys/buf.h> 57 #include <sys/mount.h> 58 #include <sys/priv.h> 59 #include <sys/refcount.h> 60 #include <sys/unistd.h> 61 #include <sys/vnode.h> 62 #include <sys/dirent.h> 63 #include <sys/lockf.h> 64 #include <sys/conf.h> 65 #include <sys/acl.h> 66 #include <sys/smr.h> 67 68 #include <security/audit/audit.h> 69 #include <security/mac/mac_framework.h> 70 71 #include <sys/file.h> /* XXX */ 72 73 #include <vm/vm.h> 74 #include <vm/vm_extern.h> 75 76 #include <ufs/ufs/acl.h> 77 #include <ufs/ufs/extattr.h> 78 #include <ufs/ufs/quota.h> 79 #include <ufs/ufs/inode.h> 80 #include <ufs/ufs/dir.h> 81 #include <ufs/ufs/ufsmount.h> 82 #include <ufs/ufs/ufs_extern.h> 83 #ifdef UFS_DIRHASH 84 #include <ufs/ufs/dirhash.h> 85 #endif 86 #ifdef UFS_GJOURNAL 87 #include <ufs/ufs/gjournal.h> 88 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); 89 #endif 90 91 #ifdef QUOTA 92 FEATURE(ufs_quota, "UFS disk quotas support"); 93 FEATURE(ufs_quota64, "64bit UFS disk quotas support"); 94 #endif 95 96 #ifdef SUIDDIR 97 FEATURE(suiddir, 98 "Give all new files in directory the same ownership as the directory"); 99 #endif 100 101 VFS_SMR_DECLARE; 102 103 #include <ufs/ffs/ffs_extern.h> 104 105 static vop_accessx_t ufs_accessx; 106 static vop_fplookup_vexec_t ufs_fplookup_vexec; 107 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); 108 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, 109 struct thread *); 110 static vop_close_t ufs_close; 111 static vop_create_t ufs_create; 112 static vop_stat_t ufs_stat; 113 static vop_getattr_t ufs_getattr; 114 static vop_ioctl_t ufs_ioctl; 115 static vop_link_t ufs_link; 116 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, 117 struct componentname *, const char *); 118 static vop_mmapped_t ufs_mmapped; 119 static vop_mkdir_t ufs_mkdir; 120 static vop_mknod_t ufs_mknod; 121 static vop_open_t ufs_open; 122 static vop_pathconf_t ufs_pathconf; 123 static vop_print_t ufs_print; 124 static vop_readlink_t ufs_readlink; 125 static vop_remove_t ufs_remove; 126 static vop_rename_t ufs_rename; 127 static vop_rmdir_t ufs_rmdir; 128 static vop_setattr_t ufs_setattr; 129 static vop_strategy_t ufs_strategy; 130 static vop_symlink_t ufs_symlink; 131 static vop_whiteout_t ufs_whiteout; 132 static vop_close_t ufsfifo_close; 133 134 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 135 "UFS filesystem"); 136 137 /* 138 * A virgin directory (no blushing please). 139 */ 140 static struct dirtemplate mastertemplate = { 141 0, 12, DT_DIR, 1, ".", 142 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 143 }; 144 static struct odirtemplate omastertemplate = { 145 0, 12, 1, ".", 146 0, DIRBLKSIZ - 12, 2, ".." 147 }; 148 149 static void 150 ufs_itimes_locked(struct vnode *vp) 151 { 152 struct inode *ip; 153 struct timespec ts; 154 155 ASSERT_VI_LOCKED(vp, __func__); 156 157 ip = VTOI(vp); 158 if (UFS_RDONLY(ip)) 159 goto out; 160 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 161 return; 162 163 if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) 164 UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); 165 else if (((vp->v_mount->mnt_kern_flag & 166 (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || 167 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) 168 UFS_INODE_SET_FLAG(ip, IN_MODIFIED); 169 else if (ip->i_flag & IN_ACCESS) 170 UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); 171 vfs_timestamp(&ts); 172 if (ip->i_flag & IN_ACCESS) { 173 DIP_SET(ip, i_atime, ts.tv_sec); 174 DIP_SET(ip, i_atimensec, ts.tv_nsec); 175 } 176 if (ip->i_flag & IN_UPDATE) { 177 DIP_SET(ip, i_mtime, ts.tv_sec); 178 DIP_SET(ip, i_mtimensec, ts.tv_nsec); 179 } 180 if (ip->i_flag & IN_CHANGE) { 181 DIP_SET(ip, i_ctime, ts.tv_sec); 182 DIP_SET(ip, i_ctimensec, ts.tv_nsec); 183 DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); 184 } 185 186 out: 187 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 188 } 189 190 void 191 ufs_itimes(struct vnode *vp) 192 { 193 194 VI_LOCK(vp); 195 ufs_itimes_locked(vp); 196 VI_UNLOCK(vp); 197 } 198 199 /* 200 * Create a regular file 201 */ 202 static int 203 ufs_create(ap) 204 struct vop_create_args /* { 205 struct vnode *a_dvp; 206 struct vnode **a_vpp; 207 struct componentname *a_cnp; 208 struct vattr *a_vap; 209 } */ *ap; 210 { 211 int error; 212 213 error = 214 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 215 ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); 216 if (error != 0) 217 return (error); 218 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 219 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 220 return (0); 221 } 222 223 /* 224 * Mknod vnode call 225 */ 226 /* ARGSUSED */ 227 static int 228 ufs_mknod(ap) 229 struct vop_mknod_args /* { 230 struct vnode *a_dvp; 231 struct vnode **a_vpp; 232 struct componentname *a_cnp; 233 struct vattr *a_vap; 234 } */ *ap; 235 { 236 struct vattr *vap = ap->a_vap; 237 struct vnode **vpp = ap->a_vpp; 238 struct inode *ip; 239 ino_t ino; 240 int error; 241 242 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 243 ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); 244 if (error) 245 return (error); 246 ip = VTOI(*vpp); 247 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 248 if (vap->va_rdev != VNOVAL) { 249 /* 250 * Want to be able to use this to make badblock 251 * inodes, so don't truncate the dev number. 252 */ 253 DIP_SET(ip, i_rdev, vap->va_rdev); 254 } 255 /* 256 * Remove inode, then reload it through VFS_VGET(). This is 257 * needed to do further inode initialization, for instance 258 * fifo, which was too early for VFS_VGET() done as part of 259 * UFS_VALLOC(). 260 */ 261 (*vpp)->v_type = VNON; 262 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 263 vgone(*vpp); 264 vput(*vpp); 265 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 266 if (error) { 267 *vpp = NULL; 268 return (error); 269 } 270 return (0); 271 } 272 273 /* 274 * Open called. 275 */ 276 /* ARGSUSED */ 277 static int 278 ufs_open(struct vop_open_args *ap) 279 { 280 struct vnode *vp = ap->a_vp; 281 struct inode *ip; 282 283 if (vp->v_type == VCHR || vp->v_type == VBLK) 284 return (EOPNOTSUPP); 285 286 ip = VTOI(vp); 287 vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); 288 if (vp->v_type == VREG && (vn_irflag_read(vp) & VIRF_PGREAD) == 0) { 289 vn_irflag_set_cond(vp, VIRF_PGREAD); 290 } 291 292 /* 293 * Files marked append-only must be opened for appending. 294 */ 295 if ((ip->i_flags & APPEND) && 296 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 297 return (EPERM); 298 299 return (0); 300 } 301 302 /* 303 * Close called. 304 * 305 * Update the times on the inode. 306 */ 307 /* ARGSUSED */ 308 static int 309 ufs_close(ap) 310 struct vop_close_args /* { 311 struct vnode *a_vp; 312 int a_fflag; 313 struct ucred *a_cred; 314 struct thread *a_td; 315 } */ *ap; 316 { 317 struct vnode *vp = ap->a_vp; 318 int usecount; 319 320 VI_LOCK(vp); 321 usecount = vp->v_usecount; 322 if (usecount > 1) 323 ufs_itimes_locked(vp); 324 VI_UNLOCK(vp); 325 return (0); 326 } 327 328 static int 329 ufs_accessx(ap) 330 struct vop_accessx_args /* { 331 struct vnode *a_vp; 332 accmode_t a_accmode; 333 struct ucred *a_cred; 334 struct thread *a_td; 335 } */ *ap; 336 { 337 struct vnode *vp = ap->a_vp; 338 struct inode *ip = VTOI(vp); 339 accmode_t accmode = ap->a_accmode; 340 int error; 341 #ifdef UFS_ACL 342 struct acl *acl; 343 acl_type_t type; 344 #endif 345 346 /* 347 * Disallow write attempts on read-only filesystems; 348 * unless the file is a socket, fifo, or a block or 349 * character device resident on the filesystem. 350 */ 351 if (accmode & VMODIFY_PERMS) { 352 switch (vp->v_type) { 353 case VDIR: 354 case VLNK: 355 case VREG: 356 if (vp->v_mount->mnt_flag & MNT_RDONLY) 357 return (EROFS); 358 #ifdef QUOTA 359 /* 360 * Inode is accounted in the quotas only if struct 361 * dquot is attached to it. VOP_ACCESS() is called 362 * from vn_open_cred() and provides a convenient 363 * point to call getinoquota(). The lock mode is 364 * exclusive when the file is opening for write. 365 */ 366 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 367 error = getinoquota(ip); 368 if (error != 0) 369 return (error); 370 } 371 #endif 372 break; 373 default: 374 break; 375 } 376 } 377 378 /* 379 * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" 380 * permits the owner of the file to remove the IMMUTABLE flag. 381 */ 382 if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && 383 (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) 384 return (EPERM); 385 386 #ifdef UFS_ACL 387 if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { 388 if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) 389 type = ACL_TYPE_NFS4; 390 else 391 type = ACL_TYPE_ACCESS; 392 393 acl = acl_alloc(M_WAITOK); 394 if (type == ACL_TYPE_NFS4) 395 error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); 396 else 397 error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); 398 switch (error) { 399 case 0: 400 if (type == ACL_TYPE_NFS4) { 401 error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, 402 ip->i_gid, acl, accmode, ap->a_cred); 403 } else { 404 error = vfs_unixify_accmode(&accmode); 405 if (error == 0) 406 error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, 407 ip->i_gid, acl, accmode, ap->a_cred); 408 } 409 break; 410 default: 411 if (error != EOPNOTSUPP) 412 printf( 413 "ufs_accessx(): Error retrieving ACL on object (%d).\n", 414 error); 415 /* 416 * XXX: Fall back until debugged. Should 417 * eventually possibly log an error, and return 418 * EPERM for safety. 419 */ 420 error = vfs_unixify_accmode(&accmode); 421 if (error == 0) 422 error = vaccess(vp->v_type, ip->i_mode, 423 ip->i_uid, ip->i_gid, accmode, ap->a_cred); 424 } 425 acl_free(acl); 426 427 return (error); 428 } 429 #endif /* !UFS_ACL */ 430 error = vfs_unixify_accmode(&accmode); 431 if (error == 0) 432 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 433 accmode, ap->a_cred); 434 return (error); 435 } 436 437 /* 438 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 439 * the comment above cache_fplookup for details. 440 */ 441 static int 442 ufs_fplookup_vexec(ap) 443 struct vop_fplookup_vexec_args /* { 444 struct vnode *a_vp; 445 struct ucred *a_cred; 446 struct thread *a_td; 447 } */ *ap; 448 { 449 struct vnode *vp; 450 struct inode *ip; 451 struct ucred *cred; 452 mode_t all_x, mode; 453 454 vp = ap->a_vp; 455 ip = VTOI_SMR(vp); 456 if (__predict_false(ip == NULL)) 457 return (EAGAIN); 458 459 /* 460 * XXX ACL race 461 * 462 * ACLs are not supported and UFS clears/sets this flag on mount and 463 * remount. However, we may still be racing with seeing them and there 464 * is no provision to make sure they were accounted for. This matches 465 * the behavior of the locked case, since the lookup there is also 466 * racy: mount takes no measures to block anyone from progressing. 467 */ 468 all_x = S_IXUSR | S_IXGRP | S_IXOTH; 469 mode = atomic_load_short(&ip->i_mode); 470 if (__predict_true((mode & all_x) == all_x)) 471 return (0); 472 473 cred = ap->a_cred; 474 return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred)); 475 } 476 477 /* ARGSUSED */ 478 static int 479 ufs_stat(struct vop_stat_args *ap) 480 { 481 struct vnode *vp = ap->a_vp; 482 struct inode *ip = VTOI(vp); 483 struct stat *sb = ap->a_sb; 484 int error; 485 486 error = vop_stat_helper_pre(ap); 487 if (__predict_false(error)) 488 return (error); 489 490 VI_LOCK(vp); 491 ufs_itimes_locked(vp); 492 if (I_IS_UFS1(ip)) { 493 sb->st_atim.tv_sec = ip->i_din1->di_atime; 494 sb->st_atim.tv_nsec = ip->i_din1->di_atimensec; 495 } else { 496 sb->st_atim.tv_sec = ip->i_din2->di_atime; 497 sb->st_atim.tv_nsec = ip->i_din2->di_atimensec; 498 } 499 VI_UNLOCK(vp); 500 501 sb->st_dev = dev2udev(ITOUMP(ip)->um_dev); 502 sb->st_ino = ip->i_number; 503 sb->st_mode = (ip->i_mode & ~IFMT) | VTTOIF(vp->v_type); 504 sb->st_nlink = ip->i_effnlink; 505 sb->st_uid = ip->i_uid; 506 sb->st_gid = ip->i_gid; 507 if (I_IS_UFS1(ip)) { 508 sb->st_rdev = ip->i_din1->di_rdev; 509 sb->st_size = ip->i_din1->di_size; 510 sb->st_mtim.tv_sec = ip->i_din1->di_mtime; 511 sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec; 512 sb->st_ctim.tv_sec = ip->i_din1->di_ctime; 513 sb->st_ctim.tv_nsec = ip->i_din1->di_ctimensec; 514 sb->st_birthtim.tv_sec = -1; 515 sb->st_birthtim.tv_nsec = 0; 516 sb->st_blocks = dbtob((u_quad_t)ip->i_din1->di_blocks) / S_BLKSIZE; 517 } else { 518 sb->st_rdev = ip->i_din2->di_rdev; 519 sb->st_size = ip->i_din2->di_size; 520 sb->st_mtim.tv_sec = ip->i_din2->di_mtime; 521 sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec; 522 sb->st_ctim.tv_sec = ip->i_din2->di_ctime; 523 sb->st_ctim.tv_nsec = ip->i_din2->di_ctimensec; 524 sb->st_birthtim.tv_sec = ip->i_din2->di_birthtime; 525 sb->st_birthtim.tv_nsec = ip->i_din2->di_birthnsec; 526 sb->st_blocks = dbtob((u_quad_t)ip->i_din2->di_blocks) / S_BLKSIZE; 527 } 528 529 sb->st_blksize = max(PAGE_SIZE, vp->v_mount->mnt_stat.f_iosize); 530 sb->st_flags = ip->i_flags; 531 sb->st_gen = ip->i_gen; 532 533 return (vop_stat_helper_post(ap, error)); 534 } 535 536 /* ARGSUSED */ 537 static int 538 ufs_getattr(ap) 539 struct vop_getattr_args /* { 540 struct vnode *a_vp; 541 struct vattr *a_vap; 542 struct ucred *a_cred; 543 } */ *ap; 544 { 545 struct vnode *vp = ap->a_vp; 546 struct inode *ip = VTOI(vp); 547 struct vattr *vap = ap->a_vap; 548 549 VI_LOCK(vp); 550 ufs_itimes_locked(vp); 551 if (I_IS_UFS1(ip)) { 552 vap->va_atime.tv_sec = ip->i_din1->di_atime; 553 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; 554 } else { 555 vap->va_atime.tv_sec = ip->i_din2->di_atime; 556 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; 557 } 558 VI_UNLOCK(vp); 559 /* 560 * Copy from inode table 561 */ 562 vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); 563 vap->va_fileid = ip->i_number; 564 vap->va_mode = ip->i_mode & ~IFMT; 565 vap->va_nlink = ip->i_effnlink; 566 vap->va_uid = ip->i_uid; 567 vap->va_gid = ip->i_gid; 568 if (I_IS_UFS1(ip)) { 569 vap->va_rdev = ip->i_din1->di_rdev; 570 vap->va_size = ip->i_din1->di_size; 571 vap->va_mtime.tv_sec = ip->i_din1->di_mtime; 572 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; 573 vap->va_ctime.tv_sec = ip->i_din1->di_ctime; 574 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; 575 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); 576 vap->va_filerev = ip->i_din1->di_modrev; 577 } else { 578 vap->va_rdev = ip->i_din2->di_rdev; 579 vap->va_size = ip->i_din2->di_size; 580 vap->va_mtime.tv_sec = ip->i_din2->di_mtime; 581 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; 582 vap->va_ctime.tv_sec = ip->i_din2->di_ctime; 583 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; 584 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; 585 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; 586 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); 587 vap->va_filerev = ip->i_din2->di_modrev; 588 } 589 vap->va_flags = ip->i_flags; 590 vap->va_gen = ip->i_gen; 591 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 592 vap->va_type = IFTOVT(ip->i_mode); 593 return (0); 594 } 595 596 /* 597 * Set attribute vnode op. called from several syscalls 598 */ 599 static int 600 ufs_setattr(ap) 601 struct vop_setattr_args /* { 602 struct vnode *a_vp; 603 struct vattr *a_vap; 604 struct ucred *a_cred; 605 } */ *ap; 606 { 607 struct vattr *vap = ap->a_vap; 608 struct vnode *vp = ap->a_vp; 609 struct inode *ip = VTOI(vp); 610 struct ucred *cred = ap->a_cred; 611 struct thread *td = curthread; 612 int error; 613 614 /* 615 * Check for unsettable attributes. 616 */ 617 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 618 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 619 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 620 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 621 return (EINVAL); 622 } 623 if (vap->va_flags != VNOVAL) { 624 if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | 625 SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | 626 UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | 627 UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 628 UF_SPARSE | UF_SYSTEM)) != 0) 629 return (EOPNOTSUPP); 630 if (vp->v_mount->mnt_flag & MNT_RDONLY) 631 return (EROFS); 632 /* 633 * Callers may only modify the file flags on objects they 634 * have VADMIN rights for. 635 */ 636 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 637 return (error); 638 /* 639 * Unprivileged processes are not permitted to unset system 640 * flags, or modify flags if any system flags are set. 641 * Privileged non-jail processes may not modify system flags 642 * if securelevel > 0 and any existing system flags are set. 643 * Privileged jail processes behave like privileged non-jail 644 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 645 * otherwise, they behave like unprivileged processes. 646 */ 647 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 648 if (ip->i_flags & 649 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 650 error = securelevel_gt(cred, 0); 651 if (error) 652 return (error); 653 } 654 /* The snapshot flag cannot be toggled. */ 655 if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) 656 return (EPERM); 657 } else { 658 if (ip->i_flags & 659 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 660 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 661 return (EPERM); 662 } 663 ip->i_flags = vap->va_flags; 664 DIP_SET(ip, i_flags, vap->va_flags); 665 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 666 error = UFS_UPDATE(vp, 0); 667 if (ip->i_flags & (IMMUTABLE | APPEND)) 668 return (error); 669 } 670 /* 671 * If immutable or append, no one can change any of its attributes 672 * except the ones already handled (in some cases, file flags 673 * including the immutability flags themselves for the superuser). 674 */ 675 if (ip->i_flags & (IMMUTABLE | APPEND)) 676 return (EPERM); 677 /* 678 * Go through the fields and update iff not VNOVAL. 679 */ 680 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 681 if (vp->v_mount->mnt_flag & MNT_RDONLY) 682 return (EROFS); 683 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, 684 td)) != 0) 685 return (error); 686 } 687 if (vap->va_size != VNOVAL) { 688 /* 689 * XXX most of the following special cases should be in 690 * callers instead of in N filesystems. The VDIR check 691 * mostly already is. 692 */ 693 switch (vp->v_type) { 694 case VDIR: 695 return (EISDIR); 696 case VLNK: 697 case VREG: 698 /* 699 * Truncation should have an effect in these cases. 700 * Disallow it if the filesystem is read-only or 701 * the file is being snapshotted. 702 */ 703 if (vp->v_mount->mnt_flag & MNT_RDONLY) 704 return (EROFS); 705 if ((ip->i_flags & SF_SNAPSHOT) != 0) 706 return (EPERM); 707 break; 708 default: 709 /* 710 * According to POSIX, the result is unspecified 711 * for file types other than regular files, 712 * directories and shared memory objects. We 713 * don't support shared memory objects in the file 714 * system, and have dubious support for truncating 715 * symlinks. Just ignore the request in other cases. 716 */ 717 return (0); 718 } 719 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | 720 ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), 721 cred)) != 0) 722 return (error); 723 } 724 if (vap->va_atime.tv_sec != VNOVAL || 725 vap->va_mtime.tv_sec != VNOVAL || 726 vap->va_birthtime.tv_sec != VNOVAL) { 727 if (vp->v_mount->mnt_flag & MNT_RDONLY) 728 return (EROFS); 729 if ((ip->i_flags & SF_SNAPSHOT) != 0) 730 return (EPERM); 731 error = vn_utimes_perm(vp, vap, cred, td); 732 if (error != 0) 733 return (error); 734 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED); 735 if (vap->va_atime.tv_sec != VNOVAL) { 736 ip->i_flag &= ~IN_ACCESS; 737 DIP_SET(ip, i_atime, vap->va_atime.tv_sec); 738 DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); 739 } 740 if (vap->va_mtime.tv_sec != VNOVAL) { 741 ip->i_flag &= ~IN_UPDATE; 742 DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); 743 DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); 744 } 745 if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { 746 ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; 747 ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; 748 } 749 error = UFS_UPDATE(vp, 0); 750 if (error) 751 return (error); 752 } 753 error = 0; 754 if (vap->va_mode != (mode_t)VNOVAL) { 755 if (vp->v_mount->mnt_flag & MNT_RDONLY) 756 return (EROFS); 757 if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & 758 (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) 759 return (EPERM); 760 error = ufs_chmod(vp, (int)vap->va_mode, cred, td); 761 } 762 return (error); 763 } 764 765 #ifdef UFS_ACL 766 static int 767 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, 768 int file_owner_id, struct ucred *cred, struct thread *td) 769 { 770 int error; 771 struct acl *aclp; 772 773 aclp = acl_alloc(M_WAITOK); 774 error = ufs_getacl_nfs4_internal(vp, aclp, td); 775 /* 776 * We don't have to handle EOPNOTSUPP here, as the filesystem claims 777 * it supports ACLs. 778 */ 779 if (error) 780 goto out; 781 782 acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); 783 error = ufs_setacl_nfs4_internal(vp, aclp, td); 784 785 out: 786 acl_free(aclp); 787 return (error); 788 } 789 #endif /* UFS_ACL */ 790 791 static int 792 ufs_mmapped(ap) 793 struct vop_mmapped_args /* { 794 struct vnode *a_vp; 795 } */ *ap; 796 { 797 struct vnode *vp; 798 struct inode *ip; 799 struct mount *mp; 800 801 vp = ap->a_vp; 802 ip = VTOI(vp); 803 mp = vp->v_mount; 804 805 if ((mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 806 UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); 807 /* 808 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. 809 */ 810 return (0); 811 } 812 813 /* 814 * Change the mode on a file. 815 * Inode must be locked before calling. 816 */ 817 static int 818 ufs_chmod(vp, mode, cred, td) 819 struct vnode *vp; 820 int mode; 821 struct ucred *cred; 822 struct thread *td; 823 { 824 struct inode *ip = VTOI(vp); 825 int newmode, error; 826 827 /* 828 * To modify the permissions on a file, must possess VADMIN 829 * for that file. 830 */ 831 if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) 832 return (error); 833 /* 834 * Privileged processes may set the sticky bit on non-directories, 835 * as well as set the setgid bit on a file with a group that the 836 * process is not a member of. Both of these are allowed in 837 * jail(8). 838 */ 839 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 840 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 841 return (EFTYPE); 842 } 843 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 844 error = priv_check_cred(cred, PRIV_VFS_SETGID); 845 if (error) 846 return (error); 847 } 848 849 /* 850 * Deny setting setuid if we are not the file owner. 851 */ 852 if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { 853 error = priv_check_cred(cred, PRIV_VFS_ADMIN); 854 if (error) 855 return (error); 856 } 857 858 newmode = ip->i_mode & ~ALLPERMS; 859 newmode |= (mode & ALLPERMS); 860 UFS_INODE_SET_MODE(ip, newmode); 861 DIP_SET(ip, i_mode, ip->i_mode); 862 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 863 #ifdef UFS_ACL 864 if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) 865 error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); 866 #endif 867 if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) 868 error = UFS_UPDATE(vp, 0); 869 870 return (error); 871 } 872 873 /* 874 * Perform chown operation on inode ip; 875 * inode must be locked prior to call. 876 */ 877 static int 878 ufs_chown(vp, uid, gid, cred, td) 879 struct vnode *vp; 880 uid_t uid; 881 gid_t gid; 882 struct ucred *cred; 883 struct thread *td; 884 { 885 struct inode *ip = VTOI(vp); 886 uid_t ouid; 887 gid_t ogid; 888 int error = 0; 889 #ifdef QUOTA 890 int i; 891 ufs2_daddr_t change; 892 #endif 893 894 if (uid == (uid_t)VNOVAL) 895 uid = ip->i_uid; 896 if (gid == (gid_t)VNOVAL) 897 gid = ip->i_gid; 898 /* 899 * To modify the ownership of a file, must possess VADMIN for that 900 * file. 901 */ 902 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) 903 return (error); 904 /* 905 * To change the owner of a file, or change the group of a file to a 906 * group of which we are not a member, the caller must have 907 * privilege. 908 */ 909 if (((uid != ip->i_uid && uid != cred->cr_uid) || 910 (gid != ip->i_gid && !groupmember(gid, cred))) && 911 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 912 return (error); 913 ogid = ip->i_gid; 914 ouid = ip->i_uid; 915 #ifdef QUOTA 916 if ((error = getinoquota(ip)) != 0) 917 return (error); 918 if (ouid == uid) { 919 dqrele(vp, ip->i_dquot[USRQUOTA]); 920 ip->i_dquot[USRQUOTA] = NODQUOT; 921 } 922 if (ogid == gid) { 923 dqrele(vp, ip->i_dquot[GRPQUOTA]); 924 ip->i_dquot[GRPQUOTA] = NODQUOT; 925 } 926 change = DIP(ip, i_blocks); 927 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 928 (void) chkiq(ip, -1, cred, CHOWN|FORCE); 929 for (i = 0; i < MAXQUOTAS; i++) { 930 dqrele(vp, ip->i_dquot[i]); 931 ip->i_dquot[i] = NODQUOT; 932 } 933 #endif 934 ip->i_gid = gid; 935 DIP_SET(ip, i_gid, gid); 936 ip->i_uid = uid; 937 DIP_SET(ip, i_uid, uid); 938 #ifdef QUOTA 939 if ((error = getinoquota(ip)) == 0) { 940 if (ouid == uid) { 941 dqrele(vp, ip->i_dquot[USRQUOTA]); 942 ip->i_dquot[USRQUOTA] = NODQUOT; 943 } 944 if (ogid == gid) { 945 dqrele(vp, ip->i_dquot[GRPQUOTA]); 946 ip->i_dquot[GRPQUOTA] = NODQUOT; 947 } 948 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { 949 if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) 950 goto good; 951 else 952 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 953 } 954 for (i = 0; i < MAXQUOTAS; i++) { 955 dqrele(vp, ip->i_dquot[i]); 956 ip->i_dquot[i] = NODQUOT; 957 } 958 } 959 ip->i_gid = ogid; 960 DIP_SET(ip, i_gid, ogid); 961 ip->i_uid = ouid; 962 DIP_SET(ip, i_uid, ouid); 963 if (getinoquota(ip) == 0) { 964 if (ouid == uid) { 965 dqrele(vp, ip->i_dquot[USRQUOTA]); 966 ip->i_dquot[USRQUOTA] = NODQUOT; 967 } 968 if (ogid == gid) { 969 dqrele(vp, ip->i_dquot[GRPQUOTA]); 970 ip->i_dquot[GRPQUOTA] = NODQUOT; 971 } 972 (void) chkdq(ip, change, cred, FORCE|CHOWN); 973 (void) chkiq(ip, 1, cred, FORCE|CHOWN); 974 (void) getinoquota(ip); 975 } 976 return (error); 977 good: 978 if (getinoquota(ip)) 979 panic("ufs_chown: lost quota"); 980 #endif /* QUOTA */ 981 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 982 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 983 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 984 UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); 985 DIP_SET(ip, i_mode, ip->i_mode); 986 } 987 } 988 error = UFS_UPDATE(vp, 0); 989 return (error); 990 } 991 992 static int 993 ufs_remove(ap) 994 struct vop_remove_args /* { 995 struct vnode *a_dvp; 996 struct vnode *a_vp; 997 struct componentname *a_cnp; 998 } */ *ap; 999 { 1000 struct inode *ip; 1001 struct vnode *vp = ap->a_vp; 1002 struct vnode *dvp = ap->a_dvp; 1003 int error; 1004 struct thread *td; 1005 1006 td = curthread; 1007 ip = VTOI(vp); 1008 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1009 (VTOI(dvp)->i_flags & APPEND)) 1010 return (EPERM); 1011 if (DOINGSUJ(dvp)) { 1012 error = softdep_prelink(dvp, vp, ap->a_cnp); 1013 if (error != 0) { 1014 MPASS(error == ERELOOKUP); 1015 return (error); 1016 } 1017 } 1018 1019 #ifdef UFS_GJOURNAL 1020 ufs_gjournal_orphan(vp); 1021 #endif 1022 error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); 1023 if (ip->i_nlink <= 0) 1024 vp->v_vflag |= VV_NOSYNC; 1025 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 1026 /* 1027 * Avoid deadlock where another thread is trying to 1028 * update the inodeblock for dvp and is waiting on 1029 * snaplk. Temporary unlock the vnode lock for the 1030 * unlinked file and sync the directory. This should 1031 * allow vput() of the directory to not block later on 1032 * while holding the snapshot vnode locked, assuming 1033 * that the directory hasn't been unlinked too. 1034 */ 1035 VOP_UNLOCK(vp); 1036 (void) VOP_FSYNC(dvp, MNT_WAIT, td); 1037 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1038 } 1039 return (error); 1040 } 1041 1042 static void 1043 print_bad_link_count(const char *funcname, struct vnode *dvp) 1044 { 1045 struct inode *dip; 1046 1047 dip = VTOI(dvp); 1048 uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", 1049 funcname, dip->i_effnlink, (intmax_t)dip->i_number, 1050 dvp->v_mount->mnt_stat.f_mntonname); 1051 } 1052 1053 /* 1054 * link vnode call 1055 */ 1056 static int 1057 ufs_link(ap) 1058 struct vop_link_args /* { 1059 struct vnode *a_tdvp; 1060 struct vnode *a_vp; 1061 struct componentname *a_cnp; 1062 } */ *ap; 1063 { 1064 struct vnode *vp = ap->a_vp; 1065 struct vnode *tdvp = ap->a_tdvp; 1066 struct componentname *cnp = ap->a_cnp; 1067 struct inode *ip; 1068 struct direct newdir; 1069 int error; 1070 1071 #ifdef INVARIANTS 1072 if ((cnp->cn_flags & HASBUF) == 0) 1073 panic("ufs_link: no name"); 1074 #endif 1075 1076 if (DOINGSUJ(tdvp)) { 1077 error = softdep_prelink(tdvp, vp, cnp); 1078 if (error != 0) { 1079 MPASS(error == ERELOOKUP); 1080 return (error); 1081 } 1082 } 1083 1084 if (VTOI(tdvp)->i_effnlink < 2) { 1085 print_bad_link_count("ufs_link", tdvp); 1086 error = EINVAL; 1087 goto out; 1088 } 1089 ip = VTOI(vp); 1090 if (ip->i_nlink >= UFS_LINK_MAX) { 1091 error = EMLINK; 1092 goto out; 1093 } 1094 /* 1095 * The file may have been removed after namei droped the original 1096 * lock. 1097 */ 1098 if (ip->i_effnlink == 0) { 1099 error = ENOENT; 1100 goto out; 1101 } 1102 if (ip->i_flags & (IMMUTABLE | APPEND)) { 1103 error = EPERM; 1104 goto out; 1105 } 1106 1107 ip->i_effnlink++; 1108 ip->i_nlink++; 1109 DIP_SET(ip, i_nlink, ip->i_nlink); 1110 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 1111 if (DOINGSOFTDEP(vp)) 1112 softdep_setup_link(VTOI(tdvp), ip); 1113 error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); 1114 if (!error) { 1115 ufs_makedirentry(ip, cnp, &newdir); 1116 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); 1117 } 1118 1119 if (error) { 1120 ip->i_effnlink--; 1121 ip->i_nlink--; 1122 DIP_SET(ip, i_nlink, ip->i_nlink); 1123 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 1124 if (DOINGSOFTDEP(vp)) 1125 softdep_revert_link(VTOI(tdvp), ip); 1126 } 1127 out: 1128 return (error); 1129 } 1130 1131 /* 1132 * whiteout vnode call 1133 */ 1134 static int 1135 ufs_whiteout(ap) 1136 struct vop_whiteout_args /* { 1137 struct vnode *a_dvp; 1138 struct componentname *a_cnp; 1139 int a_flags; 1140 } */ *ap; 1141 { 1142 struct vnode *dvp = ap->a_dvp; 1143 struct componentname *cnp = ap->a_cnp; 1144 struct direct newdir; 1145 int error = 0; 1146 1147 if (DOINGSUJ(dvp) && (ap->a_flags == CREATE || 1148 ap->a_flags == DELETE)) { 1149 error = softdep_prelink(dvp, NULL, cnp); 1150 if (error != 0) { 1151 MPASS(error == ERELOOKUP); 1152 return (error); 1153 } 1154 } 1155 1156 switch (ap->a_flags) { 1157 case LOOKUP: 1158 /* 4.4 format directories support whiteout operations */ 1159 if (!OFSFMT(dvp)) 1160 return (0); 1161 return (EOPNOTSUPP); 1162 1163 case CREATE: 1164 /* create a new directory whiteout */ 1165 #ifdef INVARIANTS 1166 if ((cnp->cn_flags & SAVENAME) == 0) 1167 panic("ufs_whiteout: missing name"); 1168 if (OFSFMT(dvp)) 1169 panic("ufs_whiteout: old format filesystem"); 1170 #endif 1171 1172 newdir.d_ino = UFS_WINO; 1173 newdir.d_namlen = cnp->cn_namelen; 1174 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); 1175 newdir.d_type = DT_WHT; 1176 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); 1177 break; 1178 1179 case DELETE: 1180 /* remove an existing directory whiteout */ 1181 #ifdef INVARIANTS 1182 if (OFSFMT(dvp)) 1183 panic("ufs_whiteout: old format filesystem"); 1184 #endif 1185 1186 cnp->cn_flags &= ~DOWHITEOUT; 1187 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); 1188 break; 1189 default: 1190 panic("ufs_whiteout: unknown op"); 1191 } 1192 return (error); 1193 } 1194 1195 static volatile int rename_restarts; 1196 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, 1197 __DEVOLATILE(int *, &rename_restarts), 0, 1198 "Times rename had to restart due to lock contention"); 1199 1200 /* 1201 * Rename system call. 1202 * rename("foo", "bar"); 1203 * is essentially 1204 * unlink("bar"); 1205 * link("foo", "bar"); 1206 * unlink("foo"); 1207 * but ``atomically''. Can't do full commit without saving state in the 1208 * inode on disk which isn't feasible at this time. Best we can do is 1209 * always guarantee the target exists. 1210 * 1211 * Basic algorithm is: 1212 * 1213 * 1) Bump link count on source while we're linking it to the 1214 * target. This also ensure the inode won't be deleted out 1215 * from underneath us while we work (it may be truncated by 1216 * a concurrent `trunc' or `open' for creation). 1217 * 2) Link source to destination. If destination already exists, 1218 * delete it first. 1219 * 3) Unlink source reference to inode if still around. If a 1220 * directory was moved and the parent of the destination 1221 * is different from the source, patch the ".." entry in the 1222 * directory. 1223 */ 1224 static int 1225 ufs_rename(ap) 1226 struct vop_rename_args /* { 1227 struct vnode *a_fdvp; 1228 struct vnode *a_fvp; 1229 struct componentname *a_fcnp; 1230 struct vnode *a_tdvp; 1231 struct vnode *a_tvp; 1232 struct componentname *a_tcnp; 1233 } */ *ap; 1234 { 1235 struct vnode *tvp = ap->a_tvp; 1236 struct vnode *tdvp = ap->a_tdvp; 1237 struct vnode *fvp = ap->a_fvp; 1238 struct vnode *fdvp = ap->a_fdvp; 1239 struct vnode *nvp; 1240 struct componentname *tcnp = ap->a_tcnp; 1241 struct componentname *fcnp = ap->a_fcnp; 1242 struct thread *td = curthread; 1243 struct inode *fip, *tip, *tdp, *fdp; 1244 struct direct newdir; 1245 off_t endoff; 1246 int doingdirectory, newparent; 1247 int error = 0; 1248 struct mount *mp; 1249 ino_t ino; 1250 seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; 1251 bool checkpath_locked, want_seqc_end; 1252 1253 checkpath_locked = want_seqc_end = false; 1254 1255 #ifdef INVARIANTS 1256 if ((tcnp->cn_flags & HASBUF) == 0 || 1257 (fcnp->cn_flags & HASBUF) == 0) 1258 panic("ufs_rename: no name"); 1259 #endif 1260 endoff = 0; 1261 mp = tdvp->v_mount; 1262 VOP_UNLOCK(tdvp); 1263 if (tvp && tvp != tdvp) 1264 VOP_UNLOCK(tvp); 1265 /* 1266 * Check for cross-device rename. 1267 */ 1268 if ((fvp->v_mount != tdvp->v_mount) || 1269 (tvp && (fvp->v_mount != tvp->v_mount))) { 1270 error = EXDEV; 1271 mp = NULL; 1272 goto releout; 1273 } 1274 1275 fdvp_s = fvp_s = tdvp_s = tvp_s = SEQC_MOD; 1276 relock: 1277 /* 1278 * We need to acquire 2 to 4 locks depending on whether tvp is NULL 1279 * and fdvp and tdvp are the same directory. Subsequently we need 1280 * to double-check all paths and in the directory rename case we 1281 * need to verify that we are not creating a directory loop. To 1282 * handle this we acquire all but fdvp using non-blocking 1283 * acquisitions. If we fail to acquire any lock in the path we will 1284 * drop all held locks, acquire the new lock in a blocking fashion, 1285 * and then release it and restart the rename. This acquire/release 1286 * step ensures that we do not spin on a lock waiting for release. 1287 */ 1288 error = vn_lock(fdvp, LK_EXCLUSIVE); 1289 if (error) 1290 goto releout; 1291 if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1292 VOP_UNLOCK(fdvp); 1293 error = vn_lock(tdvp, LK_EXCLUSIVE); 1294 if (error) 1295 goto releout; 1296 VOP_UNLOCK(tdvp); 1297 atomic_add_int(&rename_restarts, 1); 1298 goto relock; 1299 } 1300 /* 1301 * Re-resolve fvp to be certain it still exists and fetch the 1302 * correct vnode. 1303 */ 1304 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1305 if (error) { 1306 VOP_UNLOCK(fdvp); 1307 VOP_UNLOCK(tdvp); 1308 goto releout; 1309 } 1310 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1311 if (error) { 1312 VOP_UNLOCK(fdvp); 1313 VOP_UNLOCK(tdvp); 1314 if (error != EBUSY) 1315 goto releout; 1316 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1317 if (error != 0) 1318 goto releout; 1319 VOP_UNLOCK(nvp); 1320 vrele(fvp); 1321 fvp = nvp; 1322 atomic_add_int(&rename_restarts, 1); 1323 goto relock; 1324 } 1325 vrele(fvp); 1326 fvp = nvp; 1327 /* 1328 * Re-resolve tvp and acquire the vnode lock if present. 1329 */ 1330 error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); 1331 if (error != 0 && error != EJUSTRETURN) { 1332 VOP_UNLOCK(fdvp); 1333 VOP_UNLOCK(tdvp); 1334 VOP_UNLOCK(fvp); 1335 goto releout; 1336 } 1337 /* 1338 * If tvp disappeared we just carry on. 1339 */ 1340 if (error == EJUSTRETURN && tvp != NULL) { 1341 vrele(tvp); 1342 tvp = NULL; 1343 } 1344 /* 1345 * Get the tvp ino if the lookup succeeded. We may have to restart 1346 * if the non-blocking acquire fails. 1347 */ 1348 if (error == 0) { 1349 nvp = NULL; 1350 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1351 if (tvp) 1352 vrele(tvp); 1353 tvp = nvp; 1354 if (error) { 1355 VOP_UNLOCK(fdvp); 1356 VOP_UNLOCK(tdvp); 1357 VOP_UNLOCK(fvp); 1358 if (error != EBUSY) 1359 goto releout; 1360 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1361 if (error != 0) 1362 goto releout; 1363 vput(nvp); 1364 atomic_add_int(&rename_restarts, 1); 1365 goto relock; 1366 } 1367 } 1368 1369 if (DOINGSUJ(fdvp) && 1370 (seqc_in_modify(fdvp_s) || !vn_seqc_consistent(fdvp, fdvp_s) || 1371 seqc_in_modify(fvp_s) || !vn_seqc_consistent(fvp, fvp_s) || 1372 seqc_in_modify(tdvp_s) || !vn_seqc_consistent(tdvp, tdvp_s) || 1373 (tvp != NULL && (seqc_in_modify(tvp_s) || 1374 !vn_seqc_consistent(tvp, tvp_s))))) { 1375 error = softdep_prerename(fdvp, fvp, tdvp, tvp); 1376 if (error != 0) { 1377 if (error == ERELOOKUP) { 1378 fdvp_s = vn_seqc_read_any(fdvp); 1379 fvp_s = vn_seqc_read_any(fvp); 1380 tdvp_s = vn_seqc_read_any(tdvp); 1381 if (tvp != NULL) 1382 tvp_s = vn_seqc_read_any(tvp); 1383 atomic_add_int(&rename_restarts, 1); 1384 goto relock; 1385 } 1386 goto releout; 1387 } 1388 } 1389 1390 fdp = VTOI(fdvp); 1391 fip = VTOI(fvp); 1392 tdp = VTOI(tdvp); 1393 tip = NULL; 1394 if (tvp) 1395 tip = VTOI(tvp); 1396 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1397 (VTOI(tdvp)->i_flags & APPEND))) { 1398 error = EPERM; 1399 goto unlockout; 1400 } 1401 /* 1402 * Renaming a file to itself has no effect. The upper layers should 1403 * not call us in that case. However, things could change after 1404 * we drop the locks above. 1405 */ 1406 if (fvp == tvp) { 1407 error = 0; 1408 goto unlockout; 1409 } 1410 doingdirectory = 0; 1411 newparent = 0; 1412 ino = fip->i_number; 1413 if (fip->i_nlink >= UFS_LINK_MAX) { 1414 error = EMLINK; 1415 goto unlockout; 1416 } 1417 if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 1418 || (fdp->i_flags & APPEND)) { 1419 error = EPERM; 1420 goto unlockout; 1421 } 1422 if ((fip->i_mode & IFMT) == IFDIR) { 1423 /* 1424 * Avoid ".", "..", and aliases of "." for obvious reasons. 1425 */ 1426 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1427 fdp == fip || 1428 (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 1429 error = EINVAL; 1430 goto unlockout; 1431 } 1432 if (fdp->i_number != tdp->i_number) 1433 newparent = tdp->i_number; 1434 doingdirectory = 1; 1435 } 1436 if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || 1437 (tvp != NULL && tvp->v_type == VDIR && 1438 tvp->v_mountedhere != NULL)) { 1439 error = EXDEV; 1440 goto unlockout; 1441 } 1442 1443 /* 1444 * If ".." must be changed (ie the directory gets a new 1445 * parent) then the source directory must not be in the 1446 * directory hierarchy above the target, as this would 1447 * orphan everything below the source directory. Also 1448 * the user must have write permission in the source so 1449 * as to be able to change "..". 1450 */ 1451 if (doingdirectory && newparent) { 1452 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread); 1453 if (error) 1454 goto unlockout; 1455 1456 sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); 1457 checkpath_locked = true; 1458 error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, 1459 &ino); 1460 /* 1461 * We encountered a lock that we have to wait for. Unlock 1462 * everything else and VGET before restarting. 1463 */ 1464 if (ino) { 1465 sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); 1466 checkpath_locked = false; 1467 VOP_UNLOCK(fdvp); 1468 VOP_UNLOCK(fvp); 1469 VOP_UNLOCK(tdvp); 1470 if (tvp) 1471 VOP_UNLOCK(tvp); 1472 error = VFS_VGET(mp, ino, LK_SHARED, &nvp); 1473 if (error == 0) 1474 vput(nvp); 1475 atomic_add_int(&rename_restarts, 1); 1476 goto relock; 1477 } 1478 if (error) 1479 goto unlockout; 1480 if ((tcnp->cn_flags & SAVESTART) == 0) 1481 panic("ufs_rename: lost to startdir"); 1482 } 1483 if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || 1484 tdp->i_effnlink == 0) 1485 panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); 1486 1487 if (tvp != NULL) 1488 vn_seqc_write_begin(tvp); 1489 vn_seqc_write_begin(tdvp); 1490 vn_seqc_write_begin(fvp); 1491 vn_seqc_write_begin(fdvp); 1492 want_seqc_end = true; 1493 1494 /* 1495 * 1) Bump link count while we're moving stuff 1496 * around. If we crash somewhere before 1497 * completing our work, the link count 1498 * may be wrong, but correctable. 1499 */ 1500 fip->i_effnlink++; 1501 fip->i_nlink++; 1502 DIP_SET(fip, i_nlink, fip->i_nlink); 1503 UFS_INODE_SET_FLAG(fip, IN_CHANGE); 1504 if (DOINGSOFTDEP(fvp)) 1505 softdep_setup_link(tdp, fip); 1506 error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); 1507 if (error) 1508 goto bad; 1509 1510 /* 1511 * 2) If target doesn't exist, link the target 1512 * to the source and unlink the source. 1513 * Otherwise, rewrite the target directory 1514 * entry to reference the source inode and 1515 * expunge the original entry's existence. 1516 */ 1517 if (tip == NULL) { 1518 if (ITODEV(tdp) != ITODEV(fip)) 1519 panic("ufs_rename: EXDEV"); 1520 if (doingdirectory && newparent) { 1521 /* 1522 * Account for ".." in new directory. 1523 * When source and destination have the same 1524 * parent we don't adjust the link count. The 1525 * actual link modification is completed when 1526 * .. is rewritten below. 1527 */ 1528 if (tdp->i_nlink >= UFS_LINK_MAX) { 1529 error = EMLINK; 1530 goto bad; 1531 } 1532 } 1533 ufs_makedirentry(fip, tcnp, &newdir); 1534 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); 1535 if (error) 1536 goto bad; 1537 /* Setup tdvp for directory compaction if needed. */ 1538 if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 && 1539 I_ENDOFF(tdp) < tdp->i_size) 1540 endoff = I_ENDOFF(tdp); 1541 } else { 1542 if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) 1543 panic("ufs_rename: EXDEV"); 1544 /* 1545 * Short circuit rename(foo, foo). 1546 */ 1547 if (tip->i_number == fip->i_number) 1548 panic("ufs_rename: same file"); 1549 /* 1550 * If the parent directory is "sticky", then the caller 1551 * must possess VADMIN for the parent directory, or the 1552 * destination of the rename. This implements append-only 1553 * directories. 1554 */ 1555 if ((tdp->i_mode & S_ISTXT) && 1556 VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && 1557 VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { 1558 error = EPERM; 1559 goto bad; 1560 } 1561 /* 1562 * Target must be empty if a directory and have no links 1563 * to it. Also, ensure source and target are compatible 1564 * (both directories, or both not directories). 1565 */ 1566 if ((tip->i_mode & IFMT) == IFDIR) { 1567 if ((tip->i_effnlink > 2) || 1568 !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { 1569 error = ENOTEMPTY; 1570 goto bad; 1571 } 1572 if (!doingdirectory) { 1573 error = ENOTDIR; 1574 goto bad; 1575 } 1576 cache_purge(tdvp); 1577 } else if (doingdirectory) { 1578 error = EISDIR; 1579 goto bad; 1580 } 1581 if (doingdirectory) { 1582 if (!newparent) { 1583 tdp->i_effnlink--; 1584 if (DOINGSOFTDEP(tdvp)) 1585 softdep_change_linkcnt(tdp); 1586 } 1587 tip->i_effnlink--; 1588 if (DOINGSOFTDEP(tvp)) 1589 softdep_change_linkcnt(tip); 1590 } 1591 error = ufs_dirrewrite(tdp, tip, fip->i_number, 1592 IFTODT(fip->i_mode), 1593 (doingdirectory && newparent) ? newparent : doingdirectory); 1594 if (error) { 1595 if (doingdirectory) { 1596 if (!newparent) { 1597 tdp->i_effnlink++; 1598 if (DOINGSOFTDEP(tdvp)) 1599 softdep_change_linkcnt(tdp); 1600 } 1601 tip->i_effnlink++; 1602 if (DOINGSOFTDEP(tvp)) 1603 softdep_change_linkcnt(tip); 1604 } 1605 goto bad; 1606 } 1607 if (doingdirectory && !DOINGSOFTDEP(tvp)) { 1608 /* 1609 * The only stuff left in the directory is "." 1610 * and "..". The "." reference is inconsequential 1611 * since we are quashing it. We have removed the "." 1612 * reference and the reference in the parent directory, 1613 * but there may be other hard links. The soft 1614 * dependency code will arrange to do these operations 1615 * after the parent directory entry has been deleted on 1616 * disk, so when running with that code we avoid doing 1617 * them now. 1618 */ 1619 if (!newparent) { 1620 tdp->i_nlink--; 1621 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1622 UFS_INODE_SET_FLAG(tdp, IN_CHANGE); 1623 } 1624 tip->i_nlink--; 1625 DIP_SET(tip, i_nlink, tip->i_nlink); 1626 UFS_INODE_SET_FLAG(tip, IN_CHANGE); 1627 } 1628 } 1629 1630 /* 1631 * 3) Unlink the source. We have to resolve the path again to 1632 * fixup the directory offset and count for ufs_dirremove. 1633 */ 1634 if (fdvp == tdvp) { 1635 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1636 if (error) 1637 panic("ufs_rename: from entry went away!"); 1638 if (ino != fip->i_number) 1639 panic("ufs_rename: ino mismatch %ju != %ju\n", 1640 (uintmax_t)ino, (uintmax_t)fip->i_number); 1641 } 1642 /* 1643 * If the source is a directory with a 1644 * new parent, the link count of the old 1645 * parent directory must be decremented 1646 * and ".." set to point to the new parent. 1647 */ 1648 if (doingdirectory && newparent) { 1649 /* 1650 * If tip exists we simply use its link, otherwise we must 1651 * add a new one. 1652 */ 1653 if (tip == NULL) { 1654 tdp->i_effnlink++; 1655 tdp->i_nlink++; 1656 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1657 UFS_INODE_SET_FLAG(tdp, IN_CHANGE); 1658 if (DOINGSOFTDEP(tdvp)) 1659 softdep_setup_dotdot_link(tdp, fip); 1660 error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && 1661 !DOINGASYNC(tdvp)); 1662 /* Don't go to bad here as the new link exists. */ 1663 if (error) 1664 goto unlockout; 1665 } else if (DOINGSUJ(tdvp)) 1666 /* Journal must account for each new link. */ 1667 softdep_setup_dotdot_link(tdp, fip); 1668 SET_I_OFFSET(fip, mastertemplate.dot_reclen); 1669 ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); 1670 cache_purge(fdvp); 1671 } 1672 error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); 1673 /* 1674 * The kern_renameat() looks up the fvp using the DELETE flag, which 1675 * causes the removal of the name cache entry for fvp. 1676 * As the relookup of the fvp is done in two steps: 1677 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a 1678 * normal lookup of the from name just before the VFS_VGET() call, 1679 * causing the cache entry to be re-instantiated. 1680 * 1681 * The same issue also applies to tvp if it exists as 1682 * otherwise we may have a stale name cache entry for the new 1683 * name that references the old i-node if it has other links 1684 * or open file descriptors. 1685 */ 1686 cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); 1687 1688 unlockout: 1689 if (want_seqc_end) { 1690 if (tvp != NULL) 1691 vn_seqc_write_end(tvp); 1692 vn_seqc_write_end(tdvp); 1693 vn_seqc_write_end(fvp); 1694 vn_seqc_write_end(fdvp); 1695 } 1696 1697 if (checkpath_locked) 1698 sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); 1699 1700 vput(fdvp); 1701 vput(fvp); 1702 1703 /* 1704 * If compaction or fsync was requested do it in 1705 * ffs_vput_pair() now that other locks are no longer needed. 1706 */ 1707 if (error == 0 && endoff != 0) { 1708 UFS_INODE_SET_FLAG(tdp, IN_ENDOFF); 1709 SET_I_ENDOFF(tdp, endoff); 1710 } 1711 VOP_VPUT_PAIR(tdvp, &tvp, true); 1712 return (error); 1713 1714 bad: 1715 fip->i_effnlink--; 1716 fip->i_nlink--; 1717 DIP_SET(fip, i_nlink, fip->i_nlink); 1718 UFS_INODE_SET_FLAG(fip, IN_CHANGE); 1719 if (DOINGSOFTDEP(fvp)) 1720 softdep_revert_link(tdp, fip); 1721 goto unlockout; 1722 1723 releout: 1724 if (want_seqc_end) { 1725 if (tvp != NULL) 1726 vn_seqc_write_end(tvp); 1727 vn_seqc_write_end(tdvp); 1728 vn_seqc_write_end(fvp); 1729 vn_seqc_write_end(fdvp); 1730 } 1731 1732 vrele(fdvp); 1733 vrele(fvp); 1734 vrele(tdvp); 1735 if (tvp) 1736 vrele(tvp); 1737 1738 return (error); 1739 } 1740 1741 #ifdef UFS_ACL 1742 static int 1743 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1744 mode_t dmode, struct ucred *cred, struct thread *td) 1745 { 1746 int error; 1747 struct inode *ip = VTOI(tvp); 1748 struct acl *dacl, *acl; 1749 1750 acl = acl_alloc(M_WAITOK); 1751 dacl = acl_alloc(M_WAITOK); 1752 1753 /* 1754 * Retrieve default ACL from parent, if any. 1755 */ 1756 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1757 switch (error) { 1758 case 0: 1759 /* 1760 * Retrieved a default ACL, so merge mode and ACL if 1761 * necessary. If the ACL is empty, fall through to 1762 * the "not defined or available" case. 1763 */ 1764 if (acl->acl_cnt != 0) { 1765 dmode = acl_posix1e_newfilemode(dmode, acl); 1766 UFS_INODE_SET_MODE(ip, dmode); 1767 DIP_SET(ip, i_mode, dmode); 1768 *dacl = *acl; 1769 ufs_sync_acl_from_inode(ip, acl); 1770 break; 1771 } 1772 /* FALLTHROUGH */ 1773 1774 case EOPNOTSUPP: 1775 /* 1776 * Just use the mode as-is. 1777 */ 1778 UFS_INODE_SET_MODE(ip, dmode); 1779 DIP_SET(ip, i_mode, dmode); 1780 error = 0; 1781 goto out; 1782 1783 default: 1784 goto out; 1785 } 1786 1787 /* 1788 * XXX: If we abort now, will Soft Updates notify the extattr 1789 * code that the EAs for the file need to be released? 1790 */ 1791 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1792 if (error == 0) 1793 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1794 switch (error) { 1795 case 0: 1796 break; 1797 1798 case EOPNOTSUPP: 1799 /* 1800 * XXX: This should not happen, as EOPNOTSUPP above 1801 * was supposed to free acl. 1802 */ 1803 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1804 /* 1805 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); 1806 */ 1807 break; 1808 1809 default: 1810 goto out; 1811 } 1812 1813 out: 1814 acl_free(acl); 1815 acl_free(dacl); 1816 1817 return (error); 1818 } 1819 1820 static int 1821 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1822 mode_t mode, struct ucred *cred, struct thread *td) 1823 { 1824 int error; 1825 struct inode *ip = VTOI(tvp); 1826 struct acl *acl; 1827 1828 acl = acl_alloc(M_WAITOK); 1829 1830 /* 1831 * Retrieve default ACL for parent, if any. 1832 */ 1833 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1834 switch (error) { 1835 case 0: 1836 /* 1837 * Retrieved a default ACL, so merge mode and ACL if 1838 * necessary. 1839 */ 1840 if (acl->acl_cnt != 0) { 1841 /* 1842 * Two possible ways for default ACL to not 1843 * be present. First, the EA can be 1844 * undefined, or second, the default ACL can 1845 * be blank. If it's blank, fall through to 1846 * the it's not defined case. 1847 */ 1848 mode = acl_posix1e_newfilemode(mode, acl); 1849 UFS_INODE_SET_MODE(ip, mode); 1850 DIP_SET(ip, i_mode, mode); 1851 ufs_sync_acl_from_inode(ip, acl); 1852 break; 1853 } 1854 /* FALLTHROUGH */ 1855 1856 case EOPNOTSUPP: 1857 /* 1858 * Just use the mode as-is. 1859 */ 1860 UFS_INODE_SET_MODE(ip, mode); 1861 DIP_SET(ip, i_mode, mode); 1862 error = 0; 1863 goto out; 1864 1865 default: 1866 goto out; 1867 } 1868 1869 /* 1870 * XXX: If we abort now, will Soft Updates notify the extattr 1871 * code that the EAs for the file need to be released? 1872 */ 1873 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1874 switch (error) { 1875 case 0: 1876 break; 1877 1878 case EOPNOTSUPP: 1879 /* 1880 * XXX: This should not happen, as EOPNOTSUPP above was 1881 * supposed to free acl. 1882 */ 1883 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1884 "but no VOP_SETACL()\n"); 1885 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1886 "but no VOP_SETACL()"); */ 1887 break; 1888 1889 default: 1890 goto out; 1891 } 1892 1893 out: 1894 acl_free(acl); 1895 1896 return (error); 1897 } 1898 1899 static int 1900 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, 1901 mode_t child_mode, struct ucred *cred, struct thread *td) 1902 { 1903 int error; 1904 struct acl *parent_aclp, *child_aclp; 1905 1906 parent_aclp = acl_alloc(M_WAITOK); 1907 child_aclp = acl_alloc(M_WAITOK | M_ZERO); 1908 1909 error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); 1910 if (error) 1911 goto out; 1912 acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, 1913 child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); 1914 error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); 1915 if (error) 1916 goto out; 1917 out: 1918 acl_free(parent_aclp); 1919 acl_free(child_aclp); 1920 1921 return (error); 1922 } 1923 #endif 1924 1925 /* 1926 * Mkdir system call 1927 */ 1928 static int 1929 ufs_mkdir(ap) 1930 struct vop_mkdir_args /* { 1931 struct vnode *a_dvp; 1932 struct vnode **a_vpp; 1933 struct componentname *a_cnp; 1934 struct vattr *a_vap; 1935 } */ *ap; 1936 { 1937 struct vnode *dvp = ap->a_dvp; 1938 struct vattr *vap = ap->a_vap; 1939 struct componentname *cnp = ap->a_cnp; 1940 struct inode *ip, *dp; 1941 struct vnode *tvp; 1942 struct buf *bp; 1943 struct dirtemplate dirtemplate, *dtp; 1944 struct direct newdir; 1945 int error, dmode; 1946 long blkoff; 1947 1948 #ifdef INVARIANTS 1949 if ((cnp->cn_flags & HASBUF) == 0) 1950 panic("ufs_mkdir: no name"); 1951 #endif 1952 dp = VTOI(dvp); 1953 if (dp->i_nlink >= UFS_LINK_MAX) { 1954 error = EMLINK; 1955 goto out; 1956 } 1957 dmode = vap->va_mode & 0777; 1958 dmode |= IFDIR; 1959 1960 /* 1961 * Must simulate part of ufs_makeinode here to acquire the inode, 1962 * but not have it entered in the parent directory. The entry is 1963 * made later after writing "." and ".." entries. 1964 */ 1965 if (dp->i_effnlink < 2) { 1966 print_bad_link_count("ufs_mkdir", dvp); 1967 error = EINVAL; 1968 goto out; 1969 } 1970 1971 if (DOINGSUJ(dvp)) { 1972 error = softdep_prelink(dvp, NULL, cnp); 1973 if (error != 0) { 1974 MPASS(error == ERELOOKUP); 1975 return (error); 1976 } 1977 } 1978 1979 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); 1980 if (error) 1981 goto out; 1982 vn_seqc_write_begin(tvp); 1983 ip = VTOI(tvp); 1984 ip->i_gid = dp->i_gid; 1985 DIP_SET(ip, i_gid, dp->i_gid); 1986 #ifdef SUIDDIR 1987 { 1988 #ifdef QUOTA 1989 struct ucred ucred, *ucp; 1990 gid_t ucred_group; 1991 ucp = cnp->cn_cred; 1992 #endif 1993 /* 1994 * If we are hacking owners here, (only do this where told to) 1995 * and we are not giving it TO root, (would subvert quotas) 1996 * then go ahead and give it to the other user. 1997 * The new directory also inherits the SUID bit. 1998 * If user's UID and dir UID are the same, 1999 * 'give it away' so that the SUID is still forced on. 2000 */ 2001 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2002 (dp->i_mode & ISUID) && dp->i_uid) { 2003 dmode |= ISUID; 2004 ip->i_uid = dp->i_uid; 2005 DIP_SET(ip, i_uid, dp->i_uid); 2006 #ifdef QUOTA 2007 if (dp->i_uid != cnp->cn_cred->cr_uid) { 2008 /* 2009 * Make sure the correct user gets charged 2010 * for the space. 2011 * Make a dummy credential for the victim. 2012 * XXX This seems to never be accessed out of 2013 * our context so a stack variable is ok. 2014 */ 2015 refcount_init(&ucred.cr_ref, 1); 2016 ucred.cr_uid = ip->i_uid; 2017 ucred.cr_ngroups = 1; 2018 ucred.cr_groups = &ucred_group; 2019 ucred.cr_groups[0] = dp->i_gid; 2020 ucp = &ucred; 2021 } 2022 #endif 2023 } else { 2024 ip->i_uid = cnp->cn_cred->cr_uid; 2025 DIP_SET(ip, i_uid, ip->i_uid); 2026 } 2027 #ifdef QUOTA 2028 if ((error = getinoquota(ip)) || 2029 (error = chkiq(ip, 1, ucp, 0))) { 2030 if (DOINGSOFTDEP(tvp)) 2031 softdep_revert_link(dp, ip); 2032 UFS_VFREE(tvp, ip->i_number, dmode); 2033 vn_seqc_write_end(tvp); 2034 vgone(tvp); 2035 vput(tvp); 2036 return (error); 2037 } 2038 #endif 2039 } 2040 #else /* !SUIDDIR */ 2041 ip->i_uid = cnp->cn_cred->cr_uid; 2042 DIP_SET(ip, i_uid, ip->i_uid); 2043 #ifdef QUOTA 2044 if ((error = getinoquota(ip)) || 2045 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2046 if (DOINGSOFTDEP(tvp)) 2047 softdep_revert_link(dp, ip); 2048 UFS_VFREE(tvp, ip->i_number, dmode); 2049 vn_seqc_write_end(tvp); 2050 vgone(tvp); 2051 vput(tvp); 2052 return (error); 2053 } 2054 #endif 2055 #endif /* !SUIDDIR */ 2056 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 2057 UFS_INODE_SET_MODE(ip, dmode); 2058 DIP_SET(ip, i_mode, dmode); 2059 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 2060 ip->i_effnlink = 2; 2061 ip->i_nlink = 2; 2062 DIP_SET(ip, i_nlink, 2); 2063 2064 if (cnp->cn_flags & ISWHITEOUT) { 2065 ip->i_flags |= UF_OPAQUE; 2066 DIP_SET(ip, i_flags, ip->i_flags); 2067 } 2068 2069 /* 2070 * Bump link count in parent directory to reflect work done below. 2071 * Should be done before reference is created so cleanup is 2072 * possible if we crash. 2073 */ 2074 dp->i_effnlink++; 2075 dp->i_nlink++; 2076 DIP_SET(dp, i_nlink, dp->i_nlink); 2077 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2078 if (DOINGSOFTDEP(dvp)) 2079 softdep_setup_mkdir(dp, ip); 2080 error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); 2081 if (error) 2082 goto bad; 2083 #ifdef MAC 2084 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2085 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 2086 dvp, tvp, cnp); 2087 if (error) 2088 goto bad; 2089 } 2090 #endif 2091 #ifdef UFS_ACL 2092 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2093 error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 2094 cnp->cn_cred, curthread); 2095 if (error) 2096 goto bad; 2097 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 2098 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, 2099 cnp->cn_cred, curthread); 2100 if (error) 2101 goto bad; 2102 } 2103 #endif /* !UFS_ACL */ 2104 2105 /* 2106 * Initialize directory with "." and ".." from static template. 2107 */ 2108 if (!OFSFMT(dvp)) 2109 dtp = &mastertemplate; 2110 else 2111 dtp = (struct dirtemplate *)&omastertemplate; 2112 dirtemplate = *dtp; 2113 dirtemplate.dot_ino = ip->i_number; 2114 dirtemplate.dotdot_ino = dp->i_number; 2115 vnode_pager_setsize(tvp, DIRBLKSIZ); 2116 if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, 2117 BA_CLRBUF, &bp)) != 0) 2118 goto bad; 2119 ip->i_size = DIRBLKSIZ; 2120 DIP_SET(ip, i_size, DIRBLKSIZ); 2121 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 2122 bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); 2123 if (DOINGSOFTDEP(tvp)) { 2124 /* 2125 * Ensure that the entire newly allocated block is a 2126 * valid directory so that future growth within the 2127 * block does not have to ensure that the block is 2128 * written before the inode. 2129 */ 2130 blkoff = DIRBLKSIZ; 2131 while (blkoff < bp->b_bcount) { 2132 ((struct direct *) 2133 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 2134 blkoff += DIRBLKSIZ; 2135 } 2136 } 2137 if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && 2138 !DOINGASYNC(tvp))) != 0) { 2139 (void)bwrite(bp); 2140 goto bad; 2141 } 2142 /* 2143 * Directory set up, now install its entry in the parent directory. 2144 * 2145 * If we are not doing soft dependencies, then we must write out the 2146 * buffer containing the new directory body before entering the new 2147 * name in the parent. If we are doing soft dependencies, then the 2148 * buffer containing the new directory body will be passed to and 2149 * released in the soft dependency code after the code has attached 2150 * an appropriate ordering dependency to the buffer which ensures that 2151 * the buffer is written before the new name is written in the parent. 2152 */ 2153 if (DOINGASYNC(dvp)) 2154 bdwrite(bp); 2155 else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) 2156 goto bad; 2157 ufs_makedirentry(ip, cnp, &newdir); 2158 error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); 2159 2160 bad: 2161 if (error == 0) { 2162 *ap->a_vpp = tvp; 2163 vn_seqc_write_end(tvp); 2164 } else { 2165 dp->i_effnlink--; 2166 dp->i_nlink--; 2167 DIP_SET(dp, i_nlink, dp->i_nlink); 2168 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2169 /* 2170 * No need to do an explicit VOP_TRUNCATE here, vrele will 2171 * do this for us because we set the link count to 0. 2172 */ 2173 ip->i_effnlink = 0; 2174 ip->i_nlink = 0; 2175 DIP_SET(ip, i_nlink, 0); 2176 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2177 if (DOINGSOFTDEP(tvp)) 2178 softdep_revert_mkdir(dp, ip); 2179 vn_seqc_write_end(tvp); 2180 vgone(tvp); 2181 vput(tvp); 2182 } 2183 out: 2184 return (error); 2185 } 2186 2187 /* 2188 * Rmdir system call. 2189 */ 2190 static int 2191 ufs_rmdir(ap) 2192 struct vop_rmdir_args /* { 2193 struct vnode *a_dvp; 2194 struct vnode *a_vp; 2195 struct componentname *a_cnp; 2196 } */ *ap; 2197 { 2198 struct vnode *vp = ap->a_vp; 2199 struct vnode *dvp = ap->a_dvp; 2200 struct componentname *cnp = ap->a_cnp; 2201 struct inode *ip, *dp; 2202 int error; 2203 2204 ip = VTOI(vp); 2205 dp = VTOI(dvp); 2206 2207 /* 2208 * Do not remove a directory that is in the process of being renamed. 2209 * Verify the directory is empty (and valid). Rmdir ".." will not be 2210 * valid since ".." will contain a reference to the current directory 2211 * and thus be non-empty. Do not allow the removal of mounted on 2212 * directories (this can happen when an NFS exported filesystem 2213 * tries to remove a locally mounted on directory). 2214 */ 2215 error = 0; 2216 if (dp->i_effnlink <= 2) { 2217 if (dp->i_effnlink == 2) 2218 print_bad_link_count("ufs_rmdir", dvp); 2219 error = EINVAL; 2220 goto out; 2221 } 2222 if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 2223 error = ENOTEMPTY; 2224 goto out; 2225 } 2226 if ((dp->i_flags & APPEND) 2227 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 2228 error = EPERM; 2229 goto out; 2230 } 2231 if (vp->v_mountedhere != 0) { 2232 error = EINVAL; 2233 goto out; 2234 } 2235 if (DOINGSUJ(dvp)) { 2236 error = softdep_prelink(dvp, vp, cnp); 2237 if (error != 0) { 2238 MPASS(error == ERELOOKUP); 2239 return (error); 2240 } 2241 } 2242 2243 #ifdef UFS_GJOURNAL 2244 ufs_gjournal_orphan(vp); 2245 #endif 2246 /* 2247 * Delete reference to directory before purging 2248 * inode. If we crash in between, the directory 2249 * will be reattached to lost+found, 2250 */ 2251 dp->i_effnlink--; 2252 ip->i_effnlink--; 2253 if (DOINGSOFTDEP(vp)) 2254 softdep_setup_rmdir(dp, ip); 2255 error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); 2256 if (error) { 2257 dp->i_effnlink++; 2258 ip->i_effnlink++; 2259 if (DOINGSOFTDEP(vp)) 2260 softdep_revert_rmdir(dp, ip); 2261 goto out; 2262 } 2263 /* 2264 * The only stuff left in the directory is "." and "..". The "." 2265 * reference is inconsequential since we are quashing it. The soft 2266 * dependency code will arrange to do these operations after 2267 * the parent directory entry has been deleted on disk, so 2268 * when running with that code we avoid doing them now. 2269 */ 2270 if (!DOINGSOFTDEP(vp)) { 2271 dp->i_nlink--; 2272 DIP_SET(dp, i_nlink, dp->i_nlink); 2273 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2274 error = UFS_UPDATE(dvp, 0); 2275 ip->i_nlink--; 2276 DIP_SET(ip, i_nlink, ip->i_nlink); 2277 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2278 } 2279 cache_vop_rmdir(dvp, vp); 2280 #ifdef UFS_DIRHASH 2281 /* Kill any active hash; i_effnlink == 0, so it will not come back. */ 2282 if (ip->i_dirhash != NULL) 2283 ufsdirhash_free(ip); 2284 #endif 2285 out: 2286 return (error); 2287 } 2288 2289 /* 2290 * symlink -- make a symbolic link 2291 */ 2292 static int 2293 ufs_symlink(ap) 2294 struct vop_symlink_args /* { 2295 struct vnode *a_dvp; 2296 struct vnode **a_vpp; 2297 struct componentname *a_cnp; 2298 struct vattr *a_vap; 2299 const char *a_target; 2300 } */ *ap; 2301 { 2302 struct vnode *vp, **vpp = ap->a_vpp; 2303 struct inode *ip; 2304 int len, error; 2305 2306 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 2307 vpp, ap->a_cnp, "ufs_symlink"); 2308 if (error) 2309 return (error); 2310 vp = *vpp; 2311 len = strlen(ap->a_target); 2312 if (len < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) { 2313 ip = VTOI(vp); 2314 bcopy(ap->a_target, SHORTLINK(ip), len); 2315 ip->i_size = len; 2316 DIP_SET(ip, i_size, len); 2317 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 2318 error = UFS_UPDATE(vp, 0); 2319 } else 2320 error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), 2321 len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 2322 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 2323 if (error) 2324 vput(vp); 2325 return (error); 2326 } 2327 2328 /* 2329 * Vnode op for reading directories. 2330 */ 2331 int 2332 ufs_readdir(ap) 2333 struct vop_readdir_args /* { 2334 struct vnode *a_vp; 2335 struct uio *a_uio; 2336 struct ucred *a_cred; 2337 int *a_eofflag; 2338 int *a_ncookies; 2339 uint64_t **a_cookies; 2340 } */ *ap; 2341 { 2342 struct vnode *vp = ap->a_vp; 2343 struct uio *uio = ap->a_uio; 2344 struct buf *bp; 2345 struct inode *ip; 2346 struct direct *dp, *edp; 2347 uint64_t *cookies; 2348 struct dirent dstdp; 2349 off_t offset, startoffset; 2350 size_t readcnt, skipcnt; 2351 ssize_t startresid; 2352 u_int ncookies; 2353 int error; 2354 2355 if (uio->uio_offset < 0) 2356 return (EINVAL); 2357 ip = VTOI(vp); 2358 if (ip->i_effnlink == 0) 2359 return (0); 2360 if (ap->a_ncookies != NULL) { 2361 if (uio->uio_resid < 0) 2362 ncookies = 0; 2363 else 2364 ncookies = uio->uio_resid; 2365 if (uio->uio_offset >= ip->i_size) 2366 ncookies = 0; 2367 else if (ip->i_size - uio->uio_offset < ncookies) 2368 ncookies = ip->i_size - uio->uio_offset; 2369 ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; 2370 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 2371 *ap->a_ncookies = ncookies; 2372 *ap->a_cookies = cookies; 2373 } else { 2374 ncookies = 0; 2375 cookies = NULL; 2376 } 2377 offset = startoffset = uio->uio_offset; 2378 startresid = uio->uio_resid; 2379 error = 0; 2380 while (error == 0 && uio->uio_resid > 0 && 2381 uio->uio_offset < ip->i_size) { 2382 error = UFS_BLKATOFF(vp, uio->uio_offset, NULL, &bp); 2383 if (error) 2384 break; 2385 if (bp->b_offset + bp->b_bcount > ip->i_size) 2386 readcnt = ip->i_size - bp->b_offset; 2387 else 2388 readcnt = bp->b_bcount; 2389 skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & 2390 ~(size_t)(DIRBLKSIZ - 1); 2391 offset = bp->b_offset + skipcnt; 2392 dp = (struct direct *)&bp->b_data[skipcnt]; 2393 edp = (struct direct *)&bp->b_data[readcnt]; 2394 while (error == 0 && uio->uio_resid > 0 && dp < edp) { 2395 if (dp->d_reclen <= offsetof(struct direct, d_name) || 2396 (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { 2397 error = EIO; 2398 break; 2399 } 2400 #if BYTE_ORDER == LITTLE_ENDIAN 2401 /* Old filesystem format. */ 2402 if (OFSFMT(vp)) { 2403 dstdp.d_namlen = dp->d_type; 2404 dstdp.d_type = dp->d_namlen; 2405 } else 2406 #endif 2407 { 2408 dstdp.d_namlen = dp->d_namlen; 2409 dstdp.d_type = dp->d_type; 2410 } 2411 if (offsetof(struct direct, d_name) + dstdp.d_namlen > 2412 dp->d_reclen) { 2413 error = EIO; 2414 break; 2415 } 2416 if (offset < startoffset || dp->d_ino == 0) 2417 goto nextentry; 2418 dstdp.d_fileno = dp->d_ino; 2419 dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); 2420 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 2421 /* NOTE: d_off is the offset of the *next* entry. */ 2422 dstdp.d_off = offset + dp->d_reclen; 2423 dirent_terminate(&dstdp); 2424 if (dstdp.d_reclen > uio->uio_resid) { 2425 if (uio->uio_resid == startresid) 2426 error = EINVAL; 2427 else 2428 error = EJUSTRETURN; 2429 break; 2430 } 2431 /* Advance dp. */ 2432 error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); 2433 if (error) 2434 break; 2435 if (cookies != NULL) { 2436 KASSERT(ncookies > 0, 2437 ("ufs_readdir: cookies buffer too small")); 2438 *cookies = offset + dp->d_reclen; 2439 cookies++; 2440 ncookies--; 2441 } 2442 nextentry: 2443 offset += dp->d_reclen; 2444 dp = (struct direct *)((caddr_t)dp + dp->d_reclen); 2445 } 2446 bqrelse(bp); 2447 uio->uio_offset = offset; 2448 } 2449 /* We need to correct uio_offset. */ 2450 uio->uio_offset = offset; 2451 if (error == EJUSTRETURN) 2452 error = 0; 2453 if (ap->a_ncookies != NULL) { 2454 if (error == 0) { 2455 ap->a_ncookies -= ncookies; 2456 } else { 2457 free(*ap->a_cookies, M_TEMP); 2458 *ap->a_ncookies = 0; 2459 *ap->a_cookies = NULL; 2460 } 2461 } 2462 if (error == 0 && ap->a_eofflag) 2463 *ap->a_eofflag = ip->i_size <= uio->uio_offset; 2464 return (error); 2465 } 2466 2467 /* 2468 * Return target name of a symbolic link 2469 */ 2470 static int 2471 ufs_readlink(ap) 2472 struct vop_readlink_args /* { 2473 struct vnode *a_vp; 2474 struct uio *a_uio; 2475 struct ucred *a_cred; 2476 } */ *ap; 2477 { 2478 struct vnode *vp = ap->a_vp; 2479 struct inode *ip = VTOI(vp); 2480 doff_t isize; 2481 2482 isize = ip->i_size; 2483 if (isize < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) 2484 return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); 2485 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 2486 } 2487 2488 /* 2489 * Calculate the logical to physical mapping if not done already, 2490 * then call the device strategy routine. 2491 * 2492 * In order to be able to swap to a file, the ufs_bmaparray() operation may not 2493 * deadlock on memory. See ufs_bmap() for details. 2494 */ 2495 static int 2496 ufs_strategy(ap) 2497 struct vop_strategy_args /* { 2498 struct vnode *a_vp; 2499 struct buf *a_bp; 2500 } */ *ap; 2501 { 2502 struct buf *bp = ap->a_bp; 2503 struct vnode *vp = ap->a_vp; 2504 ufs2_daddr_t blkno; 2505 int error; 2506 2507 if (bp->b_blkno == bp->b_lblkno) { 2508 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); 2509 bp->b_blkno = blkno; 2510 if (error) { 2511 bp->b_error = error; 2512 bp->b_ioflags |= BIO_ERROR; 2513 bufdone(bp); 2514 return (0); 2515 } 2516 if ((long)bp->b_blkno == -1) 2517 vfs_bio_clrbuf(bp); 2518 } 2519 if ((long)bp->b_blkno == -1) { 2520 bufdone(bp); 2521 return (0); 2522 } 2523 bp->b_iooffset = dbtob(bp->b_blkno); 2524 BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); 2525 return (0); 2526 } 2527 2528 /* 2529 * Print out the contents of an inode. 2530 */ 2531 static int 2532 ufs_print(ap) 2533 struct vop_print_args /* { 2534 struct vnode *a_vp; 2535 } */ *ap; 2536 { 2537 struct vnode *vp = ap->a_vp; 2538 struct inode *ip = VTOI(vp); 2539 2540 printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, 2541 ip->i_effnlink, (intmax_t)ip->i_size); 2542 if (I_IS_UFS2(ip)) 2543 printf(", extsize %d", ip->i_din2->di_extsize); 2544 printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", 2545 (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, 2546 (u_int)ip->i_flags, PRINT_INODE_FLAGS); 2547 printf("\tino %lu, on dev %s", (u_long)ip->i_number, 2548 devtoname(ITODEV(ip))); 2549 if (vp->v_type == VFIFO) 2550 fifo_printinfo(vp); 2551 printf("\n"); 2552 return (0); 2553 } 2554 2555 /* 2556 * Close wrapper for fifos. 2557 * 2558 * Update the times on the inode then do device close. 2559 */ 2560 static int 2561 ufsfifo_close(ap) 2562 struct vop_close_args /* { 2563 struct vnode *a_vp; 2564 int a_fflag; 2565 struct ucred *a_cred; 2566 struct thread *a_td; 2567 } */ *ap; 2568 { 2569 struct vnode *vp = ap->a_vp; 2570 int usecount; 2571 2572 VI_LOCK(vp); 2573 usecount = vp->v_usecount; 2574 if (usecount > 1) 2575 ufs_itimes_locked(vp); 2576 VI_UNLOCK(vp); 2577 return (fifo_specops.vop_close(ap)); 2578 } 2579 2580 /* 2581 * Return POSIX pathconf information applicable to ufs filesystems. 2582 */ 2583 static int 2584 ufs_pathconf(ap) 2585 struct vop_pathconf_args /* { 2586 struct vnode *a_vp; 2587 int a_name; 2588 int *a_retval; 2589 } */ *ap; 2590 { 2591 int error; 2592 2593 error = 0; 2594 switch (ap->a_name) { 2595 case _PC_LINK_MAX: 2596 *ap->a_retval = UFS_LINK_MAX; 2597 break; 2598 case _PC_NAME_MAX: 2599 *ap->a_retval = UFS_MAXNAMLEN; 2600 break; 2601 case _PC_PIPE_BUF: 2602 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 2603 *ap->a_retval = PIPE_BUF; 2604 else 2605 error = EINVAL; 2606 break; 2607 case _PC_CHOWN_RESTRICTED: 2608 *ap->a_retval = 1; 2609 break; 2610 case _PC_NO_TRUNC: 2611 *ap->a_retval = 1; 2612 break; 2613 #ifdef UFS_ACL 2614 case _PC_ACL_EXTENDED: 2615 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 2616 *ap->a_retval = 1; 2617 else 2618 *ap->a_retval = 0; 2619 break; 2620 case _PC_ACL_NFS4: 2621 if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) 2622 *ap->a_retval = 1; 2623 else 2624 *ap->a_retval = 0; 2625 break; 2626 #endif 2627 case _PC_ACL_PATH_MAX: 2628 #ifdef UFS_ACL 2629 if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) 2630 *ap->a_retval = ACL_MAX_ENTRIES; 2631 else 2632 *ap->a_retval = 3; 2633 #else 2634 *ap->a_retval = 3; 2635 #endif 2636 break; 2637 #ifdef MAC 2638 case _PC_MAC_PRESENT: 2639 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) 2640 *ap->a_retval = 1; 2641 else 2642 *ap->a_retval = 0; 2643 break; 2644 #endif 2645 case _PC_MIN_HOLE_SIZE: 2646 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2647 break; 2648 case _PC_PRIO_IO: 2649 *ap->a_retval = 0; 2650 break; 2651 case _PC_SYNC_IO: 2652 *ap->a_retval = 0; 2653 break; 2654 case _PC_ALLOC_SIZE_MIN: 2655 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 2656 break; 2657 case _PC_FILESIZEBITS: 2658 *ap->a_retval = 64; 2659 break; 2660 case _PC_REC_INCR_XFER_SIZE: 2661 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2662 break; 2663 case _PC_REC_MAX_XFER_SIZE: 2664 *ap->a_retval = -1; /* means ``unlimited'' */ 2665 break; 2666 case _PC_REC_MIN_XFER_SIZE: 2667 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2668 break; 2669 case _PC_REC_XFER_ALIGN: 2670 *ap->a_retval = PAGE_SIZE; 2671 break; 2672 case _PC_SYMLINK_MAX: 2673 *ap->a_retval = MAXPATHLEN; 2674 break; 2675 2676 default: 2677 error = vop_stdpathconf(ap); 2678 break; 2679 } 2680 return (error); 2681 } 2682 2683 /* 2684 * Initialize the vnode associated with a new inode, handle aliased 2685 * vnodes. 2686 */ 2687 int 2688 ufs_vinit(mntp, fifoops, vpp) 2689 struct mount *mntp; 2690 struct vop_vector *fifoops; 2691 struct vnode **vpp; 2692 { 2693 struct inode *ip; 2694 struct vnode *vp; 2695 2696 vp = *vpp; 2697 ASSERT_VOP_LOCKED(vp, "ufs_vinit"); 2698 ip = VTOI(vp); 2699 vp->v_type = IFTOVT(ip->i_mode); 2700 /* 2701 * Only unallocated inodes should be of type VNON. 2702 */ 2703 if (ip->i_mode != 0 && vp->v_type == VNON) 2704 return (EINVAL); 2705 if (vp->v_type == VFIFO) 2706 vp->v_op = fifoops; 2707 if (ip->i_number == UFS_ROOTINO) 2708 vp->v_vflag |= VV_ROOT; 2709 *vpp = vp; 2710 return (0); 2711 } 2712 2713 /* 2714 * Allocate a new inode. 2715 * Vnode dvp must be locked. 2716 */ 2717 static int 2718 ufs_makeinode(mode, dvp, vpp, cnp, callfunc) 2719 int mode; 2720 struct vnode *dvp; 2721 struct vnode **vpp; 2722 struct componentname *cnp; 2723 const char *callfunc; 2724 { 2725 struct inode *ip, *pdir; 2726 struct direct newdir; 2727 struct vnode *tvp; 2728 int error; 2729 2730 pdir = VTOI(dvp); 2731 #ifdef INVARIANTS 2732 if ((cnp->cn_flags & HASBUF) == 0) 2733 panic("%s: no name", callfunc); 2734 #endif 2735 *vpp = NULL; 2736 if ((mode & IFMT) == 0) 2737 mode |= IFREG; 2738 2739 if (pdir->i_effnlink < 2) { 2740 print_bad_link_count(callfunc, dvp); 2741 return (EINVAL); 2742 } 2743 if (DOINGSUJ(dvp)) { 2744 error = softdep_prelink(dvp, NULL, cnp); 2745 if (error != 0) { 2746 MPASS(error == ERELOOKUP); 2747 return (error); 2748 } 2749 } 2750 error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); 2751 if (error) 2752 return (error); 2753 ip = VTOI(tvp); 2754 ip->i_gid = pdir->i_gid; 2755 DIP_SET(ip, i_gid, pdir->i_gid); 2756 #ifdef SUIDDIR 2757 { 2758 #ifdef QUOTA 2759 struct ucred ucred, *ucp; 2760 gid_t ucred_group; 2761 ucp = cnp->cn_cred; 2762 #endif 2763 /* 2764 * If we are not the owner of the directory, 2765 * and we are hacking owners here, (only do this where told to) 2766 * and we are not giving it TO root, (would subvert quotas) 2767 * then go ahead and give it to the other user. 2768 * Note that this drops off the execute bits for security. 2769 */ 2770 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2771 (pdir->i_mode & ISUID) && 2772 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 2773 ip->i_uid = pdir->i_uid; 2774 DIP_SET(ip, i_uid, ip->i_uid); 2775 mode &= ~07111; 2776 #ifdef QUOTA 2777 /* 2778 * Make sure the correct user gets charged 2779 * for the space. 2780 * Quickly knock up a dummy credential for the victim. 2781 * XXX This seems to never be accessed out of our 2782 * context so a stack variable is ok. 2783 */ 2784 refcount_init(&ucred.cr_ref, 1); 2785 ucred.cr_uid = ip->i_uid; 2786 ucred.cr_ngroups = 1; 2787 ucred.cr_groups = &ucred_group; 2788 ucred.cr_groups[0] = pdir->i_gid; 2789 ucp = &ucred; 2790 #endif 2791 } else { 2792 ip->i_uid = cnp->cn_cred->cr_uid; 2793 DIP_SET(ip, i_uid, ip->i_uid); 2794 } 2795 2796 #ifdef QUOTA 2797 if ((error = getinoquota(ip)) || 2798 (error = chkiq(ip, 1, ucp, 0))) { 2799 if (DOINGSOFTDEP(tvp)) 2800 softdep_revert_link(pdir, ip); 2801 UFS_VFREE(tvp, ip->i_number, mode); 2802 vgone(tvp); 2803 vput(tvp); 2804 return (error); 2805 } 2806 #endif 2807 } 2808 #else /* !SUIDDIR */ 2809 ip->i_uid = cnp->cn_cred->cr_uid; 2810 DIP_SET(ip, i_uid, ip->i_uid); 2811 #ifdef QUOTA 2812 if ((error = getinoquota(ip)) || 2813 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2814 if (DOINGSOFTDEP(tvp)) 2815 softdep_revert_link(pdir, ip); 2816 UFS_VFREE(tvp, ip->i_number, mode); 2817 vgone(tvp); 2818 vput(tvp); 2819 return (error); 2820 } 2821 #endif 2822 #endif /* !SUIDDIR */ 2823 vn_seqc_write_begin(tvp); /* Mostly to cover asserts */ 2824 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 2825 UFS_INODE_SET_MODE(ip, mode); 2826 DIP_SET(ip, i_mode, mode); 2827 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2828 ip->i_effnlink = 1; 2829 ip->i_nlink = 1; 2830 DIP_SET(ip, i_nlink, 1); 2831 if (DOINGSOFTDEP(tvp)) 2832 softdep_setup_create(VTOI(dvp), ip); 2833 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && 2834 priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { 2835 UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID); 2836 DIP_SET(ip, i_mode, ip->i_mode); 2837 } 2838 2839 if (cnp->cn_flags & ISWHITEOUT) { 2840 ip->i_flags |= UF_OPAQUE; 2841 DIP_SET(ip, i_flags, ip->i_flags); 2842 } 2843 2844 /* 2845 * Make sure inode goes to disk before directory entry. 2846 */ 2847 error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); 2848 if (error) 2849 goto bad; 2850 #ifdef MAC 2851 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2852 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 2853 dvp, tvp, cnp); 2854 if (error) 2855 goto bad; 2856 } 2857 #endif 2858 #ifdef UFS_ACL 2859 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2860 error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 2861 cnp->cn_cred, curthread); 2862 if (error) 2863 goto bad; 2864 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 2865 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, 2866 cnp->cn_cred, curthread); 2867 if (error) 2868 goto bad; 2869 } 2870 #endif /* !UFS_ACL */ 2871 ufs_makedirentry(ip, cnp, &newdir); 2872 error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); 2873 if (error) 2874 goto bad; 2875 vn_seqc_write_end(tvp); 2876 *vpp = tvp; 2877 return (0); 2878 2879 bad: 2880 /* 2881 * Write error occurred trying to update the inode 2882 * or the directory so must deallocate the inode. 2883 */ 2884 ip->i_effnlink = 0; 2885 ip->i_nlink = 0; 2886 DIP_SET(ip, i_nlink, 0); 2887 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2888 if (DOINGSOFTDEP(tvp)) 2889 softdep_revert_create(VTOI(dvp), ip); 2890 vn_seqc_write_end(tvp); 2891 vgone(tvp); 2892 vput(tvp); 2893 return (error); 2894 } 2895 2896 static int 2897 ufs_ioctl(struct vop_ioctl_args *ap) 2898 { 2899 struct vnode *vp; 2900 int error; 2901 2902 vp = ap->a_vp; 2903 switch (ap->a_command) { 2904 case FIOSEEKDATA: 2905 error = vn_lock(vp, LK_SHARED); 2906 if (error == 0) { 2907 error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); 2908 VOP_UNLOCK(vp); 2909 } else 2910 error = EBADF; 2911 return (error); 2912 case FIOSEEKHOLE: 2913 return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, 2914 ap->a_cred)); 2915 default: 2916 return (ENOTTY); 2917 } 2918 } 2919 2920 static int 2921 ufs_read_pgcache(struct vop_read_pgcache_args *ap) 2922 { 2923 struct uio *uio; 2924 struct vnode *vp; 2925 2926 uio = ap->a_uio; 2927 vp = ap->a_vp; 2928 VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); 2929 2930 if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || 2931 (ap->a_ioflag & IO_DIRECT) != 0) 2932 return (EJUSTRETURN); 2933 return (vn_read_from_obj(vp, uio)); 2934 } 2935 2936 /* Global vfs data structures for ufs. */ 2937 struct vop_vector ufs_vnodeops = { 2938 .vop_default = &default_vnodeops, 2939 .vop_fsync = VOP_PANIC, 2940 .vop_read = VOP_PANIC, 2941 .vop_reallocblks = VOP_PANIC, 2942 .vop_write = VOP_PANIC, 2943 .vop_accessx = ufs_accessx, 2944 .vop_bmap = ufs_bmap, 2945 .vop_fplookup_vexec = ufs_fplookup_vexec, 2946 .vop_fplookup_symlink = VOP_EAGAIN, 2947 .vop_cachedlookup = ufs_lookup, 2948 .vop_close = ufs_close, 2949 .vop_create = ufs_create, 2950 .vop_stat = ufs_stat, 2951 .vop_getattr = ufs_getattr, 2952 .vop_inactive = ufs_inactive, 2953 .vop_ioctl = ufs_ioctl, 2954 .vop_link = ufs_link, 2955 .vop_lookup = vfs_cache_lookup, 2956 .vop_mmapped = ufs_mmapped, 2957 .vop_mkdir = ufs_mkdir, 2958 .vop_mknod = ufs_mknod, 2959 .vop_need_inactive = ufs_need_inactive, 2960 .vop_open = ufs_open, 2961 .vop_pathconf = ufs_pathconf, 2962 .vop_poll = vop_stdpoll, 2963 .vop_print = ufs_print, 2964 .vop_read_pgcache = ufs_read_pgcache, 2965 .vop_readdir = ufs_readdir, 2966 .vop_readlink = ufs_readlink, 2967 .vop_reclaim = ufs_reclaim, 2968 .vop_remove = ufs_remove, 2969 .vop_rename = ufs_rename, 2970 .vop_rmdir = ufs_rmdir, 2971 .vop_setattr = ufs_setattr, 2972 #ifdef MAC 2973 .vop_setlabel = vop_stdsetlabel_ea, 2974 #endif 2975 .vop_strategy = ufs_strategy, 2976 .vop_symlink = ufs_symlink, 2977 .vop_whiteout = ufs_whiteout, 2978 #ifdef UFS_EXTATTR 2979 .vop_getextattr = ufs_getextattr, 2980 .vop_deleteextattr = ufs_deleteextattr, 2981 .vop_setextattr = ufs_setextattr, 2982 #endif 2983 #ifdef UFS_ACL 2984 .vop_getacl = ufs_getacl, 2985 .vop_setacl = ufs_setacl, 2986 .vop_aclcheck = ufs_aclcheck, 2987 #endif 2988 }; 2989 VFS_VOP_VECTOR_REGISTER(ufs_vnodeops); 2990 2991 struct vop_vector ufs_fifoops = { 2992 .vop_default = &fifo_specops, 2993 .vop_fsync = VOP_PANIC, 2994 .vop_accessx = ufs_accessx, 2995 .vop_close = ufsfifo_close, 2996 .vop_getattr = ufs_getattr, 2997 .vop_inactive = ufs_inactive, 2998 .vop_pathconf = ufs_pathconf, 2999 .vop_print = ufs_print, 3000 .vop_read = VOP_PANIC, 3001 .vop_reclaim = ufs_reclaim, 3002 .vop_setattr = ufs_setattr, 3003 #ifdef MAC 3004 .vop_setlabel = vop_stdsetlabel_ea, 3005 #endif 3006 .vop_write = VOP_PANIC, 3007 #ifdef UFS_EXTATTR 3008 .vop_getextattr = ufs_getextattr, 3009 .vop_deleteextattr = ufs_deleteextattr, 3010 .vop_setextattr = ufs_setextattr, 3011 #endif 3012 #ifdef UFS_ACL 3013 .vop_getacl = ufs_getacl, 3014 .vop_setacl = ufs_setacl, 3015 .vop_aclcheck = ufs_aclcheck, 3016 #endif 3017 }; 3018 VFS_VOP_VECTOR_REGISTER(ufs_fifoops); 3019