1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_quota.h" 43 #include "opt_suiddir.h" 44 #include "opt_ufs.h" 45 #include "opt_ffs.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/namei.h> 51 #include <sys/kernel.h> 52 #include <sys/fcntl.h> 53 #include <sys/filio.h> 54 #include <sys/stat.h> 55 #include <sys/bio.h> 56 #include <sys/buf.h> 57 #include <sys/mount.h> 58 #include <sys/priv.h> 59 #include <sys/refcount.h> 60 #include <sys/unistd.h> 61 #include <sys/vnode.h> 62 #include <sys/dirent.h> 63 #include <sys/lockf.h> 64 #include <sys/conf.h> 65 #include <sys/acl.h> 66 #include <sys/smr.h> 67 68 #include <security/audit/audit.h> 69 #include <security/mac/mac_framework.h> 70 71 #include <sys/file.h> /* XXX */ 72 73 #include <vm/vm.h> 74 #include <vm/vm_extern.h> 75 76 #include <ufs/ufs/acl.h> 77 #include <ufs/ufs/extattr.h> 78 #include <ufs/ufs/quota.h> 79 #include <ufs/ufs/inode.h> 80 #include <ufs/ufs/dir.h> 81 #include <ufs/ufs/ufsmount.h> 82 #include <ufs/ufs/ufs_extern.h> 83 #ifdef UFS_DIRHASH 84 #include <ufs/ufs/dirhash.h> 85 #endif 86 #ifdef UFS_GJOURNAL 87 #include <ufs/ufs/gjournal.h> 88 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); 89 #endif 90 91 #ifdef QUOTA 92 FEATURE(ufs_quota, "UFS disk quotas support"); 93 FEATURE(ufs_quota64, "64bit UFS disk quotas support"); 94 #endif 95 96 #ifdef SUIDDIR 97 FEATURE(suiddir, 98 "Give all new files in directory the same ownership as the directory"); 99 #endif 100 101 VFS_SMR_DECLARE; 102 103 #include <ufs/ffs/ffs_extern.h> 104 105 static vop_accessx_t ufs_accessx; 106 static vop_fplookup_vexec_t ufs_fplookup_vexec; 107 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); 108 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, 109 struct thread *); 110 static vop_close_t ufs_close; 111 static vop_create_t ufs_create; 112 static vop_stat_t ufs_stat; 113 static vop_getattr_t ufs_getattr; 114 static vop_ioctl_t ufs_ioctl; 115 static vop_link_t ufs_link; 116 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, 117 struct componentname *, const char *); 118 static vop_mmapped_t ufs_mmapped; 119 static vop_mkdir_t ufs_mkdir; 120 static vop_mknod_t ufs_mknod; 121 static vop_open_t ufs_open; 122 static vop_pathconf_t ufs_pathconf; 123 static vop_print_t ufs_print; 124 static vop_readlink_t ufs_readlink; 125 static vop_remove_t ufs_remove; 126 static vop_rename_t ufs_rename; 127 static vop_rmdir_t ufs_rmdir; 128 static vop_setattr_t ufs_setattr; 129 static vop_strategy_t ufs_strategy; 130 static vop_symlink_t ufs_symlink; 131 static vop_whiteout_t ufs_whiteout; 132 static vop_close_t ufsfifo_close; 133 134 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 135 "UFS filesystem"); 136 137 /* 138 * A virgin directory (no blushing please). 139 */ 140 static struct dirtemplate mastertemplate = { 141 0, 12, DT_DIR, 1, ".", 142 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 143 }; 144 static struct odirtemplate omastertemplate = { 145 0, 12, 1, ".", 146 0, DIRBLKSIZ - 12, 2, ".." 147 }; 148 149 static void 150 ufs_itimes_locked(struct vnode *vp) 151 { 152 struct inode *ip; 153 struct timespec ts; 154 155 ASSERT_VI_LOCKED(vp, __func__); 156 157 ip = VTOI(vp); 158 if (UFS_RDONLY(ip)) 159 goto out; 160 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 161 return; 162 163 if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) 164 UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); 165 else if (((vp->v_mount->mnt_kern_flag & 166 (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || 167 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) 168 UFS_INODE_SET_FLAG(ip, IN_MODIFIED); 169 else if (ip->i_flag & IN_ACCESS) 170 UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); 171 vfs_timestamp(&ts); 172 if (ip->i_flag & IN_ACCESS) { 173 DIP_SET(ip, i_atime, ts.tv_sec); 174 DIP_SET(ip, i_atimensec, ts.tv_nsec); 175 } 176 if (ip->i_flag & IN_UPDATE) { 177 DIP_SET(ip, i_mtime, ts.tv_sec); 178 DIP_SET(ip, i_mtimensec, ts.tv_nsec); 179 } 180 if (ip->i_flag & IN_CHANGE) { 181 DIP_SET(ip, i_ctime, ts.tv_sec); 182 DIP_SET(ip, i_ctimensec, ts.tv_nsec); 183 DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); 184 } 185 186 out: 187 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 188 } 189 190 void 191 ufs_itimes(struct vnode *vp) 192 { 193 194 VI_LOCK(vp); 195 ufs_itimes_locked(vp); 196 VI_UNLOCK(vp); 197 } 198 199 static int 200 ufs_sync_nlink(struct vnode *vp, struct vnode *vp1) 201 { 202 struct inode *ip; 203 struct mount *mp; 204 int error; 205 206 ip = VTOI(vp); 207 if (ip->i_nlink < UFS_LINK_MAX) 208 return (0); 209 if (!DOINGSOFTDEP(vp) || ip->i_effnlink >= UFS_LINK_MAX) 210 return (EMLINK); 211 212 mp = vp->v_mount; 213 vfs_ref(mp); 214 VOP_UNLOCK(vp); 215 if (vp1 != NULL) 216 VOP_UNLOCK(vp1); 217 error = vfs_busy(mp, 0); 218 if (error == 0) { 219 VFS_SYNC(mp, MNT_WAIT); 220 vfs_unbusy(mp); 221 error = ERELOOKUP; 222 } 223 vfs_rel(mp); 224 vn_lock_pair(vp, false, vp1, false); 225 return (error); 226 } 227 228 /* 229 * Create a regular file 230 */ 231 static int 232 ufs_create(ap) 233 struct vop_create_args /* { 234 struct vnode *a_dvp; 235 struct vnode **a_vpp; 236 struct componentname *a_cnp; 237 struct vattr *a_vap; 238 } */ *ap; 239 { 240 int error; 241 242 error = 243 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 244 ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); 245 if (error != 0) 246 return (error); 247 if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) 248 cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); 249 return (0); 250 } 251 252 /* 253 * Mknod vnode call 254 */ 255 /* ARGSUSED */ 256 static int 257 ufs_mknod(ap) 258 struct vop_mknod_args /* { 259 struct vnode *a_dvp; 260 struct vnode **a_vpp; 261 struct componentname *a_cnp; 262 struct vattr *a_vap; 263 } */ *ap; 264 { 265 struct vattr *vap = ap->a_vap; 266 struct vnode **vpp = ap->a_vpp; 267 struct inode *ip; 268 ino_t ino; 269 int error; 270 271 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 272 ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); 273 if (error) 274 return (error); 275 ip = VTOI(*vpp); 276 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 277 if (vap->va_rdev != VNOVAL) { 278 /* 279 * Want to be able to use this to make badblock 280 * inodes, so don't truncate the dev number. 281 */ 282 DIP_SET(ip, i_rdev, vap->va_rdev); 283 } 284 /* 285 * Remove inode, then reload it through VFS_VGET(). This is 286 * needed to do further inode initialization, for instance 287 * fifo, which was too early for VFS_VGET() done as part of 288 * UFS_VALLOC(). 289 */ 290 (*vpp)->v_type = VNON; 291 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 292 vgone(*vpp); 293 vput(*vpp); 294 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 295 if (error) { 296 *vpp = NULL; 297 return (error); 298 } 299 return (0); 300 } 301 302 /* 303 * Open called. 304 */ 305 /* ARGSUSED */ 306 static int 307 ufs_open(struct vop_open_args *ap) 308 { 309 struct vnode *vp = ap->a_vp; 310 struct inode *ip; 311 312 if (vp->v_type == VCHR || vp->v_type == VBLK) 313 return (EOPNOTSUPP); 314 315 ip = VTOI(vp); 316 vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); 317 if (vp->v_type == VREG && (vn_irflag_read(vp) & VIRF_PGREAD) == 0 && 318 ip->i_ump->um_bsize >= PAGE_SIZE) { 319 vn_irflag_set_cond(vp, VIRF_PGREAD); 320 } 321 322 /* 323 * Files marked append-only must be opened for appending. 324 */ 325 if ((ip->i_flags & APPEND) && 326 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 327 return (EPERM); 328 329 return (0); 330 } 331 332 /* 333 * Close called. 334 * 335 * Update the times on the inode. 336 */ 337 /* ARGSUSED */ 338 static int 339 ufs_close(ap) 340 struct vop_close_args /* { 341 struct vnode *a_vp; 342 int a_fflag; 343 struct ucred *a_cred; 344 struct thread *a_td; 345 } */ *ap; 346 { 347 struct vnode *vp = ap->a_vp; 348 int usecount; 349 350 VI_LOCK(vp); 351 usecount = vp->v_usecount; 352 if (usecount > 1) 353 ufs_itimes_locked(vp); 354 VI_UNLOCK(vp); 355 return (0); 356 } 357 358 static int 359 ufs_accessx(ap) 360 struct vop_accessx_args /* { 361 struct vnode *a_vp; 362 accmode_t a_accmode; 363 struct ucred *a_cred; 364 struct thread *a_td; 365 } */ *ap; 366 { 367 struct vnode *vp = ap->a_vp; 368 struct inode *ip = VTOI(vp); 369 accmode_t accmode = ap->a_accmode; 370 int error; 371 #ifdef UFS_ACL 372 struct acl *acl; 373 acl_type_t type; 374 #endif 375 376 /* 377 * Disallow write attempts on read-only filesystems; 378 * unless the file is a socket, fifo, or a block or 379 * character device resident on the filesystem. 380 */ 381 if (accmode & VMODIFY_PERMS) { 382 switch (vp->v_type) { 383 case VDIR: 384 case VLNK: 385 case VREG: 386 if (vp->v_mount->mnt_flag & MNT_RDONLY) 387 return (EROFS); 388 #ifdef QUOTA 389 /* 390 * Inode is accounted in the quotas only if struct 391 * dquot is attached to it. VOP_ACCESS() is called 392 * from vn_open_cred() and provides a convenient 393 * point to call getinoquota(). The lock mode is 394 * exclusive when the file is opening for write. 395 */ 396 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 397 error = getinoquota(ip); 398 if (error != 0) 399 return (error); 400 } 401 #endif 402 break; 403 default: 404 break; 405 } 406 } 407 408 /* 409 * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" 410 * permits the owner of the file to remove the IMMUTABLE flag. 411 */ 412 if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && 413 (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) 414 return (EPERM); 415 416 #ifdef UFS_ACL 417 if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { 418 if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) 419 type = ACL_TYPE_NFS4; 420 else 421 type = ACL_TYPE_ACCESS; 422 423 acl = acl_alloc(M_WAITOK); 424 if (type == ACL_TYPE_NFS4) 425 error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); 426 else 427 error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); 428 switch (error) { 429 case 0: 430 if (type == ACL_TYPE_NFS4) { 431 error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, 432 ip->i_gid, acl, accmode, ap->a_cred); 433 } else { 434 error = vfs_unixify_accmode(&accmode); 435 if (error == 0) 436 error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, 437 ip->i_gid, acl, accmode, ap->a_cred); 438 } 439 break; 440 default: 441 if (error != EOPNOTSUPP) 442 printf( 443 "ufs_accessx(): Error retrieving ACL on object (%d).\n", 444 error); 445 /* 446 * XXX: Fall back until debugged. Should 447 * eventually possibly log an error, and return 448 * EPERM for safety. 449 */ 450 error = vfs_unixify_accmode(&accmode); 451 if (error == 0) 452 error = vaccess(vp->v_type, ip->i_mode, 453 ip->i_uid, ip->i_gid, accmode, ap->a_cred); 454 } 455 acl_free(acl); 456 457 return (error); 458 } 459 #endif /* !UFS_ACL */ 460 error = vfs_unixify_accmode(&accmode); 461 if (error == 0) 462 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 463 accmode, ap->a_cred); 464 return (error); 465 } 466 467 /* 468 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 469 * the comment above cache_fplookup for details. 470 */ 471 static int 472 ufs_fplookup_vexec(ap) 473 struct vop_fplookup_vexec_args /* { 474 struct vnode *a_vp; 475 struct ucred *a_cred; 476 struct thread *a_td; 477 } */ *ap; 478 { 479 struct vnode *vp; 480 struct inode *ip; 481 struct ucred *cred; 482 mode_t all_x, mode; 483 484 vp = ap->a_vp; 485 ip = VTOI_SMR(vp); 486 if (__predict_false(ip == NULL)) 487 return (EAGAIN); 488 489 /* 490 * XXX ACL race 491 * 492 * ACLs are not supported and UFS clears/sets this flag on mount and 493 * remount. However, we may still be racing with seeing them and there 494 * is no provision to make sure they were accounted for. This matches 495 * the behavior of the locked case, since the lookup there is also 496 * racy: mount takes no measures to block anyone from progressing. 497 */ 498 all_x = S_IXUSR | S_IXGRP | S_IXOTH; 499 mode = atomic_load_short(&ip->i_mode); 500 if (__predict_true((mode & all_x) == all_x)) 501 return (0); 502 503 cred = ap->a_cred; 504 return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred)); 505 } 506 507 /* ARGSUSED */ 508 static int 509 ufs_stat(struct vop_stat_args *ap) 510 { 511 struct vnode *vp = ap->a_vp; 512 struct inode *ip = VTOI(vp); 513 struct stat *sb = ap->a_sb; 514 int error; 515 516 error = vop_stat_helper_pre(ap); 517 if (__predict_false(error)) 518 return (error); 519 520 VI_LOCK(vp); 521 ufs_itimes_locked(vp); 522 if (I_IS_UFS1(ip)) { 523 sb->st_atim.tv_sec = ip->i_din1->di_atime; 524 sb->st_atim.tv_nsec = ip->i_din1->di_atimensec; 525 } else { 526 sb->st_atim.tv_sec = ip->i_din2->di_atime; 527 sb->st_atim.tv_nsec = ip->i_din2->di_atimensec; 528 } 529 VI_UNLOCK(vp); 530 531 sb->st_dev = dev2udev(ITOUMP(ip)->um_dev); 532 sb->st_ino = ip->i_number; 533 sb->st_mode = (ip->i_mode & ~IFMT) | VTTOIF(vp->v_type); 534 sb->st_nlink = ip->i_effnlink; 535 sb->st_uid = ip->i_uid; 536 sb->st_gid = ip->i_gid; 537 if (I_IS_UFS1(ip)) { 538 sb->st_rdev = ip->i_din1->di_rdev; 539 sb->st_size = ip->i_din1->di_size; 540 sb->st_mtim.tv_sec = ip->i_din1->di_mtime; 541 sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec; 542 sb->st_ctim.tv_sec = ip->i_din1->di_ctime; 543 sb->st_ctim.tv_nsec = ip->i_din1->di_ctimensec; 544 sb->st_birthtim.tv_sec = -1; 545 sb->st_birthtim.tv_nsec = 0; 546 sb->st_blocks = dbtob((u_quad_t)ip->i_din1->di_blocks) / S_BLKSIZE; 547 } else { 548 sb->st_rdev = ip->i_din2->di_rdev; 549 sb->st_size = ip->i_din2->di_size; 550 sb->st_mtim.tv_sec = ip->i_din2->di_mtime; 551 sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec; 552 sb->st_ctim.tv_sec = ip->i_din2->di_ctime; 553 sb->st_ctim.tv_nsec = ip->i_din2->di_ctimensec; 554 sb->st_birthtim.tv_sec = ip->i_din2->di_birthtime; 555 sb->st_birthtim.tv_nsec = ip->i_din2->di_birthnsec; 556 sb->st_blocks = dbtob((u_quad_t)ip->i_din2->di_blocks) / S_BLKSIZE; 557 } 558 559 sb->st_blksize = max(PAGE_SIZE, vp->v_mount->mnt_stat.f_iosize); 560 sb->st_flags = ip->i_flags; 561 sb->st_gen = ip->i_gen; 562 563 return (vop_stat_helper_post(ap, error)); 564 } 565 566 /* ARGSUSED */ 567 static int 568 ufs_getattr(ap) 569 struct vop_getattr_args /* { 570 struct vnode *a_vp; 571 struct vattr *a_vap; 572 struct ucred *a_cred; 573 } */ *ap; 574 { 575 struct vnode *vp = ap->a_vp; 576 struct inode *ip = VTOI(vp); 577 struct vattr *vap = ap->a_vap; 578 579 VI_LOCK(vp); 580 ufs_itimes_locked(vp); 581 if (I_IS_UFS1(ip)) { 582 vap->va_atime.tv_sec = ip->i_din1->di_atime; 583 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; 584 } else { 585 vap->va_atime.tv_sec = ip->i_din2->di_atime; 586 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; 587 } 588 VI_UNLOCK(vp); 589 /* 590 * Copy from inode table 591 */ 592 vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); 593 vap->va_fileid = ip->i_number; 594 vap->va_mode = ip->i_mode & ~IFMT; 595 vap->va_nlink = ip->i_effnlink; 596 vap->va_uid = ip->i_uid; 597 vap->va_gid = ip->i_gid; 598 if (I_IS_UFS1(ip)) { 599 vap->va_rdev = ip->i_din1->di_rdev; 600 vap->va_size = ip->i_din1->di_size; 601 vap->va_mtime.tv_sec = ip->i_din1->di_mtime; 602 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; 603 vap->va_ctime.tv_sec = ip->i_din1->di_ctime; 604 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; 605 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); 606 vap->va_filerev = ip->i_din1->di_modrev; 607 } else { 608 vap->va_rdev = ip->i_din2->di_rdev; 609 vap->va_size = ip->i_din2->di_size; 610 vap->va_mtime.tv_sec = ip->i_din2->di_mtime; 611 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; 612 vap->va_ctime.tv_sec = ip->i_din2->di_ctime; 613 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; 614 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; 615 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; 616 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); 617 vap->va_filerev = ip->i_din2->di_modrev; 618 } 619 vap->va_flags = ip->i_flags; 620 vap->va_gen = ip->i_gen; 621 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 622 vap->va_type = IFTOVT(ip->i_mode); 623 return (0); 624 } 625 626 /* 627 * Set attribute vnode op. called from several syscalls 628 */ 629 static int 630 ufs_setattr(ap) 631 struct vop_setattr_args /* { 632 struct vnode *a_vp; 633 struct vattr *a_vap; 634 struct ucred *a_cred; 635 } */ *ap; 636 { 637 struct vattr *vap = ap->a_vap; 638 struct vnode *vp = ap->a_vp; 639 struct inode *ip = VTOI(vp); 640 struct ucred *cred = ap->a_cred; 641 struct thread *td = curthread; 642 int error; 643 644 /* 645 * Check for unsettable attributes. 646 */ 647 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 648 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 649 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 650 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 651 return (EINVAL); 652 } 653 if (vap->va_flags != VNOVAL) { 654 if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | 655 SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | 656 UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | 657 UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | 658 UF_SPARSE | UF_SYSTEM)) != 0) 659 return (EOPNOTSUPP); 660 if (vp->v_mount->mnt_flag & MNT_RDONLY) 661 return (EROFS); 662 /* 663 * Callers may only modify the file flags on objects they 664 * have VADMIN rights for. 665 */ 666 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 667 return (error); 668 /* 669 * Unprivileged processes are not permitted to unset system 670 * flags, or modify flags if any system flags are set. 671 * Privileged non-jail processes may not modify system flags 672 * if securelevel > 0 and any existing system flags are set. 673 * Privileged jail processes behave like privileged non-jail 674 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 675 * otherwise, they behave like unprivileged processes. 676 */ 677 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { 678 if (ip->i_flags & 679 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 680 error = securelevel_gt(cred, 0); 681 if (error) 682 return (error); 683 } 684 /* The snapshot flag cannot be toggled. */ 685 if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) 686 return (EPERM); 687 } else { 688 if (ip->i_flags & 689 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 690 ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) 691 return (EPERM); 692 } 693 ip->i_flags = vap->va_flags; 694 DIP_SET(ip, i_flags, vap->va_flags); 695 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 696 error = UFS_UPDATE(vp, 0); 697 if (ip->i_flags & (IMMUTABLE | APPEND)) 698 return (error); 699 } 700 /* 701 * If immutable or append, no one can change any of its attributes 702 * except the ones already handled (in some cases, file flags 703 * including the immutability flags themselves for the superuser). 704 */ 705 if (ip->i_flags & (IMMUTABLE | APPEND)) 706 return (EPERM); 707 /* 708 * Go through the fields and update iff not VNOVAL. 709 */ 710 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 711 if (vp->v_mount->mnt_flag & MNT_RDONLY) 712 return (EROFS); 713 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, 714 td)) != 0) 715 return (error); 716 } 717 if (vap->va_size != VNOVAL) { 718 /* 719 * XXX most of the following special cases should be in 720 * callers instead of in N filesystems. The VDIR check 721 * mostly already is. 722 */ 723 switch (vp->v_type) { 724 case VDIR: 725 return (EISDIR); 726 case VLNK: 727 case VREG: 728 /* 729 * Truncation should have an effect in these cases. 730 * Disallow it if the filesystem is read-only or 731 * the file is being snapshotted. 732 */ 733 if (vp->v_mount->mnt_flag & MNT_RDONLY) 734 return (EROFS); 735 if (IS_SNAPSHOT(ip)) 736 return (EPERM); 737 break; 738 default: 739 /* 740 * According to POSIX, the result is unspecified 741 * for file types other than regular files, 742 * directories and shared memory objects. We 743 * don't support shared memory objects in the file 744 * system, and have dubious support for truncating 745 * symlinks. Just ignore the request in other cases. 746 */ 747 return (0); 748 } 749 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | 750 ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), 751 cred)) != 0) 752 return (error); 753 } 754 if (vap->va_atime.tv_sec != VNOVAL || 755 vap->va_mtime.tv_sec != VNOVAL || 756 vap->va_birthtime.tv_sec != VNOVAL) { 757 if (vp->v_mount->mnt_flag & MNT_RDONLY) 758 return (EROFS); 759 if (IS_SNAPSHOT(ip)) 760 return (EPERM); 761 error = vn_utimes_perm(vp, vap, cred, td); 762 if (error != 0) 763 return (error); 764 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED); 765 if (vap->va_atime.tv_sec != VNOVAL) { 766 ip->i_flag &= ~IN_ACCESS; 767 DIP_SET(ip, i_atime, vap->va_atime.tv_sec); 768 DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); 769 } 770 if (vap->va_mtime.tv_sec != VNOVAL) { 771 ip->i_flag &= ~IN_UPDATE; 772 DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); 773 DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); 774 } 775 if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { 776 ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; 777 ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; 778 } 779 error = UFS_UPDATE(vp, 0); 780 if (error) 781 return (error); 782 } 783 error = 0; 784 if (vap->va_mode != (mode_t)VNOVAL) { 785 if (vp->v_mount->mnt_flag & MNT_RDONLY) 786 return (EROFS); 787 if (IS_SNAPSHOT(ip) && (vap->va_mode & (S_IXUSR | S_IWUSR | 788 S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)) != 0) 789 return (EPERM); 790 error = ufs_chmod(vp, (int)vap->va_mode, cred, td); 791 } 792 return (error); 793 } 794 795 #ifdef UFS_ACL 796 static int 797 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, 798 int file_owner_id, struct ucred *cred, struct thread *td) 799 { 800 int error; 801 struct acl *aclp; 802 803 aclp = acl_alloc(M_WAITOK); 804 error = ufs_getacl_nfs4_internal(vp, aclp, td); 805 /* 806 * We don't have to handle EOPNOTSUPP here, as the filesystem claims 807 * it supports ACLs. 808 */ 809 if (error) 810 goto out; 811 812 acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); 813 error = ufs_setacl_nfs4_internal(vp, aclp, td); 814 815 out: 816 acl_free(aclp); 817 return (error); 818 } 819 #endif /* UFS_ACL */ 820 821 static int 822 ufs_mmapped(ap) 823 struct vop_mmapped_args /* { 824 struct vnode *a_vp; 825 } */ *ap; 826 { 827 struct vnode *vp; 828 struct inode *ip; 829 struct mount *mp; 830 831 vp = ap->a_vp; 832 ip = VTOI(vp); 833 mp = vp->v_mount; 834 835 if ((mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) 836 UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); 837 /* 838 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. 839 */ 840 return (0); 841 } 842 843 /* 844 * Change the mode on a file. 845 * Inode must be locked before calling. 846 */ 847 static int 848 ufs_chmod(vp, mode, cred, td) 849 struct vnode *vp; 850 int mode; 851 struct ucred *cred; 852 struct thread *td; 853 { 854 struct inode *ip = VTOI(vp); 855 int newmode, error; 856 857 /* 858 * To modify the permissions on a file, must possess VADMIN 859 * for that file. 860 */ 861 if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) 862 return (error); 863 /* 864 * Privileged processes may set the sticky bit on non-directories, 865 * as well as set the setgid bit on a file with a group that the 866 * process is not a member of. Both of these are allowed in 867 * jail(8). 868 */ 869 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 870 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 871 return (EFTYPE); 872 } 873 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 874 error = priv_check_cred(cred, PRIV_VFS_SETGID); 875 if (error) 876 return (error); 877 } 878 879 /* 880 * Deny setting setuid if we are not the file owner. 881 */ 882 if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { 883 error = priv_check_cred(cred, PRIV_VFS_ADMIN); 884 if (error) 885 return (error); 886 } 887 888 newmode = ip->i_mode & ~ALLPERMS; 889 newmode |= (mode & ALLPERMS); 890 UFS_INODE_SET_MODE(ip, newmode); 891 DIP_SET(ip, i_mode, ip->i_mode); 892 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 893 #ifdef UFS_ACL 894 if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) 895 error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); 896 #endif 897 if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) 898 error = UFS_UPDATE(vp, 0); 899 900 return (error); 901 } 902 903 /* 904 * Perform chown operation on inode ip; 905 * inode must be locked prior to call. 906 */ 907 static int 908 ufs_chown(vp, uid, gid, cred, td) 909 struct vnode *vp; 910 uid_t uid; 911 gid_t gid; 912 struct ucred *cred; 913 struct thread *td; 914 { 915 struct inode *ip = VTOI(vp); 916 uid_t ouid; 917 gid_t ogid; 918 int error = 0; 919 #ifdef QUOTA 920 int i; 921 ufs2_daddr_t change; 922 #endif 923 924 if (uid == (uid_t)VNOVAL) 925 uid = ip->i_uid; 926 if (gid == (gid_t)VNOVAL) 927 gid = ip->i_gid; 928 /* 929 * To modify the ownership of a file, must possess VADMIN for that 930 * file. 931 */ 932 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) 933 return (error); 934 /* 935 * To change the owner of a file, or change the group of a file to a 936 * group of which we are not a member, the caller must have 937 * privilege. 938 */ 939 if (((uid != ip->i_uid && uid != cred->cr_uid) || 940 (gid != ip->i_gid && !groupmember(gid, cred))) && 941 (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) 942 return (error); 943 ogid = ip->i_gid; 944 ouid = ip->i_uid; 945 #ifdef QUOTA 946 if ((error = getinoquota(ip)) != 0) 947 return (error); 948 if (ouid == uid) { 949 dqrele(vp, ip->i_dquot[USRQUOTA]); 950 ip->i_dquot[USRQUOTA] = NODQUOT; 951 } 952 if (ogid == gid) { 953 dqrele(vp, ip->i_dquot[GRPQUOTA]); 954 ip->i_dquot[GRPQUOTA] = NODQUOT; 955 } 956 change = DIP(ip, i_blocks); 957 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 958 (void) chkiq(ip, -1, cred, CHOWN|FORCE); 959 for (i = 0; i < MAXQUOTAS; i++) { 960 dqrele(vp, ip->i_dquot[i]); 961 ip->i_dquot[i] = NODQUOT; 962 } 963 #endif 964 ip->i_gid = gid; 965 DIP_SET(ip, i_gid, gid); 966 ip->i_uid = uid; 967 DIP_SET(ip, i_uid, uid); 968 #ifdef QUOTA 969 if ((error = getinoquota(ip)) == 0) { 970 if (ouid == uid) { 971 dqrele(vp, ip->i_dquot[USRQUOTA]); 972 ip->i_dquot[USRQUOTA] = NODQUOT; 973 } 974 if (ogid == gid) { 975 dqrele(vp, ip->i_dquot[GRPQUOTA]); 976 ip->i_dquot[GRPQUOTA] = NODQUOT; 977 } 978 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { 979 if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) 980 goto good; 981 else 982 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 983 } 984 for (i = 0; i < MAXQUOTAS; i++) { 985 dqrele(vp, ip->i_dquot[i]); 986 ip->i_dquot[i] = NODQUOT; 987 } 988 } 989 ip->i_gid = ogid; 990 DIP_SET(ip, i_gid, ogid); 991 ip->i_uid = ouid; 992 DIP_SET(ip, i_uid, ouid); 993 if (getinoquota(ip) == 0) { 994 if (ouid == uid) { 995 dqrele(vp, ip->i_dquot[USRQUOTA]); 996 ip->i_dquot[USRQUOTA] = NODQUOT; 997 } 998 if (ogid == gid) { 999 dqrele(vp, ip->i_dquot[GRPQUOTA]); 1000 ip->i_dquot[GRPQUOTA] = NODQUOT; 1001 } 1002 (void) chkdq(ip, change, cred, FORCE|CHOWN); 1003 (void) chkiq(ip, 1, cred, FORCE|CHOWN); 1004 (void) getinoquota(ip); 1005 } 1006 return (error); 1007 good: 1008 if (getinoquota(ip)) 1009 panic("ufs_chown: lost quota"); 1010 #endif /* QUOTA */ 1011 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 1012 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 1013 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { 1014 UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); 1015 DIP_SET(ip, i_mode, ip->i_mode); 1016 } 1017 } 1018 error = UFS_UPDATE(vp, 0); 1019 return (error); 1020 } 1021 1022 static int 1023 ufs_remove(ap) 1024 struct vop_remove_args /* { 1025 struct vnode *a_dvp; 1026 struct vnode *a_vp; 1027 struct componentname *a_cnp; 1028 } */ *ap; 1029 { 1030 struct inode *ip; 1031 struct vnode *vp = ap->a_vp; 1032 struct vnode *dvp = ap->a_dvp; 1033 int error; 1034 struct thread *td; 1035 1036 td = curthread; 1037 ip = VTOI(vp); 1038 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1039 (VTOI(dvp)->i_flags & APPEND)) 1040 return (EPERM); 1041 if (DOINGSUJ(dvp)) { 1042 error = softdep_prelink(dvp, vp, ap->a_cnp); 1043 if (error != 0) { 1044 MPASS(error == ERELOOKUP); 1045 return (error); 1046 } 1047 } 1048 1049 #ifdef UFS_GJOURNAL 1050 ufs_gjournal_orphan(vp); 1051 #endif 1052 error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); 1053 if (ip->i_nlink <= 0) 1054 vp->v_vflag |= VV_NOSYNC; 1055 if (IS_SNAPSHOT(ip)) { 1056 /* 1057 * Avoid deadlock where another thread is trying to 1058 * update the inodeblock for dvp and is waiting on 1059 * snaplk. Temporary unlock the vnode lock for the 1060 * unlinked file and sync the directory. This should 1061 * allow vput() of the directory to not block later on 1062 * while holding the snapshot vnode locked, assuming 1063 * that the directory hasn't been unlinked too. 1064 */ 1065 VOP_UNLOCK(vp); 1066 (void) VOP_FSYNC(dvp, MNT_WAIT, td); 1067 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1068 } 1069 return (error); 1070 } 1071 1072 static void 1073 print_bad_link_count(const char *funcname, struct vnode *dvp) 1074 { 1075 struct inode *dip; 1076 1077 dip = VTOI(dvp); 1078 uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", 1079 funcname, dip->i_effnlink, (intmax_t)dip->i_number, 1080 dvp->v_mount->mnt_stat.f_mntonname); 1081 } 1082 1083 /* 1084 * link vnode call 1085 */ 1086 static int 1087 ufs_link(ap) 1088 struct vop_link_args /* { 1089 struct vnode *a_tdvp; 1090 struct vnode *a_vp; 1091 struct componentname *a_cnp; 1092 } */ *ap; 1093 { 1094 struct vnode *vp = ap->a_vp; 1095 struct vnode *tdvp = ap->a_tdvp; 1096 struct componentname *cnp = ap->a_cnp; 1097 struct inode *ip; 1098 struct direct newdir; 1099 int error; 1100 1101 #ifdef INVARIANTS 1102 if ((cnp->cn_flags & HASBUF) == 0) 1103 panic("ufs_link: no name"); 1104 #endif 1105 1106 if (DOINGSUJ(tdvp)) { 1107 error = softdep_prelink(tdvp, vp, cnp); 1108 if (error != 0) { 1109 MPASS(error == ERELOOKUP); 1110 return (error); 1111 } 1112 } 1113 1114 if (VTOI(tdvp)->i_effnlink < 2) { 1115 print_bad_link_count("ufs_link", tdvp); 1116 error = EINVAL; 1117 goto out; 1118 } 1119 error = ufs_sync_nlink(vp, tdvp); 1120 if (error != 0) 1121 goto out; 1122 ip = VTOI(vp); 1123 1124 /* 1125 * The file may have been removed after namei dropped the original 1126 * lock. 1127 */ 1128 if (ip->i_effnlink == 0) { 1129 error = ENOENT; 1130 goto out; 1131 } 1132 if (ip->i_flags & (IMMUTABLE | APPEND)) { 1133 error = EPERM; 1134 goto out; 1135 } 1136 1137 ip->i_effnlink++; 1138 ip->i_nlink++; 1139 DIP_SET(ip, i_nlink, ip->i_nlink); 1140 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 1141 if (DOINGSOFTDEP(vp)) 1142 softdep_setup_link(VTOI(tdvp), ip); 1143 error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); 1144 if (!error) { 1145 ufs_makedirentry(ip, cnp, &newdir); 1146 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); 1147 } 1148 1149 if (error) { 1150 ip->i_effnlink--; 1151 ip->i_nlink--; 1152 DIP_SET(ip, i_nlink, ip->i_nlink); 1153 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 1154 if (DOINGSOFTDEP(vp)) 1155 softdep_revert_link(VTOI(tdvp), ip); 1156 } 1157 out: 1158 return (error); 1159 } 1160 1161 /* 1162 * whiteout vnode call 1163 */ 1164 static int 1165 ufs_whiteout(ap) 1166 struct vop_whiteout_args /* { 1167 struct vnode *a_dvp; 1168 struct componentname *a_cnp; 1169 int a_flags; 1170 } */ *ap; 1171 { 1172 struct vnode *dvp = ap->a_dvp; 1173 struct componentname *cnp = ap->a_cnp; 1174 struct direct newdir; 1175 int error = 0; 1176 1177 if (DOINGSUJ(dvp) && (ap->a_flags == CREATE || 1178 ap->a_flags == DELETE)) { 1179 error = softdep_prelink(dvp, NULL, cnp); 1180 if (error != 0) { 1181 MPASS(error == ERELOOKUP); 1182 return (error); 1183 } 1184 } 1185 1186 switch (ap->a_flags) { 1187 case LOOKUP: 1188 /* 4.4 format directories support whiteout operations */ 1189 if (!OFSFMT(dvp)) 1190 return (0); 1191 return (EOPNOTSUPP); 1192 1193 case CREATE: 1194 /* create a new directory whiteout */ 1195 #ifdef INVARIANTS 1196 if ((cnp->cn_flags & SAVENAME) == 0) 1197 panic("ufs_whiteout: missing name"); 1198 if (OFSFMT(dvp)) 1199 panic("ufs_whiteout: old format filesystem"); 1200 #endif 1201 1202 newdir.d_ino = UFS_WINO; 1203 newdir.d_namlen = cnp->cn_namelen; 1204 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); 1205 newdir.d_type = DT_WHT; 1206 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); 1207 break; 1208 1209 case DELETE: 1210 /* remove an existing directory whiteout */ 1211 #ifdef INVARIANTS 1212 if (OFSFMT(dvp)) 1213 panic("ufs_whiteout: old format filesystem"); 1214 #endif 1215 1216 cnp->cn_flags &= ~DOWHITEOUT; 1217 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); 1218 break; 1219 default: 1220 panic("ufs_whiteout: unknown op"); 1221 } 1222 return (error); 1223 } 1224 1225 static volatile int rename_restarts; 1226 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, 1227 __DEVOLATILE(int *, &rename_restarts), 0, 1228 "Times rename had to restart due to lock contention"); 1229 1230 /* 1231 * Rename system call. 1232 * rename("foo", "bar"); 1233 * is essentially 1234 * unlink("bar"); 1235 * link("foo", "bar"); 1236 * unlink("foo"); 1237 * but ``atomically''. Can't do full commit without saving state in the 1238 * inode on disk which isn't feasible at this time. Best we can do is 1239 * always guarantee the target exists. 1240 * 1241 * Basic algorithm is: 1242 * 1243 * 1) Bump link count on source while we're linking it to the 1244 * target. This also ensure the inode won't be deleted out 1245 * from underneath us while we work (it may be truncated by 1246 * a concurrent `trunc' or `open' for creation). 1247 * 2) Link source to destination. If destination already exists, 1248 * delete it first. 1249 * 3) Unlink source reference to inode if still around. If a 1250 * directory was moved and the parent of the destination 1251 * is different from the source, patch the ".." entry in the 1252 * directory. 1253 */ 1254 static int 1255 ufs_rename(ap) 1256 struct vop_rename_args /* { 1257 struct vnode *a_fdvp; 1258 struct vnode *a_fvp; 1259 struct componentname *a_fcnp; 1260 struct vnode *a_tdvp; 1261 struct vnode *a_tvp; 1262 struct componentname *a_tcnp; 1263 } */ *ap; 1264 { 1265 struct vnode *tvp = ap->a_tvp; 1266 struct vnode *tdvp = ap->a_tdvp; 1267 struct vnode *fvp = ap->a_fvp; 1268 struct vnode *fdvp = ap->a_fdvp; 1269 struct vnode *nvp; 1270 struct componentname *tcnp = ap->a_tcnp; 1271 struct componentname *fcnp = ap->a_fcnp; 1272 struct thread *td = curthread; 1273 struct inode *fip, *tip, *tdp, *fdp; 1274 struct direct newdir; 1275 off_t endoff; 1276 int doingdirectory, newparent; 1277 int error = 0; 1278 struct mount *mp; 1279 ino_t ino; 1280 seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; 1281 bool checkpath_locked, want_seqc_end; 1282 1283 checkpath_locked = want_seqc_end = false; 1284 1285 #ifdef INVARIANTS 1286 if ((tcnp->cn_flags & HASBUF) == 0 || 1287 (fcnp->cn_flags & HASBUF) == 0) 1288 panic("ufs_rename: no name"); 1289 #endif 1290 endoff = 0; 1291 mp = tdvp->v_mount; 1292 VOP_UNLOCK(tdvp); 1293 if (tvp && tvp != tdvp) 1294 VOP_UNLOCK(tvp); 1295 /* 1296 * Check for cross-device rename. 1297 */ 1298 if ((fvp->v_mount != tdvp->v_mount) || 1299 (tvp && (fvp->v_mount != tvp->v_mount))) { 1300 error = EXDEV; 1301 mp = NULL; 1302 goto releout; 1303 } 1304 1305 fdvp_s = fvp_s = tdvp_s = tvp_s = SEQC_MOD; 1306 relock: 1307 /* 1308 * We need to acquire 2 to 4 locks depending on whether tvp is NULL 1309 * and fdvp and tdvp are the same directory. Subsequently we need 1310 * to double-check all paths and in the directory rename case we 1311 * need to verify that we are not creating a directory loop. To 1312 * handle this we acquire all but fdvp using non-blocking 1313 * acquisitions. If we fail to acquire any lock in the path we will 1314 * drop all held locks, acquire the new lock in a blocking fashion, 1315 * and then release it and restart the rename. This acquire/release 1316 * step ensures that we do not spin on a lock waiting for release. 1317 */ 1318 error = vn_lock(fdvp, LK_EXCLUSIVE); 1319 if (error) 1320 goto releout; 1321 if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1322 VOP_UNLOCK(fdvp); 1323 error = vn_lock(tdvp, LK_EXCLUSIVE); 1324 if (error) 1325 goto releout; 1326 VOP_UNLOCK(tdvp); 1327 atomic_add_int(&rename_restarts, 1); 1328 goto relock; 1329 } 1330 /* 1331 * Re-resolve fvp to be certain it still exists and fetch the 1332 * correct vnode. 1333 */ 1334 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1335 if (error) { 1336 VOP_UNLOCK(fdvp); 1337 VOP_UNLOCK(tdvp); 1338 goto releout; 1339 } 1340 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1341 if (error) { 1342 VOP_UNLOCK(fdvp); 1343 VOP_UNLOCK(tdvp); 1344 if (error != EBUSY) 1345 goto releout; 1346 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1347 if (error != 0) 1348 goto releout; 1349 VOP_UNLOCK(nvp); 1350 vrele(fvp); 1351 fvp = nvp; 1352 atomic_add_int(&rename_restarts, 1); 1353 goto relock; 1354 } 1355 vrele(fvp); 1356 fvp = nvp; 1357 /* 1358 * Re-resolve tvp and acquire the vnode lock if present. 1359 */ 1360 error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); 1361 if (error != 0 && error != EJUSTRETURN) { 1362 VOP_UNLOCK(fdvp); 1363 VOP_UNLOCK(tdvp); 1364 VOP_UNLOCK(fvp); 1365 goto releout; 1366 } 1367 /* 1368 * If tvp disappeared we just carry on. 1369 */ 1370 if (error == EJUSTRETURN && tvp != NULL) { 1371 vrele(tvp); 1372 tvp = NULL; 1373 } 1374 /* 1375 * Get the tvp ino if the lookup succeeded. We may have to restart 1376 * if the non-blocking acquire fails. 1377 */ 1378 if (error == 0) { 1379 nvp = NULL; 1380 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1381 if (tvp) 1382 vrele(tvp); 1383 tvp = nvp; 1384 if (error) { 1385 VOP_UNLOCK(fdvp); 1386 VOP_UNLOCK(tdvp); 1387 VOP_UNLOCK(fvp); 1388 if (error != EBUSY) 1389 goto releout; 1390 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1391 if (error != 0) 1392 goto releout; 1393 vput(nvp); 1394 atomic_add_int(&rename_restarts, 1); 1395 goto relock; 1396 } 1397 } 1398 1399 if (DOINGSUJ(fdvp) && 1400 (seqc_in_modify(fdvp_s) || !vn_seqc_consistent(fdvp, fdvp_s) || 1401 seqc_in_modify(fvp_s) || !vn_seqc_consistent(fvp, fvp_s) || 1402 seqc_in_modify(tdvp_s) || !vn_seqc_consistent(tdvp, tdvp_s) || 1403 (tvp != NULL && (seqc_in_modify(tvp_s) || 1404 !vn_seqc_consistent(tvp, tvp_s))))) { 1405 error = softdep_prerename(fdvp, fvp, tdvp, tvp); 1406 if (error != 0) { 1407 if (error == ERELOOKUP) { 1408 fdvp_s = vn_seqc_read_any(fdvp); 1409 fvp_s = vn_seqc_read_any(fvp); 1410 tdvp_s = vn_seqc_read_any(tdvp); 1411 if (tvp != NULL) 1412 tvp_s = vn_seqc_read_any(tvp); 1413 atomic_add_int(&rename_restarts, 1); 1414 goto relock; 1415 } 1416 goto releout; 1417 } 1418 } 1419 1420 fdp = VTOI(fdvp); 1421 fip = VTOI(fvp); 1422 tdp = VTOI(tdvp); 1423 tip = NULL; 1424 if (tvp) 1425 tip = VTOI(tvp); 1426 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1427 (VTOI(tdvp)->i_flags & APPEND))) { 1428 error = EPERM; 1429 goto unlockout; 1430 } 1431 /* 1432 * Renaming a file to itself has no effect. The upper layers should 1433 * not call us in that case. However, things could change after 1434 * we drop the locks above. 1435 */ 1436 if (fvp == tvp) { 1437 error = 0; 1438 goto unlockout; 1439 } 1440 doingdirectory = 0; 1441 newparent = 0; 1442 ino = fip->i_number; 1443 if (fip->i_nlink >= UFS_LINK_MAX) { 1444 error = EMLINK; 1445 goto unlockout; 1446 } 1447 if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 1448 || (fdp->i_flags & APPEND)) { 1449 error = EPERM; 1450 goto unlockout; 1451 } 1452 if ((fip->i_mode & IFMT) == IFDIR) { 1453 /* 1454 * Avoid ".", "..", and aliases of "." for obvious reasons. 1455 */ 1456 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1457 fdp == fip || 1458 (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 1459 error = EINVAL; 1460 goto unlockout; 1461 } 1462 if (fdp->i_number != tdp->i_number) 1463 newparent = tdp->i_number; 1464 doingdirectory = 1; 1465 } 1466 if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || 1467 (tvp != NULL && tvp->v_type == VDIR && 1468 tvp->v_mountedhere != NULL)) { 1469 error = EXDEV; 1470 goto unlockout; 1471 } 1472 1473 /* 1474 * If ".." must be changed (ie the directory gets a new 1475 * parent) then the source directory must not be in the 1476 * directory hierarchy above the target, as this would 1477 * orphan everything below the source directory. Also 1478 * the user must have write permission in the source so 1479 * as to be able to change "..". 1480 */ 1481 if (doingdirectory && newparent) { 1482 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread); 1483 if (error) 1484 goto unlockout; 1485 1486 sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); 1487 checkpath_locked = true; 1488 error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, 1489 &ino); 1490 /* 1491 * We encountered a lock that we have to wait for. Unlock 1492 * everything else and VGET before restarting. 1493 */ 1494 if (ino) { 1495 sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); 1496 checkpath_locked = false; 1497 VOP_UNLOCK(fdvp); 1498 VOP_UNLOCK(fvp); 1499 VOP_UNLOCK(tdvp); 1500 if (tvp) 1501 VOP_UNLOCK(tvp); 1502 error = VFS_VGET(mp, ino, LK_SHARED, &nvp); 1503 if (error == 0) 1504 vput(nvp); 1505 atomic_add_int(&rename_restarts, 1); 1506 goto relock; 1507 } 1508 if (error) 1509 goto unlockout; 1510 if ((tcnp->cn_flags & SAVESTART) == 0) 1511 panic("ufs_rename: lost to startdir"); 1512 } 1513 if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || 1514 tdp->i_effnlink == 0) 1515 panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); 1516 1517 if (tvp != NULL) 1518 vn_seqc_write_begin(tvp); 1519 vn_seqc_write_begin(tdvp); 1520 vn_seqc_write_begin(fvp); 1521 vn_seqc_write_begin(fdvp); 1522 want_seqc_end = true; 1523 1524 /* 1525 * 1) Bump link count while we're moving stuff 1526 * around. If we crash somewhere before 1527 * completing our work, the link count 1528 * may be wrong, but correctable. 1529 */ 1530 fip->i_effnlink++; 1531 fip->i_nlink++; 1532 DIP_SET(fip, i_nlink, fip->i_nlink); 1533 UFS_INODE_SET_FLAG(fip, IN_CHANGE); 1534 if (DOINGSOFTDEP(fvp)) 1535 softdep_setup_link(tdp, fip); 1536 error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); 1537 if (error) 1538 goto bad; 1539 1540 /* 1541 * 2) If target doesn't exist, link the target 1542 * to the source and unlink the source. 1543 * Otherwise, rewrite the target directory 1544 * entry to reference the source inode and 1545 * expunge the original entry's existence. 1546 */ 1547 if (tip == NULL) { 1548 if (ITODEV(tdp) != ITODEV(fip)) 1549 panic("ufs_rename: EXDEV"); 1550 if (doingdirectory && newparent) { 1551 /* 1552 * Account for ".." in new directory. 1553 * When source and destination have the same 1554 * parent we don't adjust the link count. The 1555 * actual link modification is completed when 1556 * .. is rewritten below. 1557 */ 1558 if (tdp->i_nlink >= UFS_LINK_MAX) { 1559 error = EMLINK; 1560 goto bad; 1561 } 1562 } 1563 ufs_makedirentry(fip, tcnp, &newdir); 1564 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); 1565 if (error) 1566 goto bad; 1567 /* Setup tdvp for directory compaction if needed. */ 1568 if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 && 1569 I_ENDOFF(tdp) < tdp->i_size) 1570 endoff = I_ENDOFF(tdp); 1571 } else { 1572 if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) 1573 panic("ufs_rename: EXDEV"); 1574 /* 1575 * Short circuit rename(foo, foo). 1576 */ 1577 if (tip->i_number == fip->i_number) 1578 panic("ufs_rename: same file"); 1579 /* 1580 * If the parent directory is "sticky", then the caller 1581 * must possess VADMIN for the parent directory, or the 1582 * destination of the rename. This implements append-only 1583 * directories. 1584 */ 1585 if ((tdp->i_mode & S_ISTXT) && 1586 VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && 1587 VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { 1588 error = EPERM; 1589 goto bad; 1590 } 1591 /* 1592 * Target must be empty if a directory and have no links 1593 * to it. Also, ensure source and target are compatible 1594 * (both directories, or both not directories). 1595 */ 1596 if ((tip->i_mode & IFMT) == IFDIR) { 1597 if ((tip->i_effnlink > 2) || 1598 !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { 1599 error = ENOTEMPTY; 1600 goto bad; 1601 } 1602 if (!doingdirectory) { 1603 error = ENOTDIR; 1604 goto bad; 1605 } 1606 cache_purge(tdvp); 1607 } else if (doingdirectory) { 1608 error = EISDIR; 1609 goto bad; 1610 } 1611 if (doingdirectory) { 1612 if (!newparent) { 1613 tdp->i_effnlink--; 1614 if (DOINGSOFTDEP(tdvp)) 1615 softdep_change_linkcnt(tdp); 1616 } 1617 tip->i_effnlink--; 1618 if (DOINGSOFTDEP(tvp)) 1619 softdep_change_linkcnt(tip); 1620 } 1621 error = ufs_dirrewrite(tdp, tip, fip->i_number, 1622 IFTODT(fip->i_mode), 1623 (doingdirectory && newparent) ? newparent : doingdirectory); 1624 if (error) { 1625 if (doingdirectory) { 1626 if (!newparent) { 1627 tdp->i_effnlink++; 1628 if (DOINGSOFTDEP(tdvp)) 1629 softdep_change_linkcnt(tdp); 1630 } 1631 tip->i_effnlink++; 1632 if (DOINGSOFTDEP(tvp)) 1633 softdep_change_linkcnt(tip); 1634 } 1635 goto bad; 1636 } 1637 if (doingdirectory && !DOINGSOFTDEP(tvp)) { 1638 /* 1639 * The only stuff left in the directory is "." 1640 * and "..". The "." reference is inconsequential 1641 * since we are quashing it. We have removed the "." 1642 * reference and the reference in the parent directory, 1643 * but there may be other hard links. The soft 1644 * dependency code will arrange to do these operations 1645 * after the parent directory entry has been deleted on 1646 * disk, so when running with that code we avoid doing 1647 * them now. 1648 */ 1649 if (!newparent) { 1650 tdp->i_nlink--; 1651 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1652 UFS_INODE_SET_FLAG(tdp, IN_CHANGE); 1653 } 1654 tip->i_nlink--; 1655 DIP_SET(tip, i_nlink, tip->i_nlink); 1656 UFS_INODE_SET_FLAG(tip, IN_CHANGE); 1657 } 1658 } 1659 1660 /* 1661 * 3) Unlink the source. We have to resolve the path again to 1662 * fixup the directory offset and count for ufs_dirremove. 1663 */ 1664 if (fdvp == tdvp) { 1665 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1666 if (error) 1667 panic("ufs_rename: from entry went away!"); 1668 if (ino != fip->i_number) 1669 panic("ufs_rename: ino mismatch %ju != %ju\n", 1670 (uintmax_t)ino, (uintmax_t)fip->i_number); 1671 } 1672 /* 1673 * If the source is a directory with a 1674 * new parent, the link count of the old 1675 * parent directory must be decremented 1676 * and ".." set to point to the new parent. 1677 */ 1678 if (doingdirectory && newparent) { 1679 /* 1680 * If tip exists we simply use its link, otherwise we must 1681 * add a new one. 1682 */ 1683 if (tip == NULL) { 1684 tdp->i_effnlink++; 1685 tdp->i_nlink++; 1686 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1687 UFS_INODE_SET_FLAG(tdp, IN_CHANGE); 1688 if (DOINGSOFTDEP(tdvp)) 1689 softdep_setup_dotdot_link(tdp, fip); 1690 error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && 1691 !DOINGASYNC(tdvp)); 1692 /* Don't go to bad here as the new link exists. */ 1693 if (error) 1694 goto unlockout; 1695 } else if (DOINGSUJ(tdvp)) 1696 /* Journal must account for each new link. */ 1697 softdep_setup_dotdot_link(tdp, fip); 1698 SET_I_OFFSET(fip, mastertemplate.dot_reclen); 1699 ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); 1700 cache_purge(fdvp); 1701 } 1702 error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); 1703 /* 1704 * The kern_renameat() looks up the fvp using the DELETE flag, which 1705 * causes the removal of the name cache entry for fvp. 1706 * As the relookup of the fvp is done in two steps: 1707 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a 1708 * normal lookup of the from name just before the VFS_VGET() call, 1709 * causing the cache entry to be re-instantiated. 1710 * 1711 * The same issue also applies to tvp if it exists as 1712 * otherwise we may have a stale name cache entry for the new 1713 * name that references the old i-node if it has other links 1714 * or open file descriptors. 1715 */ 1716 cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); 1717 1718 unlockout: 1719 if (want_seqc_end) { 1720 if (tvp != NULL) 1721 vn_seqc_write_end(tvp); 1722 vn_seqc_write_end(tdvp); 1723 vn_seqc_write_end(fvp); 1724 vn_seqc_write_end(fdvp); 1725 } 1726 1727 if (checkpath_locked) 1728 sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); 1729 1730 vput(fdvp); 1731 vput(fvp); 1732 1733 /* 1734 * If compaction or fsync was requested do it in 1735 * ffs_vput_pair() now that other locks are no longer needed. 1736 */ 1737 if (error == 0 && endoff != 0) { 1738 UFS_INODE_SET_FLAG(tdp, IN_ENDOFF); 1739 SET_I_ENDOFF(tdp, endoff); 1740 } 1741 VOP_VPUT_PAIR(tdvp, &tvp, true); 1742 return (error); 1743 1744 bad: 1745 fip->i_effnlink--; 1746 fip->i_nlink--; 1747 DIP_SET(fip, i_nlink, fip->i_nlink); 1748 UFS_INODE_SET_FLAG(fip, IN_CHANGE); 1749 if (DOINGSOFTDEP(fvp)) 1750 softdep_revert_link(tdp, fip); 1751 goto unlockout; 1752 1753 releout: 1754 if (want_seqc_end) { 1755 if (tvp != NULL) 1756 vn_seqc_write_end(tvp); 1757 vn_seqc_write_end(tdvp); 1758 vn_seqc_write_end(fvp); 1759 vn_seqc_write_end(fdvp); 1760 } 1761 1762 vrele(fdvp); 1763 vrele(fvp); 1764 vrele(tdvp); 1765 if (tvp) 1766 vrele(tvp); 1767 1768 return (error); 1769 } 1770 1771 #ifdef UFS_ACL 1772 static int 1773 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1774 mode_t dmode, struct ucred *cred, struct thread *td) 1775 { 1776 int error; 1777 struct inode *ip = VTOI(tvp); 1778 struct acl *dacl, *acl; 1779 1780 acl = acl_alloc(M_WAITOK); 1781 dacl = acl_alloc(M_WAITOK); 1782 1783 /* 1784 * Retrieve default ACL from parent, if any. 1785 */ 1786 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1787 switch (error) { 1788 case 0: 1789 /* 1790 * Retrieved a default ACL, so merge mode and ACL if 1791 * necessary. If the ACL is empty, fall through to 1792 * the "not defined or available" case. 1793 */ 1794 if (acl->acl_cnt != 0) { 1795 dmode = acl_posix1e_newfilemode(dmode, acl); 1796 UFS_INODE_SET_MODE(ip, dmode); 1797 DIP_SET(ip, i_mode, dmode); 1798 *dacl = *acl; 1799 ufs_sync_acl_from_inode(ip, acl); 1800 break; 1801 } 1802 /* FALLTHROUGH */ 1803 1804 case EOPNOTSUPP: 1805 /* 1806 * Just use the mode as-is. 1807 */ 1808 UFS_INODE_SET_MODE(ip, dmode); 1809 DIP_SET(ip, i_mode, dmode); 1810 error = 0; 1811 goto out; 1812 1813 default: 1814 goto out; 1815 } 1816 1817 /* 1818 * XXX: If we abort now, will Soft Updates notify the extattr 1819 * code that the EAs for the file need to be released? 1820 */ 1821 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1822 if (error == 0) 1823 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1824 switch (error) { 1825 case 0: 1826 break; 1827 1828 case EOPNOTSUPP: 1829 /* 1830 * XXX: This should not happen, as EOPNOTSUPP above 1831 * was supposed to free acl. 1832 */ 1833 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1834 /* 1835 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); 1836 */ 1837 break; 1838 1839 default: 1840 goto out; 1841 } 1842 1843 out: 1844 acl_free(acl); 1845 acl_free(dacl); 1846 1847 return (error); 1848 } 1849 1850 static int 1851 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1852 mode_t mode, struct ucred *cred, struct thread *td) 1853 { 1854 int error; 1855 struct inode *ip = VTOI(tvp); 1856 struct acl *acl; 1857 1858 acl = acl_alloc(M_WAITOK); 1859 1860 /* 1861 * Retrieve default ACL for parent, if any. 1862 */ 1863 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1864 switch (error) { 1865 case 0: 1866 /* 1867 * Retrieved a default ACL, so merge mode and ACL if 1868 * necessary. 1869 */ 1870 if (acl->acl_cnt != 0) { 1871 /* 1872 * Two possible ways for default ACL to not 1873 * be present. First, the EA can be 1874 * undefined, or second, the default ACL can 1875 * be blank. If it's blank, fall through to 1876 * the it's not defined case. 1877 */ 1878 mode = acl_posix1e_newfilemode(mode, acl); 1879 UFS_INODE_SET_MODE(ip, mode); 1880 DIP_SET(ip, i_mode, mode); 1881 ufs_sync_acl_from_inode(ip, acl); 1882 break; 1883 } 1884 /* FALLTHROUGH */ 1885 1886 case EOPNOTSUPP: 1887 /* 1888 * Just use the mode as-is. 1889 */ 1890 UFS_INODE_SET_MODE(ip, mode); 1891 DIP_SET(ip, i_mode, mode); 1892 error = 0; 1893 goto out; 1894 1895 default: 1896 goto out; 1897 } 1898 1899 /* 1900 * XXX: If we abort now, will Soft Updates notify the extattr 1901 * code that the EAs for the file need to be released? 1902 */ 1903 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1904 switch (error) { 1905 case 0: 1906 break; 1907 1908 case EOPNOTSUPP: 1909 /* 1910 * XXX: This should not happen, as EOPNOTSUPP above was 1911 * supposed to free acl. 1912 */ 1913 printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1914 "but no VOP_SETACL()\n"); 1915 /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " 1916 "but no VOP_SETACL()"); */ 1917 break; 1918 1919 default: 1920 goto out; 1921 } 1922 1923 out: 1924 acl_free(acl); 1925 1926 return (error); 1927 } 1928 1929 static int 1930 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, 1931 mode_t child_mode, struct ucred *cred, struct thread *td) 1932 { 1933 int error; 1934 struct acl *parent_aclp, *child_aclp; 1935 1936 parent_aclp = acl_alloc(M_WAITOK); 1937 child_aclp = acl_alloc(M_WAITOK | M_ZERO); 1938 1939 error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); 1940 if (error) 1941 goto out; 1942 acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, 1943 child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); 1944 error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); 1945 if (error) 1946 goto out; 1947 out: 1948 acl_free(parent_aclp); 1949 acl_free(child_aclp); 1950 1951 return (error); 1952 } 1953 #endif 1954 1955 /* 1956 * Mkdir system call 1957 */ 1958 static int 1959 ufs_mkdir(ap) 1960 struct vop_mkdir_args /* { 1961 struct vnode *a_dvp; 1962 struct vnode **a_vpp; 1963 struct componentname *a_cnp; 1964 struct vattr *a_vap; 1965 } */ *ap; 1966 { 1967 struct vnode *dvp = ap->a_dvp; 1968 struct vattr *vap = ap->a_vap; 1969 struct componentname *cnp = ap->a_cnp; 1970 struct inode *ip, *dp; 1971 struct vnode *tvp; 1972 struct buf *bp; 1973 struct dirtemplate dirtemplate, *dtp; 1974 struct direct newdir; 1975 int error, dmode; 1976 long blkoff; 1977 1978 #ifdef INVARIANTS 1979 if ((cnp->cn_flags & HASBUF) == 0) 1980 panic("ufs_mkdir: no name"); 1981 #endif 1982 dp = VTOI(dvp); 1983 error = ufs_sync_nlink(dvp, NULL); 1984 if (error != 0) 1985 goto out; 1986 dmode = vap->va_mode & 0777; 1987 dmode |= IFDIR; 1988 1989 /* 1990 * Must simulate part of ufs_makeinode here to acquire the inode, 1991 * but not have it entered in the parent directory. The entry is 1992 * made later after writing "." and ".." entries. 1993 */ 1994 if (dp->i_effnlink < 2) { 1995 print_bad_link_count("ufs_mkdir", dvp); 1996 error = EINVAL; 1997 goto out; 1998 } 1999 2000 if (DOINGSUJ(dvp)) { 2001 error = softdep_prelink(dvp, NULL, cnp); 2002 if (error != 0) { 2003 MPASS(error == ERELOOKUP); 2004 return (error); 2005 } 2006 } 2007 2008 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); 2009 if (error) 2010 goto out; 2011 vn_seqc_write_begin(tvp); 2012 ip = VTOI(tvp); 2013 ip->i_gid = dp->i_gid; 2014 DIP_SET(ip, i_gid, dp->i_gid); 2015 #ifdef SUIDDIR 2016 { 2017 #ifdef QUOTA 2018 struct ucred ucred, *ucp; 2019 gid_t ucred_group; 2020 ucp = cnp->cn_cred; 2021 #endif 2022 /* 2023 * If we are hacking owners here, (only do this where told to) 2024 * and we are not giving it TO root, (would subvert quotas) 2025 * then go ahead and give it to the other user. 2026 * The new directory also inherits the SUID bit. 2027 * If user's UID and dir UID are the same, 2028 * 'give it away' so that the SUID is still forced on. 2029 */ 2030 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2031 (dp->i_mode & ISUID) && dp->i_uid) { 2032 dmode |= ISUID; 2033 ip->i_uid = dp->i_uid; 2034 DIP_SET(ip, i_uid, dp->i_uid); 2035 #ifdef QUOTA 2036 if (dp->i_uid != cnp->cn_cred->cr_uid) { 2037 /* 2038 * Make sure the correct user gets charged 2039 * for the space. 2040 * Make a dummy credential for the victim. 2041 * XXX This seems to never be accessed out of 2042 * our context so a stack variable is ok. 2043 */ 2044 refcount_init(&ucred.cr_ref, 1); 2045 ucred.cr_uid = ip->i_uid; 2046 ucred.cr_ngroups = 1; 2047 ucred.cr_groups = &ucred_group; 2048 ucred.cr_groups[0] = dp->i_gid; 2049 ucp = &ucred; 2050 } 2051 #endif 2052 } else { 2053 ip->i_uid = cnp->cn_cred->cr_uid; 2054 DIP_SET(ip, i_uid, ip->i_uid); 2055 } 2056 #ifdef QUOTA 2057 if ((error = getinoquota(ip)) || 2058 (error = chkiq(ip, 1, ucp, 0))) { 2059 if (DOINGSOFTDEP(tvp)) 2060 softdep_revert_link(dp, ip); 2061 UFS_VFREE(tvp, ip->i_number, dmode); 2062 vn_seqc_write_end(tvp); 2063 vgone(tvp); 2064 vput(tvp); 2065 return (error); 2066 } 2067 #endif 2068 } 2069 #else /* !SUIDDIR */ 2070 ip->i_uid = cnp->cn_cred->cr_uid; 2071 DIP_SET(ip, i_uid, ip->i_uid); 2072 #ifdef QUOTA 2073 if ((error = getinoquota(ip)) || 2074 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2075 if (DOINGSOFTDEP(tvp)) 2076 softdep_revert_link(dp, ip); 2077 UFS_VFREE(tvp, ip->i_number, dmode); 2078 vn_seqc_write_end(tvp); 2079 vgone(tvp); 2080 vput(tvp); 2081 return (error); 2082 } 2083 #endif 2084 #endif /* !SUIDDIR */ 2085 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 2086 UFS_INODE_SET_MODE(ip, dmode); 2087 DIP_SET(ip, i_mode, dmode); 2088 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 2089 ip->i_effnlink = 2; 2090 ip->i_nlink = 2; 2091 DIP_SET(ip, i_nlink, 2); 2092 2093 if (cnp->cn_flags & ISWHITEOUT) { 2094 ip->i_flags |= UF_OPAQUE; 2095 DIP_SET(ip, i_flags, ip->i_flags); 2096 } 2097 2098 /* 2099 * Bump link count in parent directory to reflect work done below. 2100 * Should be done before reference is created so cleanup is 2101 * possible if we crash. 2102 */ 2103 dp->i_effnlink++; 2104 dp->i_nlink++; 2105 DIP_SET(dp, i_nlink, dp->i_nlink); 2106 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2107 if (DOINGSOFTDEP(dvp)) 2108 softdep_setup_mkdir(dp, ip); 2109 error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); 2110 if (error) 2111 goto bad; 2112 #ifdef MAC 2113 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2114 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 2115 dvp, tvp, cnp); 2116 if (error) 2117 goto bad; 2118 } 2119 #endif 2120 #ifdef UFS_ACL 2121 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2122 error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 2123 cnp->cn_cred, curthread); 2124 if (error) 2125 goto bad; 2126 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 2127 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, 2128 cnp->cn_cred, curthread); 2129 if (error) 2130 goto bad; 2131 } 2132 #endif /* !UFS_ACL */ 2133 2134 /* 2135 * Initialize directory with "." and ".." from static template. 2136 */ 2137 if (!OFSFMT(dvp)) 2138 dtp = &mastertemplate; 2139 else 2140 dtp = (struct dirtemplate *)&omastertemplate; 2141 dirtemplate = *dtp; 2142 dirtemplate.dot_ino = ip->i_number; 2143 dirtemplate.dotdot_ino = dp->i_number; 2144 vnode_pager_setsize(tvp, DIRBLKSIZ); 2145 if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, 2146 BA_CLRBUF, &bp)) != 0) 2147 goto bad; 2148 ip->i_size = DIRBLKSIZ; 2149 DIP_SET(ip, i_size, DIRBLKSIZ); 2150 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 2151 bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); 2152 if (DOINGSOFTDEP(tvp)) { 2153 /* 2154 * Ensure that the entire newly allocated block is a 2155 * valid directory so that future growth within the 2156 * block does not have to ensure that the block is 2157 * written before the inode. 2158 */ 2159 blkoff = DIRBLKSIZ; 2160 while (blkoff < bp->b_bcount) { 2161 ((struct direct *) 2162 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 2163 blkoff += DIRBLKSIZ; 2164 } 2165 } 2166 if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && 2167 !DOINGASYNC(tvp))) != 0) { 2168 (void)bwrite(bp); 2169 goto bad; 2170 } 2171 /* 2172 * Directory set up, now install its entry in the parent directory. 2173 * 2174 * If we are not doing soft dependencies, then we must write out the 2175 * buffer containing the new directory body before entering the new 2176 * name in the parent. If we are doing soft dependencies, then the 2177 * buffer containing the new directory body will be passed to and 2178 * released in the soft dependency code after the code has attached 2179 * an appropriate ordering dependency to the buffer which ensures that 2180 * the buffer is written before the new name is written in the parent. 2181 */ 2182 if (DOINGASYNC(dvp)) 2183 bdwrite(bp); 2184 else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) 2185 goto bad; 2186 ufs_makedirentry(ip, cnp, &newdir); 2187 error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); 2188 2189 bad: 2190 if (error == 0) { 2191 *ap->a_vpp = tvp; 2192 vn_seqc_write_end(tvp); 2193 } else { 2194 dp->i_effnlink--; 2195 dp->i_nlink--; 2196 DIP_SET(dp, i_nlink, dp->i_nlink); 2197 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2198 /* 2199 * No need to do an explicit VOP_TRUNCATE here, vrele will 2200 * do this for us because we set the link count to 0. 2201 */ 2202 ip->i_effnlink = 0; 2203 ip->i_nlink = 0; 2204 DIP_SET(ip, i_nlink, 0); 2205 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2206 if (DOINGSOFTDEP(tvp)) 2207 softdep_revert_mkdir(dp, ip); 2208 vn_seqc_write_end(tvp); 2209 vgone(tvp); 2210 vput(tvp); 2211 } 2212 out: 2213 return (error); 2214 } 2215 2216 /* 2217 * Rmdir system call. 2218 */ 2219 static int 2220 ufs_rmdir(ap) 2221 struct vop_rmdir_args /* { 2222 struct vnode *a_dvp; 2223 struct vnode *a_vp; 2224 struct componentname *a_cnp; 2225 } */ *ap; 2226 { 2227 struct vnode *vp = ap->a_vp; 2228 struct vnode *dvp = ap->a_dvp; 2229 struct componentname *cnp = ap->a_cnp; 2230 struct inode *ip, *dp; 2231 int error; 2232 2233 ip = VTOI(vp); 2234 dp = VTOI(dvp); 2235 2236 /* 2237 * Do not remove a directory that is in the process of being renamed. 2238 * Verify the directory is empty (and valid). Rmdir ".." will not be 2239 * valid since ".." will contain a reference to the current directory 2240 * and thus be non-empty. Do not allow the removal of mounted on 2241 * directories (this can happen when an NFS exported filesystem 2242 * tries to remove a locally mounted on directory). 2243 */ 2244 error = 0; 2245 if (dp->i_effnlink <= 2) { 2246 if (dp->i_effnlink == 2) 2247 print_bad_link_count("ufs_rmdir", dvp); 2248 error = EINVAL; 2249 goto out; 2250 } 2251 if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 2252 error = ENOTEMPTY; 2253 goto out; 2254 } 2255 if ((dp->i_flags & APPEND) 2256 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 2257 error = EPERM; 2258 goto out; 2259 } 2260 if (vp->v_mountedhere != 0) { 2261 error = EINVAL; 2262 goto out; 2263 } 2264 if (DOINGSUJ(dvp)) { 2265 error = softdep_prelink(dvp, vp, cnp); 2266 if (error != 0) { 2267 MPASS(error == ERELOOKUP); 2268 return (error); 2269 } 2270 } 2271 2272 #ifdef UFS_GJOURNAL 2273 ufs_gjournal_orphan(vp); 2274 #endif 2275 /* 2276 * Delete reference to directory before purging 2277 * inode. If we crash in between, the directory 2278 * will be reattached to lost+found, 2279 */ 2280 dp->i_effnlink--; 2281 ip->i_effnlink--; 2282 if (DOINGSOFTDEP(vp)) 2283 softdep_setup_rmdir(dp, ip); 2284 error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); 2285 if (error) { 2286 dp->i_effnlink++; 2287 ip->i_effnlink++; 2288 if (DOINGSOFTDEP(vp)) 2289 softdep_revert_rmdir(dp, ip); 2290 goto out; 2291 } 2292 /* 2293 * The only stuff left in the directory is "." and "..". The "." 2294 * reference is inconsequential since we are quashing it. The soft 2295 * dependency code will arrange to do these operations after 2296 * the parent directory entry has been deleted on disk, so 2297 * when running with that code we avoid doing them now. 2298 */ 2299 if (!DOINGSOFTDEP(vp)) { 2300 dp->i_nlink--; 2301 DIP_SET(dp, i_nlink, dp->i_nlink); 2302 UFS_INODE_SET_FLAG(dp, IN_CHANGE); 2303 error = UFS_UPDATE(dvp, 0); 2304 ip->i_nlink--; 2305 DIP_SET(ip, i_nlink, ip->i_nlink); 2306 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2307 } 2308 cache_vop_rmdir(dvp, vp); 2309 #ifdef UFS_DIRHASH 2310 /* Kill any active hash; i_effnlink == 0, so it will not come back. */ 2311 if (ip->i_dirhash != NULL) 2312 ufsdirhash_free(ip); 2313 #endif 2314 out: 2315 return (error); 2316 } 2317 2318 /* 2319 * symlink -- make a symbolic link 2320 */ 2321 static int 2322 ufs_symlink(ap) 2323 struct vop_symlink_args /* { 2324 struct vnode *a_dvp; 2325 struct vnode **a_vpp; 2326 struct componentname *a_cnp; 2327 struct vattr *a_vap; 2328 const char *a_target; 2329 } */ *ap; 2330 { 2331 struct vnode *vp, **vpp = ap->a_vpp; 2332 struct inode *ip; 2333 int len, error; 2334 2335 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 2336 vpp, ap->a_cnp, "ufs_symlink"); 2337 if (error) 2338 return (error); 2339 vp = *vpp; 2340 len = strlen(ap->a_target); 2341 if (len < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) { 2342 ip = VTOI(vp); 2343 bcopy(ap->a_target, DIP(ip, i_shortlink), len); 2344 ip->i_size = len; 2345 DIP_SET(ip, i_size, len); 2346 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); 2347 error = UFS_UPDATE(vp, 0); 2348 } else 2349 error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), 2350 len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 2351 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 2352 if (error) 2353 vput(vp); 2354 return (error); 2355 } 2356 2357 /* 2358 * Vnode op for reading directories. 2359 */ 2360 int 2361 ufs_readdir(ap) 2362 struct vop_readdir_args /* { 2363 struct vnode *a_vp; 2364 struct uio *a_uio; 2365 struct ucred *a_cred; 2366 int *a_eofflag; 2367 int *a_ncookies; 2368 uint64_t **a_cookies; 2369 } */ *ap; 2370 { 2371 struct vnode *vp = ap->a_vp; 2372 struct uio *uio = ap->a_uio; 2373 struct buf *bp; 2374 struct inode *ip; 2375 struct direct *dp, *edp; 2376 uint64_t *cookies; 2377 struct dirent dstdp; 2378 off_t offset, startoffset; 2379 size_t readcnt, skipcnt; 2380 ssize_t startresid; 2381 u_int ncookies; 2382 int error; 2383 2384 if (uio->uio_offset < 0) 2385 return (EINVAL); 2386 ip = VTOI(vp); 2387 if (ip->i_effnlink == 0) 2388 return (0); 2389 if (ap->a_ncookies != NULL) { 2390 if (uio->uio_resid < 0) 2391 ncookies = 0; 2392 else 2393 ncookies = uio->uio_resid; 2394 if (uio->uio_offset >= ip->i_size) 2395 ncookies = 0; 2396 else if (ip->i_size - uio->uio_offset < ncookies) 2397 ncookies = ip->i_size - uio->uio_offset; 2398 ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; 2399 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 2400 *ap->a_ncookies = ncookies; 2401 *ap->a_cookies = cookies; 2402 } else { 2403 ncookies = 0; 2404 cookies = NULL; 2405 } 2406 offset = startoffset = uio->uio_offset; 2407 startresid = uio->uio_resid; 2408 error = 0; 2409 while (error == 0 && uio->uio_resid > 0 && 2410 uio->uio_offset < ip->i_size) { 2411 error = UFS_BLKATOFF(vp, uio->uio_offset, NULL, &bp); 2412 if (error) 2413 break; 2414 if (bp->b_offset + bp->b_bcount > ip->i_size) 2415 readcnt = ip->i_size - bp->b_offset; 2416 else 2417 readcnt = bp->b_bcount; 2418 skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & 2419 ~(size_t)(DIRBLKSIZ - 1); 2420 offset = bp->b_offset + skipcnt; 2421 dp = (struct direct *)&bp->b_data[skipcnt]; 2422 edp = (struct direct *)&bp->b_data[readcnt]; 2423 while (error == 0 && uio->uio_resid > 0 && dp < edp) { 2424 if (dp->d_reclen <= offsetof(struct direct, d_name) || 2425 (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { 2426 error = EIO; 2427 break; 2428 } 2429 #if BYTE_ORDER == LITTLE_ENDIAN 2430 /* Old filesystem format. */ 2431 if (OFSFMT(vp)) { 2432 dstdp.d_namlen = dp->d_type; 2433 dstdp.d_type = dp->d_namlen; 2434 } else 2435 #endif 2436 { 2437 dstdp.d_namlen = dp->d_namlen; 2438 dstdp.d_type = dp->d_type; 2439 } 2440 if (offsetof(struct direct, d_name) + dstdp.d_namlen > 2441 dp->d_reclen) { 2442 error = EIO; 2443 break; 2444 } 2445 if (offset < startoffset || dp->d_ino == 0) 2446 goto nextentry; 2447 dstdp.d_fileno = dp->d_ino; 2448 dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); 2449 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 2450 /* NOTE: d_off is the offset of the *next* entry. */ 2451 dstdp.d_off = offset + dp->d_reclen; 2452 dirent_terminate(&dstdp); 2453 if (dstdp.d_reclen > uio->uio_resid) { 2454 if (uio->uio_resid == startresid) 2455 error = EINVAL; 2456 else 2457 error = EJUSTRETURN; 2458 break; 2459 } 2460 /* Advance dp. */ 2461 error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); 2462 if (error) 2463 break; 2464 if (cookies != NULL) { 2465 KASSERT(ncookies > 0, 2466 ("ufs_readdir: cookies buffer too small")); 2467 *cookies = offset + dp->d_reclen; 2468 cookies++; 2469 ncookies--; 2470 } 2471 nextentry: 2472 offset += dp->d_reclen; 2473 dp = (struct direct *)((caddr_t)dp + dp->d_reclen); 2474 } 2475 bqrelse(bp); 2476 uio->uio_offset = offset; 2477 } 2478 /* We need to correct uio_offset. */ 2479 uio->uio_offset = offset; 2480 if (error == EJUSTRETURN) 2481 error = 0; 2482 if (ap->a_ncookies != NULL) { 2483 if (error == 0) { 2484 ap->a_ncookies -= ncookies; 2485 } else { 2486 free(*ap->a_cookies, M_TEMP); 2487 *ap->a_ncookies = 0; 2488 *ap->a_cookies = NULL; 2489 } 2490 } 2491 if (error == 0 && ap->a_eofflag) 2492 *ap->a_eofflag = ip->i_size <= uio->uio_offset; 2493 return (error); 2494 } 2495 2496 /* 2497 * Return target name of a symbolic link 2498 */ 2499 static int 2500 ufs_readlink(ap) 2501 struct vop_readlink_args /* { 2502 struct vnode *a_vp; 2503 struct uio *a_uio; 2504 struct ucred *a_cred; 2505 } */ *ap; 2506 { 2507 struct vnode *vp = ap->a_vp; 2508 struct inode *ip = VTOI(vp); 2509 doff_t isize; 2510 2511 isize = ip->i_size; 2512 if (isize < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) 2513 return (uiomove(DIP(ip, i_shortlink), isize, ap->a_uio)); 2514 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 2515 } 2516 2517 /* 2518 * Calculate the logical to physical mapping if not done already, 2519 * then call the device strategy routine. 2520 * 2521 * In order to be able to swap to a file, the ufs_bmaparray() operation may not 2522 * deadlock on memory. See ufs_bmap() for details. 2523 */ 2524 static int 2525 ufs_strategy(ap) 2526 struct vop_strategy_args /* { 2527 struct vnode *a_vp; 2528 struct buf *a_bp; 2529 } */ *ap; 2530 { 2531 struct buf *bp = ap->a_bp; 2532 struct vnode *vp = ap->a_vp; 2533 ufs2_daddr_t blkno; 2534 int error; 2535 2536 if (bp->b_blkno == bp->b_lblkno) { 2537 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); 2538 bp->b_blkno = blkno; 2539 if (error) { 2540 bp->b_error = error; 2541 bp->b_ioflags |= BIO_ERROR; 2542 bufdone(bp); 2543 return (0); 2544 } 2545 if ((long)bp->b_blkno == -1) 2546 vfs_bio_clrbuf(bp); 2547 } 2548 if ((long)bp->b_blkno == -1) { 2549 bufdone(bp); 2550 return (0); 2551 } 2552 bp->b_iooffset = dbtob(bp->b_blkno); 2553 BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); 2554 return (0); 2555 } 2556 2557 /* 2558 * Print out the contents of an inode. 2559 */ 2560 static int 2561 ufs_print(ap) 2562 struct vop_print_args /* { 2563 struct vnode *a_vp; 2564 } */ *ap; 2565 { 2566 struct vnode *vp = ap->a_vp; 2567 struct inode *ip = VTOI(vp); 2568 2569 printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, 2570 ip->i_effnlink, (intmax_t)ip->i_size); 2571 if (I_IS_UFS2(ip)) 2572 printf(", extsize %d", ip->i_din2->di_extsize); 2573 printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", 2574 (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, 2575 (u_int)ip->i_flags, PRINT_INODE_FLAGS); 2576 printf("\tino %lu, on dev %s", (u_long)ip->i_number, 2577 devtoname(ITODEV(ip))); 2578 if (vp->v_type == VFIFO) 2579 fifo_printinfo(vp); 2580 printf("\n"); 2581 return (0); 2582 } 2583 2584 /* 2585 * Close wrapper for fifos. 2586 * 2587 * Update the times on the inode then do device close. 2588 */ 2589 static int 2590 ufsfifo_close(ap) 2591 struct vop_close_args /* { 2592 struct vnode *a_vp; 2593 int a_fflag; 2594 struct ucred *a_cred; 2595 struct thread *a_td; 2596 } */ *ap; 2597 { 2598 struct vnode *vp = ap->a_vp; 2599 int usecount; 2600 2601 VI_LOCK(vp); 2602 usecount = vp->v_usecount; 2603 if (usecount > 1) 2604 ufs_itimes_locked(vp); 2605 VI_UNLOCK(vp); 2606 return (fifo_specops.vop_close(ap)); 2607 } 2608 2609 /* 2610 * Return POSIX pathconf information applicable to ufs filesystems. 2611 */ 2612 static int 2613 ufs_pathconf(ap) 2614 struct vop_pathconf_args /* { 2615 struct vnode *a_vp; 2616 int a_name; 2617 int *a_retval; 2618 } */ *ap; 2619 { 2620 int error; 2621 2622 error = 0; 2623 switch (ap->a_name) { 2624 case _PC_LINK_MAX: 2625 *ap->a_retval = UFS_LINK_MAX; 2626 break; 2627 case _PC_NAME_MAX: 2628 *ap->a_retval = UFS_MAXNAMLEN; 2629 break; 2630 case _PC_PIPE_BUF: 2631 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 2632 *ap->a_retval = PIPE_BUF; 2633 else 2634 error = EINVAL; 2635 break; 2636 case _PC_CHOWN_RESTRICTED: 2637 *ap->a_retval = 1; 2638 break; 2639 case _PC_NO_TRUNC: 2640 *ap->a_retval = 1; 2641 break; 2642 #ifdef UFS_ACL 2643 case _PC_ACL_EXTENDED: 2644 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 2645 *ap->a_retval = 1; 2646 else 2647 *ap->a_retval = 0; 2648 break; 2649 case _PC_ACL_NFS4: 2650 if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) 2651 *ap->a_retval = 1; 2652 else 2653 *ap->a_retval = 0; 2654 break; 2655 #endif 2656 case _PC_ACL_PATH_MAX: 2657 #ifdef UFS_ACL 2658 if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) 2659 *ap->a_retval = ACL_MAX_ENTRIES; 2660 else 2661 *ap->a_retval = 3; 2662 #else 2663 *ap->a_retval = 3; 2664 #endif 2665 break; 2666 #ifdef MAC 2667 case _PC_MAC_PRESENT: 2668 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) 2669 *ap->a_retval = 1; 2670 else 2671 *ap->a_retval = 0; 2672 break; 2673 #endif 2674 case _PC_MIN_HOLE_SIZE: 2675 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2676 break; 2677 case _PC_PRIO_IO: 2678 *ap->a_retval = 0; 2679 break; 2680 case _PC_SYNC_IO: 2681 *ap->a_retval = 0; 2682 break; 2683 case _PC_ALLOC_SIZE_MIN: 2684 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 2685 break; 2686 case _PC_FILESIZEBITS: 2687 *ap->a_retval = 64; 2688 break; 2689 case _PC_REC_INCR_XFER_SIZE: 2690 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2691 break; 2692 case _PC_REC_MAX_XFER_SIZE: 2693 *ap->a_retval = -1; /* means ``unlimited'' */ 2694 break; 2695 case _PC_REC_MIN_XFER_SIZE: 2696 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2697 break; 2698 case _PC_REC_XFER_ALIGN: 2699 *ap->a_retval = PAGE_SIZE; 2700 break; 2701 case _PC_SYMLINK_MAX: 2702 *ap->a_retval = MAXPATHLEN; 2703 break; 2704 2705 default: 2706 error = vop_stdpathconf(ap); 2707 break; 2708 } 2709 return (error); 2710 } 2711 2712 /* 2713 * Initialize the vnode associated with a new inode, handle aliased 2714 * vnodes. 2715 */ 2716 int 2717 ufs_vinit(mntp, fifoops, vpp) 2718 struct mount *mntp; 2719 struct vop_vector *fifoops; 2720 struct vnode **vpp; 2721 { 2722 struct inode *ip; 2723 struct vnode *vp; 2724 2725 vp = *vpp; 2726 ASSERT_VOP_LOCKED(vp, "ufs_vinit"); 2727 ip = VTOI(vp); 2728 vp->v_type = IFTOVT(ip->i_mode); 2729 /* 2730 * Only unallocated inodes should be of type VNON. 2731 */ 2732 if (ip->i_mode != 0 && vp->v_type == VNON) 2733 return (EINVAL); 2734 if (vp->v_type == VFIFO) 2735 vp->v_op = fifoops; 2736 if (ip->i_number == UFS_ROOTINO) 2737 vp->v_vflag |= VV_ROOT; 2738 *vpp = vp; 2739 return (0); 2740 } 2741 2742 /* 2743 * Allocate a new inode. 2744 * Vnode dvp must be locked. 2745 */ 2746 static int 2747 ufs_makeinode(mode, dvp, vpp, cnp, callfunc) 2748 int mode; 2749 struct vnode *dvp; 2750 struct vnode **vpp; 2751 struct componentname *cnp; 2752 const char *callfunc; 2753 { 2754 struct inode *ip, *pdir; 2755 struct direct newdir; 2756 struct vnode *tvp; 2757 int error; 2758 2759 pdir = VTOI(dvp); 2760 #ifdef INVARIANTS 2761 if ((cnp->cn_flags & HASBUF) == 0) 2762 panic("%s: no name", callfunc); 2763 #endif 2764 *vpp = NULL; 2765 if ((mode & IFMT) == 0) 2766 mode |= IFREG; 2767 2768 if (pdir->i_effnlink < 2) { 2769 print_bad_link_count(callfunc, dvp); 2770 return (EINVAL); 2771 } 2772 if (DOINGSUJ(dvp)) { 2773 error = softdep_prelink(dvp, NULL, cnp); 2774 if (error != 0) { 2775 MPASS(error == ERELOOKUP); 2776 return (error); 2777 } 2778 } 2779 error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); 2780 if (error) 2781 return (error); 2782 ip = VTOI(tvp); 2783 ip->i_gid = pdir->i_gid; 2784 DIP_SET(ip, i_gid, pdir->i_gid); 2785 #ifdef SUIDDIR 2786 { 2787 #ifdef QUOTA 2788 struct ucred ucred, *ucp; 2789 gid_t ucred_group; 2790 ucp = cnp->cn_cred; 2791 #endif 2792 /* 2793 * If we are not the owner of the directory, 2794 * and we are hacking owners here, (only do this where told to) 2795 * and we are not giving it TO root, (would subvert quotas) 2796 * then go ahead and give it to the other user. 2797 * Note that this drops off the execute bits for security. 2798 */ 2799 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2800 (pdir->i_mode & ISUID) && 2801 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 2802 ip->i_uid = pdir->i_uid; 2803 DIP_SET(ip, i_uid, ip->i_uid); 2804 mode &= ~07111; 2805 #ifdef QUOTA 2806 /* 2807 * Make sure the correct user gets charged 2808 * for the space. 2809 * Quickly knock up a dummy credential for the victim. 2810 * XXX This seems to never be accessed out of our 2811 * context so a stack variable is ok. 2812 */ 2813 refcount_init(&ucred.cr_ref, 1); 2814 ucred.cr_uid = ip->i_uid; 2815 ucred.cr_ngroups = 1; 2816 ucred.cr_groups = &ucred_group; 2817 ucred.cr_groups[0] = pdir->i_gid; 2818 ucp = &ucred; 2819 #endif 2820 } else { 2821 ip->i_uid = cnp->cn_cred->cr_uid; 2822 DIP_SET(ip, i_uid, ip->i_uid); 2823 } 2824 2825 #ifdef QUOTA 2826 if ((error = getinoquota(ip)) || 2827 (error = chkiq(ip, 1, ucp, 0))) { 2828 if (DOINGSOFTDEP(tvp)) 2829 softdep_revert_link(pdir, ip); 2830 UFS_VFREE(tvp, ip->i_number, mode); 2831 vgone(tvp); 2832 vput(tvp); 2833 return (error); 2834 } 2835 #endif 2836 } 2837 #else /* !SUIDDIR */ 2838 ip->i_uid = cnp->cn_cred->cr_uid; 2839 DIP_SET(ip, i_uid, ip->i_uid); 2840 #ifdef QUOTA 2841 if ((error = getinoquota(ip)) || 2842 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2843 if (DOINGSOFTDEP(tvp)) 2844 softdep_revert_link(pdir, ip); 2845 UFS_VFREE(tvp, ip->i_number, mode); 2846 vgone(tvp); 2847 vput(tvp); 2848 return (error); 2849 } 2850 #endif 2851 #endif /* !SUIDDIR */ 2852 vn_seqc_write_begin(tvp); /* Mostly to cover asserts */ 2853 UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); 2854 UFS_INODE_SET_MODE(ip, mode); 2855 DIP_SET(ip, i_mode, mode); 2856 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2857 ip->i_effnlink = 1; 2858 ip->i_nlink = 1; 2859 DIP_SET(ip, i_nlink, 1); 2860 if (DOINGSOFTDEP(tvp)) 2861 softdep_setup_create(VTOI(dvp), ip); 2862 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && 2863 priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { 2864 UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID); 2865 DIP_SET(ip, i_mode, ip->i_mode); 2866 } 2867 2868 if (cnp->cn_flags & ISWHITEOUT) { 2869 ip->i_flags |= UF_OPAQUE; 2870 DIP_SET(ip, i_flags, ip->i_flags); 2871 } 2872 2873 /* 2874 * Make sure inode goes to disk before directory entry. 2875 */ 2876 error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); 2877 if (error) 2878 goto bad; 2879 #ifdef MAC 2880 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2881 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 2882 dvp, tvp, cnp); 2883 if (error) 2884 goto bad; 2885 } 2886 #endif 2887 #ifdef UFS_ACL 2888 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2889 error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 2890 cnp->cn_cred, curthread); 2891 if (error) 2892 goto bad; 2893 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 2894 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, 2895 cnp->cn_cred, curthread); 2896 if (error) 2897 goto bad; 2898 } 2899 #endif /* !UFS_ACL */ 2900 ufs_makedirentry(ip, cnp, &newdir); 2901 error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); 2902 if (error) 2903 goto bad; 2904 vn_seqc_write_end(tvp); 2905 *vpp = tvp; 2906 return (0); 2907 2908 bad: 2909 /* 2910 * Write error occurred trying to update the inode 2911 * or the directory so must deallocate the inode. 2912 */ 2913 ip->i_effnlink = 0; 2914 ip->i_nlink = 0; 2915 DIP_SET(ip, i_nlink, 0); 2916 UFS_INODE_SET_FLAG(ip, IN_CHANGE); 2917 if (DOINGSOFTDEP(tvp)) 2918 softdep_revert_create(VTOI(dvp), ip); 2919 vn_seqc_write_end(tvp); 2920 vgone(tvp); 2921 vput(tvp); 2922 return (error); 2923 } 2924 2925 static int 2926 ufs_ioctl(struct vop_ioctl_args *ap) 2927 { 2928 struct vnode *vp; 2929 int error; 2930 2931 vp = ap->a_vp; 2932 switch (ap->a_command) { 2933 case FIOSEEKDATA: 2934 error = vn_lock(vp, LK_SHARED); 2935 if (error == 0) { 2936 error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); 2937 VOP_UNLOCK(vp); 2938 } else 2939 error = EBADF; 2940 return (error); 2941 case FIOSEEKHOLE: 2942 return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, 2943 ap->a_cred)); 2944 default: 2945 return (ENOTTY); 2946 } 2947 } 2948 2949 static int 2950 ufs_read_pgcache(struct vop_read_pgcache_args *ap) 2951 { 2952 struct uio *uio; 2953 struct vnode *vp; 2954 2955 uio = ap->a_uio; 2956 vp = ap->a_vp; 2957 VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); 2958 2959 if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || 2960 (ap->a_ioflag & IO_DIRECT) != 0) 2961 return (EJUSTRETURN); 2962 return (vn_read_from_obj(vp, uio)); 2963 } 2964 2965 /* Global vfs data structures for ufs. */ 2966 struct vop_vector ufs_vnodeops = { 2967 .vop_default = &default_vnodeops, 2968 .vop_fsync = VOP_PANIC, 2969 .vop_read = VOP_PANIC, 2970 .vop_reallocblks = VOP_PANIC, 2971 .vop_write = VOP_PANIC, 2972 .vop_accessx = ufs_accessx, 2973 .vop_bmap = ufs_bmap, 2974 .vop_fplookup_vexec = ufs_fplookup_vexec, 2975 .vop_fplookup_symlink = VOP_EAGAIN, 2976 .vop_cachedlookup = ufs_lookup, 2977 .vop_close = ufs_close, 2978 .vop_create = ufs_create, 2979 .vop_stat = ufs_stat, 2980 .vop_getattr = ufs_getattr, 2981 .vop_inactive = ufs_inactive, 2982 .vop_ioctl = ufs_ioctl, 2983 .vop_link = ufs_link, 2984 .vop_lookup = vfs_cache_lookup, 2985 .vop_mmapped = ufs_mmapped, 2986 .vop_mkdir = ufs_mkdir, 2987 .vop_mknod = ufs_mknod, 2988 .vop_need_inactive = ufs_need_inactive, 2989 .vop_open = ufs_open, 2990 .vop_pathconf = ufs_pathconf, 2991 .vop_poll = vop_stdpoll, 2992 .vop_print = ufs_print, 2993 .vop_read_pgcache = ufs_read_pgcache, 2994 .vop_readdir = ufs_readdir, 2995 .vop_readlink = ufs_readlink, 2996 .vop_reclaim = ufs_reclaim, 2997 .vop_remove = ufs_remove, 2998 .vop_rename = ufs_rename, 2999 .vop_rmdir = ufs_rmdir, 3000 .vop_setattr = ufs_setattr, 3001 #ifdef MAC 3002 .vop_setlabel = vop_stdsetlabel_ea, 3003 #endif 3004 .vop_strategy = ufs_strategy, 3005 .vop_symlink = ufs_symlink, 3006 .vop_whiteout = ufs_whiteout, 3007 #ifdef UFS_EXTATTR 3008 .vop_getextattr = ufs_getextattr, 3009 .vop_deleteextattr = ufs_deleteextattr, 3010 .vop_setextattr = ufs_setextattr, 3011 #endif 3012 #ifdef UFS_ACL 3013 .vop_getacl = ufs_getacl, 3014 .vop_setacl = ufs_setacl, 3015 .vop_aclcheck = ufs_aclcheck, 3016 #endif 3017 }; 3018 VFS_VOP_VECTOR_REGISTER(ufs_vnodeops); 3019 3020 struct vop_vector ufs_fifoops = { 3021 .vop_default = &fifo_specops, 3022 .vop_fsync = VOP_PANIC, 3023 .vop_accessx = ufs_accessx, 3024 .vop_close = ufsfifo_close, 3025 .vop_getattr = ufs_getattr, 3026 .vop_inactive = ufs_inactive, 3027 .vop_pathconf = ufs_pathconf, 3028 .vop_print = ufs_print, 3029 .vop_read = VOP_PANIC, 3030 .vop_reclaim = ufs_reclaim, 3031 .vop_setattr = ufs_setattr, 3032 #ifdef MAC 3033 .vop_setlabel = vop_stdsetlabel_ea, 3034 #endif 3035 .vop_write = VOP_PANIC, 3036 #ifdef UFS_EXTATTR 3037 .vop_getextattr = ufs_getextattr, 3038 .vop_deleteextattr = ufs_deleteextattr, 3039 .vop_setextattr = ufs_setextattr, 3040 #endif 3041 #ifdef UFS_ACL 3042 .vop_getacl = ufs_getacl, 3043 .vop_setacl = ufs_setacl, 3044 .vop_aclcheck = ufs_aclcheck, 3045 #endif 3046 }; 3047 VFS_VOP_VECTOR_REGISTER(ufs_fifoops); 3048