1 /* 2 * (The copyright below applies to ufs_access()) 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 37 */ 38 39 #include "opt_quota.h" 40 #include "opt_suiddir.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/uio.h> 45 #include <sys/conf.h> 46 #include <sys/kernel.h> 47 #include <sys/fcntl.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/unistd.h> 51 #include <sys/vnode.h> 52 #include <sys/file.h> /* XXX */ 53 #include <sys/proc.h> 54 #include <sys/caps.h> 55 #include <sys/jail.h> 56 #include <sys/sysctl.h> 57 #include <sys/sfbuf.h> 58 #include <vm/vm_extern.h> 59 #include <vm/vm_object.h> 60 #include <vm/vm_page2.h> 61 62 #ifdef LWBUF_IS_OPTIMAL 63 64 static int vm_read_shortcut_enable = 1; 65 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW, 66 &vm_read_shortcut_enable, 0, "Direct vm_object vop_read shortcut"); 67 68 #endif 69 70 /* 71 * vop_helper_access() 72 * 73 * Provide standard UNIX semanics for VOP_ACCESS, but without the quota 74 * code. This procedure was basically pulled out of UFS. 75 */ 76 int 77 vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid, 78 mode_t ino_mode, u_int32_t ino_flags) 79 { 80 struct vnode *vp = ap->a_vp; 81 struct ucred *cred = ap->a_cred; 82 mode_t mask, mode = ap->a_mode; 83 gid_t *gp; 84 int i; 85 uid_t proc_uid; 86 gid_t proc_gid; 87 88 if (ap->a_flags & AT_EACCESS) { 89 proc_uid = cred->cr_uid; 90 proc_gid = cred->cr_gid; 91 } else { 92 proc_uid = cred->cr_ruid; 93 proc_gid = cred->cr_rgid; 94 } 95 96 /* 97 * Disallow write attempts on read-only filesystems; 98 * unless the file is a socket, fifo, or a block or 99 * character device resident on the filesystem. 100 */ 101 if (mode & VWRITE) { 102 switch (vp->v_type) { 103 case VDIR: 104 case VLNK: 105 case VREG: 106 case VDATABASE: 107 if (vp->v_mount->mnt_flag & MNT_RDONLY) 108 return (EROFS); 109 break; 110 default: 111 break; 112 } 113 } 114 115 /* If immutable bit set, nobody gets to write it. */ 116 if ((mode & VWRITE) && (ino_flags & IMMUTABLE)) 117 return (EPERM); 118 119 /* Otherwise, user id 0 always gets access. */ 120 if (proc_uid == 0) 121 return (0); 122 123 mask = 0; 124 125 /* Otherwise, check the owner. */ 126 if (proc_uid == ino_uid) { 127 if (mode & VEXEC) 128 mask |= S_IXUSR; 129 if (mode & VREAD) 130 mask |= S_IRUSR; 131 if (mode & VWRITE) 132 mask |= S_IWUSR; 133 return ((ino_mode & mask) == mask ? 0 : EACCES); 134 } 135 136 /* 137 * Otherwise, check the groups. 138 * We must special-case the primary group to, if needed, check against 139 * the real gid and not the effective one. 140 */ 141 if (proc_gid == ino_gid) { 142 if (mode & VEXEC) 143 mask |= S_IXGRP; 144 if (mode & VREAD) 145 mask |= S_IRGRP; 146 if (mode & VWRITE) 147 mask |= S_IWGRP; 148 return ((ino_mode & mask) == mask ? 0 : EACCES); 149 } 150 for (i = 1, gp = &cred->cr_groups[1]; i < cred->cr_ngroups; i++, gp++) 151 if (ino_gid == *gp) { 152 if (mode & VEXEC) 153 mask |= S_IXGRP; 154 if (mode & VREAD) 155 mask |= S_IRGRP; 156 if (mode & VWRITE) 157 mask |= S_IWGRP; 158 return ((ino_mode & mask) == mask ? 0 : EACCES); 159 } 160 161 /* Otherwise, check everyone else. */ 162 if (mode & VEXEC) 163 mask |= S_IXOTH; 164 if (mode & VREAD) 165 mask |= S_IROTH; 166 if (mode & VWRITE) 167 mask |= S_IWOTH; 168 return ((ino_mode & mask) == mask ? 0 : EACCES); 169 } 170 171 int 172 vop_helper_setattr_flags(u_int32_t *ino_flags, u_int32_t vaflags, 173 uid_t uid, struct ucred *cred) 174 { 175 int error; 176 177 /* 178 * If uid doesn't match only a privileged user can change the flags 179 */ 180 if (cred->cr_uid != uid && 181 (error = caps_priv_check(cred, SYSCAP_NOVFS_SYSFLAGS))) 182 { 183 return(error); 184 } 185 if (cred->cr_uid == 0 && 186 (!jailed(cred) || PRISON_CAP_ISSET(cred->cr_prison->pr_caps, 187 PRISON_CAP_VFS_CHFLAGS))) { 188 if ((*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND)) && 189 securelevel > 0) 190 return (EPERM); 191 *ino_flags = vaflags; 192 } else { 193 if (*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND) || 194 (vaflags & UF_SETTABLE) != vaflags) 195 return (EPERM); 196 *ino_flags &= SF_SETTABLE; 197 *ino_flags |= vaflags & UF_SETTABLE; 198 } 199 return(0); 200 } 201 202 /* 203 * This helper function may be used by VFSs to implement UNIX initial 204 * ownership semantics when creating new objects inside directories. 205 */ 206 uid_t 207 vop_helper_create_uid(struct mount *mp, mode_t dmode, uid_t duid, 208 struct ucred *cred, mode_t *modep) 209 { 210 #ifdef SUIDDIR 211 if ((mp->mnt_flag & MNT_SUIDDIR) && (dmode & S_ISUID) && 212 duid != cred->cr_uid && duid) { 213 *modep &= ~07111; 214 return(duid); 215 } 216 #endif 217 return(cred->cr_uid); 218 } 219 220 /* 221 * This helper may be used by VFSs to implement unix chmod semantics. 222 */ 223 int 224 vop_helper_chmod(struct vnode *vp, mode_t new_mode, struct ucred *cred, 225 uid_t cur_uid, gid_t cur_gid, mode_t *cur_modep) 226 { 227 int error; 228 229 if (cred->cr_uid != cur_uid) { 230 error = caps_priv_check(cred, SYSCAP_NOVFS_CHMOD); 231 if (error) 232 return (error); 233 } 234 if (cred->cr_uid) { 235 if (vp->v_type != VDIR && (*cur_modep & S_ISTXT)) 236 return (EFTYPE); 237 if (!groupmember(cur_gid, cred) && (*cur_modep & S_ISGID)) 238 return (EPERM); 239 } 240 *cur_modep &= ~ALLPERMS; 241 *cur_modep |= new_mode & ALLPERMS; 242 return(0); 243 } 244 245 /* 246 * This helper may be used by VFSs to implement unix chown semantics. 247 */ 248 int 249 vop_helper_chown(struct vnode *vp, uid_t new_uid, gid_t new_gid, 250 struct ucred *cred, 251 uid_t *cur_uidp, gid_t *cur_gidp, mode_t *cur_modep) 252 { 253 gid_t ogid; 254 uid_t ouid; 255 int error; 256 257 if (new_uid == (uid_t)VNOVAL) 258 new_uid = *cur_uidp; 259 if (new_gid == (gid_t)VNOVAL) 260 new_gid = *cur_gidp; 261 262 /* 263 * If we don't own the file, are trying to change the owner 264 * of the file, or are not a member of the target group, 265 * the caller must be privileged or the call fails. 266 */ 267 if ((cred->cr_uid != *cur_uidp || new_uid != *cur_uidp || 268 (new_gid != *cur_gidp && !(cred->cr_gid == new_gid || 269 groupmember(new_gid, cred)))) && 270 (error = caps_priv_check(cred, SYSCAP_NOVFS_CHOWN))) 271 { 272 return (error); 273 } 274 ogid = *cur_gidp; 275 ouid = *cur_uidp; 276 /* XXX QUOTA CODE */ 277 *cur_uidp = new_uid; 278 *cur_gidp = new_gid; 279 /* XXX QUOTA CODE */ 280 281 /* 282 * DragonFly clears both SUID and SGID if either the owner or 283 * group is changed and root isn't doing it. If root is doing 284 * it we do not clear SUID/SGID. 285 */ 286 if (cred->cr_uid != 0 && (ouid != new_uid || ogid != new_gid)) 287 *cur_modep &= ~(S_ISUID | S_ISGID); 288 return(0); 289 } 290 291 #ifdef LWBUF_IS_OPTIMAL 292 293 /* 294 * A VFS can call this function to try to dispose of a read request 295 * directly from the VM system, pretty much bypassing almost all VFS 296 * overhead except for atime updates. 297 * 298 * If 0 is returned some or all of the uio was handled. The caller must 299 * check the uio and handle the remainder. 300 * 301 * The caller must fail on a non-zero error. 302 */ 303 int 304 vop_helper_read_shortcut(struct vop_read_args *ap) 305 { 306 struct vnode *vp; 307 struct uio *uio; 308 struct lwbuf *lwb; 309 struct lwbuf lwb_cache; 310 vm_object_t obj; 311 vm_page_t m; 312 int offset; 313 int n; 314 int error; 315 316 vp = ap->a_vp; 317 uio = ap->a_uio; 318 319 /* 320 * We can't short-cut if there is no VM object or this is a special 321 * UIO_NOCOPY read (typically from VOP_STRATEGY()). We also can't 322 * do this if we cannot extract the filesize from the vnode. 323 */ 324 if (vm_read_shortcut_enable == 0) 325 return(0); 326 if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY) 327 return(0); 328 if (vp->v_filesize == NOOFFSET) 329 return(0); 330 if (uio->uio_resid == 0) 331 return(0); 332 333 /* 334 * Iterate the uio on a page-by-page basis 335 * 336 * XXX can we leave the object held shared during the uiomove()? 337 */ 338 obj = vp->v_object; 339 vm_object_hold_shared(obj); 340 341 error = 0; 342 while (uio->uio_resid && error == 0) { 343 offset = (int)uio->uio_offset & PAGE_MASK; 344 n = PAGE_SIZE - offset; 345 if (n > uio->uio_resid) 346 n = uio->uio_resid; 347 if (vp->v_filesize < uio->uio_offset) 348 break; 349 if (uio->uio_offset + n > vp->v_filesize) 350 n = vp->v_filesize - uio->uio_offset; 351 if (n == 0) 352 break; /* hit EOF */ 353 354 m = vm_page_lookup_sbusy_try(obj, OFF_TO_IDX(uio->uio_offset), 355 0, PAGE_SIZE); 356 if (error || m == NULL) { 357 error = 0; 358 break; 359 } 360 if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { 361 vm_page_sbusy_drop(m); 362 break; 363 } 364 lwb = lwbuf_alloc(m, &lwb_cache); 365 366 /* 367 * Use a no-fault uiomove() to avoid deadlocking against 368 * our VM object (which could livelock on the same object 369 * due to shared-vs-exclusive), or deadlocking against 370 * our busied page. Returns EFAULT on any fault which 371 * winds up diving a vnode. 372 */ 373 error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset, 374 n, uio); 375 376 vm_page_flag_set(m, PG_REFERENCED); 377 lwbuf_free(lwb); 378 vm_page_sbusy_drop(m); 379 } 380 vm_object_drop(obj); 381 382 /* 383 * Ignore EFAULT since we used uiomove_nofault(), causes caller 384 * to fall-back to normal code for this case. 385 */ 386 if (error == EFAULT) 387 error = 0; 388 389 return (error); 390 } 391 392 #else 393 394 /* 395 * If lwbuf's aren't optimal then it's best to just use the buffer 396 * cache. 397 */ 398 int 399 vop_helper_read_shortcut(struct vop_read_args *ap) 400 { 401 return(0); 402 } 403 404 #endif 405