1 /* 2 * (The copyright below applies to ufs_access()) 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 37 * $DragonFly: src/sys/kern/vfs_helper.c,v 1.5 2008/05/25 18:34:46 dillon Exp $ 38 */ 39 40 #include "opt_quota.h" 41 #include "opt_suiddir.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/conf.h> 46 #include <sys/kernel.h> 47 #include <sys/fcntl.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/unistd.h> 51 #include <sys/vnode.h> 52 #include <sys/file.h> /* XXX */ 53 #include <sys/proc.h> 54 #include <sys/priv.h> 55 #include <sys/jail.h> 56 #include <sys/sysctl.h> 57 #include <sys/sfbuf.h> 58 #include <vm/vm_extern.h> 59 #include <vm/vm_object.h> 60 61 #ifdef LWBUF_IS_OPTIMAL 62 63 static int vm_read_shortcut_enable = 1; 64 static long vm_read_shortcut_count; 65 static long vm_read_shortcut_failed; 66 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW, 67 &vm_read_shortcut_enable, 0, "Direct vm_object vop_read shortcut"); 68 SYSCTL_LONG(_vm, OID_AUTO, read_shortcut_count, CTLFLAG_RW, 69 &vm_read_shortcut_count, 0, "Statistics"); 70 SYSCTL_LONG(_vm, OID_AUTO, read_shortcut_failed, CTLFLAG_RW, 71 &vm_read_shortcut_failed, 0, "Statistics"); 72 73 #endif 74 75 /* 76 * vop_helper_access() 77 * 78 * Provide standard UNIX semanics for VOP_ACCESS, but without the quota 79 * code. This procedure was basically pulled out of UFS. 80 */ 81 int 82 vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid, 83 mode_t ino_mode, u_int32_t ino_flags) 84 { 85 struct vnode *vp = ap->a_vp; 86 struct ucred *cred = ap->a_cred; 87 mode_t mask, mode = ap->a_mode; 88 gid_t *gp; 89 int i; 90 uid_t proc_uid; 91 gid_t proc_gid; 92 93 if (ap->a_flags & AT_EACCESS) { 94 proc_uid = cred->cr_uid; 95 proc_gid = cred->cr_gid; 96 } else { 97 proc_uid = cred->cr_ruid; 98 proc_gid = cred->cr_rgid; 99 } 100 101 /* 102 * Disallow write attempts on read-only filesystems; 103 * unless the file is a socket, fifo, or a block or 104 * character device resident on the filesystem. 105 */ 106 if (mode & VWRITE) { 107 switch (vp->v_type) { 108 case VDIR: 109 case VLNK: 110 case VREG: 111 case VDATABASE: 112 if (vp->v_mount->mnt_flag & MNT_RDONLY) 113 return (EROFS); 114 break; 115 default: 116 break; 117 } 118 } 119 120 /* If immutable bit set, nobody gets to write it. */ 121 if ((mode & VWRITE) && (ino_flags & IMMUTABLE)) 122 return (EPERM); 123 124 /* Otherwise, user id 0 always gets access. */ 125 if (proc_uid == 0) 126 return (0); 127 128 mask = 0; 129 130 /* Otherwise, check the owner. */ 131 if (proc_uid == ino_uid) { 132 if (mode & VEXEC) 133 mask |= S_IXUSR; 134 if (mode & VREAD) 135 mask |= S_IRUSR; 136 if (mode & VWRITE) 137 mask |= S_IWUSR; 138 return ((ino_mode & mask) == mask ? 0 : EACCES); 139 } 140 141 /* 142 * Otherwise, check the groups. 143 * We must special-case the primary group to, if needed, check against 144 * the real gid and not the effective one. 145 */ 146 if (proc_gid == ino_gid) { 147 if (mode & VEXEC) 148 mask |= S_IXGRP; 149 if (mode & VREAD) 150 mask |= S_IRGRP; 151 if (mode & VWRITE) 152 mask |= S_IWGRP; 153 return ((ino_mode & mask) == mask ? 0 : EACCES); 154 } 155 for (i = 1, gp = &cred->cr_groups[1]; i < cred->cr_ngroups; i++, gp++) 156 if (ino_gid == *gp) { 157 if (mode & VEXEC) 158 mask |= S_IXGRP; 159 if (mode & VREAD) 160 mask |= S_IRGRP; 161 if (mode & VWRITE) 162 mask |= S_IWGRP; 163 return ((ino_mode & mask) == mask ? 0 : EACCES); 164 } 165 166 /* Otherwise, check everyone else. */ 167 if (mode & VEXEC) 168 mask |= S_IXOTH; 169 if (mode & VREAD) 170 mask |= S_IROTH; 171 if (mode & VWRITE) 172 mask |= S_IWOTH; 173 return ((ino_mode & mask) == mask ? 0 : EACCES); 174 } 175 176 int 177 vop_helper_setattr_flags(u_int32_t *ino_flags, u_int32_t vaflags, 178 uid_t uid, struct ucred *cred) 179 { 180 int error; 181 182 /* 183 * If uid doesn't match only a privileged user can change the flags 184 */ 185 if (cred->cr_uid != uid && 186 (error = priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0))) { 187 return(error); 188 } 189 if (cred->cr_uid == 0 && 190 (!jailed(cred)|| jail_chflags_allowed)) { 191 if ((*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND)) && 192 securelevel > 0) 193 return (EPERM); 194 *ino_flags = vaflags; 195 } else { 196 if (*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND) || 197 (vaflags & UF_SETTABLE) != vaflags) 198 return (EPERM); 199 *ino_flags &= SF_SETTABLE; 200 *ino_flags |= vaflags & UF_SETTABLE; 201 } 202 return(0); 203 } 204 205 /* 206 * This helper function may be used by VFSs to implement UNIX initial 207 * ownership semantics when creating new objects inside directories. 208 */ 209 uid_t 210 vop_helper_create_uid(struct mount *mp, mode_t dmode, uid_t duid, 211 struct ucred *cred, mode_t *modep) 212 { 213 #ifdef SUIDDIR 214 if ((mp->mnt_flag & MNT_SUIDDIR) && (dmode & S_ISUID) && 215 duid != cred->cr_uid && duid) { 216 *modep &= ~07111; 217 return(duid); 218 } 219 #endif 220 return(cred->cr_uid); 221 } 222 223 /* 224 * This helper may be used by VFSs to implement unix chmod semantics. 225 */ 226 int 227 vop_helper_chmod(struct vnode *vp, mode_t new_mode, struct ucred *cred, 228 uid_t cur_uid, gid_t cur_gid, mode_t *cur_modep) 229 { 230 int error; 231 232 if (cred->cr_uid != cur_uid) { 233 error = priv_check_cred(cred, PRIV_VFS_CHMOD, 0); 234 if (error) 235 return (error); 236 } 237 if (cred->cr_uid) { 238 if (vp->v_type != VDIR && (*cur_modep & S_ISTXT)) 239 return (EFTYPE); 240 if (!groupmember(cur_gid, cred) && (*cur_modep & S_ISGID)) 241 return (EPERM); 242 } 243 *cur_modep &= ~ALLPERMS; 244 *cur_modep |= new_mode & ALLPERMS; 245 return(0); 246 } 247 248 /* 249 * This helper may be used by VFSs to implement unix chown semantics. 250 */ 251 int 252 vop_helper_chown(struct vnode *vp, uid_t new_uid, gid_t new_gid, 253 struct ucred *cred, 254 uid_t *cur_uidp, gid_t *cur_gidp, mode_t *cur_modep) 255 { 256 gid_t ogid; 257 uid_t ouid; 258 int error; 259 260 if (new_uid == (uid_t)VNOVAL) 261 new_uid = *cur_uidp; 262 if (new_gid == (gid_t)VNOVAL) 263 new_gid = *cur_gidp; 264 265 /* 266 * If we don't own the file, are trying to change the owner 267 * of the file, or are not a member of the target group, 268 * the caller must be privileged or the call fails. 269 */ 270 if ((cred->cr_uid != *cur_uidp || new_uid != *cur_uidp || 271 (new_gid != *cur_gidp && !(cred->cr_gid == new_gid || 272 groupmember(new_gid, cred)))) && 273 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) { 274 return (error); 275 } 276 ogid = *cur_gidp; 277 ouid = *cur_uidp; 278 /* XXX QUOTA CODE */ 279 *cur_uidp = new_uid; 280 *cur_gidp = new_gid; 281 /* XXX QUOTA CODE */ 282 283 /* 284 * DragonFly clears both SUID and SGID if either the owner or 285 * group is changed and root isn't doing it. If root is doing 286 * it we do not clear SUID/SGID. 287 */ 288 if (cred->cr_uid != 0 && (ouid != new_uid || ogid != new_gid)) 289 *cur_modep &= ~(S_ISUID | S_ISGID); 290 return(0); 291 } 292 293 #ifdef LWBUF_IS_OPTIMAL 294 295 /* 296 * A VFS can call this function to try to dispose of a read request 297 * directly from the VM system, pretty much bypassing almost all VFS 298 * overhead except for atime updates. 299 * 300 * If 0 is returned some or all of the uio was handled. The caller must 301 * check the uio and handle the remainder. 302 * 303 * The caller must fail on a non-zero error. 304 */ 305 int 306 vop_helper_read_shortcut(struct vop_read_args *ap) 307 { 308 struct vnode *vp; 309 struct uio *uio; 310 struct lwbuf *lwb; 311 struct lwbuf lwb_cache; 312 vm_object_t obj; 313 vm_page_t m; 314 int offset; 315 int n; 316 int error; 317 318 vp = ap->a_vp; 319 uio = ap->a_uio; 320 321 /* 322 * We can't short-cut if there is no VM object or this is a special 323 * UIO_NOCOPY read (typically from VOP_STRATEGY()). We also can't 324 * do this if we cannot extract the filesize from the vnode. 325 */ 326 if (vm_read_shortcut_enable == 0) 327 return(0); 328 if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY) 329 return(0); 330 if (vp->v_filesize == NOOFFSET) 331 return(0); 332 if (uio->uio_resid == 0) 333 return(0); 334 335 /* 336 * Iterate the uio on a page-by-page basis 337 * 338 * XXX can we leave the object held shared during the uiomove()? 339 */ 340 ++vm_read_shortcut_count; 341 obj = vp->v_object; 342 vm_object_hold_shared(obj); 343 344 error = 0; 345 while (uio->uio_resid && error == 0) { 346 offset = (int)uio->uio_offset & PAGE_MASK; 347 n = PAGE_SIZE - offset; 348 if (n > uio->uio_resid) 349 n = uio->uio_resid; 350 if (vp->v_filesize < uio->uio_offset) 351 break; 352 if (uio->uio_offset + n > vp->v_filesize) 353 n = vp->v_filesize - uio->uio_offset; 354 if (n == 0) 355 break; /* hit EOF */ 356 357 m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset), 358 FALSE, &error); 359 if (error || m == NULL) { 360 ++vm_read_shortcut_failed; 361 error = 0; 362 break; 363 } 364 if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { 365 ++vm_read_shortcut_failed; 366 vm_page_wakeup(m); 367 break; 368 } 369 lwb = lwbuf_alloc(m, &lwb_cache); 370 371 /* 372 * Use a no-fault uiomove() to avoid deadlocking against 373 * our VM object (which could livelock on the same object 374 * due to shared-vs-exclusive), or deadlocking against 375 * our busied page. Returns EFAULT on any fault which 376 * winds up diving a vnode. 377 */ 378 error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset, 379 n, uio); 380 381 vm_page_flag_set(m, PG_REFERENCED); 382 lwbuf_free(lwb); 383 vm_page_wakeup(m); 384 } 385 vm_object_drop(obj); 386 387 /* 388 * Ignore EFAULT since we used uiomove_nofault(), causes caller 389 * to fall-back to normal code for this case. 390 */ 391 if (error == EFAULT) 392 error = 0; 393 394 return (error); 395 } 396 397 #else 398 399 /* 400 * If lwbuf's aren't optimal then it's best to just use the buffer 401 * cache. 402 */ 403 int 404 vop_helper_read_shortcut(struct vop_read_args *ap) 405 { 406 return(0); 407 } 408 409 #endif 410