xref: /dragonfly/sys/kern/vfs_helper.c (revision dc71b7ab)
1 /*
2  * (The copyright below applies to ufs_access())
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
37  * $DragonFly: src/sys/kern/vfs_helper.c,v 1.5 2008/05/25 18:34:46 dillon Exp $
38  */
39 
40 #include "opt_quota.h"
41 #include "opt_suiddir.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/conf.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
48 #include <sys/stat.h>
49 #include <sys/mount.h>
50 #include <sys/unistd.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>		/* XXX */
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/jail.h>
56 #include <sys/sysctl.h>
57 #include <sys/sfbuf.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_object.h>
60 
61 #ifdef LWBUF_IS_OPTIMAL
62 
63 static int vm_read_shortcut_enable = 1;
64 static long vm_read_shortcut_count;
65 static long vm_read_shortcut_failed;
66 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW,
67 	  &vm_read_shortcut_enable, 0, "Direct vm_object vop_read shortcut");
68 SYSCTL_LONG(_vm, OID_AUTO, read_shortcut_count, CTLFLAG_RW,
69 	  &vm_read_shortcut_count, 0, "Statistics");
70 SYSCTL_LONG(_vm, OID_AUTO, read_shortcut_failed, CTLFLAG_RW,
71 	  &vm_read_shortcut_failed, 0, "Statistics");
72 
73 #endif
74 
75 /*
76  * vop_helper_access()
77  *
78  *	Provide standard UNIX semanics for VOP_ACCESS, but without the quota
79  *	code.  This procedure was basically pulled out of UFS.
80  */
81 int
82 vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid,
83 		  mode_t ino_mode, u_int32_t ino_flags)
84 {
85 	struct vnode *vp = ap->a_vp;
86 	struct ucred *cred = ap->a_cred;
87 	mode_t mask, mode = ap->a_mode;
88 	gid_t *gp;
89 	int i;
90 	uid_t proc_uid;
91 	gid_t proc_gid;
92 
93 	if (ap->a_flags & AT_EACCESS) {
94 		proc_uid = cred->cr_uid;
95 		proc_gid = cred->cr_gid;
96 	} else {
97 		proc_uid = cred->cr_ruid;
98 		proc_gid = cred->cr_rgid;
99 	}
100 
101 	/*
102 	 * Disallow write attempts on read-only filesystems;
103 	 * unless the file is a socket, fifo, or a block or
104 	 * character device resident on the filesystem.
105 	 */
106 	if (mode & VWRITE) {
107 		switch (vp->v_type) {
108 		case VDIR:
109 		case VLNK:
110 		case VREG:
111 		case VDATABASE:
112 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
113 				return (EROFS);
114 			break;
115 		default:
116 			break;
117 		}
118 	}
119 
120 	/* If immutable bit set, nobody gets to write it. */
121 	if ((mode & VWRITE) && (ino_flags & IMMUTABLE))
122 		return (EPERM);
123 
124 	/* Otherwise, user id 0 always gets access. */
125 	if (proc_uid == 0)
126 		return (0);
127 
128 	mask = 0;
129 
130 	/* Otherwise, check the owner. */
131 	if (proc_uid == ino_uid) {
132 		if (mode & VEXEC)
133 			mask |= S_IXUSR;
134 		if (mode & VREAD)
135 			mask |= S_IRUSR;
136 		if (mode & VWRITE)
137 			mask |= S_IWUSR;
138 		return ((ino_mode & mask) == mask ? 0 : EACCES);
139 	}
140 
141 	/*
142 	 * Otherwise, check the groups.
143 	 * We must special-case the primary group to, if needed, check against
144 	 * the real gid and not the effective one.
145 	 */
146 	if (proc_gid == ino_gid) {
147 		if (mode & VEXEC)
148 			mask |= S_IXGRP;
149 		if (mode & VREAD)
150 			mask |= S_IRGRP;
151 		if (mode & VWRITE)
152 			mask |= S_IWGRP;
153 		return ((ino_mode & mask) == mask ? 0 : EACCES);
154 	}
155 	for (i = 1, gp = &cred->cr_groups[1]; i < cred->cr_ngroups; i++, gp++)
156 		if (ino_gid == *gp) {
157 			if (mode & VEXEC)
158 				mask |= S_IXGRP;
159 			if (mode & VREAD)
160 				mask |= S_IRGRP;
161 			if (mode & VWRITE)
162 				mask |= S_IWGRP;
163 			return ((ino_mode & mask) == mask ? 0 : EACCES);
164 		}
165 
166 	/* Otherwise, check everyone else. */
167 	if (mode & VEXEC)
168 		mask |= S_IXOTH;
169 	if (mode & VREAD)
170 		mask |= S_IROTH;
171 	if (mode & VWRITE)
172 		mask |= S_IWOTH;
173 	return ((ino_mode & mask) == mask ? 0 : EACCES);
174 }
175 
176 int
177 vop_helper_setattr_flags(u_int32_t *ino_flags, u_int32_t vaflags,
178 			 uid_t uid, struct ucred *cred)
179 {
180 	int error;
181 
182 	/*
183 	 * If uid doesn't match only a privileged user can change the flags
184 	 */
185 	if (cred->cr_uid != uid &&
186 	    (error = priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0))) {
187 		return(error);
188 	}
189 	if (cred->cr_uid == 0 &&
190 	    (!jailed(cred)|| jail_chflags_allowed)) {
191 		if ((*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND)) &&
192 		    securelevel > 0)
193 			return (EPERM);
194 		*ino_flags = vaflags;
195 	} else {
196 		if (*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND) ||
197 		    (vaflags & UF_SETTABLE) != vaflags)
198 			return (EPERM);
199 		*ino_flags &= SF_SETTABLE;
200 		*ino_flags |= vaflags & UF_SETTABLE;
201 	}
202 	return(0);
203 }
204 
205 /*
206  * This helper function may be used by VFSs to implement UNIX initial
207  * ownership semantics when creating new objects inside directories.
208  */
209 uid_t
210 vop_helper_create_uid(struct mount *mp, mode_t dmode, uid_t duid,
211 		      struct ucred *cred, mode_t *modep)
212 {
213 #ifdef SUIDDIR
214 	if ((mp->mnt_flag & MNT_SUIDDIR) && (dmode & S_ISUID) &&
215 	    duid != cred->cr_uid && duid) {
216 		*modep &= ~07111;
217 		return(duid);
218 	}
219 #endif
220 	return(cred->cr_uid);
221 }
222 
223 /*
224  * This helper may be used by VFSs to implement unix chmod semantics.
225  */
226 int
227 vop_helper_chmod(struct vnode *vp, mode_t new_mode, struct ucred *cred,
228 		 uid_t cur_uid, gid_t cur_gid, mode_t *cur_modep)
229 {
230 	int error;
231 
232 	if (cred->cr_uid != cur_uid) {
233 		error = priv_check_cred(cred, PRIV_VFS_CHMOD, 0);
234 		if (error)
235 			return (error);
236 	}
237 	if (cred->cr_uid) {
238 		if (vp->v_type != VDIR && (*cur_modep & S_ISTXT))
239 			return (EFTYPE);
240 		if (!groupmember(cur_gid, cred) && (*cur_modep & S_ISGID))
241 			return (EPERM);
242 	}
243 	*cur_modep &= ~ALLPERMS;
244 	*cur_modep |= new_mode & ALLPERMS;
245 	return(0);
246 }
247 
248 /*
249  * This helper may be used by VFSs to implement unix chown semantics.
250  */
251 int
252 vop_helper_chown(struct vnode *vp, uid_t new_uid, gid_t new_gid,
253 		 struct ucred *cred,
254 		 uid_t *cur_uidp, gid_t *cur_gidp, mode_t *cur_modep)
255 {
256 	gid_t ogid;
257 	uid_t ouid;
258 	int error;
259 
260 	if (new_uid == (uid_t)VNOVAL)
261 		new_uid = *cur_uidp;
262 	if (new_gid == (gid_t)VNOVAL)
263 		new_gid = *cur_gidp;
264 
265 	/*
266 	 * If we don't own the file, are trying to change the owner
267 	 * of the file, or are not a member of the target group,
268 	 * the caller must be privileged or the call fails.
269 	 */
270 	if ((cred->cr_uid != *cur_uidp || new_uid != *cur_uidp ||
271 	    (new_gid != *cur_gidp && !(cred->cr_gid == new_gid ||
272 	    groupmember(new_gid, cred)))) &&
273 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) {
274 		return (error);
275 	}
276 	ogid = *cur_gidp;
277 	ouid = *cur_uidp;
278 	/* XXX QUOTA CODE */
279 	*cur_uidp = new_uid;
280 	*cur_gidp = new_gid;
281 	/* XXX QUOTA CODE */
282 
283 	/*
284 	 * DragonFly clears both SUID and SGID if either the owner or
285 	 * group is changed and root isn't doing it.  If root is doing
286 	 * it we do not clear SUID/SGID.
287 	 */
288 	if (cred->cr_uid != 0 && (ouid != new_uid || ogid != new_gid))
289 		*cur_modep &= ~(S_ISUID | S_ISGID);
290 	return(0);
291 }
292 
293 #ifdef LWBUF_IS_OPTIMAL
294 
295 /*
296  * A VFS can call this function to try to dispose of a read request
297  * directly from the VM system, pretty much bypassing almost all VFS
298  * overhead except for atime updates.
299  *
300  * If 0 is returned some or all of the uio was handled.  The caller must
301  * check the uio and handle the remainder.
302  *
303  * The caller must fail on a non-zero error.
304  */
305 int
306 vop_helper_read_shortcut(struct vop_read_args *ap)
307 {
308 	struct vnode *vp;
309 	struct uio *uio;
310 	struct lwbuf *lwb;
311 	struct lwbuf lwb_cache;
312 	vm_object_t obj;
313 	vm_page_t m;
314 	int offset;
315 	int n;
316 	int error;
317 
318 	vp = ap->a_vp;
319 	uio = ap->a_uio;
320 
321 	/*
322 	 * We can't short-cut if there is no VM object or this is a special
323 	 * UIO_NOCOPY read (typically from VOP_STRATEGY()).  We also can't
324 	 * do this if we cannot extract the filesize from the vnode.
325 	 */
326 	if (vm_read_shortcut_enable == 0)
327 		return(0);
328 	if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY)
329 		return(0);
330 	if (vp->v_filesize == NOOFFSET)
331 		return(0);
332 	if (uio->uio_resid == 0)
333 		return(0);
334 
335 	/*
336 	 * Iterate the uio on a page-by-page basis
337 	 *
338 	 * XXX can we leave the object held shared during the uiomove()?
339 	 */
340 	++vm_read_shortcut_count;
341 	obj = vp->v_object;
342 	vm_object_hold_shared(obj);
343 
344 	error = 0;
345 	while (uio->uio_resid && error == 0) {
346 		offset = (int)uio->uio_offset & PAGE_MASK;
347 		n = PAGE_SIZE - offset;
348 		if (n > uio->uio_resid)
349 			n = uio->uio_resid;
350 		if (vp->v_filesize < uio->uio_offset)
351 			break;
352 		if (uio->uio_offset + n > vp->v_filesize)
353 			n = vp->v_filesize - uio->uio_offset;
354 		if (n == 0)
355 			break;	/* hit EOF */
356 
357 		m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset),
358 					    FALSE, &error);
359 		if (error || m == NULL) {
360 			++vm_read_shortcut_failed;
361 			error = 0;
362 			break;
363 		}
364 		if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
365 			++vm_read_shortcut_failed;
366 			vm_page_wakeup(m);
367 			break;
368 		}
369 		lwb = lwbuf_alloc(m, &lwb_cache);
370 
371 		/*
372 		 * Use a no-fault uiomove() to avoid deadlocking against
373 		 * our VM object (which could livelock on the same object
374 		 * due to shared-vs-exclusive), or deadlocking against
375 		 * our busied page.  Returns EFAULT on any fault which
376 		 * winds up diving a vnode.
377 		 */
378 		error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset,
379 					n, uio);
380 
381 		vm_page_flag_set(m, PG_REFERENCED);
382 		lwbuf_free(lwb);
383 		vm_page_wakeup(m);
384 	}
385 	vm_object_drop(obj);
386 
387 	/*
388 	 * Ignore EFAULT since we used uiomove_nofault(), causes caller
389 	 * to fall-back to normal code for this case.
390 	 */
391 	if (error == EFAULT)
392 		error = 0;
393 
394 	return (error);
395 }
396 
397 #else
398 
399 /*
400  * If lwbuf's aren't optimal then it's best to just use the buffer
401  * cache.
402  */
403 int
404 vop_helper_read_shortcut(struct vop_read_args *ap)
405 {
406 	return(0);
407 }
408 
409 #endif
410