xref: /dragonfly/sys/kern/kern_caps.c (revision 60e242c5)
1 /*
2  * Copyright (c) 2023 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/acct.h>
37 #include <sys/caps.h>
38 #include <sys/systm.h>
39 #include <sys/sysmsg.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/pioctl.h>
45 #include <sys/resourcevar.h>
46 #include <sys/jail.h>
47 #include <sys/lockf.h>
48 #include <sys/spinlock.h>
49 #include <sys/sysctl.h>
50 
51 #include <sys/spinlock2.h>
52 
53 __read_mostly static int caps_available = 1;
54 SYSCTL_INT(_kern, OID_AUTO, caps_available,
55 	   CTLFLAG_RW, &caps_available, 0 , "");
56 
57 /*
58  * Quick check for cap restriction in cred (no bounds checks),
59  * return cap flags.
60  */
61 static __inline
62 int
63 caps_check_cred(struct ucred *cred, int cap)
64 {
65 	__syscapelm_t elm;
66 
67 	cap &= ~__SYSCAP_XFLAGS;
68 	elm = cred->cr_caps.caps[__SYSCAP_INDEX(cap)];
69 
70 	return ((int)(elm >> __SYSCAP_SHIFT(cap)) & __SYSCAP_ALL);
71 }
72 
73 /*
74  * int syscap_get(int cap, void *data, size_t bytes);
75  */
76 int
77 sys_syscap_get(struct sysmsg *sysmsg, const struct syscap_get_args *uap)
78 {
79 	struct ucred *cred;
80 	int cap = uap->cap & ~__SYSCAP_XFLAGS;
81 	int res;
82 	int error;
83 
84 	if (cap < 0)
85 		return EINVAL;
86 	if (cap >= __SYSCAP_COUNT)
87 		return EOPNOTSUPP;
88 	if (uap->bytes && uap->bytes < sizeof(syscap_base_t))
89 		return EINVAL;
90 	error = 0;
91 
92 	/*
93 	 * Get capability restriction from parent pid
94 	 */
95 	if (uap->cap & __SYSCAP_INPARENT) {
96 		struct proc *pp;
97 
98 		pp = pfind(curproc->p_ppid);
99 		if (pp == NULL)
100 			return EINVAL;
101                 lwkt_gettoken_shared(&pp->p_token);	/* protect cred */
102 		cred = pp->p_ucred;
103 		crhold(cred);
104                 lwkt_reltoken(&pp->p_token);
105 		PRELE(pp);				/* from pfind */
106 	} else {
107 		cred = curthread->td_ucred;
108 	}
109 
110 	/*
111 	 * No resource data by default
112 	 */
113 	if (uap->data && uap->bytes) {
114 		syscap_base_t base;
115 
116 		base.res = SYSCAP_RESOURCE_EOF;
117 		base.len = sizeof(base);
118 		error = copyout(&base, uap->data, sizeof(base));
119 	}
120 
121 	/*
122 	 * Get resource bits
123 	 */
124 	if (error == 0) {
125 		res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
126 			    __SYSCAP_SHIFT(cap));
127 		res &= __SYSCAP_BITS_MASK;
128 		sysmsg->sysmsg_result = res;
129 	}
130 
131 	if (uap->cap & __SYSCAP_INPARENT)
132 		crfree(cred);
133 
134 	return error;
135 }
136 
137 /*
138  * int syscap_set(int cap, int flags, const void *data, size_t bytes)
139  */
140 int
141 sys_syscap_set(struct sysmsg *sysmsg, const struct syscap_set_args *uap)
142 {
143 	struct ucred *cred;
144 	struct proc *pp;
145 	int cap = uap->cap & ~__SYSCAP_XFLAGS;
146 	int res;
147 	int error;
148 	int flags = uap->flags;
149 	__syscapelm_t anymask;
150 
151 	if (cap < 0 || cap >= __SYSCAP_COUNT)
152 		return EINVAL;
153 	if (flags & ~__SYSCAP_BITS_MASK)
154 		return EINVAL;
155 	if (uap->data || uap->bytes)
156 		return EINVAL;
157 	error = 0;
158 
159 	/*
160 	 * Get capability restriction from parent pid.  We can only
161 	 * mess with the parent if it is running under the same userid
162 	 * and prison.
163 	 */
164 	if (uap->cap & __SYSCAP_INPARENT) {
165 		pp = pfind(curproc->p_ppid);
166 		if (pp == NULL)
167 			return EINVAL;
168 		if (pp->p_ucred->cr_uid != curproc->p_ucred->cr_uid ||
169 		    pp->p_ucred->cr_prison != curproc->p_ucred->cr_prison)
170 		{
171 			PRELE(pp);		/* from pfind */
172 			return EINVAL;
173 		}
174 	} else {
175 		pp = curproc;
176 	}
177 	lwkt_gettoken(&pp->p_token);		/* protect p_ucred */
178 	cred = pp->p_ucred;
179 
180 	/*
181 	 * Calculate normalized value for requested capability and check
182 	 * against the stored value.  If they do not match, wire-or to
183 	 * add the bits and set appropriate SYSCAP_ANY bits indicating
184 	 * deviation from the root syscaps.
185 	 */
186 	res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
187 		    __SYSCAP_SHIFT(cap));
188 	res &= __SYSCAP_BITS_MASK;
189 
190 	/*
191 	 * Handle resource data, if any
192 	 */
193 
194 	/*
195 	 * Set resource bits
196 	 */
197 	if (error == 0) {
198 		if (res != (res | flags)) {
199 			cred = cratom_proc(pp);
200 			anymask = (__syscapelm_t)flags <<
201 				  __SYSCAP_SHIFT(SYSCAP_ANY);
202 			atomic_set_64(&cred->cr_caps.caps[0], anymask);
203 			atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)],
204 				      ((__syscapelm_t)uap->flags <<
205 				       __SYSCAP_SHIFT(cap)));
206 		}
207 		sysmsg->sysmsg_result = res | uap->flags;
208 	}
209 
210 	/*
211 	 * Cleanup
212 	 */
213 	lwkt_reltoken(&pp->p_token);
214 	if (uap->cap & __SYSCAP_INPARENT)
215 		PRELE(pp);			/* from pfind */
216 	return error;
217 }
218 
219 /*
220  * Adjust capabilities for exec after the point of no return.
221  *
222  * This function shifts the EXEC bits into the SELF bits and
223  * replicates the EXEC bits.
224  */
225 void
226 caps_exec(struct proc *p)
227 {
228 	struct ucred *cred;
229 	__syscapelm_t elm;
230 	int changed = 0;
231 	int i;
232 
233 	/*
234 	 * Dry-run caps inheritance, did anything change?
235 	 *
236 	 * caps inheritance basically shifts the EXEC bits into the SELF bits,
237 	 * and then replicates the EXEC bits.  We have to avoid shifting any
238 	 * 1's from the SELF bits into the adjacent EXEC bits that may have
239 	 * previously been 0.
240 	 */
241 	cred = p->p_ucred;
242 	for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
243 		elm = cred->cr_caps.caps[i];
244 		elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
245 		      (elm & __SYSCAP_EXECMASK);
246 		if (elm != cred->cr_caps.caps[i])
247 			changed = 1;
248 	}
249 
250 	/*
251 	 * Yes, setup a new ucred for the process
252 	 */
253 	if (changed) {
254 		cratom_proc(p);
255 		cred = p->p_ucred;
256 		for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
257 			elm = cred->cr_caps.caps[i];
258 			elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
259 			      (elm & __SYSCAP_EXECMASK);
260 			cred->cr_caps.caps[i] = elm;
261 		}
262 	}
263 }
264 
265 /*
266  * Return the raw flags for the requested capability.
267  */
268 int
269 caps_get(struct ucred *cred, int cap)
270 {
271 	int res;
272 
273 	cap &= ~__SYSCAP_XFLAGS;
274 	if (cap < 0 || cap >= __SYSCAP_COUNT)
275 		return 0;
276 	res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
277 		    __SYSCAP_SHIFT(cap));
278 	res &= __SYSCAP_BITS_MASK;
279 
280 	return res;
281 }
282 
283 /*
284  * Set capability restriction bits
285  */
286 void
287 caps_set_locked(struct proc *p, int cap, int flags)
288 {
289 	struct ucred *cred;
290 	__syscapelm_t elm;
291 
292 	cap &= ~__SYSCAP_XFLAGS;
293 	if (cap < 0 || cap >= __SYSCAP_COUNT)
294 		return;
295 
296 	cred = cratom_proc(p);
297 	elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(SYSCAP_ANY);
298 	atomic_set_64(&cred->cr_caps.caps[0], elm);
299 	elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(cap);
300 	atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)], elm);
301 }
302 
303 /*
304  * Returns error code if restricted, 0 on success.
305  *
306  * These are more sophisticated versions of the baseline caps checks.
307  * cr_prison capabilities are also checked, and some capabilities may
308  * imply several tests.
309  */
310 int
311 caps_priv_check(struct ucred *cred, int cap)
312 {
313 	int res;
314 
315 	if (cred == NULL) {
316 		if (cap & __SYSCAP_NULLCRED)
317 			return 0;
318 		return EPERM;
319 	}
320 
321 	/*
322 	 * Uid must be 0 unless NOROOTTEST is requested.  If requested
323 	 * it means the caller is depending on e.g. /dev/blah perms.
324 	 */
325 	if (cred->cr_uid != 0 && (cap & __SYSCAP_NOROOTTEST) == 0)
326 		return EPERM;
327 
328 	res = caps_check_cred(cred, cap);
329 	if (cap & __SYSCAP_GROUP_MASK) {
330 		cap = (cap & __SYSCAP_GROUP_MASK) >> __SYSCAP_GROUP_SHIFT;
331 		res |= caps_check_cred(cred, cap);
332 	}
333 	if (res & __SYSCAP_SELF)
334 		return EPERM;
335 	return (prison_priv_check(cred, cap));
336 }
337 
338 int
339 caps_priv_check_td(thread_t td, int cap)
340 {
341 	struct ucred *cred;
342 
343 	if (td->td_lwp == NULL)			/* not user thread */
344 		return 0;
345 	cred = td->td_ucred;
346         if (cred == NULL)
347 		return (EPERM);
348 						/* must pass restrictions */
349 	if (caps_check_cred(cred, cap) & __SYSCAP_SELF)
350 		return EPERM;
351 	return (prison_priv_check(cred, cap));
352 }
353 
354 int
355 caps_priv_check_self(int cap)
356 {
357 	return (caps_priv_check_td(curthread, cap));
358 }
359