1 /*
2 * Copyright (c) 2023 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/param.h>
36 #include <sys/acct.h>
37 #include <sys/caps.h>
38 #include <sys/systm.h>
39 #include <sys/sysmsg.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/pioctl.h>
45 #include <sys/resourcevar.h>
46 #include <sys/jail.h>
47 #include <sys/lockf.h>
48 #include <sys/spinlock.h>
49 #include <sys/sysctl.h>
50
51 #include <sys/spinlock2.h>
52
53 __read_mostly static int caps_available = 1;
54 SYSCTL_INT(_kern, OID_AUTO, caps_available,
55 CTLFLAG_RW, &caps_available, 0 , "");
56
57 /*
58 * Quick check for cap restriction in cred (no bounds checks),
59 * return cap flags.
60 */
61 static __inline
62 int
caps_check_cred(struct ucred * cred,int cap)63 caps_check_cred(struct ucred *cred, int cap)
64 {
65 __syscapelm_t elm;
66
67 cap &= ~__SYSCAP_XFLAGS;
68 elm = cred->cr_caps.caps[__SYSCAP_INDEX(cap)];
69
70 return ((int)(elm >> __SYSCAP_SHIFT(cap)) & __SYSCAP_ALL);
71 }
72
73 /*
74 * int syscap_get(int cap, void *data, size_t bytes);
75 */
76 int
sys_syscap_get(struct sysmsg * sysmsg,const struct syscap_get_args * uap)77 sys_syscap_get(struct sysmsg *sysmsg, const struct syscap_get_args *uap)
78 {
79 struct ucred *cred;
80 int cap = uap->cap & ~__SYSCAP_XFLAGS;
81 int res;
82 int error;
83
84 if (cap < 0)
85 return EINVAL;
86 if (cap >= __SYSCAP_COUNT)
87 return EOPNOTSUPP;
88 if (uap->bytes && uap->bytes < sizeof(syscap_base_t))
89 return EINVAL;
90 error = 0;
91
92 /*
93 * Get capability restriction from parent pid
94 */
95 if (uap->cap & __SYSCAP_INPARENT) {
96 struct proc *pp;
97
98 pp = pfind(curproc->p_ppid);
99 if (pp == NULL)
100 return EINVAL;
101 lwkt_gettoken_shared(&pp->p_token); /* protect cred */
102 cred = pp->p_ucred;
103 crhold(cred);
104 lwkt_reltoken(&pp->p_token);
105 PRELE(pp); /* from pfind */
106 } else {
107 cred = curthread->td_ucred;
108 }
109
110 /*
111 * No resource data by default
112 */
113 if (uap->data && uap->bytes) {
114 syscap_base_t base;
115
116 base.res = SYSCAP_RESOURCE_EOF;
117 base.len = sizeof(base);
118 error = copyout(&base, uap->data, sizeof(base));
119 }
120
121 /*
122 * Get resource bits
123 */
124 if (error == 0) {
125 res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
126 __SYSCAP_SHIFT(cap));
127 res &= __SYSCAP_BITS_MASK;
128 sysmsg->sysmsg_result = res;
129 }
130
131 if (uap->cap & __SYSCAP_INPARENT)
132 crfree(cred);
133
134 return error;
135 }
136
137 /*
138 * int syscap_set(int cap, int flags, const void *data, size_t bytes)
139 */
140 int
sys_syscap_set(struct sysmsg * sysmsg,const struct syscap_set_args * uap)141 sys_syscap_set(struct sysmsg *sysmsg, const struct syscap_set_args *uap)
142 {
143 struct ucred *cred;
144 struct proc *pp;
145 int cap = uap->cap & ~__SYSCAP_XFLAGS;
146 int res;
147 int error;
148 int flags = uap->flags;
149 __syscapelm_t anymask;
150
151 if (cap < 0 || cap >= __SYSCAP_COUNT)
152 return EINVAL;
153 if (flags & ~__SYSCAP_BITS_MASK)
154 return EINVAL;
155 if (uap->data || uap->bytes)
156 return EINVAL;
157 error = 0;
158
159 /*
160 * Get capability restriction from parent pid. We can only
161 * mess with the parent if it is running under the same userid
162 * and prison.
163 */
164 if (uap->cap & __SYSCAP_INPARENT) {
165 pp = pfind(curproc->p_ppid);
166 if (pp == NULL)
167 return EINVAL;
168 if (pp->p_ucred->cr_uid != curproc->p_ucred->cr_uid ||
169 pp->p_ucred->cr_prison != curproc->p_ucred->cr_prison)
170 {
171 PRELE(pp); /* from pfind */
172 return EINVAL;
173 }
174 } else {
175 pp = curproc;
176 }
177 lwkt_gettoken(&pp->p_token); /* protect p_ucred */
178 cred = pp->p_ucred;
179
180 /*
181 * Calculate normalized value for requested capability and check
182 * against the stored value. If they do not match, wire-or to
183 * add the bits and set appropriate SYSCAP_ANY bits indicating
184 * deviation from the root syscaps.
185 */
186 res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
187 __SYSCAP_SHIFT(cap));
188 res &= __SYSCAP_BITS_MASK;
189
190 /*
191 * Handle resource data, if any
192 */
193
194 /*
195 * Set resource bits
196 */
197 if (error == 0) {
198 if (res != (res | flags)) {
199 cred = cratom_proc(pp);
200 anymask = (__syscapelm_t)flags <<
201 __SYSCAP_SHIFT(SYSCAP_ANY);
202 atomic_set_64(&cred->cr_caps.caps[0], anymask);
203 atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)],
204 ((__syscapelm_t)uap->flags <<
205 __SYSCAP_SHIFT(cap)));
206 }
207 sysmsg->sysmsg_result = res | uap->flags;
208 }
209
210 /*
211 * Cleanup
212 */
213 lwkt_reltoken(&pp->p_token);
214 if (uap->cap & __SYSCAP_INPARENT)
215 PRELE(pp); /* from pfind */
216 return error;
217 }
218
219 /*
220 * Adjust capabilities for exec after the point of no return.
221 *
222 * This function shifts the EXEC bits into the SELF bits and
223 * replicates the EXEC bits.
224 */
225 void
caps_exec(struct proc * p)226 caps_exec(struct proc *p)
227 {
228 struct ucred *cred;
229 __syscapelm_t elm;
230 int changed = 0;
231 int i;
232
233 /*
234 * Dry-run caps inheritance, did anything change?
235 *
236 * caps inheritance basically shifts the EXEC bits into the SELF bits,
237 * and then replicates the EXEC bits. We have to avoid shifting any
238 * 1's from the SELF bits into the adjacent EXEC bits that may have
239 * previously been 0.
240 */
241 cred = p->p_ucred;
242 for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
243 elm = cred->cr_caps.caps[i];
244 elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
245 (elm & __SYSCAP_EXECMASK);
246 if (elm != cred->cr_caps.caps[i])
247 changed = 1;
248 }
249
250 /*
251 * Yes, setup a new ucred for the process
252 */
253 if (changed) {
254 cratom_proc(p);
255 cred = p->p_ucred;
256 for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
257 elm = cred->cr_caps.caps[i];
258 elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
259 (elm & __SYSCAP_EXECMASK);
260 cred->cr_caps.caps[i] = elm;
261 }
262 }
263 }
264
265 /*
266 * Return the raw flags for the requested capability.
267 */
268 int
caps_get(struct ucred * cred,int cap)269 caps_get(struct ucred *cred, int cap)
270 {
271 int res;
272
273 cap &= ~__SYSCAP_XFLAGS;
274 if (cap < 0 || cap >= __SYSCAP_COUNT)
275 return 0;
276 res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
277 __SYSCAP_SHIFT(cap));
278 res &= __SYSCAP_BITS_MASK;
279
280 return res;
281 }
282
283 /*
284 * Set capability restriction bits
285 */
286 void
caps_set_locked(struct proc * p,int cap,int flags)287 caps_set_locked(struct proc *p, int cap, int flags)
288 {
289 struct ucred *cred;
290 __syscapelm_t elm;
291
292 cap &= ~__SYSCAP_XFLAGS;
293 if (cap < 0 || cap >= __SYSCAP_COUNT)
294 return;
295
296 cred = cratom_proc(p);
297 elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(SYSCAP_ANY);
298 atomic_set_64(&cred->cr_caps.caps[0], elm);
299 elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(cap);
300 atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)], elm);
301 }
302
303 /*
304 * Returns error code if restricted, 0 on success.
305 *
306 * These are more sophisticated versions of the baseline caps checks.
307 * cr_prison capabilities are also checked, and some capabilities may
308 * imply several tests.
309 */
310 int
caps_priv_check(struct ucred * cred,int cap)311 caps_priv_check(struct ucred *cred, int cap)
312 {
313 int res;
314
315 if (cred == NULL) {
316 if (cap & __SYSCAP_NULLCRED)
317 return 0;
318 return EPERM;
319 }
320
321 /*
322 * Uid must be 0 unless NOROOTTEST is requested. If requested
323 * it means the caller is depending on e.g. /dev/blah perms.
324 */
325 if (cred->cr_uid != 0 && (cap & __SYSCAP_NOROOTTEST) == 0)
326 return EPERM;
327
328 res = caps_check_cred(cred, cap);
329 if (cap & __SYSCAP_GROUP_MASK) {
330 cap = (cap & __SYSCAP_GROUP_MASK) >> __SYSCAP_GROUP_SHIFT;
331 res |= caps_check_cred(cred, cap);
332 }
333 if (res & __SYSCAP_SELF)
334 return EPERM;
335 return (prison_priv_check(cred, cap));
336 }
337
338 int
caps_priv_check_td(thread_t td,int cap)339 caps_priv_check_td(thread_t td, int cap)
340 {
341 struct ucred *cred;
342
343 if (td->td_lwp == NULL) /* not user thread */
344 return 0;
345 cred = td->td_ucred;
346 if (cred == NULL)
347 return (EPERM);
348 /* must pass restrictions */
349 if (caps_check_cred(cred, cap) & __SYSCAP_SELF)
350 return EPERM;
351 return (prison_priv_check(cred, cap));
352 }
353
354 int
caps_priv_check_self(int cap)355 caps_priv_check_self(int cap)
356 {
357 return (caps_priv_check_td(curthread, cap));
358 }
359