xref: /freebsd/sys/compat/linux/linux_mib.c (revision 2a58b312)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/jail.h>
37 #include <sys/proc.h>
38 #include <sys/sx.h>
39 
40 #include <compat/linux/linux_mib.h>
41 #include <compat/linux/linux_misc.h>
42 
43 struct linux_prison {
44 	char	pr_osname[LINUX_MAX_UTSNAME];
45 	char	pr_osrelease[LINUX_MAX_UTSNAME];
46 	int	pr_oss_version;
47 	int	pr_osrel;
48 };
49 
50 static struct linux_prison lprison0 = {
51 	.pr_osname =		"Linux",
52 	.pr_osrelease =		LINUX_VERSION_STR,
53 	.pr_oss_version =	0x030600,
54 	.pr_osrel =		LINUX_VERSION_CODE
55 };
56 
57 static unsigned linux_osd_jail_slot;
58 
59 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
60     "Linux mode");
61 
62 int linux_debug = 3;
63 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
64     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
65 
66 int linux_default_openfiles = 1024;
67 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
68     &linux_default_openfiles, 0,
69     "Default soft openfiles resource limit, or -1 for unlimited");
70 
71 int linux_default_stacksize = 8 * 1024 * 1024;
72 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
73     &linux_default_stacksize, 0,
74     "Default soft stack size resource limit, or -1 for unlimited");
75 
76 int linux_dummy_rlimits = 0;
77 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
78     &linux_dummy_rlimits, 0,
79     "Return dummy values for unsupported Linux-specific rlimits");
80 
81 int linux_ignore_ip_recverr = 1;
82 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
83     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
84 
85 int linux_preserve_vstatus = 1;
86 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
87     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
88 
89 bool linux_map_sched_prio = true;
90 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
91     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
92     "(not POSIX compliant)");
93 
94 int linux_use_emul_path = 1;
95 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
96     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
97 
98 static bool linux_setid_allowed = true;
99 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
100     &linux_setid_allowed, 0,
101     "Allow setuid/setgid on execve of Linux binary");
102 
103 int
104 linux_setid_allowed_query(struct thread *td __unused,
105     struct image_params *imgp __unused)
106 {
107 	return (linux_setid_allowed);
108 }
109 
110 static int	linux_set_osname(struct thread *td, char *osname);
111 static int	linux_set_osrelease(struct thread *td, char *osrelease);
112 static int	linux_set_oss_version(struct thread *td, int oss_version);
113 
114 static int
115 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
116 {
117 	char osname[LINUX_MAX_UTSNAME];
118 	int error;
119 
120 	linux_get_osname(req->td, osname);
121 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
122 	if (error != 0 || req->newptr == NULL)
123 		return (error);
124 	error = linux_set_osname(req->td, osname);
125 
126 	return (error);
127 }
128 
129 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
130 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
131 	    0, 0, linux_sysctl_osname, "A",
132 	    "Linux kernel OS name");
133 
134 static int
135 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
136 {
137 	char osrelease[LINUX_MAX_UTSNAME];
138 	int error;
139 
140 	linux_get_osrelease(req->td, osrelease);
141 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
142 	if (error != 0 || req->newptr == NULL)
143 		return (error);
144 	error = linux_set_osrelease(req->td, osrelease);
145 
146 	return (error);
147 }
148 
149 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
150 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
151 	    0, 0, linux_sysctl_osrelease, "A",
152 	    "Linux kernel OS release");
153 
154 static int
155 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
156 {
157 	int oss_version;
158 	int error;
159 
160 	oss_version = linux_get_oss_version(req->td);
161 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
162 	if (error != 0 || req->newptr == NULL)
163 		return (error);
164 	error = linux_set_oss_version(req->td, oss_version);
165 
166 	return (error);
167 }
168 
169 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
170 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
171 	    0, 0, linux_sysctl_oss_version, "I",
172 	    "Linux OSS version");
173 
174 /*
175  * Map the osrelease into integer
176  */
177 static int
178 linux_map_osrel(char *osrelease, int *osrel)
179 {
180 	char *sep, *eosrelease;
181 	int len, v0, v1, v2, v;
182 
183 	len = strlen(osrelease);
184 	eosrelease = osrelease + len;
185 	v0 = strtol(osrelease, &sep, 10);
186 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
187 		return (EINVAL);
188 	osrelease = sep + 1;
189 	v1 = strtol(osrelease, &sep, 10);
190 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
191 		return (EINVAL);
192 	osrelease = sep + 1;
193 	v2 = strtol(osrelease, &sep, 10);
194 	if (osrelease == sep ||
195 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
196 		return (EINVAL);
197 
198 	v = LINUX_KERNVER(v0, v1, v2);
199 	if (v < LINUX_KERNVER(1, 0, 0))
200 		return (EINVAL);
201 
202 	if (osrel != NULL)
203 		*osrel = v;
204 
205 	return (0);
206 }
207 
208 /*
209  * Find a prison with Linux info.
210  * Return the Linux info and the (locked) prison.
211  */
212 static struct linux_prison *
213 linux_find_prison(struct prison *spr, struct prison **prp)
214 {
215 	struct prison *pr;
216 	struct linux_prison *lpr;
217 
218 	for (pr = spr;; pr = pr->pr_parent) {
219 		mtx_lock(&pr->pr_mtx);
220 		lpr = (pr == &prison0)
221 		    ? &lprison0
222 		    : osd_jail_get(pr, linux_osd_jail_slot);
223 		if (lpr != NULL)
224 			break;
225 		mtx_unlock(&pr->pr_mtx);
226 	}
227 	*prp = pr;
228 
229 	return (lpr);
230 }
231 
232 /*
233  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
234  * the Linux info and lock the prison.
235  */
236 static void
237 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
238 {
239 	struct prison *ppr;
240 	struct linux_prison *lpr, *nlpr;
241 	void **rsv;
242 
243 	/* If this prison already has Linux info, return that. */
244 	lpr = linux_find_prison(pr, &ppr);
245 	if (ppr == pr)
246 		goto done;
247 	/*
248 	 * Allocate a new info record.  Then check again, in case something
249 	 * changed during the allocation.
250 	 */
251 	mtx_unlock(&ppr->pr_mtx);
252 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
253 	rsv = osd_reserve(linux_osd_jail_slot);
254 	lpr = linux_find_prison(pr, &ppr);
255 	if (ppr == pr) {
256 		free(nlpr, M_PRISON);
257 		osd_free_reserved(rsv);
258 		goto done;
259 	}
260 	/* Inherit the initial values from the ancestor. */
261 	mtx_lock(&pr->pr_mtx);
262 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
263 	bcopy(lpr, nlpr, sizeof(*lpr));
264 	lpr = nlpr;
265 	mtx_unlock(&ppr->pr_mtx);
266  done:
267 	if (lprp != NULL)
268 		*lprp = lpr;
269 	else
270 		mtx_unlock(&pr->pr_mtx);
271 }
272 
273 /*
274  * Jail OSD methods for Linux prison data.
275  */
276 static int
277 linux_prison_create(void *obj, void *data)
278 {
279 	struct prison *pr = obj;
280 	struct vfsoptlist *opts = data;
281 	int jsys;
282 
283 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
284 	    jsys == JAIL_SYS_INHERIT)
285 		return (0);
286 	/*
287 	 * Inherit a prison's initial values from its parent
288 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
289 	 */
290 	linux_alloc_prison(pr, NULL);
291 	return (0);
292 }
293 
294 static int
295 linux_prison_check(void *obj __unused, void *data)
296 {
297 	struct vfsoptlist *opts = data;
298 	char *osname, *osrelease;
299 	int error, jsys, len, oss_version;
300 
301 	/* Check that the parameters are correct. */
302 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
303 	if (error != ENOENT) {
304 		if (error != 0)
305 			return (error);
306 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
307 			return (EINVAL);
308 	}
309 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
310 	if (error != ENOENT) {
311 		if (error != 0)
312 			return (error);
313 		if (len == 0 || osname[len - 1] != '\0')
314 			return (EINVAL);
315 		if (len > LINUX_MAX_UTSNAME) {
316 			vfs_opterror(opts, "linux.osname too long");
317 			return (ENAMETOOLONG);
318 		}
319 	}
320 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
321 	if (error != ENOENT) {
322 		if (error != 0)
323 			return (error);
324 		if (len == 0 || osrelease[len - 1] != '\0')
325 			return (EINVAL);
326 		if (len > LINUX_MAX_UTSNAME) {
327 			vfs_opterror(opts, "linux.osrelease too long");
328 			return (ENAMETOOLONG);
329 		}
330 		error = linux_map_osrel(osrelease, NULL);
331 		if (error != 0) {
332 			vfs_opterror(opts, "linux.osrelease format error");
333 			return (error);
334 		}
335 	}
336 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
337 	    sizeof(oss_version));
338 
339 	if (error == ENOENT)
340 		error = 0;
341 	return (error);
342 }
343 
344 static int
345 linux_prison_set(void *obj, void *data)
346 {
347 	struct linux_prison *lpr;
348 	struct prison *pr = obj;
349 	struct vfsoptlist *opts = data;
350 	char *osname, *osrelease;
351 	int error, gotversion, jsys, len, oss_version;
352 
353 	/* Set the parameters, which should be correct. */
354 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
355 	if (error == ENOENT)
356 		jsys = -1;
357 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
358 	if (error == ENOENT)
359 		osname = NULL;
360 	else
361 		jsys = JAIL_SYS_NEW;
362 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
363 	if (error == ENOENT)
364 		osrelease = NULL;
365 	else
366 		jsys = JAIL_SYS_NEW;
367 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
368 	    sizeof(oss_version));
369 	if (error == ENOENT)
370 		gotversion = 0;
371 	else {
372 		gotversion = 1;
373 		jsys = JAIL_SYS_NEW;
374 	}
375 	switch (jsys) {
376 	case JAIL_SYS_INHERIT:
377 		/* "linux=inherit": inherit the parent's Linux info. */
378 		mtx_lock(&pr->pr_mtx);
379 		osd_jail_del(pr, linux_osd_jail_slot);
380 		mtx_unlock(&pr->pr_mtx);
381 		break;
382 	case JAIL_SYS_NEW:
383 		/*
384 		 * "linux=new" or "linux.*":
385 		 * the prison gets its own Linux info.
386 		 */
387 		linux_alloc_prison(pr, &lpr);
388 		if (osrelease) {
389 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
390 			strlcpy(lpr->pr_osrelease, osrelease,
391 			    LINUX_MAX_UTSNAME);
392 		}
393 		if (osname)
394 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
395 		if (gotversion)
396 			lpr->pr_oss_version = oss_version;
397 		mtx_unlock(&pr->pr_mtx);
398 	}
399 
400 	return (0);
401 }
402 
403 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
404 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
405     "Jail Linux kernel OS name");
406 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
407     "Jail Linux kernel OS release");
408 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
409     "I", "Jail Linux OSS version");
410 
411 static int
412 linux_prison_get(void *obj, void *data)
413 {
414 	struct linux_prison *lpr;
415 	struct prison *ppr;
416 	struct prison *pr = obj;
417 	struct vfsoptlist *opts = data;
418 	int error, i;
419 
420 	static int version0;
421 
422 	/* See if this prison is the one with the Linux info. */
423 	lpr = linux_find_prison(pr, &ppr);
424 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
425 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
426 	if (error != 0 && error != ENOENT)
427 		goto done;
428 	if (i) {
429 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
430 		if (error != 0 && error != ENOENT)
431 			goto done;
432 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
433 		if (error != 0 && error != ENOENT)
434 			goto done;
435 		error = vfs_setopt(opts, "linux.oss_version",
436 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
437 		if (error != 0 && error != ENOENT)
438 			goto done;
439 	} else {
440 		/*
441 		 * If this prison is inheriting its Linux info, report
442 		 * empty/zero parameters.
443 		 */
444 		error = vfs_setopts(opts, "linux.osname", "");
445 		if (error != 0 && error != ENOENT)
446 			goto done;
447 		error = vfs_setopts(opts, "linux.osrelease", "");
448 		if (error != 0 && error != ENOENT)
449 			goto done;
450 		error = vfs_setopt(opts, "linux.oss_version", &version0,
451 		    sizeof(lpr->pr_oss_version));
452 		if (error != 0 && error != ENOENT)
453 			goto done;
454 	}
455 	error = 0;
456 
457  done:
458 	mtx_unlock(&ppr->pr_mtx);
459 
460 	return (error);
461 }
462 
463 static void
464 linux_prison_destructor(void *data)
465 {
466 
467 	free(data, M_PRISON);
468 }
469 
470 void
471 linux_osd_jail_register(void)
472 {
473 	struct prison *pr;
474 	osd_method_t methods[PR_MAXMETHOD] = {
475 	    [PR_METHOD_CREATE] =	linux_prison_create,
476 	    [PR_METHOD_GET] =		linux_prison_get,
477 	    [PR_METHOD_SET] =		linux_prison_set,
478 	    [PR_METHOD_CHECK] =		linux_prison_check
479 	};
480 
481 	linux_osd_jail_slot =
482 	    osd_jail_register(linux_prison_destructor, methods);
483 	/* Copy the system Linux info to any current prisons. */
484 	sx_slock(&allprison_lock);
485 	TAILQ_FOREACH(pr, &allprison, pr_list)
486 		linux_alloc_prison(pr, NULL);
487 	sx_sunlock(&allprison_lock);
488 }
489 
490 void
491 linux_osd_jail_deregister(void)
492 {
493 
494 	osd_jail_deregister(linux_osd_jail_slot);
495 }
496 
497 void
498 linux_get_osname(struct thread *td, char *dst)
499 {
500 	struct prison *pr;
501 	struct linux_prison *lpr;
502 
503 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
504 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
505 	mtx_unlock(&pr->pr_mtx);
506 }
507 
508 static int
509 linux_set_osname(struct thread *td, char *osname)
510 {
511 	struct prison *pr;
512 	struct linux_prison *lpr;
513 
514 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
515 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
516 	mtx_unlock(&pr->pr_mtx);
517 
518 	return (0);
519 }
520 
521 void
522 linux_get_osrelease(struct thread *td, char *dst)
523 {
524 	struct prison *pr;
525 	struct linux_prison *lpr;
526 
527 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
528 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
529 	mtx_unlock(&pr->pr_mtx);
530 }
531 
532 int
533 linux_kernver(struct thread *td)
534 {
535 	struct prison *pr;
536 	struct linux_prison *lpr;
537 	int osrel;
538 
539 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
540 	osrel = lpr->pr_osrel;
541 	mtx_unlock(&pr->pr_mtx);
542 
543 	return (osrel);
544 }
545 
546 static int
547 linux_set_osrelease(struct thread *td, char *osrelease)
548 {
549 	struct prison *pr;
550 	struct linux_prison *lpr;
551 	int error;
552 
553 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
554 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
555 	if (error == 0)
556 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
557 	mtx_unlock(&pr->pr_mtx);
558 
559 	return (error);
560 }
561 
562 int
563 linux_get_oss_version(struct thread *td)
564 {
565 	struct prison *pr;
566 	struct linux_prison *lpr;
567 	int version;
568 
569 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
570 	version = lpr->pr_oss_version;
571 	mtx_unlock(&pr->pr_mtx);
572 
573 	return (version);
574 }
575 
576 static int
577 linux_set_oss_version(struct thread *td, int oss_version)
578 {
579 	struct prison *pr;
580 	struct linux_prison *lpr;
581 
582 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
583 	lpr->pr_oss_version = oss_version;
584 	mtx_unlock(&pr->pr_mtx);
585 
586 	return (0);
587 }
588