xref: /freebsd/sys/compat/linux/linux_mib.c (revision e17f5b1d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43 
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46 
47 struct linux_prison {
48 	char	pr_osname[LINUX_MAX_UTSNAME];
49 	char	pr_osrelease[LINUX_MAX_UTSNAME];
50 	int	pr_oss_version;
51 	int	pr_osrel;
52 };
53 
54 static struct linux_prison lprison0 = {
55 	.pr_osname =		"Linux",
56 	.pr_osrelease =		LINUX_VERSION_STR,
57 	.pr_oss_version =	0x030600,
58 	.pr_osrel =		LINUX_VERSION_CODE
59 };
60 
61 static unsigned linux_osd_jail_slot;
62 
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64     "Linux mode");
65 
66 int linux_debug = 1;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69 
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72     &linux_default_openfiles, 0,
73     "Default soft openfiles resource limit, or -1 for unlimited");
74 
75 int linux_ignore_ip_recverr = 1;
76 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
77     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
78 
79 int linux_preserve_vstatus = 0;
80 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
81     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
82 
83 bool linux_map_sched_prio = true;
84 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
85     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
86     "(not POSIX compliant)");
87 
88 static int	linux_set_osname(struct thread *td, char *osname);
89 static int	linux_set_osrelease(struct thread *td, char *osrelease);
90 static int	linux_set_oss_version(struct thread *td, int oss_version);
91 
92 static int
93 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
94 {
95 	char osname[LINUX_MAX_UTSNAME];
96 	int error;
97 
98 	linux_get_osname(req->td, osname);
99 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
100 	if (error != 0 || req->newptr == NULL)
101 		return (error);
102 	error = linux_set_osname(req->td, osname);
103 
104 	return (error);
105 }
106 
107 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
108 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
109 	    0, 0, linux_sysctl_osname, "A",
110 	    "Linux kernel OS name");
111 
112 static int
113 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
114 {
115 	char osrelease[LINUX_MAX_UTSNAME];
116 	int error;
117 
118 	linux_get_osrelease(req->td, osrelease);
119 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
120 	if (error != 0 || req->newptr == NULL)
121 		return (error);
122 	error = linux_set_osrelease(req->td, osrelease);
123 
124 	return (error);
125 }
126 
127 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
128 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
129 	    0, 0, linux_sysctl_osrelease, "A",
130 	    "Linux kernel OS release");
131 
132 static int
133 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
134 {
135 	int oss_version;
136 	int error;
137 
138 	oss_version = linux_get_oss_version(req->td);
139 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
140 	if (error != 0 || req->newptr == NULL)
141 		return (error);
142 	error = linux_set_oss_version(req->td, oss_version);
143 
144 	return (error);
145 }
146 
147 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
148 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
149 	    0, 0, linux_sysctl_oss_version, "I",
150 	    "Linux OSS version");
151 
152 /*
153  * Map the osrelease into integer
154  */
155 static int
156 linux_map_osrel(char *osrelease, int *osrel)
157 {
158 	char *sep, *eosrelease;
159 	int len, v0, v1, v2, v;
160 
161 	len = strlen(osrelease);
162 	eosrelease = osrelease + len;
163 	v0 = strtol(osrelease, &sep, 10);
164 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
165 		return (EINVAL);
166 	osrelease = sep + 1;
167 	v1 = strtol(osrelease, &sep, 10);
168 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
169 		return (EINVAL);
170 	osrelease = sep + 1;
171 	v2 = strtol(osrelease, &sep, 10);
172 	if (osrelease == sep ||
173 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
174 		return (EINVAL);
175 
176 	v = LINUX_KERNVER(v0, v1, v2);
177 	if (v < LINUX_KERNVER(1, 0, 0))
178 		return (EINVAL);
179 
180 	if (osrel != NULL)
181 		*osrel = v;
182 
183 	return (0);
184 }
185 
186 /*
187  * Find a prison with Linux info.
188  * Return the Linux info and the (locked) prison.
189  */
190 static struct linux_prison *
191 linux_find_prison(struct prison *spr, struct prison **prp)
192 {
193 	struct prison *pr;
194 	struct linux_prison *lpr;
195 
196 	for (pr = spr;; pr = pr->pr_parent) {
197 		mtx_lock(&pr->pr_mtx);
198 		lpr = (pr == &prison0)
199 		    ? &lprison0
200 		    : osd_jail_get(pr, linux_osd_jail_slot);
201 		if (lpr != NULL)
202 			break;
203 		mtx_unlock(&pr->pr_mtx);
204 	}
205 	*prp = pr;
206 
207 	return (lpr);
208 }
209 
210 /*
211  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
212  * the Linux info and lock the prison.
213  */
214 static void
215 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
216 {
217 	struct prison *ppr;
218 	struct linux_prison *lpr, *nlpr;
219 	void **rsv;
220 
221 	/* If this prison already has Linux info, return that. */
222 	lpr = linux_find_prison(pr, &ppr);
223 	if (ppr == pr)
224 		goto done;
225 	/*
226 	 * Allocate a new info record.  Then check again, in case something
227 	 * changed during the allocation.
228 	 */
229 	mtx_unlock(&ppr->pr_mtx);
230 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
231 	rsv = osd_reserve(linux_osd_jail_slot);
232 	lpr = linux_find_prison(pr, &ppr);
233 	if (ppr == pr) {
234 		free(nlpr, M_PRISON);
235 		osd_free_reserved(rsv);
236 		goto done;
237 	}
238 	/* Inherit the initial values from the ancestor. */
239 	mtx_lock(&pr->pr_mtx);
240 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
241 	bcopy(lpr, nlpr, sizeof(*lpr));
242 	lpr = nlpr;
243 	mtx_unlock(&ppr->pr_mtx);
244  done:
245 	if (lprp != NULL)
246 		*lprp = lpr;
247 	else
248 		mtx_unlock(&pr->pr_mtx);
249 }
250 
251 /*
252  * Jail OSD methods for Linux prison data.
253  */
254 static int
255 linux_prison_create(void *obj, void *data)
256 {
257 	struct prison *pr = obj;
258 	struct vfsoptlist *opts = data;
259 	int jsys;
260 
261 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
262 	    jsys == JAIL_SYS_INHERIT)
263 		return (0);
264 	/*
265 	 * Inherit a prison's initial values from its parent
266 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
267 	 */
268 	linux_alloc_prison(pr, NULL);
269 	return (0);
270 }
271 
272 static int
273 linux_prison_check(void *obj __unused, void *data)
274 {
275 	struct vfsoptlist *opts = data;
276 	char *osname, *osrelease;
277 	int error, jsys, len, oss_version;
278 
279 	/* Check that the parameters are correct. */
280 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
281 	if (error != ENOENT) {
282 		if (error != 0)
283 			return (error);
284 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
285 			return (EINVAL);
286 	}
287 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
288 	if (error != ENOENT) {
289 		if (error != 0)
290 			return (error);
291 		if (len == 0 || osname[len - 1] != '\0')
292 			return (EINVAL);
293 		if (len > LINUX_MAX_UTSNAME) {
294 			vfs_opterror(opts, "linux.osname too long");
295 			return (ENAMETOOLONG);
296 		}
297 	}
298 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
299 	if (error != ENOENT) {
300 		if (error != 0)
301 			return (error);
302 		if (len == 0 || osrelease[len - 1] != '\0')
303 			return (EINVAL);
304 		if (len > LINUX_MAX_UTSNAME) {
305 			vfs_opterror(opts, "linux.osrelease too long");
306 			return (ENAMETOOLONG);
307 		}
308 		error = linux_map_osrel(osrelease, NULL);
309 		if (error != 0) {
310 			vfs_opterror(opts, "linux.osrelease format error");
311 			return (error);
312 		}
313 	}
314 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
315 	    sizeof(oss_version));
316 
317 	if (error == ENOENT)
318 		error = 0;
319 	return (error);
320 }
321 
322 static int
323 linux_prison_set(void *obj, void *data)
324 {
325 	struct linux_prison *lpr;
326 	struct prison *pr = obj;
327 	struct vfsoptlist *opts = data;
328 	char *osname, *osrelease;
329 	int error, gotversion, jsys, len, oss_version;
330 
331 	/* Set the parameters, which should be correct. */
332 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
333 	if (error == ENOENT)
334 		jsys = -1;
335 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
336 	if (error == ENOENT)
337 		osname = NULL;
338 	else
339 		jsys = JAIL_SYS_NEW;
340 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
341 	if (error == ENOENT)
342 		osrelease = NULL;
343 	else
344 		jsys = JAIL_SYS_NEW;
345 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
346 	    sizeof(oss_version));
347 	if (error == ENOENT)
348 		gotversion = 0;
349 	else {
350 		gotversion = 1;
351 		jsys = JAIL_SYS_NEW;
352 	}
353 	switch (jsys) {
354 	case JAIL_SYS_INHERIT:
355 		/* "linux=inherit": inherit the parent's Linux info. */
356 		mtx_lock(&pr->pr_mtx);
357 		osd_jail_del(pr, linux_osd_jail_slot);
358 		mtx_unlock(&pr->pr_mtx);
359 		break;
360 	case JAIL_SYS_NEW:
361 		/*
362 		 * "linux=new" or "linux.*":
363 		 * the prison gets its own Linux info.
364 		 */
365 		linux_alloc_prison(pr, &lpr);
366 		if (osrelease) {
367 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
368 			strlcpy(lpr->pr_osrelease, osrelease,
369 			    LINUX_MAX_UTSNAME);
370 		}
371 		if (osname)
372 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
373 		if (gotversion)
374 			lpr->pr_oss_version = oss_version;
375 		mtx_unlock(&pr->pr_mtx);
376 	}
377 
378 	return (0);
379 }
380 
381 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
382 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
383     "Jail Linux kernel OS name");
384 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
385     "Jail Linux kernel OS release");
386 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
387     "I", "Jail Linux OSS version");
388 
389 static int
390 linux_prison_get(void *obj, void *data)
391 {
392 	struct linux_prison *lpr;
393 	struct prison *ppr;
394 	struct prison *pr = obj;
395 	struct vfsoptlist *opts = data;
396 	int error, i;
397 
398 	static int version0;
399 
400 	/* See if this prison is the one with the Linux info. */
401 	lpr = linux_find_prison(pr, &ppr);
402 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
403 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
404 	if (error != 0 && error != ENOENT)
405 		goto done;
406 	if (i) {
407 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
408 		if (error != 0 && error != ENOENT)
409 			goto done;
410 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
411 		if (error != 0 && error != ENOENT)
412 			goto done;
413 		error = vfs_setopt(opts, "linux.oss_version",
414 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
415 		if (error != 0 && error != ENOENT)
416 			goto done;
417 	} else {
418 		/*
419 		 * If this prison is inheriting its Linux info, report
420 		 * empty/zero parameters.
421 		 */
422 		error = vfs_setopts(opts, "linux.osname", "");
423 		if (error != 0 && error != ENOENT)
424 			goto done;
425 		error = vfs_setopts(opts, "linux.osrelease", "");
426 		if (error != 0 && error != ENOENT)
427 			goto done;
428 		error = vfs_setopt(opts, "linux.oss_version", &version0,
429 		    sizeof(lpr->pr_oss_version));
430 		if (error != 0 && error != ENOENT)
431 			goto done;
432 	}
433 	error = 0;
434 
435  done:
436 	mtx_unlock(&ppr->pr_mtx);
437 
438 	return (error);
439 }
440 
441 static void
442 linux_prison_destructor(void *data)
443 {
444 
445 	free(data, M_PRISON);
446 }
447 
448 void
449 linux_osd_jail_register(void)
450 {
451 	struct prison *pr;
452 	osd_method_t methods[PR_MAXMETHOD] = {
453 	    [PR_METHOD_CREATE] =	linux_prison_create,
454 	    [PR_METHOD_GET] =		linux_prison_get,
455 	    [PR_METHOD_SET] =		linux_prison_set,
456 	    [PR_METHOD_CHECK] =		linux_prison_check
457 	};
458 
459 	linux_osd_jail_slot =
460 	    osd_jail_register(linux_prison_destructor, methods);
461 	/* Copy the system Linux info to any current prisons. */
462 	sx_slock(&allprison_lock);
463 	TAILQ_FOREACH(pr, &allprison, pr_list)
464 		linux_alloc_prison(pr, NULL);
465 	sx_sunlock(&allprison_lock);
466 }
467 
468 void
469 linux_osd_jail_deregister(void)
470 {
471 
472 	osd_jail_deregister(linux_osd_jail_slot);
473 }
474 
475 void
476 linux_get_osname(struct thread *td, char *dst)
477 {
478 	struct prison *pr;
479 	struct linux_prison *lpr;
480 
481 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
482 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
483 	mtx_unlock(&pr->pr_mtx);
484 }
485 
486 static int
487 linux_set_osname(struct thread *td, char *osname)
488 {
489 	struct prison *pr;
490 	struct linux_prison *lpr;
491 
492 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
493 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
494 	mtx_unlock(&pr->pr_mtx);
495 
496 	return (0);
497 }
498 
499 void
500 linux_get_osrelease(struct thread *td, char *dst)
501 {
502 	struct prison *pr;
503 	struct linux_prison *lpr;
504 
505 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
506 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
507 	mtx_unlock(&pr->pr_mtx);
508 }
509 
510 int
511 linux_kernver(struct thread *td)
512 {
513 	struct prison *pr;
514 	struct linux_prison *lpr;
515 	int osrel;
516 
517 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
518 	osrel = lpr->pr_osrel;
519 	mtx_unlock(&pr->pr_mtx);
520 
521 	return (osrel);
522 }
523 
524 static int
525 linux_set_osrelease(struct thread *td, char *osrelease)
526 {
527 	struct prison *pr;
528 	struct linux_prison *lpr;
529 	int error;
530 
531 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
532 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
533 	if (error == 0)
534 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
535 	mtx_unlock(&pr->pr_mtx);
536 
537 	return (error);
538 }
539 
540 int
541 linux_get_oss_version(struct thread *td)
542 {
543 	struct prison *pr;
544 	struct linux_prison *lpr;
545 	int version;
546 
547 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
548 	version = lpr->pr_oss_version;
549 	mtx_unlock(&pr->pr_mtx);
550 
551 	return (version);
552 }
553 
554 static int
555 linux_set_oss_version(struct thread *td, int oss_version)
556 {
557 	struct prison *pr;
558 	struct linux_prison *lpr;
559 
560 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
561 	lpr->pr_oss_version = oss_version;
562 	mtx_unlock(&pr->pr_mtx);
563 
564 	return (0);
565 }
566