xref: /freebsd/sys/compat/linux/linux.c (revision d0b2dbfa)
1 /*-
2  * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 #include "opt_inet6.h"
28 
29 #include <sys/param.h>
30 #include <sys/conf.h>
31 #include <sys/ctype.h>
32 #include <sys/file.h>
33 #include <sys/filedesc.h>
34 #include <sys/jail.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/poll.h>
38 #include <sys/proc.h>
39 #include <sys/signalvar.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_dl.h>
46 #include <net/if_types.h>
47 #include <netlink/netlink.h>
48 
49 #include <sys/un.h>
50 #include <netinet/in.h>
51 
52 #include <compat/linux/linux.h>
53 #include <compat/linux/linux_common.h>
54 #include <compat/linux/linux_mib.h>
55 #include <compat/linux/linux_util.h>
56 
57 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
58 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
59     "Linux struct sockaddr size");
60 _Static_assert(offsetof(struct sockaddr, sa_data) ==
61     offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
62 
63 static bool use_real_ifnames = false;
64 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
65     &use_real_ifnames, 0,
66     "Use FreeBSD interface names instead of generating ethN aliases");
67 
68 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
69 	LINUX_SIGHUP,	/* SIGHUP */
70 	LINUX_SIGINT,	/* SIGINT */
71 	LINUX_SIGQUIT,	/* SIGQUIT */
72 	LINUX_SIGILL,	/* SIGILL */
73 	LINUX_SIGTRAP,	/* SIGTRAP */
74 	LINUX_SIGABRT,	/* SIGABRT */
75 	0,		/* SIGEMT */
76 	LINUX_SIGFPE,	/* SIGFPE */
77 	LINUX_SIGKILL,	/* SIGKILL */
78 	LINUX_SIGBUS,	/* SIGBUS */
79 	LINUX_SIGSEGV,	/* SIGSEGV */
80 	LINUX_SIGSYS,	/* SIGSYS */
81 	LINUX_SIGPIPE,	/* SIGPIPE */
82 	LINUX_SIGALRM,	/* SIGALRM */
83 	LINUX_SIGTERM,	/* SIGTERM */
84 	LINUX_SIGURG,	/* SIGURG */
85 	LINUX_SIGSTOP,	/* SIGSTOP */
86 	LINUX_SIGTSTP,	/* SIGTSTP */
87 	LINUX_SIGCONT,	/* SIGCONT */
88 	LINUX_SIGCHLD,	/* SIGCHLD */
89 	LINUX_SIGTTIN,	/* SIGTTIN */
90 	LINUX_SIGTTOU,	/* SIGTTOU */
91 	LINUX_SIGIO,	/* SIGIO */
92 	LINUX_SIGXCPU,	/* SIGXCPU */
93 	LINUX_SIGXFSZ,	/* SIGXFSZ */
94 	LINUX_SIGVTALRM,/* SIGVTALRM */
95 	LINUX_SIGPROF,	/* SIGPROF */
96 	LINUX_SIGWINCH,	/* SIGWINCH */
97 	0,		/* SIGINFO */
98 	LINUX_SIGUSR1,	/* SIGUSR1 */
99 	LINUX_SIGUSR2	/* SIGUSR2 */
100 };
101 
102 #define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
103 
104 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
105 	SIGHUP,		/* LINUX_SIGHUP */
106 	SIGINT,		/* LINUX_SIGINT */
107 	SIGQUIT,	/* LINUX_SIGQUIT */
108 	SIGILL,		/* LINUX_SIGILL */
109 	SIGTRAP,	/* LINUX_SIGTRAP */
110 	SIGABRT,	/* LINUX_SIGABRT */
111 	SIGBUS,		/* LINUX_SIGBUS */
112 	SIGFPE,		/* LINUX_SIGFPE */
113 	SIGKILL,	/* LINUX_SIGKILL */
114 	SIGUSR1,	/* LINUX_SIGUSR1 */
115 	SIGSEGV,	/* LINUX_SIGSEGV */
116 	SIGUSR2,	/* LINUX_SIGUSR2 */
117 	SIGPIPE,	/* LINUX_SIGPIPE */
118 	SIGALRM,	/* LINUX_SIGALRM */
119 	SIGTERM,	/* LINUX_SIGTERM */
120 	SIGBUS,		/* LINUX_SIGSTKFLT */
121 	SIGCHLD,	/* LINUX_SIGCHLD */
122 	SIGCONT,	/* LINUX_SIGCONT */
123 	SIGSTOP,	/* LINUX_SIGSTOP */
124 	SIGTSTP,	/* LINUX_SIGTSTP */
125 	SIGTTIN,	/* LINUX_SIGTTIN */
126 	SIGTTOU,	/* LINUX_SIGTTOU */
127 	SIGURG,		/* LINUX_SIGURG */
128 	SIGXCPU,	/* LINUX_SIGXCPU */
129 	SIGXFSZ,	/* LINUX_SIGXFSZ */
130 	SIGVTALRM,	/* LINUX_SIGVTALARM */
131 	SIGPROF,	/* LINUX_SIGPROF */
132 	SIGWINCH,	/* LINUX_SIGWINCH */
133 	SIGIO,		/* LINUX_SIGIO */
134 	/*
135 	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
136 	 * to the first unused FreeBSD signal number. Since Linux supports
137 	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
138 	 */
139 	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
140 	SIGSYS		/* LINUX_SIGSYS */
141 };
142 
143 static struct cdev *dev_shm_cdev;
144 static struct cdevsw dev_shm_cdevsw = {
145      .d_version = D_VERSION,
146      .d_name    = "dev_shm",
147 };
148 
149 /*
150  * Map Linux RT signals to the FreeBSD RT signals.
151  */
152 static inline int
153 linux_to_bsd_rt_signal(int sig)
154 {
155 
156 	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
157 }
158 
159 static inline int
160 bsd_to_linux_rt_signal(int sig)
161 {
162 
163 	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
164 }
165 
166 int
167 linux_to_bsd_signal(int sig)
168 {
169 
170 	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
171 
172 	if (sig < LINUX_SIGRTMIN)
173 		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
174 
175 	return (linux_to_bsd_rt_signal(sig));
176 }
177 
178 int
179 bsd_to_linux_signal(int sig)
180 {
181 
182 	if (sig <= LINUX_SIGTBLSZ)
183 		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
184 	if (sig == LINUX_SIGPWREMU)
185 		return (LINUX_SIGPWR);
186 
187 	return (bsd_to_linux_rt_signal(sig));
188 }
189 
190 int
191 linux_to_bsd_sigaltstack(int lsa)
192 {
193 	int bsa = 0;
194 
195 	if (lsa & LINUX_SS_DISABLE)
196 		bsa |= SS_DISABLE;
197 	/*
198 	 * Linux ignores SS_ONSTACK flag for ss
199 	 * parameter while FreeBSD prohibits it.
200 	 */
201 	return (bsa);
202 }
203 
204 int
205 bsd_to_linux_sigaltstack(int bsa)
206 {
207 	int lsa = 0;
208 
209 	if (bsa & SS_DISABLE)
210 		lsa |= LINUX_SS_DISABLE;
211 	if (bsa & SS_ONSTACK)
212 		lsa |= LINUX_SS_ONSTACK;
213 	return (lsa);
214 }
215 
216 void
217 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
218 {
219 	int b, l;
220 
221 	SIGEMPTYSET(*bss);
222 	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
223 		if (LINUX_SIGISMEMBER(*lss, l)) {
224 			b = linux_to_bsd_signal(l);
225 			if (b)
226 				SIGADDSET(*bss, b);
227 		}
228 	}
229 }
230 
231 void
232 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
233 {
234 	int b, l;
235 
236 	LINUX_SIGEMPTYSET(*lss);
237 	for (b = 1; b <= SIGRTMAX; b++) {
238 		if (SIGISMEMBER(*bss, b)) {
239 			l = bsd_to_linux_signal(b);
240 			if (l)
241 				LINUX_SIGADDSET(*lss, l);
242 		}
243 	}
244 }
245 
246 /*
247  * Translate a FreeBSD interface name to a Linux interface name
248  * by interface name, and return the number of bytes copied to lxname.
249  */
250 int
251 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
252 {
253 	struct epoch_tracker et;
254 	struct ifnet *ifp;
255 	int ret;
256 
257 	ret = 0;
258 	CURVNET_SET(TD_TO_VNET(curthread));
259 	NET_EPOCH_ENTER(et);
260 	ifp = ifunit(bsdname);
261 	if (ifp != NULL)
262 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
263 	NET_EPOCH_EXIT(et);
264 	CURVNET_RESTORE();
265 	return (ret);
266 }
267 
268 /*
269  * Translate a FreeBSD interface name to a Linux interface name
270  * by interface index, and return the number of bytes copied to lxname.
271  */
272 int
273 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
274 {
275 	struct epoch_tracker et;
276 	struct ifnet *ifp;
277 	int ret;
278 
279 	ret = 0;
280 	CURVNET_SET(TD_TO_VNET(curthread));
281 	NET_EPOCH_ENTER(et);
282 	ifp = ifnet_byindex(idx);
283 	if (ifp != NULL)
284 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
285 	NET_EPOCH_EXIT(et);
286 	CURVNET_RESTORE();
287 	return (ret);
288 }
289 
290 /*
291  * Translate a FreeBSD interface name to a Linux interface name,
292  * and return the number of bytes copied to lxname, 0 if interface
293  * not found, -1 on error.
294  */
295 struct ifname_bsd_to_linux_ifp_cb_s {
296 	struct ifnet	*ifp;
297 	int		ethno;
298 	char		*lxname;
299 	size_t		len;
300 };
301 
302 static int
303 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
304 {
305 	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
306 
307 	if (ifp == cbs->ifp)
308 		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
309 	if (IFP_IS_ETH(ifp))
310 		cbs->ethno++;
311 	return (0);
312 }
313 
314 int
315 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
316 {
317 	struct ifname_bsd_to_linux_ifp_cb_s arg = {
318 		.ifp = ifp,
319 		.ethno = 0,
320 		.lxname = lxname,
321 		.len = len,
322 	};
323 
324 	NET_EPOCH_ASSERT();
325 
326 	/*
327 	 * Linux loopback interface name is lo (not lo0),
328 	 * we translate lo to lo0, loX to loX.
329 	 */
330 	if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
331 		return (strlcpy(lxname, "lo", len));
332 
333 	/* Short-circuit non ethernet interfaces. */
334 	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
335 		return (strlcpy(lxname, if_name(ifp), len));
336 
337  	/* Determine the (relative) unit number for ethernet interfaces. */
338 	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
339 }
340 
341 /*
342  * Translate a Linux interface name to a FreeBSD interface name,
343  * and return the associated ifnet structure
344  * bsdname and lxname need to be least IFNAMSIZ bytes long, but
345  * can point to the same buffer.
346  */
347 struct ifname_linux_to_ifp_cb_s {
348 	bool		is_lo;
349 	bool		is_eth;
350 	int		ethno;
351 	int		unit;
352 	const char	*lxname;
353 	if_t		ifp;
354 };
355 
356 static int
357 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
358 {
359 	struct ifname_linux_to_ifp_cb_s *cbs = arg;
360 
361 	NET_EPOCH_ASSERT();
362 
363 	/*
364 	 * Allow Linux programs to use FreeBSD names. Don't presume
365 	 * we never have an interface named "eth", so don't make
366 	 * the test optional based on is_eth.
367 	 */
368 	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
369 		goto out;
370 	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
371 		goto out;
372 	if (cbs->is_lo && IFP_IS_LOOP(ifp))
373 		goto out;
374 	if (IFP_IS_ETH(ifp))
375 		cbs->ethno++;
376 	return (0);
377 
378 out:
379 	cbs->ifp = ifp;
380 	return (1);
381 }
382 
383 struct ifnet *
384 ifname_linux_to_ifp(struct thread *td, const char *lxname)
385 {
386 	struct ifname_linux_to_ifp_cb_s arg = {
387 		.ethno = 0,
388 		.lxname = lxname,
389 		.ifp = NULL,
390 	};
391 	int len;
392 	char *ep;
393 
394 	NET_EPOCH_ASSERT();
395 
396 	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
397 		if (!isalpha(lxname[len]) || lxname[len] == '\0')
398 			break;
399 	if (len == 0 || len == LINUX_IFNAMSIZ)
400 		return (NULL);
401 	/*
402 	 * Linux loopback interface name is lo (not lo0),
403 	 * we translate lo to lo0, loX to loX.
404 	 */
405 	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
406 	arg.unit = (int)strtoul(lxname + len, &ep, 10);
407 	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
408 	    arg.is_lo == 0)
409 		return (NULL);
410 	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
411 
412 	if_foreach(ifname_linux_to_ifp_cb, &arg);
413 	return (arg.ifp);
414 }
415 
416 int
417 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
418 {
419 	struct epoch_tracker et;
420 	struct ifnet *ifp;
421 
422 	CURVNET_SET(TD_TO_VNET(td));
423 	NET_EPOCH_ENTER(et);
424 	ifp = ifname_linux_to_ifp(td, lxname);
425 	if (ifp != NULL && bsdname != NULL)
426 		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
427 	NET_EPOCH_EXIT(et);
428 	CURVNET_RESTORE();
429 	return (ifp != NULL ? 0 : EINVAL);
430 }
431 
432 unsigned short
433 linux_ifflags(struct ifnet *ifp)
434 {
435 	unsigned short flags;
436 
437 	NET_EPOCH_ASSERT();
438 
439 	flags = if_getflags(ifp) | if_getdrvflags(ifp);
440 	return (bsd_to_linux_ifflags(flags));
441 }
442 
443 unsigned short
444 bsd_to_linux_ifflags(int fl)
445 {
446 	unsigned short flags = 0;
447 
448 	if (fl & IFF_UP)
449 		flags |= LINUX_IFF_UP;
450 	if (fl & IFF_BROADCAST)
451 		flags |= LINUX_IFF_BROADCAST;
452 	if (fl & IFF_DEBUG)
453 		flags |= LINUX_IFF_DEBUG;
454 	if (fl & IFF_LOOPBACK)
455 		flags |= LINUX_IFF_LOOPBACK;
456 	if (fl & IFF_POINTOPOINT)
457 		flags |= LINUX_IFF_POINTOPOINT;
458 	if (fl & IFF_DRV_RUNNING)
459 		flags |= LINUX_IFF_RUNNING;
460 	if (fl & IFF_NOARP)
461 		flags |= LINUX_IFF_NOARP;
462 	if (fl & IFF_PROMISC)
463 		flags |= LINUX_IFF_PROMISC;
464 	if (fl & IFF_ALLMULTI)
465 		flags |= LINUX_IFF_ALLMULTI;
466 	if (fl & IFF_MULTICAST)
467 		flags |= LINUX_IFF_MULTICAST;
468 	return (flags);
469 }
470 
471 static u_int
472 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
473 {
474 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
475 	struct l_sockaddr *lsa = arg;
476 
477 	if (count > 0)
478 		return (0);
479 	if (sdl->sdl_type != IFT_ETHER)
480 		return (0);
481 	bzero(lsa, sizeof(*lsa));
482 	lsa->sa_family = LINUX_ARPHRD_ETHER;
483 	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
484 	return (1);
485 }
486 
487 int
488 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
489 {
490 
491 	NET_EPOCH_ASSERT();
492 
493 	if (IFP_IS_LOOP(ifp)) {
494 		bzero(lsa, sizeof(*lsa));
495 		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
496 		return (0);
497 	}
498 	if (!IFP_IS_ETH(ifp))
499 		return (ENOENT);
500 	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
501 		return (0);
502 	return (ENOENT);
503 }
504 
505 int
506 linux_to_bsd_domain(int domain)
507 {
508 
509 	switch (domain) {
510 	case LINUX_AF_UNSPEC:
511 		return (AF_UNSPEC);
512 	case LINUX_AF_UNIX:
513 		return (AF_LOCAL);
514 	case LINUX_AF_INET:
515 		return (AF_INET);
516 	case LINUX_AF_INET6:
517 		return (AF_INET6);
518 	case LINUX_AF_AX25:
519 		return (AF_CCITT);
520 	case LINUX_AF_IPX:
521 		return (AF_IPX);
522 	case LINUX_AF_APPLETALK:
523 		return (AF_APPLETALK);
524 	case LINUX_AF_NETLINK:
525 		return (AF_NETLINK);
526 	}
527 	return (-1);
528 }
529 
530 int
531 bsd_to_linux_domain(int domain)
532 {
533 
534 	switch (domain) {
535 	case AF_UNSPEC:
536 		return (LINUX_AF_UNSPEC);
537 	case AF_LOCAL:
538 		return (LINUX_AF_UNIX);
539 	case AF_INET:
540 		return (LINUX_AF_INET);
541 	case AF_INET6:
542 		return (LINUX_AF_INET6);
543 	case AF_CCITT:
544 		return (LINUX_AF_AX25);
545 	case AF_IPX:
546 		return (LINUX_AF_IPX);
547 	case AF_APPLETALK:
548 		return (LINUX_AF_APPLETALK);
549 	case AF_NETLINK:
550 		return (LINUX_AF_NETLINK);
551 	}
552 	return (-1);
553 }
554 
555 /*
556  * Based on the fact that:
557  * 1. Native and Linux storage of struct sockaddr
558  * and struct sockaddr_in6 are equal.
559  * 2. On Linux sa_family is the first member of all struct sockaddr.
560  */
561 int
562 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
563     socklen_t len)
564 {
565 	struct l_sockaddr *kosa;
566 	int bdom;
567 
568 	*lsa = NULL;
569 	if (len < 2 || len > UCHAR_MAX)
570 		return (EINVAL);
571 	bdom = bsd_to_linux_domain(sa->sa_family);
572 	if (bdom == -1)
573 		return (EAFNOSUPPORT);
574 
575 	kosa = malloc(len, M_LINUX, M_WAITOK);
576 	bcopy(sa, kosa, len);
577 	kosa->sa_family = bdom;
578 	*lsa = kosa;
579 	return (0);
580 }
581 
582 int
583 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
584     socklen_t *len)
585 {
586 	struct sockaddr *sa;
587 	struct l_sockaddr *kosa;
588 #ifdef INET6
589 	struct sockaddr_in6 *sin6;
590 	bool  oldv6size;
591 #endif
592 	char *name;
593 	int salen, bdom, error, hdrlen, namelen;
594 
595 	if (*len < 2 || *len > UCHAR_MAX)
596 		return (EINVAL);
597 
598 	salen = *len;
599 
600 #ifdef INET6
601 	oldv6size = false;
602 	/*
603 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
604 	 * if it's a v4-mapped address, so reserve the proper space
605 	 * for it.
606 	 */
607 	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
608 		salen += sizeof(uint32_t);
609 		oldv6size = true;
610 	}
611 #endif
612 
613 	kosa = malloc(salen, M_SONAME, M_WAITOK);
614 
615 	if ((error = copyin(osa, kosa, *len)))
616 		goto out;
617 
618 	bdom = linux_to_bsd_domain(kosa->sa_family);
619 	if (bdom == -1) {
620 		error = EAFNOSUPPORT;
621 		goto out;
622 	}
623 
624 #ifdef INET6
625 	/*
626 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
627 	 * which lacks the scope id compared with RFC2553 one. If we detect
628 	 * the situation, reject the address and write a message to system log.
629 	 *
630 	 * Still accept addresses for which the scope id is not used.
631 	 */
632 	if (oldv6size) {
633 		if (bdom == AF_INET6) {
634 			sin6 = (struct sockaddr_in6 *)kosa;
635 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
636 			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
637 			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
638 			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
639 			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
640 			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
641 				sin6->sin6_scope_id = 0;
642 			} else {
643 				linux_msg(curthread,
644 				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
645 				error = EINVAL;
646 				goto out;
647 			}
648 		} else
649 			salen -= sizeof(uint32_t);
650 	}
651 #endif
652 	if (bdom == AF_INET) {
653 		if (salen < sizeof(struct sockaddr_in)) {
654 			error = EINVAL;
655 			goto out;
656 		}
657 		salen = sizeof(struct sockaddr_in);
658 	}
659 
660 	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
661 		hdrlen = offsetof(struct sockaddr_un, sun_path);
662 		name = ((struct sockaddr_un *)kosa)->sun_path;
663 		if (*name == '\0') {
664 			/*
665 			 * Linux abstract namespace starts with a NULL byte.
666 			 * XXX We do not support abstract namespace yet.
667 			 */
668 			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
669 		} else
670 			namelen = strnlen(name, salen - hdrlen);
671 		salen = hdrlen + namelen;
672 		if (salen > sizeof(struct sockaddr_un)) {
673 			error = ENAMETOOLONG;
674 			goto out;
675 		}
676 	}
677 
678 	if (bdom == AF_NETLINK) {
679 		if (salen < sizeof(struct sockaddr_nl)) {
680 			error = EINVAL;
681 			goto out;
682 		}
683 		salen = sizeof(struct sockaddr_nl);
684 	}
685 
686 	sa = (struct sockaddr *)kosa;
687 	sa->sa_family = bdom;
688 	sa->sa_len = salen;
689 
690 	*sap = sa;
691 	*len = salen;
692 	return (0);
693 
694 out:
695 	free(kosa, M_SONAME);
696 	return (error);
697 }
698 
699 void
700 linux_dev_shm_create(void)
701 {
702 	int error;
703 
704 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
705 	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
706 	if (error != 0) {
707 		printf("%s: failed to create device node, error %d\n",
708 		    __func__, error);
709 	}
710 }
711 
712 void
713 linux_dev_shm_destroy(void)
714 {
715 
716 	destroy_dev(dev_shm_cdev);
717 }
718 
719 int
720 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
721     size_t mapcnt, int no_value)
722 {
723 	int bsd_mask, bsd_value, linux_mask, linux_value;
724 	int linux_ret;
725 	size_t i;
726 	bool applied;
727 
728 	applied = false;
729 	linux_ret = 0;
730 	for (i = 0; i < mapcnt; ++i) {
731 		bsd_mask = bitmap[i].bsd_mask;
732 		bsd_value = bitmap[i].bsd_value;
733 		if (bsd_mask == 0)
734 			bsd_mask = bsd_value;
735 
736 		linux_mask = bitmap[i].linux_mask;
737 		linux_value = bitmap[i].linux_value;
738 		if (linux_mask == 0)
739 			linux_mask = linux_value;
740 
741 		/*
742 		 * If a mask larger than just the value is set, we explicitly
743 		 * want to make sure that only this bit we mapped within that
744 		 * mask is set.
745 		 */
746 		if ((value & bsd_mask) == bsd_value) {
747 			linux_ret = (linux_ret & ~linux_mask) | linux_value;
748 			applied = true;
749 		}
750 	}
751 
752 	if (!applied)
753 		return (no_value);
754 	return (linux_ret);
755 }
756 
757 int
758 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
759     size_t mapcnt, int no_value)
760 {
761 	int bsd_mask, bsd_value, linux_mask, linux_value;
762 	int bsd_ret;
763 	size_t i;
764 	bool applied;
765 
766 	applied = false;
767 	bsd_ret = 0;
768 	for (i = 0; i < mapcnt; ++i) {
769 		bsd_mask = bitmap[i].bsd_mask;
770 		bsd_value = bitmap[i].bsd_value;
771 		if (bsd_mask == 0)
772 			bsd_mask = bsd_value;
773 
774 		linux_mask = bitmap[i].linux_mask;
775 		linux_value = bitmap[i].linux_value;
776 		if (linux_mask == 0)
777 			linux_mask = linux_value;
778 
779 		/*
780 		 * If a mask larger than just the value is set, we explicitly
781 		 * want to make sure that only this bit we mapped within that
782 		 * mask is set.
783 		 */
784 		if ((value & linux_mask) == linux_value) {
785 			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
786 			applied = true;
787 		}
788 	}
789 
790 	if (!applied)
791 		return (no_value);
792 	return (bsd_ret);
793 }
794 
795 void
796 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
797     short *bev)
798 {
799 	struct file *fp;
800 	int error;
801 	short bits = 0;
802 
803 	if (lev & LINUX_POLLIN)
804 		bits |= POLLIN;
805 	if (lev & LINUX_POLLPRI)
806 		bits |=	POLLPRI;
807 	if (lev & LINUX_POLLOUT)
808 		bits |= POLLOUT;
809 	if (lev & LINUX_POLLERR)
810 		bits |= POLLERR;
811 	if (lev & LINUX_POLLHUP)
812 		bits |= POLLHUP;
813 	if (lev & LINUX_POLLNVAL)
814 		bits |= POLLNVAL;
815 	if (lev & LINUX_POLLRDNORM)
816 		bits |= POLLRDNORM;
817 	if (lev & LINUX_POLLRDBAND)
818 		bits |= POLLRDBAND;
819 	if (lev & LINUX_POLLWRBAND)
820 		bits |= POLLWRBAND;
821 	if (lev & LINUX_POLLWRNORM)
822 		bits |= POLLWRNORM;
823 
824 	if (lev & LINUX_POLLRDHUP) {
825 		/*
826 		 * It seems that the Linux silencly ignores POLLRDHUP
827 		 * on non-socket file descriptors unlike FreeBSD, where
828 		 * events bits is more strictly checked (POLLSTANDARD).
829 		 */
830 		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
831 		if (error == 0) {
832 			/*
833 			 * XXX. On FreeBSD POLLRDHUP applies only to
834 			 * stream sockets.
835 			 */
836 			if (fp->f_type == DTYPE_SOCKET)
837 				bits |= POLLRDHUP;
838 			fdrop(fp, td);
839 		}
840 	}
841 
842 	if (lev & LINUX_POLLMSG)
843 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
844 	if (lev & LINUX_POLLREMOVE)
845 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
846 
847 	*bev = bits;
848 }
849 
850 void
851 bsd_to_linux_poll_events(short bev, short *lev)
852 {
853 	short bits = 0;
854 
855 	if (bev & POLLIN)
856 		bits |= LINUX_POLLIN;
857 	if (bev & POLLPRI)
858 		bits |=	LINUX_POLLPRI;
859 	if (bev & (POLLOUT | POLLWRNORM))
860 		/*
861 		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
862 		 * but not on Linux
863 		 */
864 		bits |= LINUX_POLLOUT;
865 	if (bev & POLLERR)
866 		bits |= LINUX_POLLERR;
867 	if (bev & POLLHUP)
868 		bits |= LINUX_POLLHUP;
869 	if (bev & POLLNVAL)
870 		bits |= LINUX_POLLNVAL;
871 	if (bev & POLLRDNORM)
872 		bits |= LINUX_POLLRDNORM;
873 	if (bev & POLLRDBAND)
874 		bits |= LINUX_POLLRDBAND;
875 	if (bev & POLLWRBAND)
876 		bits |= LINUX_POLLWRBAND;
877 	if (bev & POLLRDHUP)
878 		bits |= LINUX_POLLRDHUP;
879 
880 	*lev = bits;
881 }
882 
883 bool
884 linux_use_real_ifname(const struct ifnet *ifp)
885 {
886 
887 	return (use_real_ifnames);
888 }
889