xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision e28a4053)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 
74 MODULE_VERSION(linux, 1);
75 
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77 
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC      0x2123 /* #! */
80 #else
81 #define SHELLMAGIC      0x2321
82 #endif
83 
84 /*
85  * Allow the sendsig functions to use the ldebug() facility
86  * even though they are not syscalls themselves. Map them
87  * to syscall 0. This is slightly less bogus than using
88  * ldebug(sigreturn).
89  */
90 #define	LINUX_SYS_linux_rt_sendsig	0
91 #define	LINUX_SYS_linux_sendsig		0
92 
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95 
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97 
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100 
101 static int	linux_fixup(register_t **stack_base,
102 		    struct image_params *iparams);
103 static int	elf_linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106 static void	exec_linux_setregs(struct thread *td,
107 		    struct image_params *imgp, u_long stack);
108 static register_t *linux_copyout_strings(struct image_params *imgp);
109 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
110 
111 static int linux_szplatform;
112 const char *linux_platform;
113 
114 static eventhandler_tag linux_exit_tag;
115 static eventhandler_tag linux_schedtail_tag;
116 static eventhandler_tag linux_exec_tag;
117 
118 /*
119  * Linux syscalls return negative errno's, we do positive and map them
120  * Reference:
121  *   FreeBSD: src/sys/sys/errno.h
122  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
123  *            linux-2.6.17.8/include/asm-generic/errno.h
124  */
125 static int bsd_to_linux_errno[ELAST + 1] = {
126 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
127 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
128 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
129 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
130 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
131 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
132 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
133 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
134 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
135 	 -72, -67, -71
136 };
137 
138 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
139 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
140 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
141 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
142 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
143 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
144 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
145 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
146 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
147 };
148 
149 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
150 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
151 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
152 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
153 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
154 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
155 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
156 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
157 	SIGIO, SIGURG, SIGSYS
158 };
159 
160 #define LINUX_T_UNKNOWN  255
161 static int _bsd_to_linux_trapcode[] = {
162 	LINUX_T_UNKNOWN,	/* 0 */
163 	6,			/* 1  T_PRIVINFLT */
164 	LINUX_T_UNKNOWN,	/* 2 */
165 	3,			/* 3  T_BPTFLT */
166 	LINUX_T_UNKNOWN,	/* 4 */
167 	LINUX_T_UNKNOWN,	/* 5 */
168 	16,			/* 6  T_ARITHTRAP */
169 	254,			/* 7  T_ASTFLT */
170 	LINUX_T_UNKNOWN,	/* 8 */
171 	13,			/* 9  T_PROTFLT */
172 	1,			/* 10 T_TRCTRAP */
173 	LINUX_T_UNKNOWN,	/* 11 */
174 	14,			/* 12 T_PAGEFLT */
175 	LINUX_T_UNKNOWN,	/* 13 */
176 	17,			/* 14 T_ALIGNFLT */
177 	LINUX_T_UNKNOWN,	/* 15 */
178 	LINUX_T_UNKNOWN,	/* 16 */
179 	LINUX_T_UNKNOWN,	/* 17 */
180 	0,			/* 18 T_DIVIDE */
181 	2,			/* 19 T_NMI */
182 	4,			/* 20 T_OFLOW */
183 	5,			/* 21 T_BOUND */
184 	7,			/* 22 T_DNA */
185 	8,			/* 23 T_DOUBLEFLT */
186 	9,			/* 24 T_FPOPFLT */
187 	10,			/* 25 T_TSSFLT */
188 	11,			/* 26 T_SEGNPFLT */
189 	12,			/* 27 T_STKFLT */
190 	18,			/* 28 T_MCHK */
191 	19,			/* 29 T_XMMFLT */
192 	15			/* 30 T_RESERVED */
193 };
194 #define bsd_to_linux_trapcode(code) \
195     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
196      _bsd_to_linux_trapcode[(code)]: \
197      LINUX_T_UNKNOWN)
198 
199 /*
200  * If FreeBSD & Linux have a difference of opinion about what a trap
201  * means, deal with it here.
202  *
203  * MPSAFE
204  */
205 static int
206 translate_traps(int signal, int trap_code)
207 {
208 	if (signal != SIGBUS)
209 		return signal;
210 	switch (trap_code) {
211 	case T_PROTFLT:
212 	case T_TSSFLT:
213 	case T_DOUBLEFLT:
214 	case T_PAGEFLT:
215 		return SIGSEGV;
216 	default:
217 		return signal;
218 	}
219 }
220 
221 static int
222 linux_fixup(register_t **stack_base, struct image_params *imgp)
223 {
224 	register_t *argv, *envp;
225 
226 	argv = *stack_base;
227 	envp = *stack_base + (imgp->args->argc + 1);
228 	(*stack_base)--;
229 	**stack_base = (intptr_t)(void *)envp;
230 	(*stack_base)--;
231 	**stack_base = (intptr_t)(void *)argv;
232 	(*stack_base)--;
233 	**stack_base = imgp->args->argc;
234 	return (0);
235 }
236 
237 static int
238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239 {
240 	struct proc *p;
241 	Elf32_Auxargs *args;
242 	Elf32_Addr *uplatform;
243 	struct ps_strings *arginfo;
244 	register_t *pos;
245 
246 	KASSERT(curthread->td_proc == imgp->proc,
247 	    ("unsafe elf_linux_fixup(), should be curproc"));
248 
249 	p = imgp->proc;
250 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
251 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
252 	    linux_szplatform);
253 	args = (Elf32_Auxargs *)imgp->auxargs;
254 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
255 
256 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
257 
258 	/*
259 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
260 	 * as it has appeared in the 2.4.0-rc7 first time.
261 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
262 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
263 	 * is not present.
264 	 * Also see linux_times() implementation.
265 	 */
266 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
267 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
268 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
269 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
270 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
271 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
272 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
273 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
274 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
275 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
276 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
277 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
278 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
279 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
280 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
281 	if (args->execfd != -1)
282 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
283 	AUXARGS_ENTRY(pos, AT_NULL, 0);
284 
285 	free(imgp->auxargs, M_TEMP);
286 	imgp->auxargs = NULL;
287 
288 	(*stack_base)--;
289 	**stack_base = (register_t)imgp->args->argc;
290 	return (0);
291 }
292 
293 /*
294  * Copied from kern/kern_exec.c
295  */
296 static register_t *
297 linux_copyout_strings(struct image_params *imgp)
298 {
299 	int argc, envc;
300 	char **vectp;
301 	char *stringp, *destp;
302 	register_t *stack_base;
303 	struct ps_strings *arginfo;
304 	struct proc *p;
305 
306 	/*
307 	 * Calculate string base and vector table pointers.
308 	 * Also deal with signal trampoline code for this exec type.
309 	 */
310 	p = imgp->proc;
311 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
312 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
313 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
314 	    sizeof(char *));
315 
316 	/*
317 	 * install sigcode
318 	 */
319 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
320 	    linux_szsigcode), linux_szsigcode);
321 
322 	/*
323 	 * install LINUX_PLATFORM
324 	 */
325 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
326 	    linux_szplatform), linux_szplatform);
327 
328 	/*
329 	 * If we have a valid auxargs ptr, prepare some room
330 	 * on the stack.
331 	 */
332 	if (imgp->auxargs) {
333 		/*
334 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
335 		 * lower compatibility.
336 		 */
337 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
338 		    (LINUX_AT_COUNT * 2);
339 		/*
340 		 * The '+ 2' is for the null pointers at the end of each of
341 		 * the arg and env vector sets,and imgp->auxarg_size is room
342 		 * for argument of Runtime loader.
343 		 */
344 		vectp = (char **)(destp - (imgp->args->argc +
345 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
346 	} else {
347 		/*
348 		 * The '+ 2' is for the null pointers at the end of each of
349 		 * the arg and env vector sets
350 		 */
351 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
352 		    sizeof(char *));
353 	}
354 
355 	/*
356 	 * vectp also becomes our initial stack base
357 	 */
358 	stack_base = (register_t *)vectp;
359 
360 	stringp = imgp->args->begin_argv;
361 	argc = imgp->args->argc;
362 	envc = imgp->args->envc;
363 
364 	/*
365 	 * Copy out strings - arguments and environment.
366 	 */
367 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
368 
369 	/*
370 	 * Fill in "ps_strings" struct for ps, w, etc.
371 	 */
372 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
373 	suword(&arginfo->ps_nargvstr, argc);
374 
375 	/*
376 	 * Fill in argument portion of vector table.
377 	 */
378 	for (; argc > 0; --argc) {
379 		suword(vectp++, (long)(intptr_t)destp);
380 		while (*stringp++ != 0)
381 			destp++;
382 		destp++;
383 	}
384 
385 	/* a null vector table pointer separates the argp's from the envp's */
386 	suword(vectp++, 0);
387 
388 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
389 	suword(&arginfo->ps_nenvstr, envc);
390 
391 	/*
392 	 * Fill in environment portion of vector table.
393 	 */
394 	for (; envc > 0; --envc) {
395 		suword(vectp++, (long)(intptr_t)destp);
396 		while (*stringp++ != 0)
397 			destp++;
398 		destp++;
399 	}
400 
401 	/* end of vector table is a null pointer */
402 	suword(vectp, 0);
403 
404 	return (stack_base);
405 }
406 
407 
408 
409 extern int _ucodesel, _udatasel;
410 extern unsigned long linux_sznonrtsigcode;
411 
412 static void
413 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
414 {
415 	struct thread *td = curthread;
416 	struct proc *p = td->td_proc;
417 	struct sigacts *psp;
418 	struct trapframe *regs;
419 	struct l_rt_sigframe *fp, frame;
420 	int sig, code;
421 	int oonstack;
422 
423 	sig = ksi->ksi_signo;
424 	code = ksi->ksi_code;
425 	PROC_LOCK_ASSERT(p, MA_OWNED);
426 	psp = p->p_sigacts;
427 	mtx_assert(&psp->ps_mtx, MA_OWNED);
428 	regs = td->td_frame;
429 	oonstack = sigonstack(regs->tf_esp);
430 
431 #ifdef DEBUG
432 	if (ldebug(rt_sendsig))
433 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
434 		    catcher, sig, (void*)mask, code);
435 #endif
436 	/*
437 	 * Allocate space for the signal handler context.
438 	 */
439 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
440 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
441 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
442 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
443 	} else
444 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
445 	mtx_unlock(&psp->ps_mtx);
446 
447 	/*
448 	 * Build the argument list for the signal handler.
449 	 */
450 	if (p->p_sysent->sv_sigtbl)
451 		if (sig <= p->p_sysent->sv_sigsize)
452 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
453 
454 	bzero(&frame, sizeof(frame));
455 
456 	frame.sf_handler = catcher;
457 	frame.sf_sig = sig;
458 	frame.sf_siginfo = &fp->sf_si;
459 	frame.sf_ucontext = &fp->sf_sc;
460 
461 	/* Fill in POSIX parts */
462 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
463 
464 	/*
465 	 * Build the signal context to be used by sigreturn.
466 	 */
467 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
468 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
469 
470 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
471 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
472 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
473 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
474 	PROC_UNLOCK(p);
475 
476 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
477 
478 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
479 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
480 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
481 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
482 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
483 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
484 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
485 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
486 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
487 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
488 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
489 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
490 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
491 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
492 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
493 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
494 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
495 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
496 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
497 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
498 
499 #ifdef DEBUG
500 	if (ldebug(rt_sendsig))
501 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
502 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
503 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
504 #endif
505 
506 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
507 		/*
508 		 * Process has trashed its stack; give it an illegal
509 		 * instruction to halt it in its tracks.
510 		 */
511 #ifdef DEBUG
512 		if (ldebug(rt_sendsig))
513 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
514 			    fp, oonstack);
515 #endif
516 		PROC_LOCK(p);
517 		sigexit(td, SIGILL);
518 	}
519 
520 	/*
521 	 * Build context to run handler in.
522 	 */
523 	regs->tf_esp = (int)fp;
524 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
525 	    linux_sznonrtsigcode;
526 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
527 	regs->tf_cs = _ucodesel;
528 	regs->tf_ds = _udatasel;
529 	regs->tf_es = _udatasel;
530 	regs->tf_fs = _udatasel;
531 	regs->tf_ss = _udatasel;
532 	PROC_LOCK(p);
533 	mtx_lock(&psp->ps_mtx);
534 }
535 
536 
537 /*
538  * Send an interrupt to process.
539  *
540  * Stack is set up to allow sigcode stored
541  * in u. to call routine, followed by kcall
542  * to sigreturn routine below.  After sigreturn
543  * resets the signal mask, the stack, and the
544  * frame pointer, it returns to the user
545  * specified pc, psl.
546  */
547 static void
548 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
549 {
550 	struct thread *td = curthread;
551 	struct proc *p = td->td_proc;
552 	struct sigacts *psp;
553 	struct trapframe *regs;
554 	struct l_sigframe *fp, frame;
555 	l_sigset_t lmask;
556 	int sig, code;
557 	int oonstack, i;
558 
559 	PROC_LOCK_ASSERT(p, MA_OWNED);
560 	psp = p->p_sigacts;
561 	sig = ksi->ksi_signo;
562 	code = ksi->ksi_code;
563 	mtx_assert(&psp->ps_mtx, MA_OWNED);
564 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
565 		/* Signal handler installed with SA_SIGINFO. */
566 		linux_rt_sendsig(catcher, ksi, mask);
567 		return;
568 	}
569 	regs = td->td_frame;
570 	oonstack = sigonstack(regs->tf_esp);
571 
572 #ifdef DEBUG
573 	if (ldebug(sendsig))
574 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
575 		    catcher, sig, (void*)mask, code);
576 #endif
577 
578 	/*
579 	 * Allocate space for the signal handler context.
580 	 */
581 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
582 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
583 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
584 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
585 	} else
586 		fp = (struct l_sigframe *)regs->tf_esp - 1;
587 	mtx_unlock(&psp->ps_mtx);
588 	PROC_UNLOCK(p);
589 
590 	/*
591 	 * Build the argument list for the signal handler.
592 	 */
593 	if (p->p_sysent->sv_sigtbl)
594 		if (sig <= p->p_sysent->sv_sigsize)
595 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
596 
597 	bzero(&frame, sizeof(frame));
598 
599 	frame.sf_handler = catcher;
600 	frame.sf_sig = sig;
601 
602 	bsd_to_linux_sigset(mask, &lmask);
603 
604 	/*
605 	 * Build the signal context to be used by sigreturn.
606 	 */
607 	frame.sf_sc.sc_mask   = lmask.__bits[0];
608 	frame.sf_sc.sc_gs     = rgs();
609 	frame.sf_sc.sc_fs     = regs->tf_fs;
610 	frame.sf_sc.sc_es     = regs->tf_es;
611 	frame.sf_sc.sc_ds     = regs->tf_ds;
612 	frame.sf_sc.sc_edi    = regs->tf_edi;
613 	frame.sf_sc.sc_esi    = regs->tf_esi;
614 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
615 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
616 	frame.sf_sc.sc_edx    = regs->tf_edx;
617 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
618 	frame.sf_sc.sc_eax    = regs->tf_eax;
619 	frame.sf_sc.sc_eip    = regs->tf_eip;
620 	frame.sf_sc.sc_cs     = regs->tf_cs;
621 	frame.sf_sc.sc_eflags = regs->tf_eflags;
622 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
623 	frame.sf_sc.sc_ss     = regs->tf_ss;
624 	frame.sf_sc.sc_err    = regs->tf_err;
625 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
626 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
627 
628 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
629 		frame.sf_extramask[i] = lmask.__bits[i+1];
630 
631 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
632 		/*
633 		 * Process has trashed its stack; give it an illegal
634 		 * instruction to halt it in its tracks.
635 		 */
636 		PROC_LOCK(p);
637 		sigexit(td, SIGILL);
638 	}
639 
640 	/*
641 	 * Build context to run handler in.
642 	 */
643 	regs->tf_esp = (int)fp;
644 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
645 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
646 	regs->tf_cs = _ucodesel;
647 	regs->tf_ds = _udatasel;
648 	regs->tf_es = _udatasel;
649 	regs->tf_fs = _udatasel;
650 	regs->tf_ss = _udatasel;
651 	PROC_LOCK(p);
652 	mtx_lock(&psp->ps_mtx);
653 }
654 
655 /*
656  * System call to cleanup state after a signal
657  * has been taken.  Reset signal mask and
658  * stack state from context left by sendsig (above).
659  * Return to previous pc and psl as specified by
660  * context left by sendsig. Check carefully to
661  * make sure that the user has not modified the
662  * psl to gain improper privileges or to cause
663  * a machine fault.
664  */
665 int
666 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
667 {
668 	struct l_sigframe frame;
669 	struct trapframe *regs;
670 	l_sigset_t lmask;
671 	sigset_t bmask;
672 	int eflags, i;
673 	ksiginfo_t ksi;
674 
675 	regs = td->td_frame;
676 
677 #ifdef DEBUG
678 	if (ldebug(sigreturn))
679 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
680 #endif
681 	/*
682 	 * The trampoline code hands us the sigframe.
683 	 * It is unsafe to keep track of it ourselves, in the event that a
684 	 * program jumps out of a signal handler.
685 	 */
686 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
687 		return (EFAULT);
688 
689 	/*
690 	 * Check for security violations.
691 	 */
692 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
693 	eflags = frame.sf_sc.sc_eflags;
694 	/*
695 	 * XXX do allow users to change the privileged flag PSL_RF.  The
696 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
697 	 * sometimes set it there too.  tf_eflags is kept in the signal
698 	 * context during signal handling and there is no other place
699 	 * to remember it, so the PSL_RF bit may be corrupted by the
700 	 * signal handler without us knowing.  Corruption of the PSL_RF
701 	 * bit at worst causes one more or one less debugger trap, so
702 	 * allowing it is fairly harmless.
703 	 */
704 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
705 		return(EINVAL);
706 
707 	/*
708 	 * Don't allow users to load a valid privileged %cs.  Let the
709 	 * hardware check for invalid selectors, excess privilege in
710 	 * other selectors, invalid %eip's and invalid %esp's.
711 	 */
712 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
713 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
714 		ksiginfo_init_trap(&ksi);
715 		ksi.ksi_signo = SIGBUS;
716 		ksi.ksi_code = BUS_OBJERR;
717 		ksi.ksi_trapno = T_PROTFLT;
718 		ksi.ksi_addr = (void *)regs->tf_eip;
719 		trapsignal(td, &ksi);
720 		return(EINVAL);
721 	}
722 
723 	lmask.__bits[0] = frame.sf_sc.sc_mask;
724 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
725 		lmask.__bits[i+1] = frame.sf_extramask[i];
726 	linux_to_bsd_sigset(&lmask, &bmask);
727 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
728 
729 	/*
730 	 * Restore signal context.
731 	 */
732 	/* %gs was restored by the trampoline. */
733 	regs->tf_fs     = frame.sf_sc.sc_fs;
734 	regs->tf_es     = frame.sf_sc.sc_es;
735 	regs->tf_ds     = frame.sf_sc.sc_ds;
736 	regs->tf_edi    = frame.sf_sc.sc_edi;
737 	regs->tf_esi    = frame.sf_sc.sc_esi;
738 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
739 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
740 	regs->tf_edx    = frame.sf_sc.sc_edx;
741 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
742 	regs->tf_eax    = frame.sf_sc.sc_eax;
743 	regs->tf_eip    = frame.sf_sc.sc_eip;
744 	regs->tf_cs     = frame.sf_sc.sc_cs;
745 	regs->tf_eflags = eflags;
746 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
747 	regs->tf_ss     = frame.sf_sc.sc_ss;
748 
749 	return (EJUSTRETURN);
750 }
751 
752 /*
753  * System call to cleanup state after a signal
754  * has been taken.  Reset signal mask and
755  * stack state from context left by rt_sendsig (above).
756  * Return to previous pc and psl as specified by
757  * context left by sendsig. Check carefully to
758  * make sure that the user has not modified the
759  * psl to gain improper privileges or to cause
760  * a machine fault.
761  */
762 int
763 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
764 {
765 	struct l_ucontext uc;
766 	struct l_sigcontext *context;
767 	sigset_t bmask;
768 	l_stack_t *lss;
769 	stack_t ss;
770 	struct trapframe *regs;
771 	int eflags;
772 	ksiginfo_t ksi;
773 
774 	regs = td->td_frame;
775 
776 #ifdef DEBUG
777 	if (ldebug(rt_sigreturn))
778 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
779 #endif
780 	/*
781 	 * The trampoline code hands us the ucontext.
782 	 * It is unsafe to keep track of it ourselves, in the event that a
783 	 * program jumps out of a signal handler.
784 	 */
785 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
786 		return (EFAULT);
787 
788 	context = &uc.uc_mcontext;
789 
790 	/*
791 	 * Check for security violations.
792 	 */
793 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
794 	eflags = context->sc_eflags;
795 	/*
796 	 * XXX do allow users to change the privileged flag PSL_RF.  The
797 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
798 	 * sometimes set it there too.  tf_eflags is kept in the signal
799 	 * context during signal handling and there is no other place
800 	 * to remember it, so the PSL_RF bit may be corrupted by the
801 	 * signal handler without us knowing.  Corruption of the PSL_RF
802 	 * bit at worst causes one more or one less debugger trap, so
803 	 * allowing it is fairly harmless.
804 	 */
805 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
806 		return(EINVAL);
807 
808 	/*
809 	 * Don't allow users to load a valid privileged %cs.  Let the
810 	 * hardware check for invalid selectors, excess privilege in
811 	 * other selectors, invalid %eip's and invalid %esp's.
812 	 */
813 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
814 	if (!CS_SECURE(context->sc_cs)) {
815 		ksiginfo_init_trap(&ksi);
816 		ksi.ksi_signo = SIGBUS;
817 		ksi.ksi_code = BUS_OBJERR;
818 		ksi.ksi_trapno = T_PROTFLT;
819 		ksi.ksi_addr = (void *)regs->tf_eip;
820 		trapsignal(td, &ksi);
821 		return(EINVAL);
822 	}
823 
824 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
825 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
826 
827 	/*
828 	 * Restore signal context
829 	 */
830 	/* %gs was restored by the trampoline. */
831 	regs->tf_fs     = context->sc_fs;
832 	regs->tf_es     = context->sc_es;
833 	regs->tf_ds     = context->sc_ds;
834 	regs->tf_edi    = context->sc_edi;
835 	regs->tf_esi    = context->sc_esi;
836 	regs->tf_ebp    = context->sc_ebp;
837 	regs->tf_ebx    = context->sc_ebx;
838 	regs->tf_edx    = context->sc_edx;
839 	regs->tf_ecx    = context->sc_ecx;
840 	regs->tf_eax    = context->sc_eax;
841 	regs->tf_eip    = context->sc_eip;
842 	regs->tf_cs     = context->sc_cs;
843 	regs->tf_eflags = eflags;
844 	regs->tf_esp    = context->sc_esp_at_signal;
845 	regs->tf_ss     = context->sc_ss;
846 
847 	/*
848 	 * call sigaltstack & ignore results..
849 	 */
850 	lss = &uc.uc_stack;
851 	ss.ss_sp = lss->ss_sp;
852 	ss.ss_size = lss->ss_size;
853 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
854 
855 #ifdef DEBUG
856 	if (ldebug(rt_sigreturn))
857 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
858 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
859 #endif
860 	(void)kern_sigaltstack(td, &ss, NULL);
861 
862 	return (EJUSTRETURN);
863 }
864 
865 static int
866 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
867 {
868 	struct proc *p;
869 	struct trapframe *frame;
870 
871 	p = td->td_proc;
872 	frame = td->td_frame;
873 
874 	sa->code = frame->tf_eax;
875 	sa->args[0] = frame->tf_ebx;
876 	sa->args[1] = frame->tf_ecx;
877 	sa->args[2] = frame->tf_edx;
878 	sa->args[3] = frame->tf_esi;
879 	sa->args[4] = frame->tf_edi;
880 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
881 
882 	if (sa->code >= p->p_sysent->sv_size)
883 		sa->callp = &p->p_sysent->sv_table[0];
884  	else
885  		sa->callp = &p->p_sysent->sv_table[sa->code];
886 	sa->narg = sa->callp->sy_narg;
887 
888 	td->td_retval[0] = 0;
889 	td->td_retval[1] = frame->tf_edx;
890 
891 	return (0);
892 }
893 
894 /*
895  * If a linux binary is exec'ing something, try this image activator
896  * first.  We override standard shell script execution in order to
897  * be able to modify the interpreter path.  We only do this if a linux
898  * binary is doing the exec, so we do not create an EXEC module for it.
899  */
900 static int	exec_linux_imgact_try(struct image_params *iparams);
901 
902 static int
903 exec_linux_imgact_try(struct image_params *imgp)
904 {
905     const char *head = (const char *)imgp->image_header;
906     char *rpath;
907     int error = -1;
908 
909     /*
910      * The interpreter for shell scripts run from a linux binary needs
911      * to be located in /compat/linux if possible in order to recursively
912      * maintain linux path emulation.
913      */
914     if (((const short *)head)[0] == SHELLMAGIC) {
915 	    /*
916 	     * Run our normal shell image activator.  If it succeeds attempt
917 	     * to use the alternate path for the interpreter.  If an alternate
918 	     * path is found, use our stringspace to store it.
919 	     */
920 	    if ((error = exec_shell_imgact(imgp)) == 0) {
921 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
922 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
923 		    if (rpath != NULL)
924 			    imgp->args->fname_buf =
925 				imgp->interpreter_name = rpath;
926 	    }
927     }
928     return (error);
929 }
930 
931 /*
932  * exec_setregs may initialize some registers differently than Linux
933  * does, thus potentially confusing Linux binaries. If necessary, we
934  * override the exec_setregs default(s) here.
935  */
936 static void
937 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
938 {
939 	struct pcb *pcb = td->td_pcb;
940 
941 	exec_setregs(td, imgp, stack);
942 
943 	/* Linux sets %gs to 0, we default to _udatasel */
944 	pcb->pcb_gs = 0;
945 	load_gs(0);
946 
947 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
948 }
949 
950 static void
951 linux_get_machine(const char **dst)
952 {
953 
954 	switch (cpu_class) {
955 	case CPUCLASS_686:
956 		*dst = "i686";
957 		break;
958 	case CPUCLASS_586:
959 		*dst = "i586";
960 		break;
961 	case CPUCLASS_486:
962 		*dst = "i486";
963 		break;
964 	default:
965 		*dst = "i386";
966 	}
967 }
968 
969 struct sysentvec linux_sysvec = {
970 	.sv_size	= LINUX_SYS_MAXSYSCALL,
971 	.sv_table	= linux_sysent,
972 	.sv_mask	= 0,
973 	.sv_sigsize	= LINUX_SIGTBLSZ,
974 	.sv_sigtbl	= bsd_to_linux_signal,
975 	.sv_errsize	= ELAST + 1,
976 	.sv_errtbl	= bsd_to_linux_errno,
977 	.sv_transtrap	= translate_traps,
978 	.sv_fixup	= linux_fixup,
979 	.sv_sendsig	= linux_sendsig,
980 	.sv_sigcode	= linux_sigcode,
981 	.sv_szsigcode	= &linux_szsigcode,
982 	.sv_prepsyscall	= NULL,
983 	.sv_name	= "Linux a.out",
984 	.sv_coredump	= NULL,
985 	.sv_imgact_try	= exec_linux_imgact_try,
986 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
987 	.sv_pagesize	= PAGE_SIZE,
988 	.sv_minuser	= VM_MIN_ADDRESS,
989 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
990 	.sv_usrstack	= USRSTACK,
991 	.sv_psstrings	= PS_STRINGS,
992 	.sv_stackprot	= VM_PROT_ALL,
993 	.sv_copyout_strings = exec_copyout_strings,
994 	.sv_setregs	= exec_linux_setregs,
995 	.sv_fixlimit	= NULL,
996 	.sv_maxssiz	= NULL,
997 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
998 	.sv_set_syscall_retval = cpu_set_syscall_retval,
999 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1000 	.sv_syscallnames = NULL,
1001 };
1002 
1003 struct sysentvec elf_linux_sysvec = {
1004 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1005 	.sv_table	= linux_sysent,
1006 	.sv_mask	= 0,
1007 	.sv_sigsize	= LINUX_SIGTBLSZ,
1008 	.sv_sigtbl	= bsd_to_linux_signal,
1009 	.sv_errsize	= ELAST + 1,
1010 	.sv_errtbl	= bsd_to_linux_errno,
1011 	.sv_transtrap	= translate_traps,
1012 	.sv_fixup	= elf_linux_fixup,
1013 	.sv_sendsig	= linux_sendsig,
1014 	.sv_sigcode	= linux_sigcode,
1015 	.sv_szsigcode	= &linux_szsigcode,
1016 	.sv_prepsyscall	= NULL,
1017 	.sv_name	= "Linux ELF",
1018 	.sv_coredump	= elf32_coredump,
1019 	.sv_imgact_try	= exec_linux_imgact_try,
1020 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1021 	.sv_pagesize	= PAGE_SIZE,
1022 	.sv_minuser	= VM_MIN_ADDRESS,
1023 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1024 	.sv_usrstack	= USRSTACK,
1025 	.sv_psstrings	= PS_STRINGS,
1026 	.sv_stackprot	= VM_PROT_ALL,
1027 	.sv_copyout_strings = linux_copyout_strings,
1028 	.sv_setregs	= exec_linux_setregs,
1029 	.sv_fixlimit	= NULL,
1030 	.sv_maxssiz	= NULL,
1031 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32,
1032 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1033 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1034 	.sv_syscallnames = NULL,
1035 };
1036 
1037 static char GNU_ABI_VENDOR[] = "GNU";
1038 static int GNULINUX_ABI_DESC = 0;
1039 
1040 static boolean_t
1041 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1042 {
1043 	const Elf32_Word *desc;
1044 	uintptr_t p;
1045 
1046 	p = (uintptr_t)(note + 1);
1047 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1048 
1049 	desc = (const Elf32_Word *)p;
1050 	if (desc[0] != GNULINUX_ABI_DESC)
1051 		return (FALSE);
1052 
1053 	/*
1054 	 * For linux we encode osrel as follows (see linux_mib.c):
1055 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1056 	 */
1057 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1058 
1059 	return (TRUE);
1060 }
1061 
1062 static Elf_Brandnote linux_brandnote = {
1063 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1064 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1065 	.hdr.n_type	= 1,
1066 	.vendor		= GNU_ABI_VENDOR,
1067 	.flags		= BN_TRANSLATE_OSREL,
1068 	.trans_osrel	= linux_trans_osrel
1069 };
1070 
1071 static Elf32_Brandinfo linux_brand = {
1072 	.brand		= ELFOSABI_LINUX,
1073 	.machine	= EM_386,
1074 	.compat_3_brand	= "Linux",
1075 	.emul_path	= "/compat/linux",
1076 	.interp_path	= "/lib/ld-linux.so.1",
1077 	.sysvec		= &elf_linux_sysvec,
1078 	.interp_newpath	= NULL,
1079 	.brand_note	= &linux_brandnote,
1080 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1081 };
1082 
1083 static Elf32_Brandinfo linux_glibc2brand = {
1084 	.brand		= ELFOSABI_LINUX,
1085 	.machine	= EM_386,
1086 	.compat_3_brand	= "Linux",
1087 	.emul_path	= "/compat/linux",
1088 	.interp_path	= "/lib/ld-linux.so.2",
1089 	.sysvec		= &elf_linux_sysvec,
1090 	.interp_newpath	= NULL,
1091 	.brand_note	= &linux_brandnote,
1092 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1093 };
1094 
1095 Elf32_Brandinfo *linux_brandlist[] = {
1096 	&linux_brand,
1097 	&linux_glibc2brand,
1098 	NULL
1099 };
1100 
1101 static int
1102 linux_elf_modevent(module_t mod, int type, void *data)
1103 {
1104 	Elf32_Brandinfo **brandinfo;
1105 	int error;
1106 	struct linux_ioctl_handler **lihp;
1107 	struct linux_device_handler **ldhp;
1108 
1109 	error = 0;
1110 
1111 	switch(type) {
1112 	case MOD_LOAD:
1113 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1114 		     ++brandinfo)
1115 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1116 				error = EINVAL;
1117 		if (error == 0) {
1118 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1119 				linux_ioctl_register_handler(*lihp);
1120 			SET_FOREACH(ldhp, linux_device_handler_set)
1121 				linux_device_register_handler(*ldhp);
1122 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1123 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1124 			LIST_INIT(&futex_list);
1125 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1126 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1127 			      NULL, 1000);
1128 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1129 			      NULL, 1000);
1130 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1131 			      NULL, 1000);
1132 			linux_get_machine(&linux_platform);
1133 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1134 			    sizeof(char *));
1135 			linux_osd_jail_register();
1136 			stclohz = (stathz ? stathz : hz);
1137 			if (bootverbose)
1138 				printf("Linux ELF exec handler installed\n");
1139 		} else
1140 			printf("cannot insert Linux ELF brand handler\n");
1141 		break;
1142 	case MOD_UNLOAD:
1143 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1144 		     ++brandinfo)
1145 			if (elf32_brand_inuse(*brandinfo))
1146 				error = EBUSY;
1147 		if (error == 0) {
1148 			for (brandinfo = &linux_brandlist[0];
1149 			     *brandinfo != NULL; ++brandinfo)
1150 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1151 					error = EINVAL;
1152 		}
1153 		if (error == 0) {
1154 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1155 				linux_ioctl_unregister_handler(*lihp);
1156 			SET_FOREACH(ldhp, linux_device_handler_set)
1157 				linux_device_unregister_handler(*ldhp);
1158 			mtx_destroy(&emul_lock);
1159 			sx_destroy(&emul_shared_lock);
1160 			mtx_destroy(&futex_mtx);
1161 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1162 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1163 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1164 			linux_osd_jail_deregister();
1165 			if (bootverbose)
1166 				printf("Linux ELF exec handler removed\n");
1167 		} else
1168 			printf("Could not deinstall ELF interpreter entry\n");
1169 		break;
1170 	default:
1171 		return EOPNOTSUPP;
1172 	}
1173 	return error;
1174 }
1175 
1176 static moduledata_t linux_elf_mod = {
1177 	"linuxelf",
1178 	linux_elf_modevent,
1179 	0
1180 };
1181 
1182 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1183