1 /* $NetBSD: linux_machdep.c,v 1.162 2016/07/13 15:59:54 maxv Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.162 2016/07/13 15:59:54 maxv Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/mbuf.h>
52 #include <sys/msgbuf.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
55 #include <sys/device.h>
56 #include <sys/syscallargs.h>
57 #include <sys/filedesc.h>
58 #include <sys/exec_elf.h>
59 #include <sys/disklabel.h>
60 #include <sys/ioctl.h>
61 #include <sys/wait.h>
62 #include <sys/kauth.h>
63 #include <sys/kmem.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <compat/linux/common/linux_types.h>
68 #include <compat/linux/common/linux_signal.h>
69 #include <compat/linux/common/linux_util.h>
70 #include <compat/linux/common/linux_ioctl.h>
71 #include <compat/linux/common/linux_hdio.h>
72 #include <compat/linux/common/linux_exec.h>
73 #include <compat/linux/common/linux_machdep.h>
74 #include <compat/linux/common/linux_errno.h>
75
76 #include <compat/linux/linux_syscallargs.h>
77
78 #include <sys/cpu.h>
79 #include <machine/cpufunc.h>
80 #include <machine/psl.h>
81 #include <machine/reg.h>
82 #include <machine/segments.h>
83 #include <machine/specialreg.h>
84 #include <machine/sysarch.h>
85 #include <machine/vm86.h>
86 #include <machine/vmparam.h>
87
88 #include <x86/fpu.h>
89
90 /*
91 * To see whether wscons is configured (for virtual console ioctl calls).
92 */
93 #if defined(_KERNEL_OPT)
94 #include "wsdisplay.h"
95 #endif
96 #if (NWSDISPLAY > 0)
97 #include <dev/wscons/wsconsio.h>
98 #include <dev/wscons/wsdisplay_usl_io.h>
99 #if defined(_KERNEL_OPT)
100 #include "opt_xserver.h"
101 #endif
102 #endif
103
104 #ifdef DEBUG_LINUX
105 #define DPRINTF(a) uprintf a
106 #else
107 #define DPRINTF(a)
108 #endif
109
110 extern struct disklist *x86_alldisks;
111
112 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
113 static void linux_save_ucontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
115 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
116 const sigset_t *, struct linux_sigcontext *);
117 static int linux_restore_sigcontext(struct lwp *,
118 struct linux_sigcontext *, register_t *);
119 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
120 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
121
122 extern char linux_sigcode[], linux_rt_sigcode[];
123
124 /*
125 * Deal with some i386-specific things in the Linux emulation code.
126 */
127
128 void
linux_setregs(struct lwp * l,struct exec_package * epp,vaddr_t stack)129 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
130 {
131 struct trapframe *tf;
132
133 #ifdef USER_LDT
134 pmap_ldt_cleanup(l);
135 #endif
136
137 fpu_save_area_clear(l, __Linux_NPXCW__);
138
139 tf = l->l_md.md_regs;
140 tf->tf_gs = 0;
141 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
143 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
144 tf->tf_edi = 0;
145 tf->tf_esi = 0;
146 tf->tf_ebp = 0;
147 tf->tf_ebx = l->l_proc->p_psstrp;
148 tf->tf_edx = 0;
149 tf->tf_ecx = 0;
150 tf->tf_eax = 0;
151 tf->tf_eip = epp->ep_entry;
152 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
153 tf->tf_eflags = PSL_USERSET;
154 tf->tf_esp = stack;
155 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
156 }
157
158 /*
159 * Send an interrupt to process.
160 *
161 * Stack is set up to allow sigcode stored
162 * in u. to call routine, followed by kcall
163 * to sigreturn routine below. After sigreturn
164 * resets the signal mask, the stack, and the
165 * frame pointer, it returns to the user
166 * specified pc, psl.
167 */
168
169 void
linux_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)170 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
171 {
172 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
173 linux_rt_sendsig(ksi, mask);
174 else
175 linux_old_sendsig(ksi, mask);
176 }
177
178
179 static void
linux_save_ucontext(struct lwp * l,struct trapframe * tf,const sigset_t * mask,struct sigaltstack * sas,struct linux_ucontext * uc)180 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
181 {
182 uc->uc_flags = 0;
183 uc->uc_link = NULL;
184 native_to_linux_sigaltstack(&uc->uc_stack, sas);
185 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
186 native_to_linux_sigset(&uc->uc_sigmask, mask);
187 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
188 }
189
190 static void
linux_save_sigcontext(struct lwp * l,struct trapframe * tf,const sigset_t * mask,struct linux_sigcontext * sc)191 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
192 const sigset_t *mask, struct linux_sigcontext *sc)
193 {
194 struct pcb *pcb = lwp_getpcb(l);
195
196 /* Save register context. */
197 #ifdef VM86
198 if (tf->tf_eflags & PSL_VM) {
199 sc->sc_gs = tf->tf_vm86_gs;
200 sc->sc_fs = tf->tf_vm86_fs;
201 sc->sc_es = tf->tf_vm86_es;
202 sc->sc_ds = tf->tf_vm86_ds;
203 sc->sc_eflags = get_vflags(l);
204 } else
205 #endif
206 {
207 sc->sc_gs = tf->tf_gs;
208 sc->sc_fs = tf->tf_fs;
209 sc->sc_es = tf->tf_es;
210 sc->sc_ds = tf->tf_ds;
211 sc->sc_eflags = tf->tf_eflags;
212 }
213 sc->sc_edi = tf->tf_edi;
214 sc->sc_esi = tf->tf_esi;
215 sc->sc_esp = tf->tf_esp;
216 sc->sc_ebp = tf->tf_ebp;
217 sc->sc_ebx = tf->tf_ebx;
218 sc->sc_edx = tf->tf_edx;
219 sc->sc_ecx = tf->tf_ecx;
220 sc->sc_eax = tf->tf_eax;
221 sc->sc_eip = tf->tf_eip;
222 sc->sc_cs = tf->tf_cs;
223 sc->sc_esp_at_signal = tf->tf_esp;
224 sc->sc_ss = tf->tf_ss;
225 sc->sc_err = tf->tf_err;
226 sc->sc_trapno = tf->tf_trapno;
227 sc->sc_cr2 = pcb->pcb_cr2;
228 sc->sc_387 = NULL;
229
230 /* Save signal stack. */
231 /* Linux doesn't save the onstack flag in sigframe */
232
233 /* Save signal mask. */
234 native_to_linux_old_sigset(&sc->sc_mask, mask);
235 }
236
237 static void
linux_rt_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)238 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
239 {
240 struct lwp *l = curlwp;
241 struct proc *p = l->l_proc;
242 struct trapframe *tf;
243 struct linux_rt_sigframe *fp, frame;
244 int onstack, error;
245 int sig = ksi->ksi_signo;
246 sig_t catcher = SIGACTION(p, sig).sa_handler;
247 struct sigaltstack *sas = &l->l_sigstk;
248
249 tf = l->l_md.md_regs;
250 /* Do we need to jump onto the signal stack? */
251 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
252 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
253
254
255 /* Allocate space for the signal handler context. */
256 if (onstack)
257 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
258 sas->ss_size);
259 else
260 fp = (struct linux_rt_sigframe *)tf->tf_esp;
261 fp--;
262
263 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
264 onstack, fp, sig, tf->tf_eip,
265 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
266
267 /* Build stack frame for signal trampoline. */
268 frame.sf_handler = catcher;
269 frame.sf_sig = native_to_linux_signo[sig];
270 frame.sf_sip = &fp->sf_si;
271 frame.sf_ucp = &fp->sf_uc;
272
273 /*
274 * XXX: the following code assumes that the constants for
275 * siginfo are the same between linux and NetBSD.
276 */
277 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
278
279 /* Save register context. */
280 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
281 sendsig_reset(l, sig);
282
283 mutex_exit(p->p_lock);
284 error = copyout(&frame, fp, sizeof(frame));
285 mutex_enter(p->p_lock);
286
287 if (error != 0) {
288 /*
289 * Process has trashed its stack; give it an illegal
290 * instruction to halt it in its tracks.
291 */
292 sigexit(l, SIGILL);
293 /* NOTREACHED */
294 }
295
296 /*
297 * Build context to run handler in.
298 */
299 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
300 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
301 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
302 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
303 (linux_rt_sigcode - linux_sigcode);
304 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
305 tf->tf_eflags &= ~PSL_CLEARSIG;
306 tf->tf_esp = (int)fp;
307 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
308
309 /* Remember that we're now on the signal stack. */
310 if (onstack)
311 sas->ss_flags |= SS_ONSTACK;
312 }
313
314 static void
linux_old_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)315 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
316 {
317 struct lwp *l = curlwp;
318 struct proc *p = l->l_proc;
319 struct trapframe *tf;
320 struct linux_sigframe *fp, frame;
321 int onstack, error;
322 int sig = ksi->ksi_signo;
323 sig_t catcher = SIGACTION(p, sig).sa_handler;
324 struct sigaltstack *sas = &l->l_sigstk;
325
326 tf = l->l_md.md_regs;
327
328 /* Do we need to jump onto the signal stack? */
329 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
330 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
331
332 /* Allocate space for the signal handler context. */
333 if (onstack)
334 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
335 sas->ss_size);
336 else
337 fp = (struct linux_sigframe *)tf->tf_esp;
338 fp--;
339
340 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
341 onstack, fp, sig, tf->tf_eip,
342 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
343
344 /* Build stack frame for signal trampoline. */
345 frame.sf_handler = catcher;
346 frame.sf_sig = native_to_linux_signo[sig];
347
348 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
349 sendsig_reset(l, sig);
350
351 mutex_exit(p->p_lock);
352 error = copyout(&frame, fp, sizeof(frame));
353 mutex_enter(p->p_lock);
354
355 if (error != 0) {
356 /*
357 * Process has trashed its stack; give it an illegal
358 * instruction to halt it in its tracks.
359 */
360 sigexit(l, SIGILL);
361 /* NOTREACHED */
362 }
363
364 /*
365 * Build context to run handler in.
366 */
367 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
368 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
369 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
370 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
371 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
372 tf->tf_eflags &= ~PSL_CLEARSIG;
373 tf->tf_esp = (int)fp;
374 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
375
376 /* Remember that we're now on the signal stack. */
377 if (onstack)
378 sas->ss_flags |= SS_ONSTACK;
379 }
380
381 /*
382 * System call to cleanup state after a signal
383 * has been taken. Reset signal mask and
384 * stack state from context left by sendsig (above).
385 * Return to previous pc and psl as specified by
386 * context left by sendsig. Check carefully to
387 * make sure that the user has not modified the
388 * psl to gain improper privileges or to cause
389 * a machine fault.
390 */
391 int
linux_sys_rt_sigreturn(struct lwp * l,const struct linux_sys_rt_sigreturn_args * uap,register_t * retval)392 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
393 {
394 /* {
395 syscallarg(struct linux_ucontext *) ucp;
396 } */
397 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
398 int error;
399
400 /*
401 * The trampoline code hands us the context.
402 * It is unsafe to keep track of it ourselves, in the event that a
403 * program jumps out of a signal handler.
404 */
405 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
406 return error;
407
408 /* XXX XAX we can do better here by using more of the ucontext */
409 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
410 }
411
412 int
linux_sys_sigreturn(struct lwp * l,const struct linux_sys_sigreturn_args * uap,register_t * retval)413 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
414 {
415 /* {
416 syscallarg(struct linux_sigcontext *) scp;
417 } */
418 struct linux_sigcontext context, *scp = SCARG(uap, scp);
419 int error;
420
421 /*
422 * The trampoline code hands us the context.
423 * It is unsafe to keep track of it ourselves, in the event that a
424 * program jumps out of a signal handler.
425 */
426 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
427 return error;
428 return linux_restore_sigcontext(l, &context, retval);
429 }
430
431 static int
linux_restore_sigcontext(struct lwp * l,struct linux_sigcontext * scp,register_t * retval)432 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
433 register_t *retval)
434 {
435 struct proc *p = l->l_proc;
436 struct sigaltstack *sas = &l->l_sigstk;
437 struct trapframe *tf;
438 sigset_t mask;
439 ssize_t ss_gap;
440
441 /* Restore register context. */
442 tf = l->l_md.md_regs;
443 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
444
445 #ifdef VM86
446 if (scp->sc_eflags & PSL_VM) {
447 void syscall_vm86(struct trapframe *);
448
449 tf->tf_vm86_gs = scp->sc_gs;
450 tf->tf_vm86_fs = scp->sc_fs;
451 tf->tf_vm86_es = scp->sc_es;
452 tf->tf_vm86_ds = scp->sc_ds;
453 set_vflags(l, scp->sc_eflags);
454 p->p_md.md_syscall = syscall_vm86;
455 } else
456 #endif
457 {
458 /*
459 * Check for security violations. If we're returning to
460 * protected mode, the CPU will validate the segment registers
461 * automatically and generate a trap on violations. We handle
462 * the trap, rather than doing all of the checking here.
463 */
464 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
465 !USERMODE(scp->sc_cs, scp->sc_eflags))
466 return EINVAL;
467
468 tf->tf_gs = scp->sc_gs;
469 tf->tf_fs = scp->sc_fs;
470 tf->tf_es = scp->sc_es;
471 tf->tf_ds = scp->sc_ds;
472 #ifdef VM86
473 if (tf->tf_eflags & PSL_VM)
474 (*p->p_emul->e_syscall_intern)(p);
475 #endif
476 tf->tf_eflags = scp->sc_eflags;
477 }
478 tf->tf_edi = scp->sc_edi;
479 tf->tf_esi = scp->sc_esi;
480 tf->tf_ebp = scp->sc_ebp;
481 tf->tf_ebx = scp->sc_ebx;
482 tf->tf_edx = scp->sc_edx;
483 tf->tf_ecx = scp->sc_ecx;
484 tf->tf_eax = scp->sc_eax;
485 tf->tf_eip = scp->sc_eip;
486 tf->tf_cs = scp->sc_cs;
487 tf->tf_esp = scp->sc_esp_at_signal;
488 tf->tf_ss = scp->sc_ss;
489
490 /* Restore signal stack. */
491 /*
492 * Linux really does it this way; it doesn't have space in sigframe
493 * to save the onstack flag.
494 */
495 mutex_enter(p->p_lock);
496 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
497 if (ss_gap >= 0 && ss_gap < sas->ss_size)
498 sas->ss_flags |= SS_ONSTACK;
499 else
500 sas->ss_flags &= ~SS_ONSTACK;
501
502 /* Restore signal mask. */
503 linux_old_to_native_sigset(&mask, &scp->sc_mask);
504 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
505 mutex_exit(p->p_lock);
506
507 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
508 return EJUSTRETURN;
509 }
510
511 #ifdef USER_LDT
512
513 static int
linux_read_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,register_t * retval)514 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
515 register_t *retval)
516 {
517 struct x86_get_ldt_args gl;
518 int error;
519 union descriptor *ldt_buf;
520 size_t sz;
521
522 /*
523 * I've checked the linux code - this function is asymetric with
524 * linux_write_ldt, and returns raw ldt entries.
525 * NB, the code I saw zerod the spare parts of the user buffer.
526 */
527
528 DPRINTF(("linux_read_ldt!"));
529
530 sz = 8192 * sizeof(*ldt_buf);
531 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
532 gl.start = 0;
533 gl.desc = NULL;
534 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
535 error = x86_get_ldt1(l, &gl, ldt_buf);
536 /* NB gl.num might have changed */
537 if (error == 0) {
538 *retval = gl.num * sizeof *ldt;
539 error = copyout(ldt_buf, SCARG(uap, ptr),
540 gl.num * sizeof *ldt_buf);
541 }
542 kmem_free(ldt_buf, sz);
543
544 return error;
545 }
546
547 struct linux_ldt_info {
548 u_int entry_number;
549 u_long base_addr;
550 u_int limit;
551 u_int seg_32bit:1;
552 u_int contents:2;
553 u_int read_exec_only:1;
554 u_int limit_in_pages:1;
555 u_int seg_not_present:1;
556 u_int useable:1;
557 };
558
559 static int
linux_write_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,int oldmode)560 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
561 int oldmode)
562 {
563 struct linux_ldt_info ldt_info;
564 union descriptor d;
565 struct x86_set_ldt_args sl;
566 int error;
567
568 DPRINTF(("linux_write_ldt %d\n", oldmode));
569 if (SCARG(uap, bytecount) != sizeof(ldt_info))
570 return (EINVAL);
571 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
572 return error;
573 if (ldt_info.entry_number >= 8192)
574 return (EINVAL);
575 if (ldt_info.contents == 3) {
576 if (oldmode)
577 return (EINVAL);
578 if (ldt_info.seg_not_present)
579 return (EINVAL);
580 }
581
582 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
583 (oldmode || (ldt_info.contents == 0 &&
584 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
585 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
586 ldt_info.useable == 0))) {
587 /* this means you should zero the ldt */
588 (void)memset(&d, 0, sizeof(d));
589 } else {
590 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
591 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
592 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
593 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
594 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
595 (!ldt_info.read_exec_only << 1);
596 d.sd.sd_dpl = SEL_UPL;
597 d.sd.sd_p = !ldt_info.seg_not_present;
598 d.sd.sd_def32 = ldt_info.seg_32bit;
599 d.sd.sd_gran = ldt_info.limit_in_pages;
600 if (!oldmode)
601 d.sd.sd_xx = ldt_info.useable;
602 else
603 d.sd.sd_xx = 0;
604 }
605 sl.start = ldt_info.entry_number;
606 sl.desc = NULL;
607 sl.num = 1;
608
609 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
610 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
611
612 return x86_set_ldt1(l, &sl, &d);
613 }
614
615 #endif /* USER_LDT */
616
617 int
linux_sys_modify_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,register_t * retval)618 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
619 {
620 /* {
621 syscallarg(int) func;
622 syscallarg(void *) ptr;
623 syscallarg(size_t) bytecount;
624 } */
625
626 switch (SCARG(uap, func)) {
627 #ifdef USER_LDT
628 case 0:
629 return linux_read_ldt(l, (const void *)uap, retval);
630 case 1:
631 return linux_write_ldt(l, (const void *)uap, 1);
632 case 2:
633 #ifdef notyet
634 return linux_read_default_ldt(l, (const void *)uap, retval);
635 #else
636 return (ENOSYS);
637 #endif
638 case 0x11:
639 return linux_write_ldt(l, (const void *)uap, 0);
640 #endif /* USER_LDT */
641
642 default:
643 return (ENOSYS);
644 }
645 }
646
647 /*
648 * XXX Pathetic hack to make svgalib work. This will fake the major
649 * device number of an opened VT so that svgalib likes it. grmbl.
650 * Should probably do it 'wrong the right way' and use a mapping
651 * array for all major device numbers, and map linux_mknod too.
652 */
653 dev_t
linux_fakedev(dev_t dev,int raw)654 linux_fakedev(dev_t dev, int raw)
655 {
656 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
657 const struct cdevsw *cd = cdevsw_lookup(dev);
658
659 if (raw) {
660 #if (NWSDISPLAY > 0)
661 extern const struct cdevsw wsdisplay_cdevsw;
662 if (cd == &wsdisplay_cdevsw)
663 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
664 #endif
665 }
666
667 if (cd == &ptc_cdevsw)
668 return makedev(LINUX_PTC_MAJOR, minor(dev));
669 if (cd == &pts_cdevsw)
670 return makedev(LINUX_PTS_MAJOR, minor(dev));
671
672 return dev;
673 }
674
675 #if (NWSDISPLAY > 0)
676 /*
677 * That's not complete, but enough to get an X server running.
678 */
679 #define NR_KEYS 128
680 static const u_short plain_map[NR_KEYS] = {
681 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
682 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
683 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
684 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
685 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
686 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
687 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
688 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
689 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
690 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
691 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
692 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
693 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
694 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
695 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
696 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
697 }, shift_map[NR_KEYS] = {
698 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
699 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
700 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
701 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
702 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
703 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
704 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
705 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
706 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
707 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
708 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
709 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
710 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
711 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
712 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
713 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
714 }, altgr_map[NR_KEYS] = {
715 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
716 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
717 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
718 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
719 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
720 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
721 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
722 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
723 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
724 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
725 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
726 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
727 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
728 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
729 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
730 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
731 }, ctrl_map[NR_KEYS] = {
732 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
733 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
734 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
735 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
736 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
737 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
738 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
739 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
740 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
741 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
742 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
743 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
744 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
745 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
746 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
747 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
748 };
749
750 const u_short * const linux_keytabs[] = {
751 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
752 };
753 #endif
754
755 static struct biosdisk_info *
fd2biosinfo(struct proc * p,struct file * fp)756 fd2biosinfo(struct proc *p, struct file *fp)
757 {
758 struct vnode *vp;
759 const char *blkname;
760 char diskname[16];
761 int i;
762 struct nativedisk_info *nip;
763 struct disklist *dl = x86_alldisks;
764
765 if (dl == NULL)
766 return NULL;
767 if (fp->f_type != DTYPE_VNODE)
768 return NULL;
769 vp = (struct vnode *)fp->f_data;
770
771 if (vp->v_type != VBLK)
772 return NULL;
773
774 blkname = devsw_blk2name(major(vp->v_rdev));
775 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
776 (unsigned long long)DISKUNIT(vp->v_rdev));
777
778 for (i = 0; i < dl->dl_nnativedisks; i++) {
779 nip = &dl->dl_nativedisks[i];
780 if (strcmp(diskname, nip->ni_devname))
781 continue;
782 if (nip->ni_nmatches != 0)
783 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
784 }
785
786 return NULL;
787 }
788
789
790 /*
791 * We come here in a last attempt to satisfy a Linux ioctl() call
792 */
793 int
linux_machdepioctl(struct lwp * l,const struct linux_sys_ioctl_args * uap,register_t * retval)794 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
795 {
796 /* {
797 syscallarg(int) fd;
798 syscallarg(u_long) com;
799 syscallarg(void *) data;
800 } */
801 struct sys_ioctl_args bia;
802 u_long com;
803 int error, error1;
804 #if (NWSDISPLAY > 0)
805 struct vt_mode lvt;
806 struct kbentry kbe;
807 #endif
808 struct linux_hd_geometry hdg;
809 struct linux_hd_big_geometry hdg_big;
810 struct biosdisk_info *bip;
811 file_t *fp;
812 int fd;
813 struct disklabel label;
814 struct partinfo partp;
815 int (*ioctlf)(struct file *, u_long, void *);
816 u_long start, biostotal, realtotal;
817 u_char heads, sectors;
818 u_int cylinders;
819 struct ioctl_pt pt;
820
821 fd = SCARG(uap, fd);
822 SCARG(&bia, fd) = fd;
823 SCARG(&bia, data) = SCARG(uap, data);
824 com = SCARG(uap, com);
825
826 if ((fp = fd_getfile(fd)) == NULL)
827 return (EBADF);
828
829 switch (com) {
830 #if (NWSDISPLAY > 0)
831 case LINUX_KDGKBMODE:
832 com = KDGKBMODE;
833 break;
834 case LINUX_KDSKBMODE:
835 com = KDSKBMODE;
836 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
837 SCARG(&bia, data) = (void *)K_RAW;
838 break;
839 case LINUX_KIOCSOUND:
840 SCARG(&bia, data) =
841 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
842 /* fall through */
843 case LINUX_KDMKTONE:
844 com = KDMKTONE;
845 break;
846 case LINUX_KDSETMODE:
847 com = KDSETMODE;
848 break;
849 case LINUX_KDGETMODE:
850 /* KD_* values are equal to the wscons numbers */
851 com = WSDISPLAYIO_GMODE;
852 break;
853 case LINUX_KDENABIO:
854 com = KDENABIO;
855 break;
856 case LINUX_KDDISABIO:
857 com = KDDISABIO;
858 break;
859 case LINUX_KDGETLED:
860 com = KDGETLED;
861 break;
862 case LINUX_KDSETLED:
863 com = KDSETLED;
864 break;
865 case LINUX_VT_OPENQRY:
866 com = VT_OPENQRY;
867 break;
868 case LINUX_VT_GETMODE:
869 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
870 if (error != 0)
871 goto out;
872 lvt.relsig = native_to_linux_signo[lvt.relsig];
873 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
874 lvt.frsig = native_to_linux_signo[lvt.frsig];
875 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
876 goto out;
877 case LINUX_VT_SETMODE:
878 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
879 if (error != 0)
880 goto out;
881 lvt.relsig = linux_to_native_signo[lvt.relsig];
882 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
883 lvt.frsig = linux_to_native_signo[lvt.frsig];
884 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
885 goto out;
886 case LINUX_VT_DISALLOCATE:
887 /* XXX should use WSDISPLAYIO_DELSCREEN */
888 error = 0;
889 goto out;
890 case LINUX_VT_RELDISP:
891 com = VT_RELDISP;
892 break;
893 case LINUX_VT_ACTIVATE:
894 com = VT_ACTIVATE;
895 break;
896 case LINUX_VT_WAITACTIVE:
897 com = VT_WAITACTIVE;
898 break;
899 case LINUX_VT_GETSTATE:
900 com = VT_GETSTATE;
901 break;
902 case LINUX_KDGKBTYPE:
903 {
904 static const u_int8_t kb101 = KB_101;
905
906 /* This is what Linux does. */
907 error = copyout(&kb101, SCARG(uap, data), 1);
908 goto out;
909 }
910 case LINUX_KDGKBENT:
911 /*
912 * The Linux KDGKBENT ioctl is different from the
913 * SYSV original. So we handle it in machdep code.
914 * XXX We should use keyboard mapping information
915 * from wsdisplay, but this would be expensive.
916 */
917 if ((error = copyin(SCARG(uap, data), &kbe,
918 sizeof(struct kbentry))))
919 goto out;
920 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
921 || kbe.kb_index >= NR_KEYS) {
922 error = EINVAL;
923 goto out;
924 }
925 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
926 error = copyout(&kbe, SCARG(uap, data),
927 sizeof(struct kbentry));
928 goto out;
929 #endif
930 case LINUX_HDIO_GETGEO:
931 case LINUX_HDIO_GETGEO_BIG:
932 /*
933 * Try to mimic Linux behaviour: return the BIOS geometry
934 * if possible (extending its # of cylinders if it's beyond
935 * the 1023 limit), fall back to the MI geometry (i.e.
936 * the real geometry) if not found, by returning an
937 * error. See common/linux_hdio.c
938 */
939 bip = fd2biosinfo(curproc, fp);
940 ioctlf = fp->f_ops->fo_ioctl;
941 error = ioctlf(fp, DIOCGDINFO, (void *)&label);
942 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
943 if (error != 0 && error1 != 0) {
944 error = error1;
945 goto out;
946 }
947 start = error1 != 0 ? partp.pi_offset : 0;
948 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
949 && bip->bi_cyl != 0) {
950 heads = bip->bi_head;
951 sectors = bip->bi_sec;
952 cylinders = bip->bi_cyl;
953 biostotal = heads * sectors * cylinders;
954 realtotal = label.d_ntracks * label.d_nsectors *
955 label.d_ncylinders;
956 if (realtotal > biostotal)
957 cylinders = realtotal / (heads * sectors);
958 } else {
959 heads = label.d_ntracks;
960 cylinders = label.d_ncylinders;
961 sectors = label.d_nsectors;
962 }
963 if (com == LINUX_HDIO_GETGEO) {
964 hdg.start = start;
965 hdg.heads = heads;
966 hdg.cylinders = cylinders;
967 hdg.sectors = sectors;
968 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
969 goto out;
970 } else {
971 hdg_big.start = start;
972 hdg_big.heads = heads;
973 hdg_big.cylinders = cylinders;
974 hdg_big.sectors = sectors;
975 error = copyout(&hdg_big, SCARG(uap, data),
976 sizeof hdg_big);
977 goto out;
978 }
979
980 default:
981 /*
982 * Unknown to us. If it's on a device, just pass it through
983 * using PTIOCLINUX, the device itself might be able to
984 * make some sense of it.
985 * XXX hack: if the function returns EJUSTRETURN,
986 * it has stuffed a sysctl return value in pt.data.
987 */
988 ioctlf = fp->f_ops->fo_ioctl;
989 pt.com = SCARG(uap, com);
990 pt.data = SCARG(uap, data);
991 error = ioctlf(fp, PTIOCLINUX, &pt);
992 if (error == EJUSTRETURN) {
993 retval[0] = (register_t)pt.data;
994 error = 0;
995 }
996
997 if (error == ENOTTY) {
998 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
999 com));
1000 }
1001 goto out;
1002 }
1003 SCARG(&bia, com) = com;
1004 error = sys_ioctl(curlwp, &bia, retval);
1005 out:
1006 fd_putfile(fd);
1007 return error;
1008 }
1009
1010 /*
1011 * Set I/O permissions for a process. Just set the maximum level
1012 * right away (ignoring the argument), otherwise we would have
1013 * to rely on I/O permission maps, which are not implemented.
1014 */
1015 int
linux_sys_iopl(struct lwp * l,const struct linux_sys_iopl_args * uap,register_t * retval)1016 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1017 {
1018 /* {
1019 syscallarg(int) level;
1020 } */
1021 struct trapframe *fp = l->l_md.md_regs;
1022
1023 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1024 NULL, NULL, NULL, NULL) != 0)
1025 return EPERM;
1026 fp->tf_eflags |= PSL_IOPL;
1027 *retval = 0;
1028 return 0;
1029 }
1030
1031 /*
1032 * See above. If a root process tries to set access to an I/O port,
1033 * just let it have the whole range.
1034 */
1035 int
linux_sys_ioperm(struct lwp * l,const struct linux_sys_ioperm_args * uap,register_t * retval)1036 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1037 {
1038 /* {
1039 syscallarg(unsigned int) lo;
1040 syscallarg(unsigned int) hi;
1041 syscallarg(int) val;
1042 } */
1043 struct trapframe *fp = l->l_md.md_regs;
1044
1045 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1046 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1047 NULL, NULL) != 0)
1048 return EPERM;
1049 if (SCARG(uap, val))
1050 fp->tf_eflags |= PSL_IOPL;
1051 *retval = 0;
1052 return 0;
1053 }
1054
1055 int
linux_usertrap(struct lwp * l,vaddr_t trapaddr,void * arg)1056 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1057 void *arg)
1058 {
1059 return 0;
1060 }
1061
1062 const char *
linux_get_uname_arch(void)1063 linux_get_uname_arch(void)
1064 {
1065 static char uname_arch[5] = "i386";
1066
1067 if (uname_arch[1] == '3')
1068 uname_arch[1] += cpu_class;
1069 return uname_arch;
1070 }
1071