xref: /qemu/linux-user/i386/cpu_loop.c (revision 336d354b)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "qemu.h"
23 #include "qemu/timer.h"
24 #include "user-internals.h"
25 #include "cpu_loop-common.h"
26 #include "signal-common.h"
27 #include "user-mmap.h"
28 
29 /***********************************************************/
30 /* CPUX86 core interface */
31 
32 uint64_t cpu_get_tsc(CPUX86State *env)
33 {
34     return cpu_get_host_ticks();
35 }
36 
37 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
38               int flags)
39 {
40     unsigned int e1, e2;
41     uint32_t *p;
42     e1 = (addr << 16) | (limit & 0xffff);
43     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
44     e2 |= flags;
45     p = ptr;
46     p[0] = tswap32(e1);
47     p[1] = tswap32(e2);
48 }
49 
50 static uint64_t *idt_table;
51 #ifdef TARGET_X86_64
52 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
53                        uint64_t addr, unsigned int sel)
54 {
55     uint32_t *p, e1, e2;
56     e1 = (addr & 0xffff) | (sel << 16);
57     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
58     p = ptr;
59     p[0] = tswap32(e1);
60     p[1] = tswap32(e2);
61     p[2] = tswap32(addr >> 32);
62     p[3] = 0;
63 }
64 /* only dpl matters as we do only user space emulation */
65 static void set_idt(int n, unsigned int dpl)
66 {
67     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
68 }
69 #else
70 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
71                      uint32_t addr, unsigned int sel)
72 {
73     uint32_t *p, e1, e2;
74     e1 = (addr & 0xffff) | (sel << 16);
75     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
76     p = ptr;
77     p[0] = tswap32(e1);
78     p[1] = tswap32(e2);
79 }
80 
81 /* only dpl matters as we do only user space emulation */
82 static void set_idt(int n, unsigned int dpl)
83 {
84     set_gate(idt_table + n, 0, dpl, 0, 0);
85 }
86 #endif
87 
88 #ifdef TARGET_X86_64
89 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
90 {
91     /*
92      * For all the vsyscalls, NULL means "don't write anything" not
93      * "write it at address 0".
94      */
95     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
96         return true;
97     }
98 
99     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
100     force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
101     return false;
102 }
103 
104 /*
105  * Since v3.1, the kernel traps and emulates the vsyscall page.
106  * Entry points other than the official generate SIGSEGV.
107  */
108 static void emulate_vsyscall(CPUX86State *env)
109 {
110     int syscall;
111     abi_ulong ret;
112     uint64_t caller;
113 
114     /*
115      * Validate the entry point.  We have already validated the page
116      * during translation to get here; now verify the offset.
117      */
118     switch (env->eip & ~TARGET_PAGE_MASK) {
119     case 0x000:
120         syscall = TARGET_NR_gettimeofday;
121         break;
122     case 0x400:
123         syscall = TARGET_NR_time;
124         break;
125     case 0x800:
126         syscall = TARGET_NR_getcpu;
127         break;
128     default:
129         goto sigsegv;
130     }
131 
132     /*
133      * Validate the return address.
134      * Note that the kernel treats this the same as an invalid entry point.
135      */
136     if (get_user_u64(caller, env->regs[R_ESP])) {
137         goto sigsegv;
138     }
139 
140     /*
141      * Validate the the pointer arguments.
142      */
143     switch (syscall) {
144     case TARGET_NR_gettimeofday:
145         if (!write_ok_or_segv(env, env->regs[R_EDI],
146                               sizeof(struct target_timeval)) ||
147             !write_ok_or_segv(env, env->regs[R_ESI],
148                               sizeof(struct target_timezone))) {
149             return;
150         }
151         break;
152     case TARGET_NR_time:
153         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
154             return;
155         }
156         break;
157     case TARGET_NR_getcpu:
158         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
159             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
160             return;
161         }
162         break;
163     default:
164         g_assert_not_reached();
165     }
166 
167     /*
168      * Perform the syscall.  None of the vsyscalls should need restarting.
169      */
170     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
171                      env->regs[R_EDX], env->regs[10], env->regs[8],
172                      env->regs[9], 0, 0);
173     g_assert(ret != -QEMU_ERESTARTSYS);
174     g_assert(ret != -QEMU_ESIGRETURN);
175     if (ret == -TARGET_EFAULT) {
176         goto sigsegv;
177     }
178     env->regs[R_EAX] = ret;
179 
180     /* Emulate a ret instruction to leave the vsyscall page.  */
181     env->eip = caller;
182     env->regs[R_ESP] += 8;
183     return;
184 
185  sigsegv:
186     force_sig(TARGET_SIGSEGV);
187 }
188 #endif
189 
190 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
191 {
192 #ifndef TARGET_X86_64
193     if (env->eflags & VM_MASK) {
194         handle_vm86_trap(env, trapnr);
195         return true;
196     }
197 #endif
198     return false;
199 }
200 
201 void cpu_loop(CPUX86State *env)
202 {
203     CPUState *cs = env_cpu(env);
204     int trapnr;
205     abi_ulong pc;
206     abi_ulong ret;
207 
208     for(;;) {
209         cpu_exec_start(cs);
210         trapnr = cpu_exec(cs);
211         cpu_exec_end(cs);
212         process_queued_cpu_work(cs);
213 
214         switch(trapnr) {
215         case 0x80:
216             /* linux syscall from int $0x80 */
217             ret = do_syscall(env,
218                              env->regs[R_EAX],
219                              env->regs[R_EBX],
220                              env->regs[R_ECX],
221                              env->regs[R_EDX],
222                              env->regs[R_ESI],
223                              env->regs[R_EDI],
224                              env->regs[R_EBP],
225                              0, 0);
226             if (ret == -QEMU_ERESTARTSYS) {
227                 env->eip -= 2;
228             } else if (ret != -QEMU_ESIGRETURN) {
229                 env->regs[R_EAX] = ret;
230             }
231             break;
232 #ifndef TARGET_ABI32
233         case EXCP_SYSCALL:
234             /* linux syscall from syscall instruction */
235             ret = do_syscall(env,
236                              env->regs[R_EAX],
237                              env->regs[R_EDI],
238                              env->regs[R_ESI],
239                              env->regs[R_EDX],
240                              env->regs[10],
241                              env->regs[8],
242                              env->regs[9],
243                              0, 0);
244             if (ret == -QEMU_ERESTARTSYS) {
245                 env->eip -= 2;
246             } else if (ret != -QEMU_ESIGRETURN) {
247                 env->regs[R_EAX] = ret;
248             }
249             break;
250 #endif
251 #ifdef TARGET_X86_64
252         case EXCP_VSYSCALL:
253             emulate_vsyscall(env);
254             break;
255 #endif
256         case EXCP0B_NOSEG:
257         case EXCP0C_STACK:
258             force_sig(TARGET_SIGBUS);
259             break;
260         case EXCP0D_GPF:
261             /* XXX: potential problem if ABI32 */
262             if (maybe_handle_vm86_trap(env, trapnr)) {
263                 break;
264             }
265             force_sig(TARGET_SIGSEGV);
266             break;
267         case EXCP0E_PAGE:
268             force_sig_fault(TARGET_SIGSEGV,
269                             (env->error_code & PG_ERROR_P_MASK ?
270                              TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
271                             env->cr[2]);
272             break;
273         case EXCP00_DIVZ:
274             if (maybe_handle_vm86_trap(env, trapnr)) {
275                 break;
276             }
277             force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
278             break;
279         case EXCP01_DB:
280             if (maybe_handle_vm86_trap(env, trapnr)) {
281                 break;
282             }
283             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
284             break;
285         case EXCP03_INT3:
286             if (maybe_handle_vm86_trap(env, trapnr)) {
287                 break;
288             }
289             force_sig(TARGET_SIGTRAP);
290             break;
291         case EXCP04_INTO:
292         case EXCP05_BOUND:
293             if (maybe_handle_vm86_trap(env, trapnr)) {
294                 break;
295             }
296             force_sig(TARGET_SIGSEGV);
297             break;
298         case EXCP06_ILLOP:
299             force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
300             break;
301         case EXCP_INTERRUPT:
302             /* just indicate that signals should be handled asap */
303             break;
304         case EXCP_DEBUG:
305             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
306             break;
307         case EXCP_ATOMIC:
308             cpu_exec_step_atomic(cs);
309             break;
310         default:
311             pc = env->segs[R_CS].base + env->eip;
312             EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
313                       (long)pc, trapnr);
314             abort();
315         }
316         process_pending_signals(env);
317     }
318 }
319 
320 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
321 {
322     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
323     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
324     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
325         env->cr[4] |= CR4_OSFXSR_MASK;
326         env->hflags |= HF_OSFXSR_MASK;
327     }
328 #ifndef TARGET_ABI32
329     /* enable 64 bit mode if possible */
330     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
331         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
332         exit(EXIT_FAILURE);
333     }
334     env->cr[4] |= CR4_PAE_MASK;
335     env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
336     env->hflags |= HF_LMA_MASK;
337 #endif
338 
339     /* flags setup : we activate the IRQs by default as in user mode */
340     env->eflags |= IF_MASK;
341 
342     /* linux register setup */
343 #ifndef TARGET_ABI32
344     env->regs[R_EAX] = regs->rax;
345     env->regs[R_EBX] = regs->rbx;
346     env->regs[R_ECX] = regs->rcx;
347     env->regs[R_EDX] = regs->rdx;
348     env->regs[R_ESI] = regs->rsi;
349     env->regs[R_EDI] = regs->rdi;
350     env->regs[R_EBP] = regs->rbp;
351     env->regs[R_ESP] = regs->rsp;
352     env->eip = regs->rip;
353 #else
354     env->regs[R_EAX] = regs->eax;
355     env->regs[R_EBX] = regs->ebx;
356     env->regs[R_ECX] = regs->ecx;
357     env->regs[R_EDX] = regs->edx;
358     env->regs[R_ESI] = regs->esi;
359     env->regs[R_EDI] = regs->edi;
360     env->regs[R_EBP] = regs->ebp;
361     env->regs[R_ESP] = regs->esp;
362     env->eip = regs->eip;
363 #endif
364 
365     /* linux interrupt setup */
366 #ifndef TARGET_ABI32
367     env->idt.limit = 511;
368 #else
369     env->idt.limit = 255;
370 #endif
371     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
372                                 PROT_READ|PROT_WRITE,
373                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
374     idt_table = g2h_untagged(env->idt.base);
375     set_idt(0, 0);
376     set_idt(1, 0);
377     set_idt(2, 0);
378     set_idt(3, 3);
379     set_idt(4, 3);
380     set_idt(5, 0);
381     set_idt(6, 0);
382     set_idt(7, 0);
383     set_idt(8, 0);
384     set_idt(9, 0);
385     set_idt(10, 0);
386     set_idt(11, 0);
387     set_idt(12, 0);
388     set_idt(13, 0);
389     set_idt(14, 0);
390     set_idt(15, 0);
391     set_idt(16, 0);
392     set_idt(17, 0);
393     set_idt(18, 0);
394     set_idt(19, 0);
395     set_idt(0x80, 3);
396 
397     /* linux segment setup */
398     {
399         uint64_t *gdt_table;
400         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
401                                     PROT_READ|PROT_WRITE,
402                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
403         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
404         gdt_table = g2h_untagged(env->gdt.base);
405 #ifdef TARGET_ABI32
406         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
407                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
408                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
409 #else
410         /* 64 bit code segment */
411         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
412                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
413                  DESC_L_MASK |
414                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
415 #endif
416         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
417                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
418                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
419     }
420     cpu_x86_load_seg(env, R_CS, __USER_CS);
421     cpu_x86_load_seg(env, R_SS, __USER_DS);
422 #ifdef TARGET_ABI32
423     cpu_x86_load_seg(env, R_DS, __USER_DS);
424     cpu_x86_load_seg(env, R_ES, __USER_DS);
425     cpu_x86_load_seg(env, R_FS, __USER_DS);
426     cpu_x86_load_seg(env, R_GS, __USER_DS);
427     /* This hack makes Wine work... */
428     env->segs[R_FS].selector = 0;
429 #else
430     cpu_x86_load_seg(env, R_DS, 0);
431     cpu_x86_load_seg(env, R_ES, 0);
432     cpu_x86_load_seg(env, R_FS, 0);
433     cpu_x86_load_seg(env, R_GS, 0);
434 #endif
435 }
436