1 /*
2  * QEMU HAX support
3  *
4  * Copyright IBM, Corp. 2008
5  *           Red Hat, Inc. 2008
6  *
7  * Authors:
8  *  Anthony Liguori   <aliguori@us.ibm.com>
9  *  Glauber Costa     <gcosta@redhat.com>
10  *
11  * Copyright (c) 2011 Intel Corporation
12  *  Written by:
13  *  Jiang Yunhong<yunhong.jiang@intel.com>
14  *  Xin Xiaohui<xiaohui.xin@intel.com>
15  *  Zhang Xiantao<xiantao.zhang@intel.com>
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
18  * See the COPYING file in the top-level directory.
19  *
20  */
21 
22 /*
23  * HAX common code for both windows and darwin
24  */
25 
26 #include "qemu/osdep.h"
27 #include "cpu.h"
28 #include "exec/address-spaces.h"
29 
30 #include "qemu-common.h"
31 #include "hax-i386.h"
32 #include "sysemu/accel.h"
33 #include "sysemu/reset.h"
34 #include "sysemu/runstate.h"
35 #include "qemu/main-loop.h"
36 #include "hw/boards.h"
37 
38 #define DEBUG_HAX 0
39 
40 #define DPRINTF(fmt, ...) \
41     do { \
42         if (DEBUG_HAX) { \
43             fprintf(stdout, fmt, ## __VA_ARGS__); \
44         } \
45     } while (0)
46 
47 /* Current version */
48 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
49 /* Minimum HAX kernel version */
50 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
51 
52 static bool hax_allowed;
53 
54 struct hax_state hax_global;
55 
56 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
57 static int hax_arch_get_registers(CPUArchState *env);
58 
hax_enabled(void)59 int hax_enabled(void)
60 {
61     return hax_allowed;
62 }
63 
valid_hax_tunnel_size(uint16_t size)64 int valid_hax_tunnel_size(uint16_t size)
65 {
66     return size >= sizeof(struct hax_tunnel);
67 }
68 
hax_vcpu_get_fd(CPUArchState * env)69 hax_fd hax_vcpu_get_fd(CPUArchState *env)
70 {
71     struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
72     if (!vcpu) {
73         return HAX_INVALID_FD;
74     }
75     return vcpu->fd;
76 }
77 
hax_get_capability(struct hax_state * hax)78 static int hax_get_capability(struct hax_state *hax)
79 {
80     int ret;
81     struct hax_capabilityinfo capinfo, *cap = &capinfo;
82 
83     ret = hax_capability(hax, cap);
84     if (ret) {
85         return ret;
86     }
87 
88     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
89         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
90             DPRINTF
91                 ("VTX feature is not enabled, HAX driver will not work.\n");
92         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
93             DPRINTF
94                 ("NX feature is not enabled, HAX driver will not work.\n");
95         }
96         return -ENXIO;
97 
98     }
99 
100     if (!(cap->winfo & HAX_CAP_UG)) {
101         fprintf(stderr, "UG mode is not supported by the hardware.\n");
102         return -ENOTSUP;
103     }
104 
105     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
106 
107     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
108         if (cap->mem_quota < hax->mem_quota) {
109             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
110             return -ENOSPC;
111         }
112     }
113     return 0;
114 }
115 
hax_version_support(struct hax_state * hax)116 static int hax_version_support(struct hax_state *hax)
117 {
118     int ret;
119     struct hax_module_version version;
120 
121     ret = hax_mod_version(hax, &version);
122     if (ret < 0) {
123         return 0;
124     }
125 
126     if (hax_min_version > version.cur_version) {
127         fprintf(stderr, "Incompatible HAX module version %d,",
128                 version.cur_version);
129         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
130         return 0;
131     }
132     if (hax_cur_version < version.compat_version) {
133         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
134                 hax_cur_version);
135         fprintf(stderr, "requires minimum HAX API version %x\n",
136                 version.compat_version);
137         return 0;
138     }
139 
140     return 1;
141 }
142 
hax_vcpu_create(int id)143 int hax_vcpu_create(int id)
144 {
145     struct hax_vcpu_state *vcpu = NULL;
146     int ret;
147 
148     if (!hax_global.vm) {
149         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
150         return -1;
151     }
152 
153     if (hax_global.vm->vcpus[id]) {
154         fprintf(stderr, "vcpu %x allocated already\n", id);
155         return 0;
156     }
157 
158     vcpu = g_new0(struct hax_vcpu_state, 1);
159 
160     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
161     if (ret) {
162         fprintf(stderr, "Failed to create vcpu %x\n", id);
163         goto error;
164     }
165 
166     vcpu->vcpu_id = id;
167     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
168     if (hax_invalid_fd(vcpu->fd)) {
169         fprintf(stderr, "Failed to open the vcpu\n");
170         ret = -ENODEV;
171         goto error;
172     }
173 
174     hax_global.vm->vcpus[id] = vcpu;
175 
176     ret = hax_host_setup_vcpu_channel(vcpu);
177     if (ret) {
178         fprintf(stderr, "Invalid hax tunnel size\n");
179         ret = -EINVAL;
180         goto error;
181     }
182     return 0;
183 
184   error:
185     /* vcpu and tunnel will be closed automatically */
186     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
187         hax_close_fd(vcpu->fd);
188     }
189 
190     hax_global.vm->vcpus[id] = NULL;
191     g_free(vcpu);
192     return -1;
193 }
194 
hax_vcpu_destroy(CPUState * cpu)195 int hax_vcpu_destroy(CPUState *cpu)
196 {
197     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
198 
199     if (!hax_global.vm) {
200         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
201         return -1;
202     }
203 
204     if (!vcpu) {
205         return 0;
206     }
207 
208     /*
209      * 1. The hax_tunnel is also destroyed when vcpu is destroyed
210      * 2. close fd will cause hax module vcpu be cleaned
211      */
212     hax_close_fd(vcpu->fd);
213     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
214     g_free(vcpu);
215     return 0;
216 }
217 
hax_init_vcpu(CPUState * cpu)218 int hax_init_vcpu(CPUState *cpu)
219 {
220     int ret;
221 
222     ret = hax_vcpu_create(cpu->cpu_index);
223     if (ret < 0) {
224         fprintf(stderr, "Failed to create HAX vcpu\n");
225         exit(-1);
226     }
227 
228     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
229     cpu->vcpu_dirty = true;
230     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
231 
232     return ret;
233 }
234 
hax_vm_create(struct hax_state * hax)235 struct hax_vm *hax_vm_create(struct hax_state *hax)
236 {
237     struct hax_vm *vm;
238     int vm_id = 0, ret;
239 
240     if (hax_invalid_fd(hax->fd)) {
241         return NULL;
242     }
243 
244     if (hax->vm) {
245         return hax->vm;
246     }
247 
248     vm = g_new0(struct hax_vm, 1);
249 
250     ret = hax_host_create_vm(hax, &vm_id);
251     if (ret) {
252         fprintf(stderr, "Failed to create vm %x\n", ret);
253         goto error;
254     }
255     vm->id = vm_id;
256     vm->fd = hax_host_open_vm(hax, vm_id);
257     if (hax_invalid_fd(vm->fd)) {
258         fprintf(stderr, "Failed to open vm %d\n", vm_id);
259         goto error;
260     }
261 
262     hax->vm = vm;
263     return vm;
264 
265   error:
266     g_free(vm);
267     hax->vm = NULL;
268     return NULL;
269 }
270 
hax_vm_destroy(struct hax_vm * vm)271 int hax_vm_destroy(struct hax_vm *vm)
272 {
273     int i;
274 
275     for (i = 0; i < HAX_MAX_VCPU; i++)
276         if (vm->vcpus[i]) {
277             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
278             return -1;
279         }
280     hax_close_fd(vm->fd);
281     g_free(vm);
282     hax_global.vm = NULL;
283     return 0;
284 }
285 
hax_handle_interrupt(CPUState * cpu,int mask)286 static void hax_handle_interrupt(CPUState *cpu, int mask)
287 {
288     cpu->interrupt_request |= mask;
289 
290     if (!qemu_cpu_is_self(cpu)) {
291         qemu_cpu_kick(cpu);
292     }
293 }
294 
hax_init(ram_addr_t ram_size)295 static int hax_init(ram_addr_t ram_size)
296 {
297     struct hax_state *hax = NULL;
298     struct hax_qemu_version qversion;
299     int ret;
300 
301     hax = &hax_global;
302 
303     memset(hax, 0, sizeof(struct hax_state));
304     hax->mem_quota = ram_size;
305 
306     hax->fd = hax_mod_open();
307     if (hax_invalid_fd(hax->fd)) {
308         hax->fd = 0;
309         ret = -ENODEV;
310         goto error;
311     }
312 
313     ret = hax_get_capability(hax);
314 
315     if (ret) {
316         if (ret != -ENOSPC) {
317             ret = -EINVAL;
318         }
319         goto error;
320     }
321 
322     if (!hax_version_support(hax)) {
323         ret = -EINVAL;
324         goto error;
325     }
326 
327     hax->vm = hax_vm_create(hax);
328     if (!hax->vm) {
329         fprintf(stderr, "Failed to create HAX VM\n");
330         ret = -EINVAL;
331         goto error;
332     }
333 
334     hax_memory_init();
335 
336     qversion.cur_version = hax_cur_version;
337     qversion.min_version = hax_min_version;
338     hax_notify_qemu_version(hax->vm->fd, &qversion);
339     cpu_interrupt_handler = hax_handle_interrupt;
340 
341     return ret;
342   error:
343     if (hax->vm) {
344         hax_vm_destroy(hax->vm);
345     }
346     if (hax->fd) {
347         hax_mod_close(hax);
348     }
349 
350     return ret;
351 }
352 
hax_accel_init(MachineState * ms)353 static int hax_accel_init(MachineState *ms)
354 {
355     int ret = hax_init(ms->ram_size);
356 
357     if (ret && (ret != -ENOSPC)) {
358         fprintf(stderr, "No accelerator found.\n");
359     } else {
360         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
361                 !ret ? "working" : "not working",
362                 !ret ? "fast virt" : "emulation");
363     }
364     return ret;
365 }
366 
hax_handle_fastmmio(CPUArchState * env,struct hax_fastmmio * hft)367 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
368 {
369     if (hft->direction < 2) {
370         cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
371                                hft->direction);
372     } else {
373         /*
374          * HAX API v4 supports transferring data between two MMIO addresses,
375          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
376          *  hft->direction == 2: gpa ==> gpa2
377          */
378         uint64_t value;
379         cpu_physical_memory_read(hft->gpa, &value, hft->size);
380         cpu_physical_memory_write(hft->gpa2, &value, hft->size);
381     }
382 
383     return 0;
384 }
385 
hax_handle_io(CPUArchState * env,uint32_t df,uint16_t port,int direction,int size,int count,void * buffer)386 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
387                          int direction, int size, int count, void *buffer)
388 {
389     uint8_t *ptr;
390     int i;
391     MemTxAttrs attrs = { 0 };
392 
393     if (!df) {
394         ptr = (uint8_t *) buffer;
395     } else {
396         ptr = buffer + size * count - size;
397     }
398     for (i = 0; i < count; i++) {
399         address_space_rw(&address_space_io, port, attrs,
400                          ptr, size, direction == HAX_EXIT_IO_OUT);
401         if (!df) {
402             ptr += size;
403         } else {
404             ptr -= size;
405         }
406     }
407 
408     return 0;
409 }
410 
hax_vcpu_interrupt(CPUArchState * env)411 static int hax_vcpu_interrupt(CPUArchState *env)
412 {
413     CPUState *cpu = env_cpu(env);
414     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
415     struct hax_tunnel *ht = vcpu->tunnel;
416 
417     /*
418      * Try to inject an interrupt if the guest can accept it
419      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
420      */
421     if (ht->ready_for_interrupt_injection &&
422         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
423         int irq;
424 
425         irq = cpu_get_pic_interrupt(env);
426         if (irq >= 0) {
427             hax_inject_interrupt(env, irq);
428             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
429         }
430     }
431 
432     /* If we have an interrupt but the guest is not ready to receive an
433      * interrupt, request an interrupt window exit.  This will
434      * cause a return to userspace as soon as the guest is ready to
435      * receive interrupts. */
436     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
437         ht->request_interrupt_window = 1;
438     } else {
439         ht->request_interrupt_window = 0;
440     }
441     return 0;
442 }
443 
hax_raise_event(CPUState * cpu)444 void hax_raise_event(CPUState *cpu)
445 {
446     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
447 
448     if (!vcpu) {
449         return;
450     }
451     vcpu->tunnel->user_event_pending = 1;
452 }
453 
454 /*
455  * Ask hax kernel module to run the CPU for us till:
456  * 1. Guest crash or shutdown
457  * 2. Need QEMU's emulation like guest execute MMIO instruction
458  * 3. Guest execute HLT
459  * 4. QEMU have Signal/event pending
460  * 5. An unknown VMX exit happens
461  */
hax_vcpu_hax_exec(CPUArchState * env)462 static int hax_vcpu_hax_exec(CPUArchState *env)
463 {
464     int ret = 0;
465     CPUState *cpu = env_cpu(env);
466     X86CPU *x86_cpu = X86_CPU(cpu);
467     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
468     struct hax_tunnel *ht = vcpu->tunnel;
469 
470     if (!hax_enabled()) {
471         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
472         return 0;
473     }
474 
475     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
476         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
477         apic_poll_irq(x86_cpu->apic_state);
478     }
479 
480     /* After a vcpu is halted (either because it is an AP and has just been
481      * reset, or because it has executed the HLT instruction), it will not be
482      * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
483      * for events that may change the halted state of this vcpu:
484      *  a) Maskable interrupt, when RFLAGS.IF is 1;
485      *     Note: env->eflags may not reflect the current RFLAGS state, because
486      *           it is not updated after each hax_vcpu_run(). We cannot afford
487      *           to fail to recognize any unhalt-by-maskable-interrupt event
488      *           (in which case the vcpu will halt forever), and yet we cannot
489      *           afford the overhead of hax_vcpu_sync_state(). The current
490      *           solution is to err on the side of caution and have the HLT
491      *           handler (see case HAX_EXIT_HLT below) unconditionally set the
492      *           IF_MASK bit in env->eflags, which, in effect, disables the
493      *           RFLAGS.IF check.
494      *  b) NMI;
495      *  c) INIT signal;
496      *  d) SIPI signal.
497      */
498     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
499          (env->eflags & IF_MASK)) ||
500         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
501         cpu->halted = 0;
502     }
503 
504     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
505         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
506                 cpu->cpu_index);
507         do_cpu_init(x86_cpu);
508         hax_vcpu_sync_state(env, 1);
509     }
510 
511     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
512         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
513                 cpu->cpu_index);
514         hax_vcpu_sync_state(env, 0);
515         do_cpu_sipi(x86_cpu);
516         hax_vcpu_sync_state(env, 1);
517     }
518 
519     if (cpu->halted) {
520         /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
521          * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
522          * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
523          * until the vcpu is unhalted.
524          */
525         cpu->exception_index = EXCP_HLT;
526         return 0;
527     }
528 
529     do {
530         int hax_ret;
531 
532         if (cpu->exit_request) {
533             ret = 1;
534             break;
535         }
536 
537         hax_vcpu_interrupt(env);
538 
539         qemu_mutex_unlock_iothread();
540         cpu_exec_start(cpu);
541         hax_ret = hax_vcpu_run(vcpu);
542         cpu_exec_end(cpu);
543         qemu_mutex_lock_iothread();
544 
545         /* Simply continue the vcpu_run if system call interrupted */
546         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
547             DPRINTF("io window interrupted\n");
548             continue;
549         }
550 
551         if (hax_ret < 0) {
552             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
553             abort();
554         }
555         switch (ht->_exit_status) {
556         case HAX_EXIT_IO:
557             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
558                             ht->pio._direction,
559                             ht->pio._size, ht->pio._count, vcpu->iobuf);
560             break;
561         case HAX_EXIT_FAST_MMIO:
562             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
563             break;
564         /* Guest state changed, currently only for shutdown */
565         case HAX_EXIT_STATECHANGE:
566             fprintf(stdout, "VCPU shutdown request\n");
567             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
568             hax_vcpu_sync_state(env, 0);
569             ret = 1;
570             break;
571         case HAX_EXIT_UNKNOWN_VMEXIT:
572             fprintf(stderr, "Unknown VMX exit %x from guest\n",
573                     ht->_exit_reason);
574             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
575             hax_vcpu_sync_state(env, 0);
576             cpu_dump_state(cpu, stderr, 0);
577             ret = -1;
578             break;
579         case HAX_EXIT_HLT:
580             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
581                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
582                 /* hlt instruction with interrupt disabled is shutdown */
583                 env->eflags |= IF_MASK;
584                 cpu->halted = 1;
585                 cpu->exception_index = EXCP_HLT;
586                 ret = 1;
587             }
588             break;
589         /* these situations will continue to hax module */
590         case HAX_EXIT_INTERRUPT:
591         case HAX_EXIT_PAUSED:
592             break;
593         case HAX_EXIT_MMIO:
594             /* Should not happen on UG system */
595             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
596             ret = -1;
597             break;
598         case HAX_EXIT_REAL:
599             /* Should not happen on UG system */
600             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
601             ret = -1;
602             break;
603         default:
604             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
605             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
606             hax_vcpu_sync_state(env, 0);
607             cpu_dump_state(cpu, stderr, 0);
608             ret = 1;
609             break;
610         }
611     } while (!ret);
612 
613     if (cpu->exit_request) {
614         cpu->exit_request = 0;
615         cpu->exception_index = EXCP_INTERRUPT;
616     }
617     return ret < 0;
618 }
619 
do_hax_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)620 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
621 {
622     CPUArchState *env = cpu->env_ptr;
623 
624     hax_arch_get_registers(env);
625     cpu->vcpu_dirty = true;
626 }
627 
hax_cpu_synchronize_state(CPUState * cpu)628 void hax_cpu_synchronize_state(CPUState *cpu)
629 {
630     if (!cpu->vcpu_dirty) {
631         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
632     }
633 }
634 
do_hax_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)635 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
636                                               run_on_cpu_data arg)
637 {
638     CPUArchState *env = cpu->env_ptr;
639 
640     hax_vcpu_sync_state(env, 1);
641     cpu->vcpu_dirty = false;
642 }
643 
hax_cpu_synchronize_post_reset(CPUState * cpu)644 void hax_cpu_synchronize_post_reset(CPUState *cpu)
645 {
646     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
647 }
648 
do_hax_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)649 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
650 {
651     CPUArchState *env = cpu->env_ptr;
652 
653     hax_vcpu_sync_state(env, 1);
654     cpu->vcpu_dirty = false;
655 }
656 
hax_cpu_synchronize_post_init(CPUState * cpu)657 void hax_cpu_synchronize_post_init(CPUState *cpu)
658 {
659     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
660 }
661 
do_hax_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)662 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
663 {
664     cpu->vcpu_dirty = true;
665 }
666 
hax_cpu_synchronize_pre_loadvm(CPUState * cpu)667 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
668 {
669     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
670 }
671 
hax_smp_cpu_exec(CPUState * cpu)672 int hax_smp_cpu_exec(CPUState *cpu)
673 {
674     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
675     int fatal;
676     int ret;
677 
678     while (1) {
679         if (cpu->exception_index >= EXCP_INTERRUPT) {
680             ret = cpu->exception_index;
681             cpu->exception_index = -1;
682             break;
683         }
684 
685         fatal = hax_vcpu_hax_exec(env);
686 
687         if (fatal) {
688             fprintf(stderr, "Unsupported HAX vcpu return\n");
689             abort();
690         }
691     }
692 
693     return ret;
694 }
695 
set_v8086_seg(struct segment_desc_t * lhs,const SegmentCache * rhs)696 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
697 {
698     memset(lhs, 0, sizeof(struct segment_desc_t));
699     lhs->selector = rhs->selector;
700     lhs->base = rhs->base;
701     lhs->limit = rhs->limit;
702     lhs->type = 3;
703     lhs->present = 1;
704     lhs->dpl = 3;
705     lhs->operand_size = 0;
706     lhs->desc = 1;
707     lhs->long_mode = 0;
708     lhs->granularity = 0;
709     lhs->available = 0;
710 }
711 
get_seg(SegmentCache * lhs,const struct segment_desc_t * rhs)712 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
713 {
714     lhs->selector = rhs->selector;
715     lhs->base = rhs->base;
716     lhs->limit = rhs->limit;
717     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
718         | (rhs->present * DESC_P_MASK)
719         | (rhs->dpl << DESC_DPL_SHIFT)
720         | (rhs->operand_size << DESC_B_SHIFT)
721         | (rhs->desc * DESC_S_MASK)
722         | (rhs->long_mode << DESC_L_SHIFT)
723         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
724 }
725 
set_seg(struct segment_desc_t * lhs,const SegmentCache * rhs)726 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
727 {
728     unsigned flags = rhs->flags;
729 
730     memset(lhs, 0, sizeof(struct segment_desc_t));
731     lhs->selector = rhs->selector;
732     lhs->base = rhs->base;
733     lhs->limit = rhs->limit;
734     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
735     lhs->present = (flags & DESC_P_MASK) != 0;
736     lhs->dpl = rhs->selector & 3;
737     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
738     lhs->desc = (flags & DESC_S_MASK) != 0;
739     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
740     lhs->granularity = (flags & DESC_G_MASK) != 0;
741     lhs->available = (flags & DESC_AVL_MASK) != 0;
742 }
743 
hax_getput_reg(uint64_t * hax_reg,target_ulong * qemu_reg,int set)744 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
745 {
746     target_ulong reg = *hax_reg;
747 
748     if (set) {
749         *hax_reg = *qemu_reg;
750     } else {
751         *qemu_reg = reg;
752     }
753 }
754 
755 /* The sregs has been synced with HAX kernel already before this call */
hax_get_segments(CPUArchState * env,struct vcpu_state_t * sregs)756 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
757 {
758     get_seg(&env->segs[R_CS], &sregs->_cs);
759     get_seg(&env->segs[R_DS], &sregs->_ds);
760     get_seg(&env->segs[R_ES], &sregs->_es);
761     get_seg(&env->segs[R_FS], &sregs->_fs);
762     get_seg(&env->segs[R_GS], &sregs->_gs);
763     get_seg(&env->segs[R_SS], &sregs->_ss);
764 
765     get_seg(&env->tr, &sregs->_tr);
766     get_seg(&env->ldt, &sregs->_ldt);
767     env->idt.limit = sregs->_idt.limit;
768     env->idt.base = sregs->_idt.base;
769     env->gdt.limit = sregs->_gdt.limit;
770     env->gdt.base = sregs->_gdt.base;
771     return 0;
772 }
773 
hax_set_segments(CPUArchState * env,struct vcpu_state_t * sregs)774 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
775 {
776     if ((env->eflags & VM_MASK)) {
777         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
778         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
779         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
780         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
781         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
782         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
783     } else {
784         set_seg(&sregs->_cs, &env->segs[R_CS]);
785         set_seg(&sregs->_ds, &env->segs[R_DS]);
786         set_seg(&sregs->_es, &env->segs[R_ES]);
787         set_seg(&sregs->_fs, &env->segs[R_FS]);
788         set_seg(&sregs->_gs, &env->segs[R_GS]);
789         set_seg(&sregs->_ss, &env->segs[R_SS]);
790 
791         if (env->cr[0] & CR0_PE_MASK) {
792             /* force ss cpl to cs cpl */
793             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
794                                   (sregs->_cs.selector & 3);
795             sregs->_ss.dpl = sregs->_ss.selector & 3;
796         }
797     }
798 
799     set_seg(&sregs->_tr, &env->tr);
800     set_seg(&sregs->_ldt, &env->ldt);
801     sregs->_idt.limit = env->idt.limit;
802     sregs->_idt.base = env->idt.base;
803     sregs->_gdt.limit = env->gdt.limit;
804     sregs->_gdt.base = env->gdt.base;
805     return 0;
806 }
807 
hax_sync_vcpu_register(CPUArchState * env,int set)808 static int hax_sync_vcpu_register(CPUArchState *env, int set)
809 {
810     struct vcpu_state_t regs;
811     int ret;
812     memset(&regs, 0, sizeof(struct vcpu_state_t));
813 
814     if (!set) {
815         ret = hax_sync_vcpu_state(env, &regs, 0);
816         if (ret < 0) {
817             return -1;
818         }
819     }
820 
821     /* generic register */
822     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
823     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
824     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
825     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
826     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
827     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
828     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
829     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
830 #ifdef TARGET_X86_64
831     hax_getput_reg(&regs._r8, &env->regs[8], set);
832     hax_getput_reg(&regs._r9, &env->regs[9], set);
833     hax_getput_reg(&regs._r10, &env->regs[10], set);
834     hax_getput_reg(&regs._r11, &env->regs[11], set);
835     hax_getput_reg(&regs._r12, &env->regs[12], set);
836     hax_getput_reg(&regs._r13, &env->regs[13], set);
837     hax_getput_reg(&regs._r14, &env->regs[14], set);
838     hax_getput_reg(&regs._r15, &env->regs[15], set);
839 #endif
840     hax_getput_reg(&regs._rflags, &env->eflags, set);
841     hax_getput_reg(&regs._rip, &env->eip, set);
842 
843     if (set) {
844         regs._cr0 = env->cr[0];
845         regs._cr2 = env->cr[2];
846         regs._cr3 = env->cr[3];
847         regs._cr4 = env->cr[4];
848         hax_set_segments(env, &regs);
849     } else {
850         env->cr[0] = regs._cr0;
851         env->cr[2] = regs._cr2;
852         env->cr[3] = regs._cr3;
853         env->cr[4] = regs._cr4;
854         hax_get_segments(env, &regs);
855     }
856 
857     if (set) {
858         ret = hax_sync_vcpu_state(env, &regs, 1);
859         if (ret < 0) {
860             return -1;
861         }
862     }
863     return 0;
864 }
865 
hax_msr_entry_set(struct vmx_msr * item,uint32_t index,uint64_t value)866 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
867                               uint64_t value)
868 {
869     item->entry = index;
870     item->value = value;
871 }
872 
hax_get_msrs(CPUArchState * env)873 static int hax_get_msrs(CPUArchState *env)
874 {
875     struct hax_msr_data md;
876     struct vmx_msr *msrs = md.entries;
877     int ret, i, n;
878 
879     n = 0;
880     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
881     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
882     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
883     msrs[n++].entry = MSR_IA32_TSC;
884 #ifdef TARGET_X86_64
885     msrs[n++].entry = MSR_EFER;
886     msrs[n++].entry = MSR_STAR;
887     msrs[n++].entry = MSR_LSTAR;
888     msrs[n++].entry = MSR_CSTAR;
889     msrs[n++].entry = MSR_FMASK;
890     msrs[n++].entry = MSR_KERNELGSBASE;
891 #endif
892     md.nr_msr = n;
893     ret = hax_sync_msr(env, &md, 0);
894     if (ret < 0) {
895         return ret;
896     }
897 
898     for (i = 0; i < md.done; i++) {
899         switch (msrs[i].entry) {
900         case MSR_IA32_SYSENTER_CS:
901             env->sysenter_cs = msrs[i].value;
902             break;
903         case MSR_IA32_SYSENTER_ESP:
904             env->sysenter_esp = msrs[i].value;
905             break;
906         case MSR_IA32_SYSENTER_EIP:
907             env->sysenter_eip = msrs[i].value;
908             break;
909         case MSR_IA32_TSC:
910             env->tsc = msrs[i].value;
911             break;
912 #ifdef TARGET_X86_64
913         case MSR_EFER:
914             env->efer = msrs[i].value;
915             break;
916         case MSR_STAR:
917             env->star = msrs[i].value;
918             break;
919         case MSR_LSTAR:
920             env->lstar = msrs[i].value;
921             break;
922         case MSR_CSTAR:
923             env->cstar = msrs[i].value;
924             break;
925         case MSR_FMASK:
926             env->fmask = msrs[i].value;
927             break;
928         case MSR_KERNELGSBASE:
929             env->kernelgsbase = msrs[i].value;
930             break;
931 #endif
932         }
933     }
934 
935     return 0;
936 }
937 
hax_set_msrs(CPUArchState * env)938 static int hax_set_msrs(CPUArchState *env)
939 {
940     struct hax_msr_data md;
941     struct vmx_msr *msrs;
942     msrs = md.entries;
943     int n = 0;
944 
945     memset(&md, 0, sizeof(struct hax_msr_data));
946     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
947     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
948     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
949     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
950 #ifdef TARGET_X86_64
951     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
952     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
953     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
954     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
955     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
956     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
957 #endif
958     md.nr_msr = n;
959     md.done = 0;
960 
961     return hax_sync_msr(env, &md, 1);
962 }
963 
hax_get_fpu(CPUArchState * env)964 static int hax_get_fpu(CPUArchState *env)
965 {
966     struct fx_layout fpu;
967     int i, ret;
968 
969     ret = hax_sync_fpu(env, &fpu, 0);
970     if (ret < 0) {
971         return ret;
972     }
973 
974     env->fpstt = (fpu.fsw >> 11) & 7;
975     env->fpus = fpu.fsw;
976     env->fpuc = fpu.fcw;
977     for (i = 0; i < 8; ++i) {
978         env->fptags[i] = !((fpu.ftw >> i) & 1);
979     }
980     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
981 
982     for (i = 0; i < 8; i++) {
983         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
984         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
985         if (CPU_NB_REGS > 8) {
986             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
987             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
988         }
989     }
990     env->mxcsr = fpu.mxcsr;
991 
992     return 0;
993 }
994 
hax_set_fpu(CPUArchState * env)995 static int hax_set_fpu(CPUArchState *env)
996 {
997     struct fx_layout fpu;
998     int i;
999 
1000     memset(&fpu, 0, sizeof(fpu));
1001     fpu.fsw = env->fpus & ~(7 << 11);
1002     fpu.fsw |= (env->fpstt & 7) << 11;
1003     fpu.fcw = env->fpuc;
1004 
1005     for (i = 0; i < 8; ++i) {
1006         fpu.ftw |= (!env->fptags[i]) << i;
1007     }
1008 
1009     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1010     for (i = 0; i < 8; i++) {
1011         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1012         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1013         if (CPU_NB_REGS > 8) {
1014             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1015             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1016         }
1017     }
1018 
1019     fpu.mxcsr = env->mxcsr;
1020 
1021     return hax_sync_fpu(env, &fpu, 1);
1022 }
1023 
hax_arch_get_registers(CPUArchState * env)1024 static int hax_arch_get_registers(CPUArchState *env)
1025 {
1026     int ret;
1027 
1028     ret = hax_sync_vcpu_register(env, 0);
1029     if (ret < 0) {
1030         return ret;
1031     }
1032 
1033     ret = hax_get_fpu(env);
1034     if (ret < 0) {
1035         return ret;
1036     }
1037 
1038     ret = hax_get_msrs(env);
1039     if (ret < 0) {
1040         return ret;
1041     }
1042 
1043     x86_update_hflags(env);
1044     return 0;
1045 }
1046 
hax_arch_set_registers(CPUArchState * env)1047 static int hax_arch_set_registers(CPUArchState *env)
1048 {
1049     int ret;
1050     ret = hax_sync_vcpu_register(env, 1);
1051 
1052     if (ret < 0) {
1053         fprintf(stderr, "Failed to sync vcpu reg\n");
1054         return ret;
1055     }
1056     ret = hax_set_fpu(env);
1057     if (ret < 0) {
1058         fprintf(stderr, "FPU failed\n");
1059         return ret;
1060     }
1061     ret = hax_set_msrs(env);
1062     if (ret < 0) {
1063         fprintf(stderr, "MSR failed\n");
1064         return ret;
1065     }
1066 
1067     return 0;
1068 }
1069 
hax_vcpu_sync_state(CPUArchState * env,int modified)1070 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1071 {
1072     if (hax_enabled()) {
1073         if (modified) {
1074             hax_arch_set_registers(env);
1075         } else {
1076             hax_arch_get_registers(env);
1077         }
1078     }
1079 }
1080 
1081 /*
1082  * much simpler than kvm, at least in first stage because:
1083  * We don't need consider the device pass-through, we don't need
1084  * consider the framebuffer, and we may even remove the bios at all
1085  */
hax_sync_vcpus(void)1086 int hax_sync_vcpus(void)
1087 {
1088     if (hax_enabled()) {
1089         CPUState *cpu;
1090 
1091         cpu = first_cpu;
1092         if (!cpu) {
1093             return 0;
1094         }
1095 
1096         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1097             int ret;
1098 
1099             ret = hax_arch_set_registers(cpu->env_ptr);
1100             if (ret < 0) {
1101                 return ret;
1102             }
1103         }
1104     }
1105 
1106     return 0;
1107 }
1108 
hax_reset_vcpu_state(void * opaque)1109 void hax_reset_vcpu_state(void *opaque)
1110 {
1111     CPUState *cpu;
1112     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1113         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1114         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1115     }
1116 }
1117 
hax_accel_class_init(ObjectClass * oc,void * data)1118 static void hax_accel_class_init(ObjectClass *oc, void *data)
1119 {
1120     AccelClass *ac = ACCEL_CLASS(oc);
1121     ac->name = "HAX";
1122     ac->init_machine = hax_accel_init;
1123     ac->allowed = &hax_allowed;
1124 }
1125 
1126 static const TypeInfo hax_accel_type = {
1127     .name = ACCEL_CLASS_NAME("hax"),
1128     .parent = TYPE_ACCEL,
1129     .class_init = hax_accel_class_init,
1130 };
1131 
hax_type_init(void)1132 static void hax_type_init(void)
1133 {
1134     type_register_static(&hax_accel_type);
1135 }
1136 
1137 type_init(hax_type_init);
1138