1 /*
2  * QEMU HAX support
3  *
4  * Copyright IBM, Corp. 2008
5  *           Red Hat, Inc. 2008
6  *
7  * Authors:
8  *  Anthony Liguori   <aliguori@us.ibm.com>
9  *  Glauber Costa     <gcosta@redhat.com>
10  *
11  * Copyright (c) 2011 Intel Corporation
12  *  Written by:
13  *  Jiang Yunhong<yunhong.jiang@intel.com>
14  *  Xin Xiaohui<xiaohui.xin@intel.com>
15  *  Zhang Xiantao<xiantao.zhang@intel.com>
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
18  * See the COPYING file in the top-level directory.
19  *
20  */
21 
22 /*
23  * HAX common code for both windows and darwin
24  */
25 
26 #include "qemu/osdep.h"
27 #include "cpu.h"
28 #include "exec/address-spaces.h"
29 
30 #include "qemu-common.h"
31 #include "hax-i386.h"
32 #include "sysemu/accel.h"
33 #include "sysemu/sysemu.h"
34 #include "qemu/main-loop.h"
35 #include "hw/boards.h"
36 
37 #define DEBUG_HAX 0
38 
39 #define DPRINTF(fmt, ...) \
40     do { \
41         if (DEBUG_HAX) { \
42             fprintf(stdout, fmt, ## __VA_ARGS__); \
43         } \
44     } while (0)
45 
46 /* Current version */
47 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
48 /* Minimum HAX kernel version */
49 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
50 
51 static bool hax_allowed;
52 
53 struct hax_state hax_global;
54 
55 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
56 static int hax_arch_get_registers(CPUArchState *env);
57 
hax_enabled(void)58 int hax_enabled(void)
59 {
60     return hax_allowed;
61 }
62 
valid_hax_tunnel_size(uint16_t size)63 int valid_hax_tunnel_size(uint16_t size)
64 {
65     return size >= sizeof(struct hax_tunnel);
66 }
67 
hax_vcpu_get_fd(CPUArchState * env)68 hax_fd hax_vcpu_get_fd(CPUArchState *env)
69 {
70     struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
71     if (!vcpu) {
72         return HAX_INVALID_FD;
73     }
74     return vcpu->fd;
75 }
76 
hax_get_capability(struct hax_state * hax)77 static int hax_get_capability(struct hax_state *hax)
78 {
79     int ret;
80     struct hax_capabilityinfo capinfo, *cap = &capinfo;
81 
82     ret = hax_capability(hax, cap);
83     if (ret) {
84         return ret;
85     }
86 
87     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
88         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
89             DPRINTF
90                 ("VTX feature is not enabled, HAX driver will not work.\n");
91         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
92             DPRINTF
93                 ("NX feature is not enabled, HAX driver will not work.\n");
94         }
95         return -ENXIO;
96 
97     }
98 
99     if (!(cap->winfo & HAX_CAP_UG)) {
100         fprintf(stderr, "UG mode is not supported by the hardware.\n");
101         return -ENOTSUP;
102     }
103 
104     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
105 
106     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
107         if (cap->mem_quota < hax->mem_quota) {
108             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
109             return -ENOSPC;
110         }
111     }
112     return 0;
113 }
114 
hax_version_support(struct hax_state * hax)115 static int hax_version_support(struct hax_state *hax)
116 {
117     int ret;
118     struct hax_module_version version;
119 
120     ret = hax_mod_version(hax, &version);
121     if (ret < 0) {
122         return 0;
123     }
124 
125     if (hax_min_version > version.cur_version) {
126         fprintf(stderr, "Incompatible HAX module version %d,",
127                 version.cur_version);
128         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
129         return 0;
130     }
131     if (hax_cur_version < version.compat_version) {
132         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
133                 hax_cur_version);
134         fprintf(stderr, "requires minimum HAX API version %x\n",
135                 version.compat_version);
136         return 0;
137     }
138 
139     return 1;
140 }
141 
hax_vcpu_create(int id)142 int hax_vcpu_create(int id)
143 {
144     struct hax_vcpu_state *vcpu = NULL;
145     int ret;
146 
147     if (!hax_global.vm) {
148         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
149         return -1;
150     }
151 
152     if (hax_global.vm->vcpus[id]) {
153         fprintf(stderr, "vcpu %x allocated already\n", id);
154         return 0;
155     }
156 
157     vcpu = g_malloc(sizeof(struct hax_vcpu_state));
158     if (!vcpu) {
159         fprintf(stderr, "Failed to alloc vcpu state\n");
160         return -ENOMEM;
161     }
162 
163     memset(vcpu, 0, sizeof(struct hax_vcpu_state));
164 
165     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
166     if (ret) {
167         fprintf(stderr, "Failed to create vcpu %x\n", id);
168         goto error;
169     }
170 
171     vcpu->vcpu_id = id;
172     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
173     if (hax_invalid_fd(vcpu->fd)) {
174         fprintf(stderr, "Failed to open the vcpu\n");
175         ret = -ENODEV;
176         goto error;
177     }
178 
179     hax_global.vm->vcpus[id] = vcpu;
180 
181     ret = hax_host_setup_vcpu_channel(vcpu);
182     if (ret) {
183         fprintf(stderr, "Invalid hax tunnel size\n");
184         ret = -EINVAL;
185         goto error;
186     }
187     return 0;
188 
189   error:
190     /* vcpu and tunnel will be closed automatically */
191     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
192         hax_close_fd(vcpu->fd);
193     }
194 
195     hax_global.vm->vcpus[id] = NULL;
196     g_free(vcpu);
197     return -1;
198 }
199 
hax_vcpu_destroy(CPUState * cpu)200 int hax_vcpu_destroy(CPUState *cpu)
201 {
202     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
203 
204     if (!hax_global.vm) {
205         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
206         return -1;
207     }
208 
209     if (!vcpu) {
210         return 0;
211     }
212 
213     /*
214      * 1. The hax_tunnel is also destroied when vcpu destroy
215      * 2. close fd will cause hax module vcpu be cleaned
216      */
217     hax_close_fd(vcpu->fd);
218     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
219     g_free(vcpu);
220     return 0;
221 }
222 
hax_init_vcpu(CPUState * cpu)223 int hax_init_vcpu(CPUState *cpu)
224 {
225     int ret;
226 
227     ret = hax_vcpu_create(cpu->cpu_index);
228     if (ret < 0) {
229         fprintf(stderr, "Failed to create HAX vcpu\n");
230         exit(-1);
231     }
232 
233     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
234     cpu->vcpu_dirty = true;
235     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
236 
237     return ret;
238 }
239 
hax_vm_create(struct hax_state * hax)240 struct hax_vm *hax_vm_create(struct hax_state *hax)
241 {
242     struct hax_vm *vm;
243     int vm_id = 0, ret;
244 
245     if (hax_invalid_fd(hax->fd)) {
246         return NULL;
247     }
248 
249     if (hax->vm) {
250         return hax->vm;
251     }
252 
253     vm = g_malloc(sizeof(struct hax_vm));
254     if (!vm) {
255         return NULL;
256     }
257     memset(vm, 0, sizeof(struct hax_vm));
258     ret = hax_host_create_vm(hax, &vm_id);
259     if (ret) {
260         fprintf(stderr, "Failed to create vm %x\n", ret);
261         goto error;
262     }
263     vm->id = vm_id;
264     vm->fd = hax_host_open_vm(hax, vm_id);
265     if (hax_invalid_fd(vm->fd)) {
266         fprintf(stderr, "Failed to open vm %d\n", vm_id);
267         goto error;
268     }
269 
270     hax->vm = vm;
271     return vm;
272 
273   error:
274     g_free(vm);
275     hax->vm = NULL;
276     return NULL;
277 }
278 
hax_vm_destroy(struct hax_vm * vm)279 int hax_vm_destroy(struct hax_vm *vm)
280 {
281     int i;
282 
283     for (i = 0; i < HAX_MAX_VCPU; i++)
284         if (vm->vcpus[i]) {
285             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
286             return -1;
287         }
288     hax_close_fd(vm->fd);
289     g_free(vm);
290     hax_global.vm = NULL;
291     return 0;
292 }
293 
hax_handle_interrupt(CPUState * cpu,int mask)294 static void hax_handle_interrupt(CPUState *cpu, int mask)
295 {
296     cpu->interrupt_request |= mask;
297 
298     if (!qemu_cpu_is_self(cpu)) {
299         qemu_cpu_kick(cpu);
300     }
301 }
302 
hax_init(ram_addr_t ram_size)303 static int hax_init(ram_addr_t ram_size)
304 {
305     struct hax_state *hax = NULL;
306     struct hax_qemu_version qversion;
307     int ret;
308 
309     hax = &hax_global;
310 
311     memset(hax, 0, sizeof(struct hax_state));
312     hax->mem_quota = ram_size;
313 
314     hax->fd = hax_mod_open();
315     if (hax_invalid_fd(hax->fd)) {
316         hax->fd = 0;
317         ret = -ENODEV;
318         goto error;
319     }
320 
321     ret = hax_get_capability(hax);
322 
323     if (ret) {
324         if (ret != -ENOSPC) {
325             ret = -EINVAL;
326         }
327         goto error;
328     }
329 
330     if (!hax_version_support(hax)) {
331         ret = -EINVAL;
332         goto error;
333     }
334 
335     hax->vm = hax_vm_create(hax);
336     if (!hax->vm) {
337         fprintf(stderr, "Failed to create HAX VM\n");
338         ret = -EINVAL;
339         goto error;
340     }
341 
342     hax_memory_init();
343 
344     qversion.cur_version = hax_cur_version;
345     qversion.min_version = hax_min_version;
346     hax_notify_qemu_version(hax->vm->fd, &qversion);
347     cpu_interrupt_handler = hax_handle_interrupt;
348 
349     return ret;
350   error:
351     if (hax->vm) {
352         hax_vm_destroy(hax->vm);
353     }
354     if (hax->fd) {
355         hax_mod_close(hax);
356     }
357 
358     return ret;
359 }
360 
hax_accel_init(MachineState * ms)361 static int hax_accel_init(MachineState *ms)
362 {
363     int ret = hax_init(ms->ram_size);
364 
365     if (ret && (ret != -ENOSPC)) {
366         fprintf(stderr, "No accelerator found.\n");
367     } else {
368         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
369                 !ret ? "working" : "not working",
370                 !ret ? "fast virt" : "emulation");
371     }
372     return ret;
373 }
374 
hax_handle_fastmmio(CPUArchState * env,struct hax_fastmmio * hft)375 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
376 {
377     if (hft->direction < 2) {
378         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
379                                hft->direction);
380     } else {
381         /*
382          * HAX API v4 supports transferring data between two MMIO addresses,
383          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
384          *  hft->direction == 2: gpa ==> gpa2
385          */
386         uint64_t value;
387         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
388         cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
389     }
390 
391     return 0;
392 }
393 
hax_handle_io(CPUArchState * env,uint32_t df,uint16_t port,int direction,int size,int count,void * buffer)394 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
395                          int direction, int size, int count, void *buffer)
396 {
397     uint8_t *ptr;
398     int i;
399     MemTxAttrs attrs = { 0 };
400 
401     if (!df) {
402         ptr = (uint8_t *) buffer;
403     } else {
404         ptr = buffer + size * count - size;
405     }
406     for (i = 0; i < count; i++) {
407         address_space_rw(&address_space_io, port, attrs,
408                          ptr, size, direction == HAX_EXIT_IO_OUT);
409         if (!df) {
410             ptr += size;
411         } else {
412             ptr -= size;
413         }
414     }
415 
416     return 0;
417 }
418 
hax_vcpu_interrupt(CPUArchState * env)419 static int hax_vcpu_interrupt(CPUArchState *env)
420 {
421     CPUState *cpu = ENV_GET_CPU(env);
422     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
423     struct hax_tunnel *ht = vcpu->tunnel;
424 
425     /*
426      * Try to inject an interrupt if the guest can accept it
427      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
428      */
429     if (ht->ready_for_interrupt_injection &&
430         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
431         int irq;
432 
433         irq = cpu_get_pic_interrupt(env);
434         if (irq >= 0) {
435             hax_inject_interrupt(env, irq);
436             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
437         }
438     }
439 
440     /* If we have an interrupt but the guest is not ready to receive an
441      * interrupt, request an interrupt window exit.  This will
442      * cause a return to userspace as soon as the guest is ready to
443      * receive interrupts. */
444     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
445         ht->request_interrupt_window = 1;
446     } else {
447         ht->request_interrupt_window = 0;
448     }
449     return 0;
450 }
451 
hax_raise_event(CPUState * cpu)452 void hax_raise_event(CPUState *cpu)
453 {
454     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
455 
456     if (!vcpu) {
457         return;
458     }
459     vcpu->tunnel->user_event_pending = 1;
460 }
461 
462 /*
463  * Ask hax kernel module to run the CPU for us till:
464  * 1. Guest crash or shutdown
465  * 2. Need QEMU's emulation like guest execute MMIO instruction
466  * 3. Guest execute HLT
467  * 4. QEMU have Signal/event pending
468  * 5. An unknown VMX exit happens
469  */
hax_vcpu_hax_exec(CPUArchState * env)470 static int hax_vcpu_hax_exec(CPUArchState *env)
471 {
472     int ret = 0;
473     CPUState *cpu = ENV_GET_CPU(env);
474     X86CPU *x86_cpu = X86_CPU(cpu);
475     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
476     struct hax_tunnel *ht = vcpu->tunnel;
477 
478     if (!hax_enabled()) {
479         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
480         return 0;
481     }
482 
483     cpu->halted = 0;
484 
485     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
486         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
487         apic_poll_irq(x86_cpu->apic_state);
488     }
489 
490     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
491         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
492                 cpu->cpu_index);
493         do_cpu_init(x86_cpu);
494         hax_vcpu_sync_state(env, 1);
495     }
496 
497     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
498         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
499                 cpu->cpu_index);
500         hax_vcpu_sync_state(env, 0);
501         do_cpu_sipi(x86_cpu);
502         hax_vcpu_sync_state(env, 1);
503     }
504 
505     do {
506         int hax_ret;
507 
508         if (cpu->exit_request) {
509             ret = 1;
510             break;
511         }
512 
513         hax_vcpu_interrupt(env);
514 
515         qemu_mutex_unlock_iothread();
516         cpu_exec_start(cpu);
517         hax_ret = hax_vcpu_run(vcpu);
518         cpu_exec_end(cpu);
519         qemu_mutex_lock_iothread();
520 
521         /* Simply continue the vcpu_run if system call interrupted */
522         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
523             DPRINTF("io window interrupted\n");
524             continue;
525         }
526 
527         if (hax_ret < 0) {
528             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
529             abort();
530         }
531         switch (ht->_exit_status) {
532         case HAX_EXIT_IO:
533             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
534                             ht->pio._direction,
535                             ht->pio._size, ht->pio._count, vcpu->iobuf);
536             break;
537         case HAX_EXIT_FAST_MMIO:
538             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
539             break;
540         /* Guest state changed, currently only for shutdown */
541         case HAX_EXIT_STATECHANGE:
542             fprintf(stdout, "VCPU shutdown request\n");
543             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
544             hax_vcpu_sync_state(env, 0);
545             ret = 1;
546             break;
547         case HAX_EXIT_UNKNOWN_VMEXIT:
548             fprintf(stderr, "Unknown VMX exit %x from guest\n",
549                     ht->_exit_reason);
550             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
551             hax_vcpu_sync_state(env, 0);
552             cpu_dump_state(cpu, stderr, fprintf, 0);
553             ret = -1;
554             break;
555         case HAX_EXIT_HLT:
556             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
557                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
558                 /* hlt instruction with interrupt disabled is shutdown */
559                 env->eflags |= IF_MASK;
560                 cpu->halted = 1;
561                 cpu->exception_index = EXCP_HLT;
562                 ret = 1;
563             }
564             break;
565         /* these situations will continue to hax module */
566         case HAX_EXIT_INTERRUPT:
567         case HAX_EXIT_PAUSED:
568             break;
569         case HAX_EXIT_MMIO:
570             /* Should not happen on UG system */
571             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
572             ret = -1;
573             break;
574         case HAX_EXIT_REAL:
575             /* Should not happen on UG system */
576             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
577             ret = -1;
578             break;
579         default:
580             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
581             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
582             hax_vcpu_sync_state(env, 0);
583             cpu_dump_state(cpu, stderr, fprintf, 0);
584             ret = 1;
585             break;
586         }
587     } while (!ret);
588 
589     if (cpu->exit_request) {
590         cpu->exit_request = 0;
591         cpu->exception_index = EXCP_INTERRUPT;
592     }
593     return ret < 0;
594 }
595 
do_hax_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)596 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
597 {
598     CPUArchState *env = cpu->env_ptr;
599 
600     hax_arch_get_registers(env);
601     cpu->vcpu_dirty = true;
602 }
603 
hax_cpu_synchronize_state(CPUState * cpu)604 void hax_cpu_synchronize_state(CPUState *cpu)
605 {
606     if (!cpu->vcpu_dirty) {
607         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
608     }
609 }
610 
do_hax_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)611 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
612                                               run_on_cpu_data arg)
613 {
614     CPUArchState *env = cpu->env_ptr;
615 
616     hax_vcpu_sync_state(env, 1);
617     cpu->vcpu_dirty = false;
618 }
619 
hax_cpu_synchronize_post_reset(CPUState * cpu)620 void hax_cpu_synchronize_post_reset(CPUState *cpu)
621 {
622     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
623 }
624 
do_hax_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)625 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
626 {
627     CPUArchState *env = cpu->env_ptr;
628 
629     hax_vcpu_sync_state(env, 1);
630     cpu->vcpu_dirty = false;
631 }
632 
hax_cpu_synchronize_post_init(CPUState * cpu)633 void hax_cpu_synchronize_post_init(CPUState *cpu)
634 {
635     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
636 }
637 
do_hax_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)638 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
639 {
640     cpu->vcpu_dirty = true;
641 }
642 
hax_cpu_synchronize_pre_loadvm(CPUState * cpu)643 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
644 {
645     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
646 }
647 
hax_smp_cpu_exec(CPUState * cpu)648 int hax_smp_cpu_exec(CPUState *cpu)
649 {
650     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
651     int fatal;
652     int ret;
653 
654     while (1) {
655         if (cpu->exception_index >= EXCP_INTERRUPT) {
656             ret = cpu->exception_index;
657             cpu->exception_index = -1;
658             break;
659         }
660 
661         fatal = hax_vcpu_hax_exec(env);
662 
663         if (fatal) {
664             fprintf(stderr, "Unsupported HAX vcpu return\n");
665             abort();
666         }
667     }
668 
669     return ret;
670 }
671 
set_v8086_seg(struct segment_desc_t * lhs,const SegmentCache * rhs)672 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
673 {
674     memset(lhs, 0, sizeof(struct segment_desc_t));
675     lhs->selector = rhs->selector;
676     lhs->base = rhs->base;
677     lhs->limit = rhs->limit;
678     lhs->type = 3;
679     lhs->present = 1;
680     lhs->dpl = 3;
681     lhs->operand_size = 0;
682     lhs->desc = 1;
683     lhs->long_mode = 0;
684     lhs->granularity = 0;
685     lhs->available = 0;
686 }
687 
get_seg(SegmentCache * lhs,const struct segment_desc_t * rhs)688 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
689 {
690     lhs->selector = rhs->selector;
691     lhs->base = rhs->base;
692     lhs->limit = rhs->limit;
693     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
694         | (rhs->present * DESC_P_MASK)
695         | (rhs->dpl << DESC_DPL_SHIFT)
696         | (rhs->operand_size << DESC_B_SHIFT)
697         | (rhs->desc * DESC_S_MASK)
698         | (rhs->long_mode << DESC_L_SHIFT)
699         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
700 }
701 
set_seg(struct segment_desc_t * lhs,const SegmentCache * rhs)702 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
703 {
704     unsigned flags = rhs->flags;
705 
706     memset(lhs, 0, sizeof(struct segment_desc_t));
707     lhs->selector = rhs->selector;
708     lhs->base = rhs->base;
709     lhs->limit = rhs->limit;
710     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
711     lhs->present = (flags & DESC_P_MASK) != 0;
712     lhs->dpl = rhs->selector & 3;
713     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
714     lhs->desc = (flags & DESC_S_MASK) != 0;
715     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
716     lhs->granularity = (flags & DESC_G_MASK) != 0;
717     lhs->available = (flags & DESC_AVL_MASK) != 0;
718 }
719 
hax_getput_reg(uint64_t * hax_reg,target_ulong * qemu_reg,int set)720 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
721 {
722     target_ulong reg = *hax_reg;
723 
724     if (set) {
725         *hax_reg = *qemu_reg;
726     } else {
727         *qemu_reg = reg;
728     }
729 }
730 
731 /* The sregs has been synced with HAX kernel already before this call */
hax_get_segments(CPUArchState * env,struct vcpu_state_t * sregs)732 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
733 {
734     get_seg(&env->segs[R_CS], &sregs->_cs);
735     get_seg(&env->segs[R_DS], &sregs->_ds);
736     get_seg(&env->segs[R_ES], &sregs->_es);
737     get_seg(&env->segs[R_FS], &sregs->_fs);
738     get_seg(&env->segs[R_GS], &sregs->_gs);
739     get_seg(&env->segs[R_SS], &sregs->_ss);
740 
741     get_seg(&env->tr, &sregs->_tr);
742     get_seg(&env->ldt, &sregs->_ldt);
743     env->idt.limit = sregs->_idt.limit;
744     env->idt.base = sregs->_idt.base;
745     env->gdt.limit = sregs->_gdt.limit;
746     env->gdt.base = sregs->_gdt.base;
747     return 0;
748 }
749 
hax_set_segments(CPUArchState * env,struct vcpu_state_t * sregs)750 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
751 {
752     if ((env->eflags & VM_MASK)) {
753         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
754         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
755         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
756         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
757         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
758         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
759     } else {
760         set_seg(&sregs->_cs, &env->segs[R_CS]);
761         set_seg(&sregs->_ds, &env->segs[R_DS]);
762         set_seg(&sregs->_es, &env->segs[R_ES]);
763         set_seg(&sregs->_fs, &env->segs[R_FS]);
764         set_seg(&sregs->_gs, &env->segs[R_GS]);
765         set_seg(&sregs->_ss, &env->segs[R_SS]);
766 
767         if (env->cr[0] & CR0_PE_MASK) {
768             /* force ss cpl to cs cpl */
769             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
770                                   (sregs->_cs.selector & 3);
771             sregs->_ss.dpl = sregs->_ss.selector & 3;
772         }
773     }
774 
775     set_seg(&sregs->_tr, &env->tr);
776     set_seg(&sregs->_ldt, &env->ldt);
777     sregs->_idt.limit = env->idt.limit;
778     sregs->_idt.base = env->idt.base;
779     sregs->_gdt.limit = env->gdt.limit;
780     sregs->_gdt.base = env->gdt.base;
781     return 0;
782 }
783 
hax_sync_vcpu_register(CPUArchState * env,int set)784 static int hax_sync_vcpu_register(CPUArchState *env, int set)
785 {
786     struct vcpu_state_t regs;
787     int ret;
788     memset(&regs, 0, sizeof(struct vcpu_state_t));
789 
790     if (!set) {
791         ret = hax_sync_vcpu_state(env, &regs, 0);
792         if (ret < 0) {
793             return -1;
794         }
795     }
796 
797     /* generic register */
798     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
799     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
800     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
801     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
802     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
803     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
804     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
805     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
806 #ifdef TARGET_X86_64
807     hax_getput_reg(&regs._r8, &env->regs[8], set);
808     hax_getput_reg(&regs._r9, &env->regs[9], set);
809     hax_getput_reg(&regs._r10, &env->regs[10], set);
810     hax_getput_reg(&regs._r11, &env->regs[11], set);
811     hax_getput_reg(&regs._r12, &env->regs[12], set);
812     hax_getput_reg(&regs._r13, &env->regs[13], set);
813     hax_getput_reg(&regs._r14, &env->regs[14], set);
814     hax_getput_reg(&regs._r15, &env->regs[15], set);
815 #endif
816     hax_getput_reg(&regs._rflags, &env->eflags, set);
817     hax_getput_reg(&regs._rip, &env->eip, set);
818 
819     if (set) {
820         regs._cr0 = env->cr[0];
821         regs._cr2 = env->cr[2];
822         regs._cr3 = env->cr[3];
823         regs._cr4 = env->cr[4];
824         hax_set_segments(env, &regs);
825     } else {
826         env->cr[0] = regs._cr0;
827         env->cr[2] = regs._cr2;
828         env->cr[3] = regs._cr3;
829         env->cr[4] = regs._cr4;
830         hax_get_segments(env, &regs);
831     }
832 
833     if (set) {
834         ret = hax_sync_vcpu_state(env, &regs, 1);
835         if (ret < 0) {
836             return -1;
837         }
838     }
839     return 0;
840 }
841 
hax_msr_entry_set(struct vmx_msr * item,uint32_t index,uint64_t value)842 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
843                               uint64_t value)
844 {
845     item->entry = index;
846     item->value = value;
847 }
848 
hax_get_msrs(CPUArchState * env)849 static int hax_get_msrs(CPUArchState *env)
850 {
851     struct hax_msr_data md;
852     struct vmx_msr *msrs = md.entries;
853     int ret, i, n;
854 
855     n = 0;
856     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
857     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
858     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
859     msrs[n++].entry = MSR_IA32_TSC;
860 #ifdef TARGET_X86_64
861     msrs[n++].entry = MSR_EFER;
862     msrs[n++].entry = MSR_STAR;
863     msrs[n++].entry = MSR_LSTAR;
864     msrs[n++].entry = MSR_CSTAR;
865     msrs[n++].entry = MSR_FMASK;
866     msrs[n++].entry = MSR_KERNELGSBASE;
867 #endif
868     md.nr_msr = n;
869     ret = hax_sync_msr(env, &md, 0);
870     if (ret < 0) {
871         return ret;
872     }
873 
874     for (i = 0; i < md.done; i++) {
875         switch (msrs[i].entry) {
876         case MSR_IA32_SYSENTER_CS:
877             env->sysenter_cs = msrs[i].value;
878             break;
879         case MSR_IA32_SYSENTER_ESP:
880             env->sysenter_esp = msrs[i].value;
881             break;
882         case MSR_IA32_SYSENTER_EIP:
883             env->sysenter_eip = msrs[i].value;
884             break;
885         case MSR_IA32_TSC:
886             env->tsc = msrs[i].value;
887             break;
888 #ifdef TARGET_X86_64
889         case MSR_EFER:
890             env->efer = msrs[i].value;
891             break;
892         case MSR_STAR:
893             env->star = msrs[i].value;
894             break;
895         case MSR_LSTAR:
896             env->lstar = msrs[i].value;
897             break;
898         case MSR_CSTAR:
899             env->cstar = msrs[i].value;
900             break;
901         case MSR_FMASK:
902             env->fmask = msrs[i].value;
903             break;
904         case MSR_KERNELGSBASE:
905             env->kernelgsbase = msrs[i].value;
906             break;
907 #endif
908         }
909     }
910 
911     return 0;
912 }
913 
hax_set_msrs(CPUArchState * env)914 static int hax_set_msrs(CPUArchState *env)
915 {
916     struct hax_msr_data md;
917     struct vmx_msr *msrs;
918     msrs = md.entries;
919     int n = 0;
920 
921     memset(&md, 0, sizeof(struct hax_msr_data));
922     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
923     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
924     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
925     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
926 #ifdef TARGET_X86_64
927     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
928     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
929     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
930     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
931     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
932     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
933 #endif
934     md.nr_msr = n;
935     md.done = 0;
936 
937     return hax_sync_msr(env, &md, 1);
938 }
939 
hax_get_fpu(CPUArchState * env)940 static int hax_get_fpu(CPUArchState *env)
941 {
942     struct fx_layout fpu;
943     int i, ret;
944 
945     ret = hax_sync_fpu(env, &fpu, 0);
946     if (ret < 0) {
947         return ret;
948     }
949 
950     env->fpstt = (fpu.fsw >> 11) & 7;
951     env->fpus = fpu.fsw;
952     env->fpuc = fpu.fcw;
953     for (i = 0; i < 8; ++i) {
954         env->fptags[i] = !((fpu.ftw >> i) & 1);
955     }
956     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
957 
958     for (i = 0; i < 8; i++) {
959         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
960         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
961         if (CPU_NB_REGS > 8) {
962             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
963             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
964         }
965     }
966     env->mxcsr = fpu.mxcsr;
967 
968     return 0;
969 }
970 
hax_set_fpu(CPUArchState * env)971 static int hax_set_fpu(CPUArchState *env)
972 {
973     struct fx_layout fpu;
974     int i;
975 
976     memset(&fpu, 0, sizeof(fpu));
977     fpu.fsw = env->fpus & ~(7 << 11);
978     fpu.fsw |= (env->fpstt & 7) << 11;
979     fpu.fcw = env->fpuc;
980 
981     for (i = 0; i < 8; ++i) {
982         fpu.ftw |= (!env->fptags[i]) << i;
983     }
984 
985     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
986     for (i = 0; i < 8; i++) {
987         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
988         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
989         if (CPU_NB_REGS > 8) {
990             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
991             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
992         }
993     }
994 
995     fpu.mxcsr = env->mxcsr;
996 
997     return hax_sync_fpu(env, &fpu, 1);
998 }
999 
hax_arch_get_registers(CPUArchState * env)1000 static int hax_arch_get_registers(CPUArchState *env)
1001 {
1002     int ret;
1003 
1004     ret = hax_sync_vcpu_register(env, 0);
1005     if (ret < 0) {
1006         return ret;
1007     }
1008 
1009     ret = hax_get_fpu(env);
1010     if (ret < 0) {
1011         return ret;
1012     }
1013 
1014     ret = hax_get_msrs(env);
1015     if (ret < 0) {
1016         return ret;
1017     }
1018 
1019     x86_update_hflags(env);
1020     return 0;
1021 }
1022 
hax_arch_set_registers(CPUArchState * env)1023 static int hax_arch_set_registers(CPUArchState *env)
1024 {
1025     int ret;
1026     ret = hax_sync_vcpu_register(env, 1);
1027 
1028     if (ret < 0) {
1029         fprintf(stderr, "Failed to sync vcpu reg\n");
1030         return ret;
1031     }
1032     ret = hax_set_fpu(env);
1033     if (ret < 0) {
1034         fprintf(stderr, "FPU failed\n");
1035         return ret;
1036     }
1037     ret = hax_set_msrs(env);
1038     if (ret < 0) {
1039         fprintf(stderr, "MSR failed\n");
1040         return ret;
1041     }
1042 
1043     return 0;
1044 }
1045 
hax_vcpu_sync_state(CPUArchState * env,int modified)1046 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1047 {
1048     if (hax_enabled()) {
1049         if (modified) {
1050             hax_arch_set_registers(env);
1051         } else {
1052             hax_arch_get_registers(env);
1053         }
1054     }
1055 }
1056 
1057 /*
1058  * much simpler than kvm, at least in first stage because:
1059  * We don't need consider the device pass-through, we don't need
1060  * consider the framebuffer, and we may even remove the bios at all
1061  */
hax_sync_vcpus(void)1062 int hax_sync_vcpus(void)
1063 {
1064     if (hax_enabled()) {
1065         CPUState *cpu;
1066 
1067         cpu = first_cpu;
1068         if (!cpu) {
1069             return 0;
1070         }
1071 
1072         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1073             int ret;
1074 
1075             ret = hax_arch_set_registers(cpu->env_ptr);
1076             if (ret < 0) {
1077                 return ret;
1078             }
1079         }
1080     }
1081 
1082     return 0;
1083 }
1084 
hax_reset_vcpu_state(void * opaque)1085 void hax_reset_vcpu_state(void *opaque)
1086 {
1087     CPUState *cpu;
1088     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1089         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1090         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1091     }
1092 }
1093 
hax_accel_class_init(ObjectClass * oc,void * data)1094 static void hax_accel_class_init(ObjectClass *oc, void *data)
1095 {
1096     AccelClass *ac = ACCEL_CLASS(oc);
1097     ac->name = "HAX";
1098     ac->init_machine = hax_accel_init;
1099     ac->allowed = &hax_allowed;
1100 }
1101 
1102 static const TypeInfo hax_accel_type = {
1103     .name = ACCEL_CLASS_NAME("hax"),
1104     .parent = TYPE_ACCEL,
1105     .class_init = hax_accel_class_init,
1106 };
1107 
hax_type_init(void)1108 static void hax_type_init(void)
1109 {
1110     type_register_static(&hax_accel_type);
1111 }
1112 
1113 type_init(hax_type_init);
1114