xref: /qemu/target/i386/hvf/hvf.c (revision d0fb9657)
1 /* Copyright 2008 IBM Corporation
2  *           2008 Red Hat, Inc.
3  * Copyright 2011 Intel Corporation
4  * Copyright 2016 Veertu, Inc.
5  * Copyright 2017 The Android Open Source Project
6  *
7  * QEMU Hypervisor.framework support
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of version 2 of the GNU General Public
11  * License as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see <http://www.gnu.org/licenses/>.
20  *
21  * This file contain code under public domain from the hvdos project:
22  * https://github.com/mist64/hvdos
23  *
24  * Parts Copyright (c) 2011 NetApp, Inc.
25  * All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  *
36  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 #include "qemu/osdep.h"
50 #include "qemu-common.h"
51 #include "qemu/error-report.h"
52 
53 #include "sysemu/hvf.h"
54 #include "sysemu/runstate.h"
55 #include "hvf-i386.h"
56 #include "vmcs.h"
57 #include "vmx.h"
58 #include "x86.h"
59 #include "x86_descr.h"
60 #include "x86_mmu.h"
61 #include "x86_decode.h"
62 #include "x86_emu.h"
63 #include "x86_task.h"
64 #include "x86hvf.h"
65 
66 #include <Hypervisor/hv.h>
67 #include <Hypervisor/hv_vmx.h>
68 #include <sys/sysctl.h>
69 
70 #include "hw/i386/apic_internal.h"
71 #include "qemu/main-loop.h"
72 #include "qemu/accel.h"
73 #include "target/i386/cpu.h"
74 
75 #include "hvf-accel-ops.h"
76 
77 HVFState *hvf_state;
78 
79 static void assert_hvf_ok(hv_return_t ret)
80 {
81     if (ret == HV_SUCCESS) {
82         return;
83     }
84 
85     switch (ret) {
86     case HV_ERROR:
87         error_report("Error: HV_ERROR");
88         break;
89     case HV_BUSY:
90         error_report("Error: HV_BUSY");
91         break;
92     case HV_BAD_ARGUMENT:
93         error_report("Error: HV_BAD_ARGUMENT");
94         break;
95     case HV_NO_RESOURCES:
96         error_report("Error: HV_NO_RESOURCES");
97         break;
98     case HV_NO_DEVICE:
99         error_report("Error: HV_NO_DEVICE");
100         break;
101     case HV_UNSUPPORTED:
102         error_report("Error: HV_UNSUPPORTED");
103         break;
104     default:
105         error_report("Unknown Error");
106     }
107 
108     abort();
109 }
110 
111 /* Memory slots */
112 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
113 {
114     hvf_slot *slot;
115     int x;
116     for (x = 0; x < hvf_state->num_slots; ++x) {
117         slot = &hvf_state->slots[x];
118         if (slot->size && start < (slot->start + slot->size) &&
119             (start + size) > slot->start) {
120             return slot;
121         }
122     }
123     return NULL;
124 }
125 
126 struct mac_slot {
127     int present;
128     uint64_t size;
129     uint64_t gpa_start;
130     uint64_t gva;
131 };
132 
133 struct mac_slot mac_slots[32];
134 
135 static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
136 {
137     struct mac_slot *macslot;
138     hv_return_t ret;
139 
140     macslot = &mac_slots[slot->slot_id];
141 
142     if (macslot->present) {
143         if (macslot->size != slot->size) {
144             macslot->present = 0;
145             ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
146             assert_hvf_ok(ret);
147         }
148     }
149 
150     if (!slot->size) {
151         return 0;
152     }
153 
154     macslot->present = 1;
155     macslot->gpa_start = slot->start;
156     macslot->size = slot->size;
157     ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
158     assert_hvf_ok(ret);
159     return 0;
160 }
161 
162 void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
163 {
164     hvf_slot *mem;
165     MemoryRegion *area = section->mr;
166     bool writeable = !area->readonly && !area->rom_device;
167     hv_memory_flags_t flags;
168 
169     if (!memory_region_is_ram(area)) {
170         if (writeable) {
171             return;
172         } else if (!memory_region_is_romd(area)) {
173             /*
174              * If the memory device is not in romd_mode, then we actually want
175              * to remove the hvf memory slot so all accesses will trap.
176              */
177              add = false;
178         }
179     }
180 
181     mem = hvf_find_overlap_slot(
182             section->offset_within_address_space,
183             int128_get64(section->size));
184 
185     if (mem && add) {
186         if (mem->size == int128_get64(section->size) &&
187             mem->start == section->offset_within_address_space &&
188             mem->mem == (memory_region_get_ram_ptr(area) +
189             section->offset_within_region)) {
190             return; /* Same region was attempted to register, go away. */
191         }
192     }
193 
194     /* Region needs to be reset. set the size to 0 and remap it. */
195     if (mem) {
196         mem->size = 0;
197         if (do_hvf_set_memory(mem, 0)) {
198             error_report("Failed to reset overlapping slot");
199             abort();
200         }
201     }
202 
203     if (!add) {
204         return;
205     }
206 
207     if (area->readonly ||
208         (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
209         flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
210     } else {
211         flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
212     }
213 
214     /* Now make a new slot. */
215     int x;
216 
217     for (x = 0; x < hvf_state->num_slots; ++x) {
218         mem = &hvf_state->slots[x];
219         if (!mem->size) {
220             break;
221         }
222     }
223 
224     if (x == hvf_state->num_slots) {
225         error_report("No free slots");
226         abort();
227     }
228 
229     mem->size = int128_get64(section->size);
230     mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
231     mem->start = section->offset_within_address_space;
232     mem->region = area;
233 
234     if (do_hvf_set_memory(mem, flags)) {
235         error_report("Error registering new memory slot");
236         abort();
237     }
238 }
239 
240 void vmx_update_tpr(CPUState *cpu)
241 {
242     /* TODO: need integrate APIC handling */
243     X86CPU *x86_cpu = X86_CPU(cpu);
244     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
245     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
246 
247     wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
248     if (irr == -1) {
249         wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
250     } else {
251         wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
252               irr >> 4);
253     }
254 }
255 
256 static void update_apic_tpr(CPUState *cpu)
257 {
258     X86CPU *x86_cpu = X86_CPU(cpu);
259     int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
260     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
261 }
262 
263 #define VECTORING_INFO_VECTOR_MASK     0xff
264 
265 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
266                   int direction, int size, int count)
267 {
268     int i;
269     uint8_t *ptr = buffer;
270 
271     for (i = 0; i < count; i++) {
272         address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
273                          ptr, size,
274                          direction);
275         ptr += size;
276     }
277 }
278 
279 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
280 {
281     if (!cpu->vcpu_dirty) {
282         hvf_get_registers(cpu);
283         cpu->vcpu_dirty = true;
284     }
285 }
286 
287 void hvf_cpu_synchronize_state(CPUState *cpu)
288 {
289     if (!cpu->vcpu_dirty) {
290         run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
291     }
292 }
293 
294 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
295                                               run_on_cpu_data arg)
296 {
297     hvf_put_registers(cpu);
298     cpu->vcpu_dirty = false;
299 }
300 
301 void hvf_cpu_synchronize_post_reset(CPUState *cpu)
302 {
303     run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
304 }
305 
306 static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
307                                              run_on_cpu_data arg)
308 {
309     hvf_put_registers(cpu);
310     cpu->vcpu_dirty = false;
311 }
312 
313 void hvf_cpu_synchronize_post_init(CPUState *cpu)
314 {
315     run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
316 }
317 
318 static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
319                                               run_on_cpu_data arg)
320 {
321     cpu->vcpu_dirty = true;
322 }
323 
324 void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
325 {
326     run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
327 }
328 
329 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
330 {
331     int read, write;
332 
333     /* EPT fault on an instruction fetch doesn't make sense here */
334     if (ept_qual & EPT_VIOLATION_INST_FETCH) {
335         return false;
336     }
337 
338     /* EPT fault must be a read fault or a write fault */
339     read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
340     write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
341     if ((read | write) == 0) {
342         return false;
343     }
344 
345     if (write && slot) {
346         if (slot->flags & HVF_SLOT_LOG) {
347             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
348             hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
349                           HV_MEMORY_READ | HV_MEMORY_WRITE);
350         }
351     }
352 
353     /*
354      * The EPT violation must have been caused by accessing a
355      * guest-physical address that is a translation of a guest-linear
356      * address.
357      */
358     if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
359         (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
360         return false;
361     }
362 
363     if (!slot) {
364         return true;
365     }
366     if (!memory_region_is_ram(slot->region) &&
367         !(read && memory_region_is_romd(slot->region))) {
368         return true;
369     }
370     return false;
371 }
372 
373 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
374 {
375     hvf_slot *slot;
376 
377     slot = hvf_find_overlap_slot(
378             section->offset_within_address_space,
379             int128_get64(section->size));
380 
381     /* protect region against writes; begin tracking it */
382     if (on) {
383         slot->flags |= HVF_SLOT_LOG;
384         hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
385                       HV_MEMORY_READ);
386     /* stop tracking region*/
387     } else {
388         slot->flags &= ~HVF_SLOT_LOG;
389         hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
390                       HV_MEMORY_READ | HV_MEMORY_WRITE);
391     }
392 }
393 
394 static void hvf_log_start(MemoryListener *listener,
395                           MemoryRegionSection *section, int old, int new)
396 {
397     if (old != 0) {
398         return;
399     }
400 
401     hvf_set_dirty_tracking(section, 1);
402 }
403 
404 static void hvf_log_stop(MemoryListener *listener,
405                          MemoryRegionSection *section, int old, int new)
406 {
407     if (new != 0) {
408         return;
409     }
410 
411     hvf_set_dirty_tracking(section, 0);
412 }
413 
414 static void hvf_log_sync(MemoryListener *listener,
415                          MemoryRegionSection *section)
416 {
417     /*
418      * sync of dirty pages is handled elsewhere; just make sure we keep
419      * tracking the region.
420      */
421     hvf_set_dirty_tracking(section, 1);
422 }
423 
424 static void hvf_region_add(MemoryListener *listener,
425                            MemoryRegionSection *section)
426 {
427     hvf_set_phys_mem(section, true);
428 }
429 
430 static void hvf_region_del(MemoryListener *listener,
431                            MemoryRegionSection *section)
432 {
433     hvf_set_phys_mem(section, false);
434 }
435 
436 static MemoryListener hvf_memory_listener = {
437     .priority = 10,
438     .region_add = hvf_region_add,
439     .region_del = hvf_region_del,
440     .log_start = hvf_log_start,
441     .log_stop = hvf_log_stop,
442     .log_sync = hvf_log_sync,
443 };
444 
445 void hvf_vcpu_destroy(CPUState *cpu)
446 {
447     X86CPU *x86_cpu = X86_CPU(cpu);
448     CPUX86State *env = &x86_cpu->env;
449 
450     hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
451     g_free(env->hvf_mmio_buf);
452     assert_hvf_ok(ret);
453 }
454 
455 static void dummy_signal(int sig)
456 {
457 }
458 
459 static void init_tsc_freq(CPUX86State *env)
460 {
461     size_t length;
462     uint64_t tsc_freq;
463 
464     if (env->tsc_khz != 0) {
465         return;
466     }
467 
468     length = sizeof(uint64_t);
469     if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
470         return;
471     }
472     env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
473 }
474 
475 static void init_apic_bus_freq(CPUX86State *env)
476 {
477     size_t length;
478     uint64_t bus_freq;
479 
480     if (env->apic_bus_freq != 0) {
481         return;
482     }
483 
484     length = sizeof(uint64_t);
485     if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
486         return;
487     }
488     env->apic_bus_freq = bus_freq;
489 }
490 
491 static inline bool tsc_is_known(CPUX86State *env)
492 {
493     return env->tsc_khz != 0;
494 }
495 
496 static inline bool apic_bus_freq_is_known(CPUX86State *env)
497 {
498     return env->apic_bus_freq != 0;
499 }
500 
501 int hvf_init_vcpu(CPUState *cpu)
502 {
503 
504     X86CPU *x86cpu = X86_CPU(cpu);
505     CPUX86State *env = &x86cpu->env;
506     int r;
507 
508     /* init cpu signals */
509     sigset_t set;
510     struct sigaction sigact;
511 
512     memset(&sigact, 0, sizeof(sigact));
513     sigact.sa_handler = dummy_signal;
514     sigaction(SIG_IPI, &sigact, NULL);
515 
516     pthread_sigmask(SIG_BLOCK, NULL, &set);
517     sigdelset(&set, SIG_IPI);
518 
519     init_emu();
520     init_decoder();
521 
522     hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
523     env->hvf_mmio_buf = g_new(char, 4096);
524 
525     if (x86cpu->vmware_cpuid_freq) {
526         init_tsc_freq(env);
527         init_apic_bus_freq(env);
528 
529         if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
530             error_report("vmware-cpuid-freq: feature couldn't be enabled");
531         }
532     }
533 
534     r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
535     cpu->vcpu_dirty = 1;
536     assert_hvf_ok(r);
537 
538     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
539         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
540         abort();
541     }
542     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
543         &hvf_state->hvf_caps->vmx_cap_procbased)) {
544         abort();
545     }
546     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
547         &hvf_state->hvf_caps->vmx_cap_procbased2)) {
548         abort();
549     }
550     if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
551         &hvf_state->hvf_caps->vmx_cap_entry)) {
552         abort();
553     }
554 
555     /* set VMCS control fields */
556     wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
557           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
558           VMCS_PIN_BASED_CTLS_EXTINT |
559           VMCS_PIN_BASED_CTLS_NMI |
560           VMCS_PIN_BASED_CTLS_VNMI));
561     wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
562           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
563           VMCS_PRI_PROC_BASED_CTLS_HLT |
564           VMCS_PRI_PROC_BASED_CTLS_MWAIT |
565           VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
566           VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
567           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
568     wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
569           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
570                    VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
571 
572     wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
573           0));
574     wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
575 
576     wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
577 
578     x86cpu = X86_CPU(cpu);
579     x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
580 
581     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
582     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
583     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
584     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
585     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
586     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
587     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
588     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
589     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
590     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
591     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
592     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
593 
594     return 0;
595 }
596 
597 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
598 {
599     X86CPU *x86_cpu = X86_CPU(cpu);
600     CPUX86State *env = &x86_cpu->env;
601 
602     env->exception_nr = -1;
603     env->exception_pending = 0;
604     env->exception_injected = 0;
605     env->interrupt_injected = -1;
606     env->nmi_injected = false;
607     env->ins_len = 0;
608     env->has_error_code = false;
609     if (idtvec_info & VMCS_IDT_VEC_VALID) {
610         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
611         case VMCS_IDT_VEC_HWINTR:
612         case VMCS_IDT_VEC_SWINTR:
613             env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
614             break;
615         case VMCS_IDT_VEC_NMI:
616             env->nmi_injected = true;
617             break;
618         case VMCS_IDT_VEC_HWEXCEPTION:
619         case VMCS_IDT_VEC_SWEXCEPTION:
620             env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
621             env->exception_injected = 1;
622             break;
623         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
624         default:
625             abort();
626         }
627         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
628             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
629             env->ins_len = ins_len;
630         }
631         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
632             env->has_error_code = true;
633             env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
634         }
635     }
636     if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
637         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
638         env->hflags2 |= HF2_NMI_MASK;
639     } else {
640         env->hflags2 &= ~HF2_NMI_MASK;
641     }
642     if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
643          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
644          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
645         env->hflags |= HF_INHIBIT_IRQ_MASK;
646     } else {
647         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
648     }
649 }
650 
651 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
652                               uint32_t *eax, uint32_t *ebx,
653                               uint32_t *ecx, uint32_t *edx)
654 {
655     /*
656      * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
657      * leafs 0x40000001-0x4000000F are filled with zeros
658      * Provides vmware-cpuid-freq support to hvf
659      *
660      * Note: leaf 0x40000000 not exposes HVF,
661      * leaving hypervisor signature empty
662      */
663 
664     if (index < 0x40000000 || index > 0x40000010 ||
665         !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
666 
667         cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
668         return;
669     }
670 
671     switch (index) {
672     case 0x40000000:
673         *eax = 0x40000010;    /* Max available cpuid leaf */
674         *ebx = 0;             /* Leave signature empty */
675         *ecx = 0;
676         *edx = 0;
677         break;
678     case 0x40000010:
679         *eax = env->tsc_khz;
680         *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
681         *ecx = 0;
682         *edx = 0;
683         break;
684     default:
685         *eax = 0;
686         *ebx = 0;
687         *ecx = 0;
688         *edx = 0;
689         break;
690     }
691 }
692 
693 int hvf_vcpu_exec(CPUState *cpu)
694 {
695     X86CPU *x86_cpu = X86_CPU(cpu);
696     CPUX86State *env = &x86_cpu->env;
697     int ret = 0;
698     uint64_t rip = 0;
699 
700     if (hvf_process_events(cpu)) {
701         return EXCP_HLT;
702     }
703 
704     do {
705         if (cpu->vcpu_dirty) {
706             hvf_put_registers(cpu);
707             cpu->vcpu_dirty = false;
708         }
709 
710         if (hvf_inject_interrupts(cpu)) {
711             return EXCP_INTERRUPT;
712         }
713         vmx_update_tpr(cpu);
714 
715         qemu_mutex_unlock_iothread();
716         if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
717             qemu_mutex_lock_iothread();
718             return EXCP_HLT;
719         }
720 
721         hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
722         assert_hvf_ok(r);
723 
724         /* handle VMEXIT */
725         uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
726         uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
727         uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
728                                            VMCS_EXIT_INSTRUCTION_LENGTH);
729 
730         uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
731 
732         hvf_store_events(cpu, ins_len, idtvec_info);
733         rip = rreg(cpu->hvf_fd, HV_X86_RIP);
734         env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
735 
736         qemu_mutex_lock_iothread();
737 
738         update_apic_tpr(cpu);
739         current_cpu = cpu;
740 
741         ret = 0;
742         switch (exit_reason) {
743         case EXIT_REASON_HLT: {
744             macvm_set_rip(cpu, rip + ins_len);
745             if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
746                 (env->eflags & IF_MASK))
747                 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
748                 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
749                 cpu->halted = 1;
750                 ret = EXCP_HLT;
751                 break;
752             }
753             ret = EXCP_INTERRUPT;
754             break;
755         }
756         case EXIT_REASON_MWAIT: {
757             ret = EXCP_INTERRUPT;
758             break;
759         }
760         /* Need to check if MMIO or unmapped fault */
761         case EXIT_REASON_EPT_FAULT:
762         {
763             hvf_slot *slot;
764             uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
765 
766             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
767                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
768                 vmx_set_nmi_blocking(cpu);
769             }
770 
771             slot = hvf_find_overlap_slot(gpa, 1);
772             /* mmio */
773             if (ept_emulation_fault(slot, gpa, exit_qual)) {
774                 struct x86_decode decode;
775 
776                 load_regs(cpu);
777                 decode_instruction(env, &decode);
778                 exec_instruction(env, &decode);
779                 store_regs(cpu);
780                 break;
781             }
782             break;
783         }
784         case EXIT_REASON_INOUT:
785         {
786             uint32_t in = (exit_qual & 8) != 0;
787             uint32_t size =  (exit_qual & 7) + 1;
788             uint32_t string =  (exit_qual & 16) != 0;
789             uint32_t port =  exit_qual >> 16;
790             /*uint32_t rep = (exit_qual & 0x20) != 0;*/
791 
792             if (!string && in) {
793                 uint64_t val = 0;
794                 load_regs(cpu);
795                 hvf_handle_io(env, port, &val, 0, size, 1);
796                 if (size == 1) {
797                     AL(env) = val;
798                 } else if (size == 2) {
799                     AX(env) = val;
800                 } else if (size == 4) {
801                     RAX(env) = (uint32_t)val;
802                 } else {
803                     RAX(env) = (uint64_t)val;
804                 }
805                 env->eip += ins_len;
806                 store_regs(cpu);
807                 break;
808             } else if (!string && !in) {
809                 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
810                 hvf_handle_io(env, port, &RAX(env), 1, size, 1);
811                 macvm_set_rip(cpu, rip + ins_len);
812                 break;
813             }
814             struct x86_decode decode;
815 
816             load_regs(cpu);
817             decode_instruction(env, &decode);
818             assert(ins_len == decode.len);
819             exec_instruction(env, &decode);
820             store_regs(cpu);
821 
822             break;
823         }
824         case EXIT_REASON_CPUID: {
825             uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
826             uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
827             uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
828             uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
829 
830             if (rax == 1) {
831                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
832                 env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
833             }
834             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
835 
836             wreg(cpu->hvf_fd, HV_X86_RAX, rax);
837             wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
838             wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
839             wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
840 
841             macvm_set_rip(cpu, rip + ins_len);
842             break;
843         }
844         case EXIT_REASON_XSETBV: {
845             X86CPU *x86_cpu = X86_CPU(cpu);
846             CPUX86State *env = &x86_cpu->env;
847             uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
848             uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
849             uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
850 
851             if (ecx) {
852                 macvm_set_rip(cpu, rip + ins_len);
853                 break;
854             }
855             env->xcr0 = ((uint64_t)edx << 32) | eax;
856             wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
857             macvm_set_rip(cpu, rip + ins_len);
858             break;
859         }
860         case EXIT_REASON_INTR_WINDOW:
861             vmx_clear_int_window_exiting(cpu);
862             ret = EXCP_INTERRUPT;
863             break;
864         case EXIT_REASON_NMI_WINDOW:
865             vmx_clear_nmi_window_exiting(cpu);
866             ret = EXCP_INTERRUPT;
867             break;
868         case EXIT_REASON_EXT_INTR:
869             /* force exit and allow io handling */
870             ret = EXCP_INTERRUPT;
871             break;
872         case EXIT_REASON_RDMSR:
873         case EXIT_REASON_WRMSR:
874         {
875             load_regs(cpu);
876             if (exit_reason == EXIT_REASON_RDMSR) {
877                 simulate_rdmsr(cpu);
878             } else {
879                 simulate_wrmsr(cpu);
880             }
881             env->eip += ins_len;
882             store_regs(cpu);
883             break;
884         }
885         case EXIT_REASON_CR_ACCESS: {
886             int cr;
887             int reg;
888 
889             load_regs(cpu);
890             cr = exit_qual & 15;
891             reg = (exit_qual >> 8) & 15;
892 
893             switch (cr) {
894             case 0x0: {
895                 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
896                 break;
897             }
898             case 4: {
899                 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
900                 break;
901             }
902             case 8: {
903                 X86CPU *x86_cpu = X86_CPU(cpu);
904                 if (exit_qual & 0x10) {
905                     RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
906                 } else {
907                     int tpr = RRX(env, reg);
908                     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
909                     ret = EXCP_INTERRUPT;
910                 }
911                 break;
912             }
913             default:
914                 error_report("Unrecognized CR %d", cr);
915                 abort();
916             }
917             env->eip += ins_len;
918             store_regs(cpu);
919             break;
920         }
921         case EXIT_REASON_APIC_ACCESS: { /* TODO */
922             struct x86_decode decode;
923 
924             load_regs(cpu);
925             decode_instruction(env, &decode);
926             exec_instruction(env, &decode);
927             store_regs(cpu);
928             break;
929         }
930         case EXIT_REASON_TPR: {
931             ret = 1;
932             break;
933         }
934         case EXIT_REASON_TASK_SWITCH: {
935             uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
936             x68_segment_selector sel = {.sel = exit_qual & 0xffff};
937             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
938              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
939              & VMCS_INTR_T_MASK);
940             break;
941         }
942         case EXIT_REASON_TRIPLE_FAULT: {
943             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
944             ret = EXCP_INTERRUPT;
945             break;
946         }
947         case EXIT_REASON_RDPMC:
948             wreg(cpu->hvf_fd, HV_X86_RAX, 0);
949             wreg(cpu->hvf_fd, HV_X86_RDX, 0);
950             macvm_set_rip(cpu, rip + ins_len);
951             break;
952         case VMX_REASON_VMCALL:
953             env->exception_nr = EXCP0D_GPF;
954             env->exception_injected = 1;
955             env->has_error_code = true;
956             env->error_code = 0;
957             break;
958         default:
959             error_report("%llx: unhandled exit %llx", rip, exit_reason);
960         }
961     } while (ret == 0);
962 
963     return ret;
964 }
965 
966 bool hvf_allowed;
967 
968 static int hvf_accel_init(MachineState *ms)
969 {
970     int x;
971     hv_return_t ret;
972     HVFState *s;
973 
974     ret = hv_vm_create(HV_VM_DEFAULT);
975     assert_hvf_ok(ret);
976 
977     s = g_new0(HVFState, 1);
978 
979     s->num_slots = 32;
980     for (x = 0; x < s->num_slots; ++x) {
981         s->slots[x].size = 0;
982         s->slots[x].slot_id = x;
983     }
984 
985     hvf_state = s;
986     memory_listener_register(&hvf_memory_listener, &address_space_memory);
987     return 0;
988 }
989 
990 static void hvf_accel_class_init(ObjectClass *oc, void *data)
991 {
992     AccelClass *ac = ACCEL_CLASS(oc);
993     ac->name = "HVF";
994     ac->init_machine = hvf_accel_init;
995     ac->allowed = &hvf_allowed;
996 }
997 
998 static const TypeInfo hvf_accel_type = {
999     .name = TYPE_HVF_ACCEL,
1000     .parent = TYPE_ACCEL,
1001     .class_init = hvf_accel_class_init,
1002 };
1003 
1004 static void hvf_type_init(void)
1005 {
1006     type_register_static(&hvf_accel_type);
1007 }
1008 
1009 type_init(hvf_type_init);
1010