xref: /qemu/target/i386/hvf/hvf.c (revision d7a84021)
1 /* Copyright 2008 IBM Corporation
2  *           2008 Red Hat, Inc.
3  * Copyright 2011 Intel Corporation
4  * Copyright 2016 Veertu, Inc.
5  * Copyright 2017 The Android Open Source Project
6  *
7  * QEMU Hypervisor.framework support
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of version 2 of the GNU General Public
11  * License as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see <http://www.gnu.org/licenses/>.
20  *
21  * This file contain code under public domain from the hvdos project:
22  * https://github.com/mist64/hvdos
23  *
24  * Parts Copyright (c) 2011 NetApp, Inc.
25  * All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  *
36  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 #include "qemu/osdep.h"
50 #include "qemu-common.h"
51 #include "qemu/error-report.h"
52 
53 #include "sysemu/hvf.h"
54 #include "sysemu/runstate.h"
55 #include "hvf-i386.h"
56 #include "vmcs.h"
57 #include "vmx.h"
58 #include "x86.h"
59 #include "x86_descr.h"
60 #include "x86_mmu.h"
61 #include "x86_decode.h"
62 #include "x86_emu.h"
63 #include "x86_task.h"
64 #include "x86hvf.h"
65 
66 #include <Hypervisor/hv.h>
67 #include <Hypervisor/hv_vmx.h>
68 #include <sys/sysctl.h>
69 
70 #include "exec/address-spaces.h"
71 #include "hw/i386/apic_internal.h"
72 #include "qemu/main-loop.h"
73 #include "qemu/accel.h"
74 #include "target/i386/cpu.h"
75 
76 #include "hvf-accel-ops.h"
77 
78 HVFState *hvf_state;
79 
80 static void assert_hvf_ok(hv_return_t ret)
81 {
82     if (ret == HV_SUCCESS) {
83         return;
84     }
85 
86     switch (ret) {
87     case HV_ERROR:
88         error_report("Error: HV_ERROR");
89         break;
90     case HV_BUSY:
91         error_report("Error: HV_BUSY");
92         break;
93     case HV_BAD_ARGUMENT:
94         error_report("Error: HV_BAD_ARGUMENT");
95         break;
96     case HV_NO_RESOURCES:
97         error_report("Error: HV_NO_RESOURCES");
98         break;
99     case HV_NO_DEVICE:
100         error_report("Error: HV_NO_DEVICE");
101         break;
102     case HV_UNSUPPORTED:
103         error_report("Error: HV_UNSUPPORTED");
104         break;
105     default:
106         error_report("Unknown Error");
107     }
108 
109     abort();
110 }
111 
112 /* Memory slots */
113 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
114 {
115     hvf_slot *slot;
116     int x;
117     for (x = 0; x < hvf_state->num_slots; ++x) {
118         slot = &hvf_state->slots[x];
119         if (slot->size && start < (slot->start + slot->size) &&
120             (start + size) > slot->start) {
121             return slot;
122         }
123     }
124     return NULL;
125 }
126 
127 struct mac_slot {
128     int present;
129     uint64_t size;
130     uint64_t gpa_start;
131     uint64_t gva;
132 };
133 
134 struct mac_slot mac_slots[32];
135 
136 static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
137 {
138     struct mac_slot *macslot;
139     hv_return_t ret;
140 
141     macslot = &mac_slots[slot->slot_id];
142 
143     if (macslot->present) {
144         if (macslot->size != slot->size) {
145             macslot->present = 0;
146             ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
147             assert_hvf_ok(ret);
148         }
149     }
150 
151     if (!slot->size) {
152         return 0;
153     }
154 
155     macslot->present = 1;
156     macslot->gpa_start = slot->start;
157     macslot->size = slot->size;
158     ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
159     assert_hvf_ok(ret);
160     return 0;
161 }
162 
163 void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
164 {
165     hvf_slot *mem;
166     MemoryRegion *area = section->mr;
167     bool writeable = !area->readonly && !area->rom_device;
168     hv_memory_flags_t flags;
169 
170     if (!memory_region_is_ram(area)) {
171         if (writeable) {
172             return;
173         } else if (!memory_region_is_romd(area)) {
174             /*
175              * If the memory device is not in romd_mode, then we actually want
176              * to remove the hvf memory slot so all accesses will trap.
177              */
178              add = false;
179         }
180     }
181 
182     mem = hvf_find_overlap_slot(
183             section->offset_within_address_space,
184             int128_get64(section->size));
185 
186     if (mem && add) {
187         if (mem->size == int128_get64(section->size) &&
188             mem->start == section->offset_within_address_space &&
189             mem->mem == (memory_region_get_ram_ptr(area) +
190             section->offset_within_region)) {
191             return; /* Same region was attempted to register, go away. */
192         }
193     }
194 
195     /* Region needs to be reset. set the size to 0 and remap it. */
196     if (mem) {
197         mem->size = 0;
198         if (do_hvf_set_memory(mem, 0)) {
199             error_report("Failed to reset overlapping slot");
200             abort();
201         }
202     }
203 
204     if (!add) {
205         return;
206     }
207 
208     if (area->readonly ||
209         (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
210         flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
211     } else {
212         flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
213     }
214 
215     /* Now make a new slot. */
216     int x;
217 
218     for (x = 0; x < hvf_state->num_slots; ++x) {
219         mem = &hvf_state->slots[x];
220         if (!mem->size) {
221             break;
222         }
223     }
224 
225     if (x == hvf_state->num_slots) {
226         error_report("No free slots");
227         abort();
228     }
229 
230     mem->size = int128_get64(section->size);
231     mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
232     mem->start = section->offset_within_address_space;
233     mem->region = area;
234 
235     if (do_hvf_set_memory(mem, flags)) {
236         error_report("Error registering new memory slot");
237         abort();
238     }
239 }
240 
241 void vmx_update_tpr(CPUState *cpu)
242 {
243     /* TODO: need integrate APIC handling */
244     X86CPU *x86_cpu = X86_CPU(cpu);
245     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
246     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
247 
248     wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
249     if (irr == -1) {
250         wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
251     } else {
252         wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
253               irr >> 4);
254     }
255 }
256 
257 static void update_apic_tpr(CPUState *cpu)
258 {
259     X86CPU *x86_cpu = X86_CPU(cpu);
260     int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
261     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
262 }
263 
264 #define VECTORING_INFO_VECTOR_MASK     0xff
265 
266 void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
267                   int direction, int size, int count)
268 {
269     int i;
270     uint8_t *ptr = buffer;
271 
272     for (i = 0; i < count; i++) {
273         address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
274                          ptr, size,
275                          direction);
276         ptr += size;
277     }
278 }
279 
280 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
281 {
282     if (!cpu->vcpu_dirty) {
283         hvf_get_registers(cpu);
284         cpu->vcpu_dirty = true;
285     }
286 }
287 
288 void hvf_cpu_synchronize_state(CPUState *cpu)
289 {
290     if (!cpu->vcpu_dirty) {
291         run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
292     }
293 }
294 
295 static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
296                                               run_on_cpu_data arg)
297 {
298     hvf_put_registers(cpu);
299     cpu->vcpu_dirty = false;
300 }
301 
302 void hvf_cpu_synchronize_post_reset(CPUState *cpu)
303 {
304     run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
305 }
306 
307 static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
308                                              run_on_cpu_data arg)
309 {
310     hvf_put_registers(cpu);
311     cpu->vcpu_dirty = false;
312 }
313 
314 void hvf_cpu_synchronize_post_init(CPUState *cpu)
315 {
316     run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
317 }
318 
319 static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
320                                               run_on_cpu_data arg)
321 {
322     cpu->vcpu_dirty = true;
323 }
324 
325 void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
326 {
327     run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
328 }
329 
330 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
331 {
332     int read, write;
333 
334     /* EPT fault on an instruction fetch doesn't make sense here */
335     if (ept_qual & EPT_VIOLATION_INST_FETCH) {
336         return false;
337     }
338 
339     /* EPT fault must be a read fault or a write fault */
340     read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
341     write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
342     if ((read | write) == 0) {
343         return false;
344     }
345 
346     if (write && slot) {
347         if (slot->flags & HVF_SLOT_LOG) {
348             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
349             hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
350                           HV_MEMORY_READ | HV_MEMORY_WRITE);
351         }
352     }
353 
354     /*
355      * The EPT violation must have been caused by accessing a
356      * guest-physical address that is a translation of a guest-linear
357      * address.
358      */
359     if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
360         (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
361         return false;
362     }
363 
364     if (!slot) {
365         return true;
366     }
367     if (!memory_region_is_ram(slot->region) &&
368         !(read && memory_region_is_romd(slot->region))) {
369         return true;
370     }
371     return false;
372 }
373 
374 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
375 {
376     hvf_slot *slot;
377 
378     slot = hvf_find_overlap_slot(
379             section->offset_within_address_space,
380             int128_get64(section->size));
381 
382     /* protect region against writes; begin tracking it */
383     if (on) {
384         slot->flags |= HVF_SLOT_LOG;
385         hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
386                       HV_MEMORY_READ);
387     /* stop tracking region*/
388     } else {
389         slot->flags &= ~HVF_SLOT_LOG;
390         hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
391                       HV_MEMORY_READ | HV_MEMORY_WRITE);
392     }
393 }
394 
395 static void hvf_log_start(MemoryListener *listener,
396                           MemoryRegionSection *section, int old, int new)
397 {
398     if (old != 0) {
399         return;
400     }
401 
402     hvf_set_dirty_tracking(section, 1);
403 }
404 
405 static void hvf_log_stop(MemoryListener *listener,
406                          MemoryRegionSection *section, int old, int new)
407 {
408     if (new != 0) {
409         return;
410     }
411 
412     hvf_set_dirty_tracking(section, 0);
413 }
414 
415 static void hvf_log_sync(MemoryListener *listener,
416                          MemoryRegionSection *section)
417 {
418     /*
419      * sync of dirty pages is handled elsewhere; just make sure we keep
420      * tracking the region.
421      */
422     hvf_set_dirty_tracking(section, 1);
423 }
424 
425 static void hvf_region_add(MemoryListener *listener,
426                            MemoryRegionSection *section)
427 {
428     hvf_set_phys_mem(section, true);
429 }
430 
431 static void hvf_region_del(MemoryListener *listener,
432                            MemoryRegionSection *section)
433 {
434     hvf_set_phys_mem(section, false);
435 }
436 
437 static MemoryListener hvf_memory_listener = {
438     .priority = 10,
439     .region_add = hvf_region_add,
440     .region_del = hvf_region_del,
441     .log_start = hvf_log_start,
442     .log_stop = hvf_log_stop,
443     .log_sync = hvf_log_sync,
444 };
445 
446 void hvf_vcpu_destroy(CPUState *cpu)
447 {
448     X86CPU *x86_cpu = X86_CPU(cpu);
449     CPUX86State *env = &x86_cpu->env;
450 
451     hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
452     g_free(env->hvf_mmio_buf);
453     assert_hvf_ok(ret);
454 }
455 
456 static void dummy_signal(int sig)
457 {
458 }
459 
460 static void init_tsc_freq(CPUX86State *env)
461 {
462     size_t length;
463     uint64_t tsc_freq;
464 
465     if (env->tsc_khz != 0) {
466         return;
467     }
468 
469     length = sizeof(uint64_t);
470     if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
471         return;
472     }
473     env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
474 }
475 
476 static void init_apic_bus_freq(CPUX86State *env)
477 {
478     size_t length;
479     uint64_t bus_freq;
480 
481     if (env->apic_bus_freq != 0) {
482         return;
483     }
484 
485     length = sizeof(uint64_t);
486     if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
487         return;
488     }
489     env->apic_bus_freq = bus_freq;
490 }
491 
492 static inline bool tsc_is_known(CPUX86State *env)
493 {
494     return env->tsc_khz != 0;
495 }
496 
497 static inline bool apic_bus_freq_is_known(CPUX86State *env)
498 {
499     return env->apic_bus_freq != 0;
500 }
501 
502 int hvf_init_vcpu(CPUState *cpu)
503 {
504 
505     X86CPU *x86cpu = X86_CPU(cpu);
506     CPUX86State *env = &x86cpu->env;
507     int r;
508 
509     /* init cpu signals */
510     sigset_t set;
511     struct sigaction sigact;
512 
513     memset(&sigact, 0, sizeof(sigact));
514     sigact.sa_handler = dummy_signal;
515     sigaction(SIG_IPI, &sigact, NULL);
516 
517     pthread_sigmask(SIG_BLOCK, NULL, &set);
518     sigdelset(&set, SIG_IPI);
519 
520     init_emu();
521     init_decoder();
522 
523     hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
524     env->hvf_mmio_buf = g_new(char, 4096);
525 
526     if (x86cpu->vmware_cpuid_freq) {
527         init_tsc_freq(env);
528         init_apic_bus_freq(env);
529 
530         if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
531             error_report("vmware-cpuid-freq: feature couldn't be enabled");
532         }
533     }
534 
535     r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
536     cpu->vcpu_dirty = 1;
537     assert_hvf_ok(r);
538 
539     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
540         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
541         abort();
542     }
543     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
544         &hvf_state->hvf_caps->vmx_cap_procbased)) {
545         abort();
546     }
547     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
548         &hvf_state->hvf_caps->vmx_cap_procbased2)) {
549         abort();
550     }
551     if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
552         &hvf_state->hvf_caps->vmx_cap_entry)) {
553         abort();
554     }
555 
556     /* set VMCS control fields */
557     wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
558           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
559           VMCS_PIN_BASED_CTLS_EXTINT |
560           VMCS_PIN_BASED_CTLS_NMI |
561           VMCS_PIN_BASED_CTLS_VNMI));
562     wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
563           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
564           VMCS_PRI_PROC_BASED_CTLS_HLT |
565           VMCS_PRI_PROC_BASED_CTLS_MWAIT |
566           VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
567           VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
568           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
569     wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
570           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
571                    VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
572 
573     wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
574           0));
575     wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
576 
577     wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
578 
579     x86cpu = X86_CPU(cpu);
580     x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
581 
582     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
583     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
584     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
585     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
586     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
587     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
588     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
589     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
590     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
591     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
592     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
593     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
594 
595     return 0;
596 }
597 
598 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
599 {
600     X86CPU *x86_cpu = X86_CPU(cpu);
601     CPUX86State *env = &x86_cpu->env;
602 
603     env->exception_nr = -1;
604     env->exception_pending = 0;
605     env->exception_injected = 0;
606     env->interrupt_injected = -1;
607     env->nmi_injected = false;
608     env->ins_len = 0;
609     env->has_error_code = false;
610     if (idtvec_info & VMCS_IDT_VEC_VALID) {
611         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
612         case VMCS_IDT_VEC_HWINTR:
613         case VMCS_IDT_VEC_SWINTR:
614             env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
615             break;
616         case VMCS_IDT_VEC_NMI:
617             env->nmi_injected = true;
618             break;
619         case VMCS_IDT_VEC_HWEXCEPTION:
620         case VMCS_IDT_VEC_SWEXCEPTION:
621             env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
622             env->exception_injected = 1;
623             break;
624         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
625         default:
626             abort();
627         }
628         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
629             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
630             env->ins_len = ins_len;
631         }
632         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
633             env->has_error_code = true;
634             env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
635         }
636     }
637     if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
638         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
639         env->hflags2 |= HF2_NMI_MASK;
640     } else {
641         env->hflags2 &= ~HF2_NMI_MASK;
642     }
643     if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
644          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
645          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
646         env->hflags |= HF_INHIBIT_IRQ_MASK;
647     } else {
648         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
649     }
650 }
651 
652 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
653                               uint32_t *eax, uint32_t *ebx,
654                               uint32_t *ecx, uint32_t *edx)
655 {
656     /*
657      * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
658      * leafs 0x40000001-0x4000000F are filled with zeros
659      * Provides vmware-cpuid-freq support to hvf
660      *
661      * Note: leaf 0x40000000 not exposes HVF,
662      * leaving hypervisor signature empty
663      */
664 
665     if (index < 0x40000000 || index > 0x40000010 ||
666         !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
667 
668         cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
669         return;
670     }
671 
672     switch (index) {
673     case 0x40000000:
674         *eax = 0x40000010;    /* Max available cpuid leaf */
675         *ebx = 0;             /* Leave signature empty */
676         *ecx = 0;
677         *edx = 0;
678         break;
679     case 0x40000010:
680         *eax = env->tsc_khz;
681         *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
682         *ecx = 0;
683         *edx = 0;
684         break;
685     default:
686         *eax = 0;
687         *ebx = 0;
688         *ecx = 0;
689         *edx = 0;
690         break;
691     }
692 }
693 
694 int hvf_vcpu_exec(CPUState *cpu)
695 {
696     X86CPU *x86_cpu = X86_CPU(cpu);
697     CPUX86State *env = &x86_cpu->env;
698     int ret = 0;
699     uint64_t rip = 0;
700 
701     if (hvf_process_events(cpu)) {
702         return EXCP_HLT;
703     }
704 
705     do {
706         if (cpu->vcpu_dirty) {
707             hvf_put_registers(cpu);
708             cpu->vcpu_dirty = false;
709         }
710 
711         if (hvf_inject_interrupts(cpu)) {
712             return EXCP_INTERRUPT;
713         }
714         vmx_update_tpr(cpu);
715 
716         qemu_mutex_unlock_iothread();
717         if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
718             qemu_mutex_lock_iothread();
719             return EXCP_HLT;
720         }
721 
722         hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
723         assert_hvf_ok(r);
724 
725         /* handle VMEXIT */
726         uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
727         uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
728         uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
729                                            VMCS_EXIT_INSTRUCTION_LENGTH);
730 
731         uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
732 
733         hvf_store_events(cpu, ins_len, idtvec_info);
734         rip = rreg(cpu->hvf_fd, HV_X86_RIP);
735         env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
736 
737         qemu_mutex_lock_iothread();
738 
739         update_apic_tpr(cpu);
740         current_cpu = cpu;
741 
742         ret = 0;
743         switch (exit_reason) {
744         case EXIT_REASON_HLT: {
745             macvm_set_rip(cpu, rip + ins_len);
746             if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
747                 (env->eflags & IF_MASK))
748                 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
749                 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
750                 cpu->halted = 1;
751                 ret = EXCP_HLT;
752                 break;
753             }
754             ret = EXCP_INTERRUPT;
755             break;
756         }
757         case EXIT_REASON_MWAIT: {
758             ret = EXCP_INTERRUPT;
759             break;
760         }
761         /* Need to check if MMIO or unmapped fault */
762         case EXIT_REASON_EPT_FAULT:
763         {
764             hvf_slot *slot;
765             uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
766 
767             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
768                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
769                 vmx_set_nmi_blocking(cpu);
770             }
771 
772             slot = hvf_find_overlap_slot(gpa, 1);
773             /* mmio */
774             if (ept_emulation_fault(slot, gpa, exit_qual)) {
775                 struct x86_decode decode;
776 
777                 load_regs(cpu);
778                 decode_instruction(env, &decode);
779                 exec_instruction(env, &decode);
780                 store_regs(cpu);
781                 break;
782             }
783             break;
784         }
785         case EXIT_REASON_INOUT:
786         {
787             uint32_t in = (exit_qual & 8) != 0;
788             uint32_t size =  (exit_qual & 7) + 1;
789             uint32_t string =  (exit_qual & 16) != 0;
790             uint32_t port =  exit_qual >> 16;
791             /*uint32_t rep = (exit_qual & 0x20) != 0;*/
792 
793             if (!string && in) {
794                 uint64_t val = 0;
795                 load_regs(cpu);
796                 hvf_handle_io(env, port, &val, 0, size, 1);
797                 if (size == 1) {
798                     AL(env) = val;
799                 } else if (size == 2) {
800                     AX(env) = val;
801                 } else if (size == 4) {
802                     RAX(env) = (uint32_t)val;
803                 } else {
804                     RAX(env) = (uint64_t)val;
805                 }
806                 env->eip += ins_len;
807                 store_regs(cpu);
808                 break;
809             } else if (!string && !in) {
810                 RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
811                 hvf_handle_io(env, port, &RAX(env), 1, size, 1);
812                 macvm_set_rip(cpu, rip + ins_len);
813                 break;
814             }
815             struct x86_decode decode;
816 
817             load_regs(cpu);
818             decode_instruction(env, &decode);
819             assert(ins_len == decode.len);
820             exec_instruction(env, &decode);
821             store_regs(cpu);
822 
823             break;
824         }
825         case EXIT_REASON_CPUID: {
826             uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
827             uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
828             uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
829             uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
830 
831             if (rax == 1) {
832                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
833                 env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
834             }
835             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
836 
837             wreg(cpu->hvf_fd, HV_X86_RAX, rax);
838             wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
839             wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
840             wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
841 
842             macvm_set_rip(cpu, rip + ins_len);
843             break;
844         }
845         case EXIT_REASON_XSETBV: {
846             X86CPU *x86_cpu = X86_CPU(cpu);
847             CPUX86State *env = &x86_cpu->env;
848             uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
849             uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
850             uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
851 
852             if (ecx) {
853                 macvm_set_rip(cpu, rip + ins_len);
854                 break;
855             }
856             env->xcr0 = ((uint64_t)edx << 32) | eax;
857             wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
858             macvm_set_rip(cpu, rip + ins_len);
859             break;
860         }
861         case EXIT_REASON_INTR_WINDOW:
862             vmx_clear_int_window_exiting(cpu);
863             ret = EXCP_INTERRUPT;
864             break;
865         case EXIT_REASON_NMI_WINDOW:
866             vmx_clear_nmi_window_exiting(cpu);
867             ret = EXCP_INTERRUPT;
868             break;
869         case EXIT_REASON_EXT_INTR:
870             /* force exit and allow io handling */
871             ret = EXCP_INTERRUPT;
872             break;
873         case EXIT_REASON_RDMSR:
874         case EXIT_REASON_WRMSR:
875         {
876             load_regs(cpu);
877             if (exit_reason == EXIT_REASON_RDMSR) {
878                 simulate_rdmsr(cpu);
879             } else {
880                 simulate_wrmsr(cpu);
881             }
882             env->eip += ins_len;
883             store_regs(cpu);
884             break;
885         }
886         case EXIT_REASON_CR_ACCESS: {
887             int cr;
888             int reg;
889 
890             load_regs(cpu);
891             cr = exit_qual & 15;
892             reg = (exit_qual >> 8) & 15;
893 
894             switch (cr) {
895             case 0x0: {
896                 macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
897                 break;
898             }
899             case 4: {
900                 macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
901                 break;
902             }
903             case 8: {
904                 X86CPU *x86_cpu = X86_CPU(cpu);
905                 if (exit_qual & 0x10) {
906                     RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
907                 } else {
908                     int tpr = RRX(env, reg);
909                     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
910                     ret = EXCP_INTERRUPT;
911                 }
912                 break;
913             }
914             default:
915                 error_report("Unrecognized CR %d", cr);
916                 abort();
917             }
918             env->eip += ins_len;
919             store_regs(cpu);
920             break;
921         }
922         case EXIT_REASON_APIC_ACCESS: { /* TODO */
923             struct x86_decode decode;
924 
925             load_regs(cpu);
926             decode_instruction(env, &decode);
927             exec_instruction(env, &decode);
928             store_regs(cpu);
929             break;
930         }
931         case EXIT_REASON_TPR: {
932             ret = 1;
933             break;
934         }
935         case EXIT_REASON_TASK_SWITCH: {
936             uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
937             x68_segment_selector sel = {.sel = exit_qual & 0xffff};
938             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
939              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
940              & VMCS_INTR_T_MASK);
941             break;
942         }
943         case EXIT_REASON_TRIPLE_FAULT: {
944             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
945             ret = EXCP_INTERRUPT;
946             break;
947         }
948         case EXIT_REASON_RDPMC:
949             wreg(cpu->hvf_fd, HV_X86_RAX, 0);
950             wreg(cpu->hvf_fd, HV_X86_RDX, 0);
951             macvm_set_rip(cpu, rip + ins_len);
952             break;
953         case VMX_REASON_VMCALL:
954             env->exception_nr = EXCP0D_GPF;
955             env->exception_injected = 1;
956             env->has_error_code = true;
957             env->error_code = 0;
958             break;
959         default:
960             error_report("%llx: unhandled exit %llx", rip, exit_reason);
961         }
962     } while (ret == 0);
963 
964     return ret;
965 }
966 
967 bool hvf_allowed;
968 
969 static int hvf_accel_init(MachineState *ms)
970 {
971     int x;
972     hv_return_t ret;
973     HVFState *s;
974 
975     ret = hv_vm_create(HV_VM_DEFAULT);
976     assert_hvf_ok(ret);
977 
978     s = g_new0(HVFState, 1);
979 
980     s->num_slots = 32;
981     for (x = 0; x < s->num_slots; ++x) {
982         s->slots[x].size = 0;
983         s->slots[x].slot_id = x;
984     }
985 
986     hvf_state = s;
987     memory_listener_register(&hvf_memory_listener, &address_space_memory);
988     return 0;
989 }
990 
991 static void hvf_accel_class_init(ObjectClass *oc, void *data)
992 {
993     AccelClass *ac = ACCEL_CLASS(oc);
994     ac->name = "HVF";
995     ac->init_machine = hvf_accel_init;
996     ac->allowed = &hvf_allowed;
997 }
998 
999 static const TypeInfo hvf_accel_type = {
1000     .name = TYPE_HVF_ACCEL,
1001     .parent = TYPE_ACCEL,
1002     .class_init = hvf_accel_class_init,
1003 };
1004 
1005 static void hvf_type_init(void)
1006 {
1007     type_register_static(&hvf_accel_type);
1008 }
1009 
1010 type_init(hvf_type_init);
1011