1 /*
2  * QEMU Windows Hypervisor Platform accelerator (WHPX)
3  *
4  * Copyright Microsoft Corp. 2017
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "qemu/error-report.h"
23 #include "qapi/error.h"
24 #include "migration/blocker.h"
25 #include "whp-dispatch.h"
26 
27 #include "whpx-cpus.h"
28 
29 #include <WinHvPlatform.h>
30 #include <WinHvEmulation.h>
31 
32 #define HYPERV_APIC_BUS_FREQUENCY      (200000000ULL)
33 
34 struct whpx_state {
35     uint64_t mem_quota;
36     WHV_PARTITION_HANDLE partition;
37 };
38 
39 static const WHV_REGISTER_NAME whpx_register_names[] = {
40 
41     /* X64 General purpose registers */
42     WHvX64RegisterRax,
43     WHvX64RegisterRcx,
44     WHvX64RegisterRdx,
45     WHvX64RegisterRbx,
46     WHvX64RegisterRsp,
47     WHvX64RegisterRbp,
48     WHvX64RegisterRsi,
49     WHvX64RegisterRdi,
50     WHvX64RegisterR8,
51     WHvX64RegisterR9,
52     WHvX64RegisterR10,
53     WHvX64RegisterR11,
54     WHvX64RegisterR12,
55     WHvX64RegisterR13,
56     WHvX64RegisterR14,
57     WHvX64RegisterR15,
58     WHvX64RegisterRip,
59     WHvX64RegisterRflags,
60 
61     /* X64 Segment registers */
62     WHvX64RegisterEs,
63     WHvX64RegisterCs,
64     WHvX64RegisterSs,
65     WHvX64RegisterDs,
66     WHvX64RegisterFs,
67     WHvX64RegisterGs,
68     WHvX64RegisterLdtr,
69     WHvX64RegisterTr,
70 
71     /* X64 Table registers */
72     WHvX64RegisterIdtr,
73     WHvX64RegisterGdtr,
74 
75     /* X64 Control Registers */
76     WHvX64RegisterCr0,
77     WHvX64RegisterCr2,
78     WHvX64RegisterCr3,
79     WHvX64RegisterCr4,
80     WHvX64RegisterCr8,
81 
82     /* X64 Debug Registers */
83     /*
84      * WHvX64RegisterDr0,
85      * WHvX64RegisterDr1,
86      * WHvX64RegisterDr2,
87      * WHvX64RegisterDr3,
88      * WHvX64RegisterDr6,
89      * WHvX64RegisterDr7,
90      */
91 
92     /* X64 Floating Point and Vector Registers */
93     WHvX64RegisterXmm0,
94     WHvX64RegisterXmm1,
95     WHvX64RegisterXmm2,
96     WHvX64RegisterXmm3,
97     WHvX64RegisterXmm4,
98     WHvX64RegisterXmm5,
99     WHvX64RegisterXmm6,
100     WHvX64RegisterXmm7,
101     WHvX64RegisterXmm8,
102     WHvX64RegisterXmm9,
103     WHvX64RegisterXmm10,
104     WHvX64RegisterXmm11,
105     WHvX64RegisterXmm12,
106     WHvX64RegisterXmm13,
107     WHvX64RegisterXmm14,
108     WHvX64RegisterXmm15,
109     WHvX64RegisterFpMmx0,
110     WHvX64RegisterFpMmx1,
111     WHvX64RegisterFpMmx2,
112     WHvX64RegisterFpMmx3,
113     WHvX64RegisterFpMmx4,
114     WHvX64RegisterFpMmx5,
115     WHvX64RegisterFpMmx6,
116     WHvX64RegisterFpMmx7,
117     WHvX64RegisterFpControlStatus,
118     WHvX64RegisterXmmControlStatus,
119 
120     /* X64 MSRs */
121     WHvX64RegisterEfer,
122 #ifdef TARGET_X86_64
123     WHvX64RegisterKernelGsBase,
124 #endif
125     WHvX64RegisterApicBase,
126     /* WHvX64RegisterPat, */
127     WHvX64RegisterSysenterCs,
128     WHvX64RegisterSysenterEip,
129     WHvX64RegisterSysenterEsp,
130     WHvX64RegisterStar,
131 #ifdef TARGET_X86_64
132     WHvX64RegisterLstar,
133     WHvX64RegisterCstar,
134     WHvX64RegisterSfmask,
135 #endif
136 
137     /* Interrupt / Event Registers */
138     /*
139      * WHvRegisterPendingInterruption,
140      * WHvRegisterInterruptState,
141      * WHvRegisterPendingEvent0,
142      * WHvRegisterPendingEvent1
143      * WHvX64RegisterDeliverabilityNotifications,
144      */
145 };
146 
147 struct whpx_register_set {
148     WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
149 };
150 
151 struct whpx_vcpu {
152     WHV_EMULATOR_HANDLE emulator;
153     bool window_registered;
154     bool interruptable;
155     uint64_t tpr;
156     uint64_t apic_base;
157     bool interruption_pending;
158 
159     /* Must be the last field as it may have a tail */
160     WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
161 };
162 
163 static bool whpx_allowed;
164 static bool whp_dispatch_initialized;
165 static HMODULE hWinHvPlatform, hWinHvEmulation;
166 
167 struct whpx_state whpx_global;
168 struct WHPDispatch whp_dispatch;
169 
170 
171 /*
172  * VP support
173  */
174 
get_whpx_vcpu(CPUState * cpu)175 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
176 {
177     return (struct whpx_vcpu *)cpu->hax_vcpu;
178 }
179 
whpx_seg_q2h(const SegmentCache * qs,int v86,int r86)180 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
181                                              int r86)
182 {
183     WHV_X64_SEGMENT_REGISTER hs;
184     unsigned flags = qs->flags;
185 
186     hs.Base = qs->base;
187     hs.Limit = qs->limit;
188     hs.Selector = qs->selector;
189 
190     if (v86) {
191         hs.Attributes = 0;
192         hs.SegmentType = 3;
193         hs.Present = 1;
194         hs.DescriptorPrivilegeLevel = 3;
195         hs.NonSystemSegment = 1;
196 
197     } else {
198         hs.Attributes = (flags >> DESC_TYPE_SHIFT);
199 
200         if (r86) {
201             /* hs.Base &= 0xfffff; */
202         }
203     }
204 
205     return hs;
206 }
207 
whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER * hs)208 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
209 {
210     SegmentCache qs;
211 
212     qs.base = hs->Base;
213     qs.limit = hs->Limit;
214     qs.selector = hs->Selector;
215 
216     qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
217 
218     return qs;
219 }
220 
whpx_set_tsc(CPUState * cpu)221 static int whpx_set_tsc(CPUState *cpu)
222 {
223     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
224     WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
225     WHV_REGISTER_VALUE tsc_val;
226     HRESULT hr;
227     struct whpx_state *whpx = &whpx_global;
228 
229     /*
230      * Suspend the partition prior to setting the TSC to reduce the variance
231      * in TSC across vCPUs. When the first vCPU runs post suspend, the
232      * partition is automatically resumed.
233      */
234     if (whp_dispatch.WHvSuspendPartitionTime) {
235 
236         /*
237          * Unable to suspend partition while setting TSC is not a fatal
238          * error. It just increases the likelihood of TSC variance between
239          * vCPUs and some guest OS are able to handle that just fine.
240          */
241         hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
242         if (FAILED(hr)) {
243             warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
244         }
245     }
246 
247     tsc_val.Reg64 = env->tsc;
248     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
249         whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
250     if (FAILED(hr)) {
251         error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
252         return -1;
253     }
254 
255     return 0;
256 }
257 
whpx_set_registers(CPUState * cpu,int level)258 static void whpx_set_registers(CPUState *cpu, int level)
259 {
260     struct whpx_state *whpx = &whpx_global;
261     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
262     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
263     X86CPU *x86_cpu = X86_CPU(cpu);
264     struct whpx_register_set vcxt;
265     HRESULT hr;
266     int idx;
267     int idx_next;
268     int i;
269     int v86, r86;
270 
271     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
272 
273     /*
274      * Following MSRs have side effects on the guest or are too heavy for
275      * runtime. Limit them to full state update.
276      */
277     if (level >= WHPX_SET_RESET_STATE) {
278         whpx_set_tsc(cpu);
279     }
280 
281     memset(&vcxt, 0, sizeof(struct whpx_register_set));
282 
283     v86 = (env->eflags & VM_MASK);
284     r86 = !(env->cr[0] & CR0_PE_MASK);
285 
286     vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
287     vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
288 
289     idx = 0;
290 
291     /* Indexes for first 16 registers match between HV and QEMU definitions */
292     idx_next = 16;
293     for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
294         vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
295     }
296     idx = idx_next;
297 
298     /* Same goes for RIP and RFLAGS */
299     assert(whpx_register_names[idx] == WHvX64RegisterRip);
300     vcxt.values[idx++].Reg64 = env->eip;
301 
302     assert(whpx_register_names[idx] == WHvX64RegisterRflags);
303     vcxt.values[idx++].Reg64 = env->eflags;
304 
305     /* Translate 6+4 segment registers. HV and QEMU order matches  */
306     assert(idx == WHvX64RegisterEs);
307     for (i = 0; i < 6; i += 1, idx += 1) {
308         vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
309     }
310 
311     assert(idx == WHvX64RegisterLdtr);
312     vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
313 
314     assert(idx == WHvX64RegisterTr);
315     vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
316 
317     assert(idx == WHvX64RegisterIdtr);
318     vcxt.values[idx].Table.Base = env->idt.base;
319     vcxt.values[idx].Table.Limit = env->idt.limit;
320     idx += 1;
321 
322     assert(idx == WHvX64RegisterGdtr);
323     vcxt.values[idx].Table.Base = env->gdt.base;
324     vcxt.values[idx].Table.Limit = env->gdt.limit;
325     idx += 1;
326 
327     /* CR0, 2, 3, 4, 8 */
328     assert(whpx_register_names[idx] == WHvX64RegisterCr0);
329     vcxt.values[idx++].Reg64 = env->cr[0];
330     assert(whpx_register_names[idx] == WHvX64RegisterCr2);
331     vcxt.values[idx++].Reg64 = env->cr[2];
332     assert(whpx_register_names[idx] == WHvX64RegisterCr3);
333     vcxt.values[idx++].Reg64 = env->cr[3];
334     assert(whpx_register_names[idx] == WHvX64RegisterCr4);
335     vcxt.values[idx++].Reg64 = env->cr[4];
336     assert(whpx_register_names[idx] == WHvX64RegisterCr8);
337     vcxt.values[idx++].Reg64 = vcpu->tpr;
338 
339     /* 8 Debug Registers - Skipped */
340 
341     /* 16 XMM registers */
342     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
343     idx_next = idx + 16;
344     for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
345         vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
346         vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
347     }
348     idx = idx_next;
349 
350     /* 8 FP registers */
351     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
352     for (i = 0; i < 8; i += 1, idx += 1) {
353         vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
354         /* vcxt.values[idx].Fp.AsUINT128.High64 =
355                env->fpregs[i].mmx.MMX_Q(1);
356         */
357     }
358 
359     /* FP control status register */
360     assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
361     vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
362     vcxt.values[idx].FpControlStatus.FpStatus =
363         (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
364     vcxt.values[idx].FpControlStatus.FpTag = 0;
365     for (i = 0; i < 8; ++i) {
366         vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
367     }
368     vcxt.values[idx].FpControlStatus.Reserved = 0;
369     vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
370     vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
371     idx += 1;
372 
373     /* XMM control status register */
374     assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
375     vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
376     vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
377     vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
378     idx += 1;
379 
380     /* MSRs */
381     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
382     vcxt.values[idx++].Reg64 = env->efer;
383 #ifdef TARGET_X86_64
384     assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
385     vcxt.values[idx++].Reg64 = env->kernelgsbase;
386 #endif
387 
388     assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
389     vcxt.values[idx++].Reg64 = vcpu->apic_base;
390 
391     /* WHvX64RegisterPat - Skipped */
392 
393     assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
394     vcxt.values[idx++].Reg64 = env->sysenter_cs;
395     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
396     vcxt.values[idx++].Reg64 = env->sysenter_eip;
397     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
398     vcxt.values[idx++].Reg64 = env->sysenter_esp;
399     assert(whpx_register_names[idx] == WHvX64RegisterStar);
400     vcxt.values[idx++].Reg64 = env->star;
401 #ifdef TARGET_X86_64
402     assert(whpx_register_names[idx] == WHvX64RegisterLstar);
403     vcxt.values[idx++].Reg64 = env->lstar;
404     assert(whpx_register_names[idx] == WHvX64RegisterCstar);
405     vcxt.values[idx++].Reg64 = env->cstar;
406     assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
407     vcxt.values[idx++].Reg64 = env->fmask;
408 #endif
409 
410     /* Interrupt / Event Registers - Skipped */
411 
412     assert(idx == RTL_NUMBER_OF(whpx_register_names));
413 
414     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
415         whpx->partition, cpu->cpu_index,
416         whpx_register_names,
417         RTL_NUMBER_OF(whpx_register_names),
418         &vcxt.values[0]);
419 
420     if (FAILED(hr)) {
421         error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
422                      hr);
423     }
424 
425     return;
426 }
427 
whpx_get_tsc(CPUState * cpu)428 static int whpx_get_tsc(CPUState *cpu)
429 {
430     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
431     WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
432     WHV_REGISTER_VALUE tsc_val;
433     HRESULT hr;
434     struct whpx_state *whpx = &whpx_global;
435 
436     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
437         whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
438     if (FAILED(hr)) {
439         error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
440         return -1;
441     }
442 
443     env->tsc = tsc_val.Reg64;
444     return 0;
445 }
446 
whpx_get_registers(CPUState * cpu)447 static void whpx_get_registers(CPUState *cpu)
448 {
449     struct whpx_state *whpx = &whpx_global;
450     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
451     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
452     X86CPU *x86_cpu = X86_CPU(cpu);
453     struct whpx_register_set vcxt;
454     uint64_t tpr, apic_base;
455     HRESULT hr;
456     int idx;
457     int idx_next;
458     int i;
459 
460     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
461 
462     if (!env->tsc_valid) {
463         whpx_get_tsc(cpu);
464         env->tsc_valid = !runstate_is_running();
465     }
466 
467     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
468         whpx->partition, cpu->cpu_index,
469         whpx_register_names,
470         RTL_NUMBER_OF(whpx_register_names),
471         &vcxt.values[0]);
472     if (FAILED(hr)) {
473         error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
474                      hr);
475     }
476 
477     idx = 0;
478 
479     /* Indexes for first 16 registers match between HV and QEMU definitions */
480     idx_next = 16;
481     for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
482         env->regs[idx] = vcxt.values[idx].Reg64;
483     }
484     idx = idx_next;
485 
486     /* Same goes for RIP and RFLAGS */
487     assert(whpx_register_names[idx] == WHvX64RegisterRip);
488     env->eip = vcxt.values[idx++].Reg64;
489     assert(whpx_register_names[idx] == WHvX64RegisterRflags);
490     env->eflags = vcxt.values[idx++].Reg64;
491 
492     /* Translate 6+4 segment registers. HV and QEMU order matches  */
493     assert(idx == WHvX64RegisterEs);
494     for (i = 0; i < 6; i += 1, idx += 1) {
495         env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
496     }
497 
498     assert(idx == WHvX64RegisterLdtr);
499     env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
500     assert(idx == WHvX64RegisterTr);
501     env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
502     assert(idx == WHvX64RegisterIdtr);
503     env->idt.base = vcxt.values[idx].Table.Base;
504     env->idt.limit = vcxt.values[idx].Table.Limit;
505     idx += 1;
506     assert(idx == WHvX64RegisterGdtr);
507     env->gdt.base = vcxt.values[idx].Table.Base;
508     env->gdt.limit = vcxt.values[idx].Table.Limit;
509     idx += 1;
510 
511     /* CR0, 2, 3, 4, 8 */
512     assert(whpx_register_names[idx] == WHvX64RegisterCr0);
513     env->cr[0] = vcxt.values[idx++].Reg64;
514     assert(whpx_register_names[idx] == WHvX64RegisterCr2);
515     env->cr[2] = vcxt.values[idx++].Reg64;
516     assert(whpx_register_names[idx] == WHvX64RegisterCr3);
517     env->cr[3] = vcxt.values[idx++].Reg64;
518     assert(whpx_register_names[idx] == WHvX64RegisterCr4);
519     env->cr[4] = vcxt.values[idx++].Reg64;
520     assert(whpx_register_names[idx] == WHvX64RegisterCr8);
521     tpr = vcxt.values[idx++].Reg64;
522     if (tpr != vcpu->tpr) {
523         vcpu->tpr = tpr;
524         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
525     }
526 
527     /* 8 Debug Registers - Skipped */
528 
529     /* 16 XMM registers */
530     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
531     idx_next = idx + 16;
532     for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
533         env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
534         env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
535     }
536     idx = idx_next;
537 
538     /* 8 FP registers */
539     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
540     for (i = 0; i < 8; i += 1, idx += 1) {
541         env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
542         /* env->fpregs[i].mmx.MMX_Q(1) =
543                vcxt.values[idx].Fp.AsUINT128.High64;
544         */
545     }
546 
547     /* FP control status register */
548     assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
549     env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
550     env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
551     env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
552     for (i = 0; i < 8; ++i) {
553         env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
554     }
555     env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
556     env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
557     idx += 1;
558 
559     /* XMM control status register */
560     assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
561     env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
562     idx += 1;
563 
564     /* MSRs */
565     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
566     env->efer = vcxt.values[idx++].Reg64;
567 #ifdef TARGET_X86_64
568     assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
569     env->kernelgsbase = vcxt.values[idx++].Reg64;
570 #endif
571 
572     assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
573     apic_base = vcxt.values[idx++].Reg64;
574     if (apic_base != vcpu->apic_base) {
575         vcpu->apic_base = apic_base;
576         cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
577     }
578 
579     /* WHvX64RegisterPat - Skipped */
580 
581     assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
582     env->sysenter_cs = vcxt.values[idx++].Reg64;
583     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
584     env->sysenter_eip = vcxt.values[idx++].Reg64;
585     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
586     env->sysenter_esp = vcxt.values[idx++].Reg64;
587     assert(whpx_register_names[idx] == WHvX64RegisterStar);
588     env->star = vcxt.values[idx++].Reg64;
589 #ifdef TARGET_X86_64
590     assert(whpx_register_names[idx] == WHvX64RegisterLstar);
591     env->lstar = vcxt.values[idx++].Reg64;
592     assert(whpx_register_names[idx] == WHvX64RegisterCstar);
593     env->cstar = vcxt.values[idx++].Reg64;
594     assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
595     env->fmask = vcxt.values[idx++].Reg64;
596 #endif
597 
598     /* Interrupt / Event Registers - Skipped */
599 
600     assert(idx == RTL_NUMBER_OF(whpx_register_names));
601 
602     return;
603 }
604 
whpx_emu_ioport_callback(void * ctx,WHV_EMULATOR_IO_ACCESS_INFO * IoAccess)605 static HRESULT CALLBACK whpx_emu_ioport_callback(
606     void *ctx,
607     WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
608 {
609     MemTxAttrs attrs = { 0 };
610     address_space_rw(&address_space_io, IoAccess->Port, attrs,
611                      &IoAccess->Data, IoAccess->AccessSize,
612                      IoAccess->Direction);
613     return S_OK;
614 }
615 
whpx_emu_mmio_callback(void * ctx,WHV_EMULATOR_MEMORY_ACCESS_INFO * ma)616 static HRESULT CALLBACK whpx_emu_mmio_callback(
617     void *ctx,
618     WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
619 {
620     cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
621                            ma->Direction);
622     return S_OK;
623 }
624 
whpx_emu_getreg_callback(void * ctx,const WHV_REGISTER_NAME * RegisterNames,UINT32 RegisterCount,WHV_REGISTER_VALUE * RegisterValues)625 static HRESULT CALLBACK whpx_emu_getreg_callback(
626     void *ctx,
627     const WHV_REGISTER_NAME *RegisterNames,
628     UINT32 RegisterCount,
629     WHV_REGISTER_VALUE *RegisterValues)
630 {
631     HRESULT hr;
632     struct whpx_state *whpx = &whpx_global;
633     CPUState *cpu = (CPUState *)ctx;
634 
635     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
636         whpx->partition, cpu->cpu_index,
637         RegisterNames, RegisterCount,
638         RegisterValues);
639     if (FAILED(hr)) {
640         error_report("WHPX: Failed to get virtual processor registers,"
641                      " hr=%08lx", hr);
642     }
643 
644     return hr;
645 }
646 
whpx_emu_setreg_callback(void * ctx,const WHV_REGISTER_NAME * RegisterNames,UINT32 RegisterCount,const WHV_REGISTER_VALUE * RegisterValues)647 static HRESULT CALLBACK whpx_emu_setreg_callback(
648     void *ctx,
649     const WHV_REGISTER_NAME *RegisterNames,
650     UINT32 RegisterCount,
651     const WHV_REGISTER_VALUE *RegisterValues)
652 {
653     HRESULT hr;
654     struct whpx_state *whpx = &whpx_global;
655     CPUState *cpu = (CPUState *)ctx;
656 
657     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
658         whpx->partition, cpu->cpu_index,
659         RegisterNames, RegisterCount,
660         RegisterValues);
661     if (FAILED(hr)) {
662         error_report("WHPX: Failed to set virtual processor registers,"
663                      " hr=%08lx", hr);
664     }
665 
666     /*
667      * The emulator just successfully wrote the register state. We clear the
668      * dirty state so we avoid the double write on resume of the VP.
669      */
670     cpu->vcpu_dirty = false;
671 
672     return hr;
673 }
674 
whpx_emu_translate_callback(void * ctx,WHV_GUEST_VIRTUAL_ADDRESS Gva,WHV_TRANSLATE_GVA_FLAGS TranslateFlags,WHV_TRANSLATE_GVA_RESULT_CODE * TranslationResult,WHV_GUEST_PHYSICAL_ADDRESS * Gpa)675 static HRESULT CALLBACK whpx_emu_translate_callback(
676     void *ctx,
677     WHV_GUEST_VIRTUAL_ADDRESS Gva,
678     WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
679     WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
680     WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
681 {
682     HRESULT hr;
683     struct whpx_state *whpx = &whpx_global;
684     CPUState *cpu = (CPUState *)ctx;
685     WHV_TRANSLATE_GVA_RESULT res;
686 
687     hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
688                                       Gva, TranslateFlags, &res, Gpa);
689     if (FAILED(hr)) {
690         error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
691     } else {
692         *TranslationResult = res.ResultCode;
693     }
694 
695     return hr;
696 }
697 
698 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
699     .Size = sizeof(WHV_EMULATOR_CALLBACKS),
700     .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
701     .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
702     .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
703     .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
704     .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
705 };
706 
whpx_handle_mmio(CPUState * cpu,WHV_MEMORY_ACCESS_CONTEXT * ctx)707 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
708 {
709     HRESULT hr;
710     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
711     WHV_EMULATOR_STATUS emu_status;
712 
713     hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
714         vcpu->emulator, cpu,
715         &vcpu->exit_ctx.VpContext, ctx,
716         &emu_status);
717     if (FAILED(hr)) {
718         error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
719         return -1;
720     }
721 
722     if (!emu_status.EmulationSuccessful) {
723         error_report("WHPX: Failed to emulate MMIO access with"
724                      " EmulatorReturnStatus: %u", emu_status.AsUINT32);
725         return -1;
726     }
727 
728     return 0;
729 }
730 
whpx_handle_portio(CPUState * cpu,WHV_X64_IO_PORT_ACCESS_CONTEXT * ctx)731 static int whpx_handle_portio(CPUState *cpu,
732                               WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
733 {
734     HRESULT hr;
735     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
736     WHV_EMULATOR_STATUS emu_status;
737 
738     hr = whp_dispatch.WHvEmulatorTryIoEmulation(
739         vcpu->emulator, cpu,
740         &vcpu->exit_ctx.VpContext, ctx,
741         &emu_status);
742     if (FAILED(hr)) {
743         error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
744         return -1;
745     }
746 
747     if (!emu_status.EmulationSuccessful) {
748         error_report("WHPX: Failed to emulate PortIO access with"
749                      " EmulatorReturnStatus: %u", emu_status.AsUINT32);
750         return -1;
751     }
752 
753     return 0;
754 }
755 
whpx_handle_halt(CPUState * cpu)756 static int whpx_handle_halt(CPUState *cpu)
757 {
758     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
759     int ret = 0;
760 
761     qemu_mutex_lock_iothread();
762     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
763           (env->eflags & IF_MASK)) &&
764         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
765         cpu->exception_index = EXCP_HLT;
766         cpu->halted = true;
767         ret = 1;
768     }
769     qemu_mutex_unlock_iothread();
770 
771     return ret;
772 }
773 
whpx_vcpu_pre_run(CPUState * cpu)774 static void whpx_vcpu_pre_run(CPUState *cpu)
775 {
776     HRESULT hr;
777     struct whpx_state *whpx = &whpx_global;
778     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
779     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
780     X86CPU *x86_cpu = X86_CPU(cpu);
781     int irq;
782     uint8_t tpr;
783     WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
784     UINT32 reg_count = 0;
785     WHV_REGISTER_VALUE reg_values[3];
786     WHV_REGISTER_NAME reg_names[3];
787 
788     memset(&new_int, 0, sizeof(new_int));
789     memset(reg_values, 0, sizeof(reg_values));
790 
791     qemu_mutex_lock_iothread();
792 
793     /* Inject NMI */
794     if (!vcpu->interruption_pending &&
795         cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
796         if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
797             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
798             vcpu->interruptable = false;
799             new_int.InterruptionType = WHvX64PendingNmi;
800             new_int.InterruptionPending = 1;
801             new_int.InterruptionVector = 2;
802         }
803         if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
804             cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
805         }
806     }
807 
808     /*
809      * Force the VCPU out of its inner loop to process any INIT requests or
810      * commit pending TPR access.
811      */
812     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
813         if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
814             !(env->hflags & HF_SMM_MASK)) {
815             cpu->exit_request = 1;
816         }
817         if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
818             cpu->exit_request = 1;
819         }
820     }
821 
822     /* Get pending hard interruption or replay one that was overwritten */
823     if (!vcpu->interruption_pending &&
824         vcpu->interruptable && (env->eflags & IF_MASK)) {
825         assert(!new_int.InterruptionPending);
826         if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
827             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
828             irq = cpu_get_pic_interrupt(env);
829             if (irq >= 0) {
830                 new_int.InterruptionType = WHvX64PendingInterrupt;
831                 new_int.InterruptionPending = 1;
832                 new_int.InterruptionVector = irq;
833             }
834         }
835     }
836 
837     /* Setup interrupt state if new one was prepared */
838     if (new_int.InterruptionPending) {
839         reg_values[reg_count].PendingInterruption = new_int;
840         reg_names[reg_count] = WHvRegisterPendingInterruption;
841         reg_count += 1;
842     }
843 
844     /* Sync the TPR to the CR8 if was modified during the intercept */
845     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
846     if (tpr != vcpu->tpr) {
847         vcpu->tpr = tpr;
848         reg_values[reg_count].Reg64 = tpr;
849         cpu->exit_request = 1;
850         reg_names[reg_count] = WHvX64RegisterCr8;
851         reg_count += 1;
852     }
853 
854     /* Update the state of the interrupt delivery notification */
855     if (!vcpu->window_registered &&
856         cpu->interrupt_request & CPU_INTERRUPT_HARD) {
857         reg_values[reg_count].DeliverabilityNotifications.InterruptNotification
858             = 1;
859         vcpu->window_registered = 1;
860         reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
861         reg_count += 1;
862     }
863 
864     qemu_mutex_unlock_iothread();
865 
866     if (reg_count) {
867         hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
868             whpx->partition, cpu->cpu_index,
869             reg_names, reg_count, reg_values);
870         if (FAILED(hr)) {
871             error_report("WHPX: Failed to set interrupt state registers,"
872                          " hr=%08lx", hr);
873         }
874     }
875 
876     return;
877 }
878 
whpx_vcpu_post_run(CPUState * cpu)879 static void whpx_vcpu_post_run(CPUState *cpu)
880 {
881     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
882     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
883     X86CPU *x86_cpu = X86_CPU(cpu);
884 
885     env->eflags = vcpu->exit_ctx.VpContext.Rflags;
886 
887     uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
888     if (vcpu->tpr != tpr) {
889         vcpu->tpr = tpr;
890         qemu_mutex_lock_iothread();
891         cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
892         qemu_mutex_unlock_iothread();
893     }
894 
895     vcpu->interruption_pending =
896         vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
897 
898     vcpu->interruptable =
899         !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
900 
901     return;
902 }
903 
whpx_vcpu_process_async_events(CPUState * cpu)904 static void whpx_vcpu_process_async_events(CPUState *cpu)
905 {
906     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
907     X86CPU *x86_cpu = X86_CPU(cpu);
908     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
909 
910     if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
911         !(env->hflags & HF_SMM_MASK)) {
912         whpx_cpu_synchronize_state(cpu);
913         do_cpu_init(x86_cpu);
914         vcpu->interruptable = true;
915     }
916 
917     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
918         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
919         apic_poll_irq(x86_cpu->apic_state);
920     }
921 
922     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
923          (env->eflags & IF_MASK)) ||
924         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
925         cpu->halted = false;
926     }
927 
928     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
929         whpx_cpu_synchronize_state(cpu);
930         do_cpu_sipi(x86_cpu);
931     }
932 
933     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
934         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
935         whpx_cpu_synchronize_state(cpu);
936         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
937                                       env->tpr_access_type);
938     }
939 
940     return;
941 }
942 
whpx_vcpu_run(CPUState * cpu)943 static int whpx_vcpu_run(CPUState *cpu)
944 {
945     HRESULT hr;
946     struct whpx_state *whpx = &whpx_global;
947     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
948     int ret;
949 
950     whpx_vcpu_process_async_events(cpu);
951     if (cpu->halted) {
952         cpu->exception_index = EXCP_HLT;
953         qatomic_set(&cpu->exit_request, false);
954         return 0;
955     }
956 
957     qemu_mutex_unlock_iothread();
958     cpu_exec_start(cpu);
959 
960     do {
961         if (cpu->vcpu_dirty) {
962             whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
963             cpu->vcpu_dirty = false;
964         }
965 
966         whpx_vcpu_pre_run(cpu);
967 
968         if (qatomic_read(&cpu->exit_request)) {
969             whpx_vcpu_kick(cpu);
970         }
971 
972         hr = whp_dispatch.WHvRunVirtualProcessor(
973             whpx->partition, cpu->cpu_index,
974             &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
975 
976         if (FAILED(hr)) {
977             error_report("WHPX: Failed to exec a virtual processor,"
978                          " hr=%08lx", hr);
979             ret = -1;
980             break;
981         }
982 
983         whpx_vcpu_post_run(cpu);
984 
985         switch (vcpu->exit_ctx.ExitReason) {
986         case WHvRunVpExitReasonMemoryAccess:
987             ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
988             break;
989 
990         case WHvRunVpExitReasonX64IoPortAccess:
991             ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
992             break;
993 
994         case WHvRunVpExitReasonX64InterruptWindow:
995             vcpu->window_registered = 0;
996             ret = 0;
997             break;
998 
999         case WHvRunVpExitReasonX64Halt:
1000             ret = whpx_handle_halt(cpu);
1001             break;
1002 
1003         case WHvRunVpExitReasonCanceled:
1004             cpu->exception_index = EXCP_INTERRUPT;
1005             ret = 1;
1006             break;
1007 
1008         case WHvRunVpExitReasonX64MsrAccess: {
1009             WHV_REGISTER_VALUE reg_values[3] = {0};
1010             WHV_REGISTER_NAME reg_names[3];
1011             UINT32 reg_count;
1012 
1013             reg_names[0] = WHvX64RegisterRip;
1014             reg_names[1] = WHvX64RegisterRax;
1015             reg_names[2] = WHvX64RegisterRdx;
1016 
1017             reg_values[0].Reg64 =
1018                 vcpu->exit_ctx.VpContext.Rip +
1019                 vcpu->exit_ctx.VpContext.InstructionLength;
1020 
1021             /*
1022              * For all unsupported MSR access we:
1023              *     ignore writes
1024              *     return 0 on read.
1025              */
1026             reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1027                         1 : 3;
1028 
1029             hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1030                 whpx->partition,
1031                 cpu->cpu_index,
1032                 reg_names, reg_count,
1033                 reg_values);
1034 
1035             if (FAILED(hr)) {
1036                 error_report("WHPX: Failed to set MsrAccess state "
1037                              " registers, hr=%08lx", hr);
1038             }
1039             ret = 0;
1040             break;
1041         }
1042         case WHvRunVpExitReasonX64Cpuid: {
1043             WHV_REGISTER_VALUE reg_values[5];
1044             WHV_REGISTER_NAME reg_names[5];
1045             UINT32 reg_count = 5;
1046             UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1047             X86CPU *x86_cpu = X86_CPU(cpu);
1048             CPUX86State *env = &x86_cpu->env;
1049 
1050             memset(reg_values, 0, sizeof(reg_values));
1051 
1052             rip = vcpu->exit_ctx.VpContext.Rip +
1053                   vcpu->exit_ctx.VpContext.InstructionLength;
1054             cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1055 
1056             /*
1057              * Ideally, these should be supplied to the hypervisor during VCPU
1058              * initialization and it should be able to satisfy this request.
1059              * But, currently, WHPX doesn't support setting CPUID values in the
1060              * hypervisor once the partition has been setup, which is too late
1061              * since VCPUs are realized later. For now, use the values from
1062              * QEMU to satisfy these requests, until WHPX adds support for
1063              * being able to set these values in the hypervisor at runtime.
1064              */
1065             cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1066                 (UINT32 *)&rcx, (UINT32 *)&rdx);
1067             switch (cpuid_fn) {
1068             case 0x40000000:
1069                 /* Expose the vmware cpu frequency cpuid leaf */
1070                 rax = 0x40000010;
1071                 rbx = rcx = rdx = 0;
1072                 break;
1073 
1074             case 0x40000010:
1075                 rax = env->tsc_khz;
1076                 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1077                 rcx = rdx = 0;
1078                 break;
1079 
1080             case 0x80000001:
1081                 /* Remove any support of OSVW */
1082                 rcx &= ~CPUID_EXT3_OSVW;
1083                 break;
1084             }
1085 
1086             reg_names[0] = WHvX64RegisterRip;
1087             reg_names[1] = WHvX64RegisterRax;
1088             reg_names[2] = WHvX64RegisterRcx;
1089             reg_names[3] = WHvX64RegisterRdx;
1090             reg_names[4] = WHvX64RegisterRbx;
1091 
1092             reg_values[0].Reg64 = rip;
1093             reg_values[1].Reg64 = rax;
1094             reg_values[2].Reg64 = rcx;
1095             reg_values[3].Reg64 = rdx;
1096             reg_values[4].Reg64 = rbx;
1097 
1098             hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1099                 whpx->partition, cpu->cpu_index,
1100                 reg_names,
1101                 reg_count,
1102                 reg_values);
1103 
1104             if (FAILED(hr)) {
1105                 error_report("WHPX: Failed to set CpuidAccess state registers,"
1106                              " hr=%08lx", hr);
1107             }
1108             ret = 0;
1109             break;
1110         }
1111         case WHvRunVpExitReasonNone:
1112         case WHvRunVpExitReasonUnrecoverableException:
1113         case WHvRunVpExitReasonInvalidVpRegisterValue:
1114         case WHvRunVpExitReasonUnsupportedFeature:
1115         case WHvRunVpExitReasonException:
1116         default:
1117             error_report("WHPX: Unexpected VP exit code %d",
1118                          vcpu->exit_ctx.ExitReason);
1119             whpx_get_registers(cpu);
1120             qemu_mutex_lock_iothread();
1121             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1122             qemu_mutex_unlock_iothread();
1123             break;
1124         }
1125 
1126     } while (!ret);
1127 
1128     cpu_exec_end(cpu);
1129     qemu_mutex_lock_iothread();
1130     current_cpu = cpu;
1131 
1132     qatomic_set(&cpu->exit_request, false);
1133 
1134     return ret < 0;
1135 }
1136 
do_whpx_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)1137 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1138 {
1139     if (!cpu->vcpu_dirty) {
1140         whpx_get_registers(cpu);
1141         cpu->vcpu_dirty = true;
1142     }
1143 }
1144 
do_whpx_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)1145 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1146                                                run_on_cpu_data arg)
1147 {
1148     whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1149     cpu->vcpu_dirty = false;
1150 }
1151 
do_whpx_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)1152 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1153                                               run_on_cpu_data arg)
1154 {
1155     whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1156     cpu->vcpu_dirty = false;
1157 }
1158 
do_whpx_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)1159 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1160                                                run_on_cpu_data arg)
1161 {
1162     cpu->vcpu_dirty = true;
1163 }
1164 
1165 /*
1166  * CPU support.
1167  */
1168 
whpx_cpu_synchronize_state(CPUState * cpu)1169 void whpx_cpu_synchronize_state(CPUState *cpu)
1170 {
1171     if (!cpu->vcpu_dirty) {
1172         run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1173     }
1174 }
1175 
whpx_cpu_synchronize_post_reset(CPUState * cpu)1176 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1177 {
1178     run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1179 }
1180 
whpx_cpu_synchronize_post_init(CPUState * cpu)1181 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1182 {
1183     run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1184 }
1185 
whpx_cpu_synchronize_pre_loadvm(CPUState * cpu)1186 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1187 {
1188     run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1189 }
1190 
1191 /*
1192  * Vcpu support.
1193  */
1194 
1195 static Error *whpx_migration_blocker;
1196 
whpx_cpu_update_state(void * opaque,int running,RunState state)1197 static void whpx_cpu_update_state(void *opaque, int running, RunState state)
1198 {
1199     CPUX86State *env = opaque;
1200 
1201     if (running) {
1202         env->tsc_valid = false;
1203     }
1204 }
1205 
whpx_init_vcpu(CPUState * cpu)1206 int whpx_init_vcpu(CPUState *cpu)
1207 {
1208     HRESULT hr;
1209     struct whpx_state *whpx = &whpx_global;
1210     struct whpx_vcpu *vcpu = NULL;
1211     Error *local_error = NULL;
1212     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
1213     X86CPU *x86_cpu = X86_CPU(cpu);
1214     UINT64 freq = 0;
1215     int ret;
1216 
1217     /* Add migration blockers for all unsupported features of the
1218      * Windows Hypervisor Platform
1219      */
1220     if (whpx_migration_blocker == NULL) {
1221         error_setg(&whpx_migration_blocker,
1222                "State blocked due to non-migratable CPUID feature support,"
1223                "dirty memory tracking support, and XSAVE/XRSTOR support");
1224 
1225         (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1226         if (local_error) {
1227             error_report_err(local_error);
1228             migrate_del_blocker(whpx_migration_blocker);
1229             error_free(whpx_migration_blocker);
1230             ret = -EINVAL;
1231             goto error;
1232         }
1233     }
1234 
1235     vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1236 
1237     if (!vcpu) {
1238         error_report("WHPX: Failed to allocte VCPU context.");
1239         ret = -ENOMEM;
1240         goto error;
1241     }
1242 
1243     hr = whp_dispatch.WHvEmulatorCreateEmulator(
1244         &whpx_emu_callbacks,
1245         &vcpu->emulator);
1246     if (FAILED(hr)) {
1247         error_report("WHPX: Failed to setup instruction completion support,"
1248                      " hr=%08lx", hr);
1249         ret = -EINVAL;
1250         goto error;
1251     }
1252 
1253     hr = whp_dispatch.WHvCreateVirtualProcessor(
1254         whpx->partition, cpu->cpu_index, 0);
1255     if (FAILED(hr)) {
1256         error_report("WHPX: Failed to create a virtual processor,"
1257                      " hr=%08lx", hr);
1258         whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1259         ret = -EINVAL;
1260         goto error;
1261     }
1262 
1263     /*
1264      * vcpu's TSC frequency is either specified by user, or use the value
1265      * provided by Hyper-V if the former is not present. In the latter case, we
1266      * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1267      * frequency can be migrated later via this field.
1268      */
1269     if (!env->tsc_khz) {
1270         hr = whp_dispatch.WHvGetCapability(
1271             WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
1272                 NULL);
1273         if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1274             if (FAILED(hr)) {
1275                 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
1276             } else {
1277                 env->tsc_khz = freq / 1000; /* Hz to KHz */
1278             }
1279         }
1280     }
1281 
1282     env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
1283     hr = whp_dispatch.WHvGetCapability(
1284         WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
1285     if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1286         if (FAILED(hr)) {
1287             printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
1288         } else {
1289             env->apic_bus_freq = freq;
1290         }
1291     }
1292 
1293     /*
1294      * If the vmware cpuid frequency leaf option is set, and we have a valid
1295      * tsc value, trap the corresponding cpuid's.
1296      */
1297     if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
1298         UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
1299 
1300         hr = whp_dispatch.WHvSetPartitionProperty(
1301                 whpx->partition,
1302                 WHvPartitionPropertyCodeCpuidExitList,
1303                 cpuidExitList,
1304                 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1305 
1306         if (FAILED(hr)) {
1307             error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1308                         hr);
1309             ret = -EINVAL;
1310             goto error;
1311         }
1312     }
1313 
1314     vcpu->interruptable = true;
1315     cpu->vcpu_dirty = true;
1316     cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1317     qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1318 
1319     return 0;
1320 
1321 error:
1322     g_free(vcpu);
1323 
1324     return ret;
1325 }
1326 
whpx_vcpu_exec(CPUState * cpu)1327 int whpx_vcpu_exec(CPUState *cpu)
1328 {
1329     int ret;
1330     int fatal;
1331 
1332     for (;;) {
1333         if (cpu->exception_index >= EXCP_INTERRUPT) {
1334             ret = cpu->exception_index;
1335             cpu->exception_index = -1;
1336             break;
1337         }
1338 
1339         fatal = whpx_vcpu_run(cpu);
1340 
1341         if (fatal) {
1342             error_report("WHPX: Failed to exec a virtual processor");
1343             abort();
1344         }
1345     }
1346 
1347     return ret;
1348 }
1349 
whpx_destroy_vcpu(CPUState * cpu)1350 void whpx_destroy_vcpu(CPUState *cpu)
1351 {
1352     struct whpx_state *whpx = &whpx_global;
1353     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1354 
1355     whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1356     whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1357     g_free(cpu->hax_vcpu);
1358     return;
1359 }
1360 
whpx_vcpu_kick(CPUState * cpu)1361 void whpx_vcpu_kick(CPUState *cpu)
1362 {
1363     struct whpx_state *whpx = &whpx_global;
1364     whp_dispatch.WHvCancelRunVirtualProcessor(
1365         whpx->partition, cpu->cpu_index, 0);
1366 }
1367 
1368 /*
1369  * Memory support.
1370  */
1371 
whpx_update_mapping(hwaddr start_pa,ram_addr_t size,void * host_va,int add,int rom,const char * name)1372 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1373                                 void *host_va, int add, int rom,
1374                                 const char *name)
1375 {
1376     struct whpx_state *whpx = &whpx_global;
1377     HRESULT hr;
1378 
1379     /*
1380     if (add) {
1381         printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1382                (void*)start_pa, (void*)size, host_va,
1383                (rom ? "ROM" : "RAM"), name);
1384     } else {
1385         printf("WHPX: DEL PA:%p Size:%p, Host:%p,      '%s'\n",
1386                (void*)start_pa, (void*)size, host_va, name);
1387     }
1388     */
1389 
1390     if (add) {
1391         hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1392                                          host_va,
1393                                          start_pa,
1394                                          size,
1395                                          (WHvMapGpaRangeFlagRead |
1396                                           WHvMapGpaRangeFlagExecute |
1397                                           (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1398     } else {
1399         hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1400                                            start_pa,
1401                                            size);
1402     }
1403 
1404     if (FAILED(hr)) {
1405         error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1406                      " Host:%p, hr=%08lx",
1407                      (add ? "MAP" : "UNMAP"), name,
1408                      (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1409     }
1410 }
1411 
whpx_process_section(MemoryRegionSection * section,int add)1412 static void whpx_process_section(MemoryRegionSection *section, int add)
1413 {
1414     MemoryRegion *mr = section->mr;
1415     hwaddr start_pa = section->offset_within_address_space;
1416     ram_addr_t size = int128_get64(section->size);
1417     unsigned int delta;
1418     uint64_t host_va;
1419 
1420     if (!memory_region_is_ram(mr)) {
1421         return;
1422     }
1423 
1424     delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1425     delta &= ~qemu_real_host_page_mask;
1426     if (delta > size) {
1427         return;
1428     }
1429     start_pa += delta;
1430     size -= delta;
1431     size &= qemu_real_host_page_mask;
1432     if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1433         return;
1434     }
1435 
1436     host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1437             + section->offset_within_region + delta;
1438 
1439     whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1440                         memory_region_is_rom(mr), mr->name);
1441 }
1442 
whpx_region_add(MemoryListener * listener,MemoryRegionSection * section)1443 static void whpx_region_add(MemoryListener *listener,
1444                            MemoryRegionSection *section)
1445 {
1446     memory_region_ref(section->mr);
1447     whpx_process_section(section, 1);
1448 }
1449 
whpx_region_del(MemoryListener * listener,MemoryRegionSection * section)1450 static void whpx_region_del(MemoryListener *listener,
1451                            MemoryRegionSection *section)
1452 {
1453     whpx_process_section(section, 0);
1454     memory_region_unref(section->mr);
1455 }
1456 
whpx_transaction_begin(MemoryListener * listener)1457 static void whpx_transaction_begin(MemoryListener *listener)
1458 {
1459 }
1460 
whpx_transaction_commit(MemoryListener * listener)1461 static void whpx_transaction_commit(MemoryListener *listener)
1462 {
1463 }
1464 
whpx_log_sync(MemoryListener * listener,MemoryRegionSection * section)1465 static void whpx_log_sync(MemoryListener *listener,
1466                          MemoryRegionSection *section)
1467 {
1468     MemoryRegion *mr = section->mr;
1469 
1470     if (!memory_region_is_ram(mr)) {
1471         return;
1472     }
1473 
1474     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1475 }
1476 
1477 static MemoryListener whpx_memory_listener = {
1478     .begin = whpx_transaction_begin,
1479     .commit = whpx_transaction_commit,
1480     .region_add = whpx_region_add,
1481     .region_del = whpx_region_del,
1482     .log_sync = whpx_log_sync,
1483     .priority = 10,
1484 };
1485 
whpx_memory_init(void)1486 static void whpx_memory_init(void)
1487 {
1488     memory_listener_register(&whpx_memory_listener, &address_space_memory);
1489 }
1490 
1491 /*
1492  * Load the functions from the given library, using the given handle. If a
1493  * handle is provided, it is used, otherwise the library is opened. The
1494  * handle will be updated on return with the opened one.
1495  */
load_whp_dispatch_fns(HMODULE * handle,WHPFunctionList function_list)1496 static bool load_whp_dispatch_fns(HMODULE *handle,
1497     WHPFunctionList function_list)
1498 {
1499     HMODULE hLib = *handle;
1500 
1501     #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1502     #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1503     #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1504         whp_dispatch.function_name = \
1505             (function_name ## _t)GetProcAddress(hLib, #function_name); \
1506 
1507     #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1508         whp_dispatch.function_name = \
1509             (function_name ## _t)GetProcAddress(hLib, #function_name); \
1510         if (!whp_dispatch.function_name) { \
1511             error_report("Could not load function %s", #function_name); \
1512             goto error; \
1513         } \
1514 
1515     #define WHP_LOAD_LIB(lib_name, handle_lib) \
1516     if (!handle_lib) { \
1517         handle_lib = LoadLibrary(lib_name); \
1518         if (!handle_lib) { \
1519             error_report("Could not load library %s.", lib_name); \
1520             goto error; \
1521         } \
1522     } \
1523 
1524     switch (function_list) {
1525     case WINHV_PLATFORM_FNS_DEFAULT:
1526         WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1527         LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1528         break;
1529 
1530     case WINHV_EMULATION_FNS_DEFAULT:
1531         WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1532         LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1533         break;
1534 
1535     case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1536         WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1537         LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1538         break;
1539     }
1540 
1541     *handle = hLib;
1542     return true;
1543 
1544 error:
1545     if (hLib) {
1546         FreeLibrary(hLib);
1547     }
1548 
1549     return false;
1550 }
1551 
1552 /*
1553  * Partition support
1554  */
1555 
whpx_accel_init(MachineState * ms)1556 static int whpx_accel_init(MachineState *ms)
1557 {
1558     struct whpx_state *whpx;
1559     int ret;
1560     HRESULT hr;
1561     WHV_CAPABILITY whpx_cap;
1562     UINT32 whpx_cap_size;
1563     WHV_PARTITION_PROPERTY prop;
1564     UINT32 cpuidExitList[] = {1, 0x80000001};
1565 
1566     whpx = &whpx_global;
1567 
1568     if (!init_whp_dispatch()) {
1569         ret = -ENOSYS;
1570         goto error;
1571     }
1572 
1573     memset(whpx, 0, sizeof(struct whpx_state));
1574     whpx->mem_quota = ms->ram_size;
1575 
1576     hr = whp_dispatch.WHvGetCapability(
1577         WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1578         sizeof(whpx_cap), &whpx_cap_size);
1579     if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1580         error_report("WHPX: No accelerator found, hr=%08lx", hr);
1581         ret = -ENOSPC;
1582         goto error;
1583     }
1584 
1585     hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1586     if (FAILED(hr)) {
1587         error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1588         ret = -EINVAL;
1589         goto error;
1590     }
1591 
1592     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1593     prop.ProcessorCount = ms->smp.cpus;
1594     hr = whp_dispatch.WHvSetPartitionProperty(
1595         whpx->partition,
1596         WHvPartitionPropertyCodeProcessorCount,
1597         &prop,
1598         sizeof(WHV_PARTITION_PROPERTY));
1599 
1600     if (FAILED(hr)) {
1601         error_report("WHPX: Failed to set partition core count to %d,"
1602                      " hr=%08lx", ms->smp.cores, hr);
1603         ret = -EINVAL;
1604         goto error;
1605     }
1606 
1607     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1608     prop.ExtendedVmExits.X64MsrExit = 1;
1609     prop.ExtendedVmExits.X64CpuidExit = 1;
1610     hr = whp_dispatch.WHvSetPartitionProperty(
1611         whpx->partition,
1612         WHvPartitionPropertyCodeExtendedVmExits,
1613         &prop,
1614         sizeof(WHV_PARTITION_PROPERTY));
1615 
1616     if (FAILED(hr)) {
1617         error_report("WHPX: Failed to enable partition extended X64MsrExit and"
1618                      " X64CpuidExit hr=%08lx", hr);
1619         ret = -EINVAL;
1620         goto error;
1621     }
1622 
1623     hr = whp_dispatch.WHvSetPartitionProperty(
1624         whpx->partition,
1625         WHvPartitionPropertyCodeCpuidExitList,
1626         cpuidExitList,
1627         RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1628 
1629     if (FAILED(hr)) {
1630         error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1631                      hr);
1632         ret = -EINVAL;
1633         goto error;
1634     }
1635 
1636     hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1637     if (FAILED(hr)) {
1638         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1639         ret = -EINVAL;
1640         goto error;
1641     }
1642 
1643     whpx_memory_init();
1644 
1645     cpus_register_accel(&whpx_cpus);
1646 
1647     printf("Windows Hypervisor Platform accelerator is operational\n");
1648     return 0;
1649 
1650 error:
1651 
1652     if (NULL != whpx->partition) {
1653         whp_dispatch.WHvDeletePartition(whpx->partition);
1654         whpx->partition = NULL;
1655     }
1656 
1657     return ret;
1658 }
1659 
whpx_enabled(void)1660 int whpx_enabled(void)
1661 {
1662     return whpx_allowed;
1663 }
1664 
whpx_accel_class_init(ObjectClass * oc,void * data)1665 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1666 {
1667     AccelClass *ac = ACCEL_CLASS(oc);
1668     ac->name = "WHPX";
1669     ac->init_machine = whpx_accel_init;
1670     ac->allowed = &whpx_allowed;
1671 }
1672 
1673 static const TypeInfo whpx_accel_type = {
1674     .name = ACCEL_CLASS_NAME("whpx"),
1675     .parent = TYPE_ACCEL,
1676     .class_init = whpx_accel_class_init,
1677 };
1678 
whpx_type_init(void)1679 static void whpx_type_init(void)
1680 {
1681     type_register_static(&whpx_accel_type);
1682 }
1683 
init_whp_dispatch(void)1684 bool init_whp_dispatch(void)
1685 {
1686     if (whp_dispatch_initialized) {
1687         return true;
1688     }
1689 
1690     if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1691         goto error;
1692     }
1693 
1694     if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1695         goto error;
1696     }
1697 
1698     assert(load_whp_dispatch_fns(&hWinHvPlatform,
1699         WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1700     whp_dispatch_initialized = true;
1701 
1702     return true;
1703 error:
1704     if (hWinHvPlatform) {
1705         FreeLibrary(hWinHvPlatform);
1706     }
1707 
1708     if (hWinHvEmulation) {
1709         FreeLibrary(hWinHvEmulation);
1710     }
1711 
1712     return false;
1713 }
1714 
1715 type_init(whpx_type_init);
1716