1 /*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
3 *
4 * Copyright Microsoft Corp. 2017
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "qemu/error-report.h"
23 #include "qapi/error.h"
24 #include "migration/blocker.h"
25 #include "whp-dispatch.h"
26
27 #include "whpx-cpus.h"
28
29 #include <WinHvPlatform.h>
30 #include <WinHvEmulation.h>
31
32 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
33
34 struct whpx_state {
35 uint64_t mem_quota;
36 WHV_PARTITION_HANDLE partition;
37 };
38
39 static const WHV_REGISTER_NAME whpx_register_names[] = {
40
41 /* X64 General purpose registers */
42 WHvX64RegisterRax,
43 WHvX64RegisterRcx,
44 WHvX64RegisterRdx,
45 WHvX64RegisterRbx,
46 WHvX64RegisterRsp,
47 WHvX64RegisterRbp,
48 WHvX64RegisterRsi,
49 WHvX64RegisterRdi,
50 WHvX64RegisterR8,
51 WHvX64RegisterR9,
52 WHvX64RegisterR10,
53 WHvX64RegisterR11,
54 WHvX64RegisterR12,
55 WHvX64RegisterR13,
56 WHvX64RegisterR14,
57 WHvX64RegisterR15,
58 WHvX64RegisterRip,
59 WHvX64RegisterRflags,
60
61 /* X64 Segment registers */
62 WHvX64RegisterEs,
63 WHvX64RegisterCs,
64 WHvX64RegisterSs,
65 WHvX64RegisterDs,
66 WHvX64RegisterFs,
67 WHvX64RegisterGs,
68 WHvX64RegisterLdtr,
69 WHvX64RegisterTr,
70
71 /* X64 Table registers */
72 WHvX64RegisterIdtr,
73 WHvX64RegisterGdtr,
74
75 /* X64 Control Registers */
76 WHvX64RegisterCr0,
77 WHvX64RegisterCr2,
78 WHvX64RegisterCr3,
79 WHvX64RegisterCr4,
80 WHvX64RegisterCr8,
81
82 /* X64 Debug Registers */
83 /*
84 * WHvX64RegisterDr0,
85 * WHvX64RegisterDr1,
86 * WHvX64RegisterDr2,
87 * WHvX64RegisterDr3,
88 * WHvX64RegisterDr6,
89 * WHvX64RegisterDr7,
90 */
91
92 /* X64 Floating Point and Vector Registers */
93 WHvX64RegisterXmm0,
94 WHvX64RegisterXmm1,
95 WHvX64RegisterXmm2,
96 WHvX64RegisterXmm3,
97 WHvX64RegisterXmm4,
98 WHvX64RegisterXmm5,
99 WHvX64RegisterXmm6,
100 WHvX64RegisterXmm7,
101 WHvX64RegisterXmm8,
102 WHvX64RegisterXmm9,
103 WHvX64RegisterXmm10,
104 WHvX64RegisterXmm11,
105 WHvX64RegisterXmm12,
106 WHvX64RegisterXmm13,
107 WHvX64RegisterXmm14,
108 WHvX64RegisterXmm15,
109 WHvX64RegisterFpMmx0,
110 WHvX64RegisterFpMmx1,
111 WHvX64RegisterFpMmx2,
112 WHvX64RegisterFpMmx3,
113 WHvX64RegisterFpMmx4,
114 WHvX64RegisterFpMmx5,
115 WHvX64RegisterFpMmx6,
116 WHvX64RegisterFpMmx7,
117 WHvX64RegisterFpControlStatus,
118 WHvX64RegisterXmmControlStatus,
119
120 /* X64 MSRs */
121 WHvX64RegisterEfer,
122 #ifdef TARGET_X86_64
123 WHvX64RegisterKernelGsBase,
124 #endif
125 WHvX64RegisterApicBase,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs,
128 WHvX64RegisterSysenterEip,
129 WHvX64RegisterSysenterEsp,
130 WHvX64RegisterStar,
131 #ifdef TARGET_X86_64
132 WHvX64RegisterLstar,
133 WHvX64RegisterCstar,
134 WHvX64RegisterSfmask,
135 #endif
136
137 /* Interrupt / Event Registers */
138 /*
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
144 */
145 };
146
147 struct whpx_register_set {
148 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
149 };
150
151 struct whpx_vcpu {
152 WHV_EMULATOR_HANDLE emulator;
153 bool window_registered;
154 bool interruptable;
155 uint64_t tpr;
156 uint64_t apic_base;
157 bool interruption_pending;
158
159 /* Must be the last field as it may have a tail */
160 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
161 };
162
163 static bool whpx_allowed;
164 static bool whp_dispatch_initialized;
165 static HMODULE hWinHvPlatform, hWinHvEmulation;
166
167 struct whpx_state whpx_global;
168 struct WHPDispatch whp_dispatch;
169
170
171 /*
172 * VP support
173 */
174
get_whpx_vcpu(CPUState * cpu)175 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
176 {
177 return (struct whpx_vcpu *)cpu->hax_vcpu;
178 }
179
whpx_seg_q2h(const SegmentCache * qs,int v86,int r86)180 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
181 int r86)
182 {
183 WHV_X64_SEGMENT_REGISTER hs;
184 unsigned flags = qs->flags;
185
186 hs.Base = qs->base;
187 hs.Limit = qs->limit;
188 hs.Selector = qs->selector;
189
190 if (v86) {
191 hs.Attributes = 0;
192 hs.SegmentType = 3;
193 hs.Present = 1;
194 hs.DescriptorPrivilegeLevel = 3;
195 hs.NonSystemSegment = 1;
196
197 } else {
198 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
199
200 if (r86) {
201 /* hs.Base &= 0xfffff; */
202 }
203 }
204
205 return hs;
206 }
207
whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER * hs)208 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
209 {
210 SegmentCache qs;
211
212 qs.base = hs->Base;
213 qs.limit = hs->Limit;
214 qs.selector = hs->Selector;
215
216 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
217
218 return qs;
219 }
220
whpx_set_tsc(CPUState * cpu)221 static int whpx_set_tsc(CPUState *cpu)
222 {
223 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
224 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
225 WHV_REGISTER_VALUE tsc_val;
226 HRESULT hr;
227 struct whpx_state *whpx = &whpx_global;
228
229 /*
230 * Suspend the partition prior to setting the TSC to reduce the variance
231 * in TSC across vCPUs. When the first vCPU runs post suspend, the
232 * partition is automatically resumed.
233 */
234 if (whp_dispatch.WHvSuspendPartitionTime) {
235
236 /*
237 * Unable to suspend partition while setting TSC is not a fatal
238 * error. It just increases the likelihood of TSC variance between
239 * vCPUs and some guest OS are able to handle that just fine.
240 */
241 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
242 if (FAILED(hr)) {
243 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
244 }
245 }
246
247 tsc_val.Reg64 = env->tsc;
248 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
249 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
250 if (FAILED(hr)) {
251 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
252 return -1;
253 }
254
255 return 0;
256 }
257
whpx_set_registers(CPUState * cpu,int level)258 static void whpx_set_registers(CPUState *cpu, int level)
259 {
260 struct whpx_state *whpx = &whpx_global;
261 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
262 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
263 X86CPU *x86_cpu = X86_CPU(cpu);
264 struct whpx_register_set vcxt;
265 HRESULT hr;
266 int idx;
267 int idx_next;
268 int i;
269 int v86, r86;
270
271 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
272
273 /*
274 * Following MSRs have side effects on the guest or are too heavy for
275 * runtime. Limit them to full state update.
276 */
277 if (level >= WHPX_SET_RESET_STATE) {
278 whpx_set_tsc(cpu);
279 }
280
281 memset(&vcxt, 0, sizeof(struct whpx_register_set));
282
283 v86 = (env->eflags & VM_MASK);
284 r86 = !(env->cr[0] & CR0_PE_MASK);
285
286 vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
287 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
288
289 idx = 0;
290
291 /* Indexes for first 16 registers match between HV and QEMU definitions */
292 idx_next = 16;
293 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
294 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
295 }
296 idx = idx_next;
297
298 /* Same goes for RIP and RFLAGS */
299 assert(whpx_register_names[idx] == WHvX64RegisterRip);
300 vcxt.values[idx++].Reg64 = env->eip;
301
302 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
303 vcxt.values[idx++].Reg64 = env->eflags;
304
305 /* Translate 6+4 segment registers. HV and QEMU order matches */
306 assert(idx == WHvX64RegisterEs);
307 for (i = 0; i < 6; i += 1, idx += 1) {
308 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
309 }
310
311 assert(idx == WHvX64RegisterLdtr);
312 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
313
314 assert(idx == WHvX64RegisterTr);
315 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
316
317 assert(idx == WHvX64RegisterIdtr);
318 vcxt.values[idx].Table.Base = env->idt.base;
319 vcxt.values[idx].Table.Limit = env->idt.limit;
320 idx += 1;
321
322 assert(idx == WHvX64RegisterGdtr);
323 vcxt.values[idx].Table.Base = env->gdt.base;
324 vcxt.values[idx].Table.Limit = env->gdt.limit;
325 idx += 1;
326
327 /* CR0, 2, 3, 4, 8 */
328 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
329 vcxt.values[idx++].Reg64 = env->cr[0];
330 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
331 vcxt.values[idx++].Reg64 = env->cr[2];
332 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
333 vcxt.values[idx++].Reg64 = env->cr[3];
334 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
335 vcxt.values[idx++].Reg64 = env->cr[4];
336 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
337 vcxt.values[idx++].Reg64 = vcpu->tpr;
338
339 /* 8 Debug Registers - Skipped */
340
341 /* 16 XMM registers */
342 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
343 idx_next = idx + 16;
344 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
345 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
346 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
347 }
348 idx = idx_next;
349
350 /* 8 FP registers */
351 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
352 for (i = 0; i < 8; i += 1, idx += 1) {
353 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
354 /* vcxt.values[idx].Fp.AsUINT128.High64 =
355 env->fpregs[i].mmx.MMX_Q(1);
356 */
357 }
358
359 /* FP control status register */
360 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
361 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
362 vcxt.values[idx].FpControlStatus.FpStatus =
363 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
364 vcxt.values[idx].FpControlStatus.FpTag = 0;
365 for (i = 0; i < 8; ++i) {
366 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
367 }
368 vcxt.values[idx].FpControlStatus.Reserved = 0;
369 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
370 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
371 idx += 1;
372
373 /* XMM control status register */
374 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
375 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
376 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
377 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
378 idx += 1;
379
380 /* MSRs */
381 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
382 vcxt.values[idx++].Reg64 = env->efer;
383 #ifdef TARGET_X86_64
384 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
385 vcxt.values[idx++].Reg64 = env->kernelgsbase;
386 #endif
387
388 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
389 vcxt.values[idx++].Reg64 = vcpu->apic_base;
390
391 /* WHvX64RegisterPat - Skipped */
392
393 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
394 vcxt.values[idx++].Reg64 = env->sysenter_cs;
395 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
396 vcxt.values[idx++].Reg64 = env->sysenter_eip;
397 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
398 vcxt.values[idx++].Reg64 = env->sysenter_esp;
399 assert(whpx_register_names[idx] == WHvX64RegisterStar);
400 vcxt.values[idx++].Reg64 = env->star;
401 #ifdef TARGET_X86_64
402 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
403 vcxt.values[idx++].Reg64 = env->lstar;
404 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
405 vcxt.values[idx++].Reg64 = env->cstar;
406 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
407 vcxt.values[idx++].Reg64 = env->fmask;
408 #endif
409
410 /* Interrupt / Event Registers - Skipped */
411
412 assert(idx == RTL_NUMBER_OF(whpx_register_names));
413
414 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
415 whpx->partition, cpu->cpu_index,
416 whpx_register_names,
417 RTL_NUMBER_OF(whpx_register_names),
418 &vcxt.values[0]);
419
420 if (FAILED(hr)) {
421 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
422 hr);
423 }
424
425 return;
426 }
427
whpx_get_tsc(CPUState * cpu)428 static int whpx_get_tsc(CPUState *cpu)
429 {
430 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
431 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
432 WHV_REGISTER_VALUE tsc_val;
433 HRESULT hr;
434 struct whpx_state *whpx = &whpx_global;
435
436 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
437 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
438 if (FAILED(hr)) {
439 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
440 return -1;
441 }
442
443 env->tsc = tsc_val.Reg64;
444 return 0;
445 }
446
whpx_get_registers(CPUState * cpu)447 static void whpx_get_registers(CPUState *cpu)
448 {
449 struct whpx_state *whpx = &whpx_global;
450 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
451 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
452 X86CPU *x86_cpu = X86_CPU(cpu);
453 struct whpx_register_set vcxt;
454 uint64_t tpr, apic_base;
455 HRESULT hr;
456 int idx;
457 int idx_next;
458 int i;
459
460 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
461
462 if (!env->tsc_valid) {
463 whpx_get_tsc(cpu);
464 env->tsc_valid = !runstate_is_running();
465 }
466
467 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
468 whpx->partition, cpu->cpu_index,
469 whpx_register_names,
470 RTL_NUMBER_OF(whpx_register_names),
471 &vcxt.values[0]);
472 if (FAILED(hr)) {
473 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
474 hr);
475 }
476
477 idx = 0;
478
479 /* Indexes for first 16 registers match between HV and QEMU definitions */
480 idx_next = 16;
481 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
482 env->regs[idx] = vcxt.values[idx].Reg64;
483 }
484 idx = idx_next;
485
486 /* Same goes for RIP and RFLAGS */
487 assert(whpx_register_names[idx] == WHvX64RegisterRip);
488 env->eip = vcxt.values[idx++].Reg64;
489 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
490 env->eflags = vcxt.values[idx++].Reg64;
491
492 /* Translate 6+4 segment registers. HV and QEMU order matches */
493 assert(idx == WHvX64RegisterEs);
494 for (i = 0; i < 6; i += 1, idx += 1) {
495 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
496 }
497
498 assert(idx == WHvX64RegisterLdtr);
499 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
500 assert(idx == WHvX64RegisterTr);
501 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
502 assert(idx == WHvX64RegisterIdtr);
503 env->idt.base = vcxt.values[idx].Table.Base;
504 env->idt.limit = vcxt.values[idx].Table.Limit;
505 idx += 1;
506 assert(idx == WHvX64RegisterGdtr);
507 env->gdt.base = vcxt.values[idx].Table.Base;
508 env->gdt.limit = vcxt.values[idx].Table.Limit;
509 idx += 1;
510
511 /* CR0, 2, 3, 4, 8 */
512 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
513 env->cr[0] = vcxt.values[idx++].Reg64;
514 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
515 env->cr[2] = vcxt.values[idx++].Reg64;
516 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
517 env->cr[3] = vcxt.values[idx++].Reg64;
518 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
519 env->cr[4] = vcxt.values[idx++].Reg64;
520 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
521 tpr = vcxt.values[idx++].Reg64;
522 if (tpr != vcpu->tpr) {
523 vcpu->tpr = tpr;
524 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
525 }
526
527 /* 8 Debug Registers - Skipped */
528
529 /* 16 XMM registers */
530 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
531 idx_next = idx + 16;
532 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
533 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
534 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
535 }
536 idx = idx_next;
537
538 /* 8 FP registers */
539 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
540 for (i = 0; i < 8; i += 1, idx += 1) {
541 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
542 /* env->fpregs[i].mmx.MMX_Q(1) =
543 vcxt.values[idx].Fp.AsUINT128.High64;
544 */
545 }
546
547 /* FP control status register */
548 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
549 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
550 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
551 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
552 for (i = 0; i < 8; ++i) {
553 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
554 }
555 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
556 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
557 idx += 1;
558
559 /* XMM control status register */
560 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
561 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
562 idx += 1;
563
564 /* MSRs */
565 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
566 env->efer = vcxt.values[idx++].Reg64;
567 #ifdef TARGET_X86_64
568 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
569 env->kernelgsbase = vcxt.values[idx++].Reg64;
570 #endif
571
572 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
573 apic_base = vcxt.values[idx++].Reg64;
574 if (apic_base != vcpu->apic_base) {
575 vcpu->apic_base = apic_base;
576 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
577 }
578
579 /* WHvX64RegisterPat - Skipped */
580
581 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
582 env->sysenter_cs = vcxt.values[idx++].Reg64;
583 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
584 env->sysenter_eip = vcxt.values[idx++].Reg64;
585 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
586 env->sysenter_esp = vcxt.values[idx++].Reg64;
587 assert(whpx_register_names[idx] == WHvX64RegisterStar);
588 env->star = vcxt.values[idx++].Reg64;
589 #ifdef TARGET_X86_64
590 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
591 env->lstar = vcxt.values[idx++].Reg64;
592 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
593 env->cstar = vcxt.values[idx++].Reg64;
594 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
595 env->fmask = vcxt.values[idx++].Reg64;
596 #endif
597
598 /* Interrupt / Event Registers - Skipped */
599
600 assert(idx == RTL_NUMBER_OF(whpx_register_names));
601
602 return;
603 }
604
whpx_emu_ioport_callback(void * ctx,WHV_EMULATOR_IO_ACCESS_INFO * IoAccess)605 static HRESULT CALLBACK whpx_emu_ioport_callback(
606 void *ctx,
607 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
608 {
609 MemTxAttrs attrs = { 0 };
610 address_space_rw(&address_space_io, IoAccess->Port, attrs,
611 &IoAccess->Data, IoAccess->AccessSize,
612 IoAccess->Direction);
613 return S_OK;
614 }
615
whpx_emu_mmio_callback(void * ctx,WHV_EMULATOR_MEMORY_ACCESS_INFO * ma)616 static HRESULT CALLBACK whpx_emu_mmio_callback(
617 void *ctx,
618 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
619 {
620 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
621 ma->Direction);
622 return S_OK;
623 }
624
whpx_emu_getreg_callback(void * ctx,const WHV_REGISTER_NAME * RegisterNames,UINT32 RegisterCount,WHV_REGISTER_VALUE * RegisterValues)625 static HRESULT CALLBACK whpx_emu_getreg_callback(
626 void *ctx,
627 const WHV_REGISTER_NAME *RegisterNames,
628 UINT32 RegisterCount,
629 WHV_REGISTER_VALUE *RegisterValues)
630 {
631 HRESULT hr;
632 struct whpx_state *whpx = &whpx_global;
633 CPUState *cpu = (CPUState *)ctx;
634
635 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
636 whpx->partition, cpu->cpu_index,
637 RegisterNames, RegisterCount,
638 RegisterValues);
639 if (FAILED(hr)) {
640 error_report("WHPX: Failed to get virtual processor registers,"
641 " hr=%08lx", hr);
642 }
643
644 return hr;
645 }
646
whpx_emu_setreg_callback(void * ctx,const WHV_REGISTER_NAME * RegisterNames,UINT32 RegisterCount,const WHV_REGISTER_VALUE * RegisterValues)647 static HRESULT CALLBACK whpx_emu_setreg_callback(
648 void *ctx,
649 const WHV_REGISTER_NAME *RegisterNames,
650 UINT32 RegisterCount,
651 const WHV_REGISTER_VALUE *RegisterValues)
652 {
653 HRESULT hr;
654 struct whpx_state *whpx = &whpx_global;
655 CPUState *cpu = (CPUState *)ctx;
656
657 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
658 whpx->partition, cpu->cpu_index,
659 RegisterNames, RegisterCount,
660 RegisterValues);
661 if (FAILED(hr)) {
662 error_report("WHPX: Failed to set virtual processor registers,"
663 " hr=%08lx", hr);
664 }
665
666 /*
667 * The emulator just successfully wrote the register state. We clear the
668 * dirty state so we avoid the double write on resume of the VP.
669 */
670 cpu->vcpu_dirty = false;
671
672 return hr;
673 }
674
whpx_emu_translate_callback(void * ctx,WHV_GUEST_VIRTUAL_ADDRESS Gva,WHV_TRANSLATE_GVA_FLAGS TranslateFlags,WHV_TRANSLATE_GVA_RESULT_CODE * TranslationResult,WHV_GUEST_PHYSICAL_ADDRESS * Gpa)675 static HRESULT CALLBACK whpx_emu_translate_callback(
676 void *ctx,
677 WHV_GUEST_VIRTUAL_ADDRESS Gva,
678 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
679 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
680 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
681 {
682 HRESULT hr;
683 struct whpx_state *whpx = &whpx_global;
684 CPUState *cpu = (CPUState *)ctx;
685 WHV_TRANSLATE_GVA_RESULT res;
686
687 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
688 Gva, TranslateFlags, &res, Gpa);
689 if (FAILED(hr)) {
690 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
691 } else {
692 *TranslationResult = res.ResultCode;
693 }
694
695 return hr;
696 }
697
698 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
699 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
700 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
701 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
702 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
703 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
704 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
705 };
706
whpx_handle_mmio(CPUState * cpu,WHV_MEMORY_ACCESS_CONTEXT * ctx)707 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
708 {
709 HRESULT hr;
710 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
711 WHV_EMULATOR_STATUS emu_status;
712
713 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
714 vcpu->emulator, cpu,
715 &vcpu->exit_ctx.VpContext, ctx,
716 &emu_status);
717 if (FAILED(hr)) {
718 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
719 return -1;
720 }
721
722 if (!emu_status.EmulationSuccessful) {
723 error_report("WHPX: Failed to emulate MMIO access with"
724 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
725 return -1;
726 }
727
728 return 0;
729 }
730
whpx_handle_portio(CPUState * cpu,WHV_X64_IO_PORT_ACCESS_CONTEXT * ctx)731 static int whpx_handle_portio(CPUState *cpu,
732 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
733 {
734 HRESULT hr;
735 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
736 WHV_EMULATOR_STATUS emu_status;
737
738 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
739 vcpu->emulator, cpu,
740 &vcpu->exit_ctx.VpContext, ctx,
741 &emu_status);
742 if (FAILED(hr)) {
743 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
744 return -1;
745 }
746
747 if (!emu_status.EmulationSuccessful) {
748 error_report("WHPX: Failed to emulate PortIO access with"
749 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
750 return -1;
751 }
752
753 return 0;
754 }
755
whpx_handle_halt(CPUState * cpu)756 static int whpx_handle_halt(CPUState *cpu)
757 {
758 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
759 int ret = 0;
760
761 qemu_mutex_lock_iothread();
762 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
763 (env->eflags & IF_MASK)) &&
764 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
765 cpu->exception_index = EXCP_HLT;
766 cpu->halted = true;
767 ret = 1;
768 }
769 qemu_mutex_unlock_iothread();
770
771 return ret;
772 }
773
whpx_vcpu_pre_run(CPUState * cpu)774 static void whpx_vcpu_pre_run(CPUState *cpu)
775 {
776 HRESULT hr;
777 struct whpx_state *whpx = &whpx_global;
778 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
779 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
780 X86CPU *x86_cpu = X86_CPU(cpu);
781 int irq;
782 uint8_t tpr;
783 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
784 UINT32 reg_count = 0;
785 WHV_REGISTER_VALUE reg_values[3];
786 WHV_REGISTER_NAME reg_names[3];
787
788 memset(&new_int, 0, sizeof(new_int));
789 memset(reg_values, 0, sizeof(reg_values));
790
791 qemu_mutex_lock_iothread();
792
793 /* Inject NMI */
794 if (!vcpu->interruption_pending &&
795 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
796 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
797 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
798 vcpu->interruptable = false;
799 new_int.InterruptionType = WHvX64PendingNmi;
800 new_int.InterruptionPending = 1;
801 new_int.InterruptionVector = 2;
802 }
803 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
804 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
805 }
806 }
807
808 /*
809 * Force the VCPU out of its inner loop to process any INIT requests or
810 * commit pending TPR access.
811 */
812 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
813 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
814 !(env->hflags & HF_SMM_MASK)) {
815 cpu->exit_request = 1;
816 }
817 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
818 cpu->exit_request = 1;
819 }
820 }
821
822 /* Get pending hard interruption or replay one that was overwritten */
823 if (!vcpu->interruption_pending &&
824 vcpu->interruptable && (env->eflags & IF_MASK)) {
825 assert(!new_int.InterruptionPending);
826 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
827 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
828 irq = cpu_get_pic_interrupt(env);
829 if (irq >= 0) {
830 new_int.InterruptionType = WHvX64PendingInterrupt;
831 new_int.InterruptionPending = 1;
832 new_int.InterruptionVector = irq;
833 }
834 }
835 }
836
837 /* Setup interrupt state if new one was prepared */
838 if (new_int.InterruptionPending) {
839 reg_values[reg_count].PendingInterruption = new_int;
840 reg_names[reg_count] = WHvRegisterPendingInterruption;
841 reg_count += 1;
842 }
843
844 /* Sync the TPR to the CR8 if was modified during the intercept */
845 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
846 if (tpr != vcpu->tpr) {
847 vcpu->tpr = tpr;
848 reg_values[reg_count].Reg64 = tpr;
849 cpu->exit_request = 1;
850 reg_names[reg_count] = WHvX64RegisterCr8;
851 reg_count += 1;
852 }
853
854 /* Update the state of the interrupt delivery notification */
855 if (!vcpu->window_registered &&
856 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
857 reg_values[reg_count].DeliverabilityNotifications.InterruptNotification
858 = 1;
859 vcpu->window_registered = 1;
860 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
861 reg_count += 1;
862 }
863
864 qemu_mutex_unlock_iothread();
865
866 if (reg_count) {
867 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
868 whpx->partition, cpu->cpu_index,
869 reg_names, reg_count, reg_values);
870 if (FAILED(hr)) {
871 error_report("WHPX: Failed to set interrupt state registers,"
872 " hr=%08lx", hr);
873 }
874 }
875
876 return;
877 }
878
whpx_vcpu_post_run(CPUState * cpu)879 static void whpx_vcpu_post_run(CPUState *cpu)
880 {
881 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
882 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
883 X86CPU *x86_cpu = X86_CPU(cpu);
884
885 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
886
887 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
888 if (vcpu->tpr != tpr) {
889 vcpu->tpr = tpr;
890 qemu_mutex_lock_iothread();
891 cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
892 qemu_mutex_unlock_iothread();
893 }
894
895 vcpu->interruption_pending =
896 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
897
898 vcpu->interruptable =
899 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
900
901 return;
902 }
903
whpx_vcpu_process_async_events(CPUState * cpu)904 static void whpx_vcpu_process_async_events(CPUState *cpu)
905 {
906 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
907 X86CPU *x86_cpu = X86_CPU(cpu);
908 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
909
910 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
911 !(env->hflags & HF_SMM_MASK)) {
912 whpx_cpu_synchronize_state(cpu);
913 do_cpu_init(x86_cpu);
914 vcpu->interruptable = true;
915 }
916
917 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
918 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
919 apic_poll_irq(x86_cpu->apic_state);
920 }
921
922 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
923 (env->eflags & IF_MASK)) ||
924 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
925 cpu->halted = false;
926 }
927
928 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
929 whpx_cpu_synchronize_state(cpu);
930 do_cpu_sipi(x86_cpu);
931 }
932
933 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
934 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
935 whpx_cpu_synchronize_state(cpu);
936 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
937 env->tpr_access_type);
938 }
939
940 return;
941 }
942
whpx_vcpu_run(CPUState * cpu)943 static int whpx_vcpu_run(CPUState *cpu)
944 {
945 HRESULT hr;
946 struct whpx_state *whpx = &whpx_global;
947 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
948 int ret;
949
950 whpx_vcpu_process_async_events(cpu);
951 if (cpu->halted) {
952 cpu->exception_index = EXCP_HLT;
953 qatomic_set(&cpu->exit_request, false);
954 return 0;
955 }
956
957 qemu_mutex_unlock_iothread();
958 cpu_exec_start(cpu);
959
960 do {
961 if (cpu->vcpu_dirty) {
962 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
963 cpu->vcpu_dirty = false;
964 }
965
966 whpx_vcpu_pre_run(cpu);
967
968 if (qatomic_read(&cpu->exit_request)) {
969 whpx_vcpu_kick(cpu);
970 }
971
972 hr = whp_dispatch.WHvRunVirtualProcessor(
973 whpx->partition, cpu->cpu_index,
974 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
975
976 if (FAILED(hr)) {
977 error_report("WHPX: Failed to exec a virtual processor,"
978 " hr=%08lx", hr);
979 ret = -1;
980 break;
981 }
982
983 whpx_vcpu_post_run(cpu);
984
985 switch (vcpu->exit_ctx.ExitReason) {
986 case WHvRunVpExitReasonMemoryAccess:
987 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
988 break;
989
990 case WHvRunVpExitReasonX64IoPortAccess:
991 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
992 break;
993
994 case WHvRunVpExitReasonX64InterruptWindow:
995 vcpu->window_registered = 0;
996 ret = 0;
997 break;
998
999 case WHvRunVpExitReasonX64Halt:
1000 ret = whpx_handle_halt(cpu);
1001 break;
1002
1003 case WHvRunVpExitReasonCanceled:
1004 cpu->exception_index = EXCP_INTERRUPT;
1005 ret = 1;
1006 break;
1007
1008 case WHvRunVpExitReasonX64MsrAccess: {
1009 WHV_REGISTER_VALUE reg_values[3] = {0};
1010 WHV_REGISTER_NAME reg_names[3];
1011 UINT32 reg_count;
1012
1013 reg_names[0] = WHvX64RegisterRip;
1014 reg_names[1] = WHvX64RegisterRax;
1015 reg_names[2] = WHvX64RegisterRdx;
1016
1017 reg_values[0].Reg64 =
1018 vcpu->exit_ctx.VpContext.Rip +
1019 vcpu->exit_ctx.VpContext.InstructionLength;
1020
1021 /*
1022 * For all unsupported MSR access we:
1023 * ignore writes
1024 * return 0 on read.
1025 */
1026 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1027 1 : 3;
1028
1029 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1030 whpx->partition,
1031 cpu->cpu_index,
1032 reg_names, reg_count,
1033 reg_values);
1034
1035 if (FAILED(hr)) {
1036 error_report("WHPX: Failed to set MsrAccess state "
1037 " registers, hr=%08lx", hr);
1038 }
1039 ret = 0;
1040 break;
1041 }
1042 case WHvRunVpExitReasonX64Cpuid: {
1043 WHV_REGISTER_VALUE reg_values[5];
1044 WHV_REGISTER_NAME reg_names[5];
1045 UINT32 reg_count = 5;
1046 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1047 X86CPU *x86_cpu = X86_CPU(cpu);
1048 CPUX86State *env = &x86_cpu->env;
1049
1050 memset(reg_values, 0, sizeof(reg_values));
1051
1052 rip = vcpu->exit_ctx.VpContext.Rip +
1053 vcpu->exit_ctx.VpContext.InstructionLength;
1054 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1055
1056 /*
1057 * Ideally, these should be supplied to the hypervisor during VCPU
1058 * initialization and it should be able to satisfy this request.
1059 * But, currently, WHPX doesn't support setting CPUID values in the
1060 * hypervisor once the partition has been setup, which is too late
1061 * since VCPUs are realized later. For now, use the values from
1062 * QEMU to satisfy these requests, until WHPX adds support for
1063 * being able to set these values in the hypervisor at runtime.
1064 */
1065 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1066 (UINT32 *)&rcx, (UINT32 *)&rdx);
1067 switch (cpuid_fn) {
1068 case 0x40000000:
1069 /* Expose the vmware cpu frequency cpuid leaf */
1070 rax = 0x40000010;
1071 rbx = rcx = rdx = 0;
1072 break;
1073
1074 case 0x40000010:
1075 rax = env->tsc_khz;
1076 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1077 rcx = rdx = 0;
1078 break;
1079
1080 case 0x80000001:
1081 /* Remove any support of OSVW */
1082 rcx &= ~CPUID_EXT3_OSVW;
1083 break;
1084 }
1085
1086 reg_names[0] = WHvX64RegisterRip;
1087 reg_names[1] = WHvX64RegisterRax;
1088 reg_names[2] = WHvX64RegisterRcx;
1089 reg_names[3] = WHvX64RegisterRdx;
1090 reg_names[4] = WHvX64RegisterRbx;
1091
1092 reg_values[0].Reg64 = rip;
1093 reg_values[1].Reg64 = rax;
1094 reg_values[2].Reg64 = rcx;
1095 reg_values[3].Reg64 = rdx;
1096 reg_values[4].Reg64 = rbx;
1097
1098 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1099 whpx->partition, cpu->cpu_index,
1100 reg_names,
1101 reg_count,
1102 reg_values);
1103
1104 if (FAILED(hr)) {
1105 error_report("WHPX: Failed to set CpuidAccess state registers,"
1106 " hr=%08lx", hr);
1107 }
1108 ret = 0;
1109 break;
1110 }
1111 case WHvRunVpExitReasonNone:
1112 case WHvRunVpExitReasonUnrecoverableException:
1113 case WHvRunVpExitReasonInvalidVpRegisterValue:
1114 case WHvRunVpExitReasonUnsupportedFeature:
1115 case WHvRunVpExitReasonException:
1116 default:
1117 error_report("WHPX: Unexpected VP exit code %d",
1118 vcpu->exit_ctx.ExitReason);
1119 whpx_get_registers(cpu);
1120 qemu_mutex_lock_iothread();
1121 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1122 qemu_mutex_unlock_iothread();
1123 break;
1124 }
1125
1126 } while (!ret);
1127
1128 cpu_exec_end(cpu);
1129 qemu_mutex_lock_iothread();
1130 current_cpu = cpu;
1131
1132 qatomic_set(&cpu->exit_request, false);
1133
1134 return ret < 0;
1135 }
1136
do_whpx_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)1137 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1138 {
1139 if (!cpu->vcpu_dirty) {
1140 whpx_get_registers(cpu);
1141 cpu->vcpu_dirty = true;
1142 }
1143 }
1144
do_whpx_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)1145 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1146 run_on_cpu_data arg)
1147 {
1148 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1149 cpu->vcpu_dirty = false;
1150 }
1151
do_whpx_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)1152 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1153 run_on_cpu_data arg)
1154 {
1155 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1156 cpu->vcpu_dirty = false;
1157 }
1158
do_whpx_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)1159 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1160 run_on_cpu_data arg)
1161 {
1162 cpu->vcpu_dirty = true;
1163 }
1164
1165 /*
1166 * CPU support.
1167 */
1168
whpx_cpu_synchronize_state(CPUState * cpu)1169 void whpx_cpu_synchronize_state(CPUState *cpu)
1170 {
1171 if (!cpu->vcpu_dirty) {
1172 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1173 }
1174 }
1175
whpx_cpu_synchronize_post_reset(CPUState * cpu)1176 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1177 {
1178 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1179 }
1180
whpx_cpu_synchronize_post_init(CPUState * cpu)1181 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1182 {
1183 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1184 }
1185
whpx_cpu_synchronize_pre_loadvm(CPUState * cpu)1186 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1187 {
1188 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1189 }
1190
1191 /*
1192 * Vcpu support.
1193 */
1194
1195 static Error *whpx_migration_blocker;
1196
whpx_cpu_update_state(void * opaque,int running,RunState state)1197 static void whpx_cpu_update_state(void *opaque, int running, RunState state)
1198 {
1199 CPUX86State *env = opaque;
1200
1201 if (running) {
1202 env->tsc_valid = false;
1203 }
1204 }
1205
whpx_init_vcpu(CPUState * cpu)1206 int whpx_init_vcpu(CPUState *cpu)
1207 {
1208 HRESULT hr;
1209 struct whpx_state *whpx = &whpx_global;
1210 struct whpx_vcpu *vcpu = NULL;
1211 Error *local_error = NULL;
1212 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
1213 X86CPU *x86_cpu = X86_CPU(cpu);
1214 UINT64 freq = 0;
1215 int ret;
1216
1217 /* Add migration blockers for all unsupported features of the
1218 * Windows Hypervisor Platform
1219 */
1220 if (whpx_migration_blocker == NULL) {
1221 error_setg(&whpx_migration_blocker,
1222 "State blocked due to non-migratable CPUID feature support,"
1223 "dirty memory tracking support, and XSAVE/XRSTOR support");
1224
1225 (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1226 if (local_error) {
1227 error_report_err(local_error);
1228 migrate_del_blocker(whpx_migration_blocker);
1229 error_free(whpx_migration_blocker);
1230 ret = -EINVAL;
1231 goto error;
1232 }
1233 }
1234
1235 vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1236
1237 if (!vcpu) {
1238 error_report("WHPX: Failed to allocte VCPU context.");
1239 ret = -ENOMEM;
1240 goto error;
1241 }
1242
1243 hr = whp_dispatch.WHvEmulatorCreateEmulator(
1244 &whpx_emu_callbacks,
1245 &vcpu->emulator);
1246 if (FAILED(hr)) {
1247 error_report("WHPX: Failed to setup instruction completion support,"
1248 " hr=%08lx", hr);
1249 ret = -EINVAL;
1250 goto error;
1251 }
1252
1253 hr = whp_dispatch.WHvCreateVirtualProcessor(
1254 whpx->partition, cpu->cpu_index, 0);
1255 if (FAILED(hr)) {
1256 error_report("WHPX: Failed to create a virtual processor,"
1257 " hr=%08lx", hr);
1258 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1259 ret = -EINVAL;
1260 goto error;
1261 }
1262
1263 /*
1264 * vcpu's TSC frequency is either specified by user, or use the value
1265 * provided by Hyper-V if the former is not present. In the latter case, we
1266 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1267 * frequency can be migrated later via this field.
1268 */
1269 if (!env->tsc_khz) {
1270 hr = whp_dispatch.WHvGetCapability(
1271 WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
1272 NULL);
1273 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1274 if (FAILED(hr)) {
1275 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
1276 } else {
1277 env->tsc_khz = freq / 1000; /* Hz to KHz */
1278 }
1279 }
1280 }
1281
1282 env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
1283 hr = whp_dispatch.WHvGetCapability(
1284 WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
1285 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1286 if (FAILED(hr)) {
1287 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
1288 } else {
1289 env->apic_bus_freq = freq;
1290 }
1291 }
1292
1293 /*
1294 * If the vmware cpuid frequency leaf option is set, and we have a valid
1295 * tsc value, trap the corresponding cpuid's.
1296 */
1297 if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
1298 UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
1299
1300 hr = whp_dispatch.WHvSetPartitionProperty(
1301 whpx->partition,
1302 WHvPartitionPropertyCodeCpuidExitList,
1303 cpuidExitList,
1304 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1305
1306 if (FAILED(hr)) {
1307 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1308 hr);
1309 ret = -EINVAL;
1310 goto error;
1311 }
1312 }
1313
1314 vcpu->interruptable = true;
1315 cpu->vcpu_dirty = true;
1316 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1317 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1318
1319 return 0;
1320
1321 error:
1322 g_free(vcpu);
1323
1324 return ret;
1325 }
1326
whpx_vcpu_exec(CPUState * cpu)1327 int whpx_vcpu_exec(CPUState *cpu)
1328 {
1329 int ret;
1330 int fatal;
1331
1332 for (;;) {
1333 if (cpu->exception_index >= EXCP_INTERRUPT) {
1334 ret = cpu->exception_index;
1335 cpu->exception_index = -1;
1336 break;
1337 }
1338
1339 fatal = whpx_vcpu_run(cpu);
1340
1341 if (fatal) {
1342 error_report("WHPX: Failed to exec a virtual processor");
1343 abort();
1344 }
1345 }
1346
1347 return ret;
1348 }
1349
whpx_destroy_vcpu(CPUState * cpu)1350 void whpx_destroy_vcpu(CPUState *cpu)
1351 {
1352 struct whpx_state *whpx = &whpx_global;
1353 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1354
1355 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1356 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1357 g_free(cpu->hax_vcpu);
1358 return;
1359 }
1360
whpx_vcpu_kick(CPUState * cpu)1361 void whpx_vcpu_kick(CPUState *cpu)
1362 {
1363 struct whpx_state *whpx = &whpx_global;
1364 whp_dispatch.WHvCancelRunVirtualProcessor(
1365 whpx->partition, cpu->cpu_index, 0);
1366 }
1367
1368 /*
1369 * Memory support.
1370 */
1371
whpx_update_mapping(hwaddr start_pa,ram_addr_t size,void * host_va,int add,int rom,const char * name)1372 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1373 void *host_va, int add, int rom,
1374 const char *name)
1375 {
1376 struct whpx_state *whpx = &whpx_global;
1377 HRESULT hr;
1378
1379 /*
1380 if (add) {
1381 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1382 (void*)start_pa, (void*)size, host_va,
1383 (rom ? "ROM" : "RAM"), name);
1384 } else {
1385 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1386 (void*)start_pa, (void*)size, host_va, name);
1387 }
1388 */
1389
1390 if (add) {
1391 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1392 host_va,
1393 start_pa,
1394 size,
1395 (WHvMapGpaRangeFlagRead |
1396 WHvMapGpaRangeFlagExecute |
1397 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1398 } else {
1399 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1400 start_pa,
1401 size);
1402 }
1403
1404 if (FAILED(hr)) {
1405 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1406 " Host:%p, hr=%08lx",
1407 (add ? "MAP" : "UNMAP"), name,
1408 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1409 }
1410 }
1411
whpx_process_section(MemoryRegionSection * section,int add)1412 static void whpx_process_section(MemoryRegionSection *section, int add)
1413 {
1414 MemoryRegion *mr = section->mr;
1415 hwaddr start_pa = section->offset_within_address_space;
1416 ram_addr_t size = int128_get64(section->size);
1417 unsigned int delta;
1418 uint64_t host_va;
1419
1420 if (!memory_region_is_ram(mr)) {
1421 return;
1422 }
1423
1424 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1425 delta &= ~qemu_real_host_page_mask;
1426 if (delta > size) {
1427 return;
1428 }
1429 start_pa += delta;
1430 size -= delta;
1431 size &= qemu_real_host_page_mask;
1432 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1433 return;
1434 }
1435
1436 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1437 + section->offset_within_region + delta;
1438
1439 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1440 memory_region_is_rom(mr), mr->name);
1441 }
1442
whpx_region_add(MemoryListener * listener,MemoryRegionSection * section)1443 static void whpx_region_add(MemoryListener *listener,
1444 MemoryRegionSection *section)
1445 {
1446 memory_region_ref(section->mr);
1447 whpx_process_section(section, 1);
1448 }
1449
whpx_region_del(MemoryListener * listener,MemoryRegionSection * section)1450 static void whpx_region_del(MemoryListener *listener,
1451 MemoryRegionSection *section)
1452 {
1453 whpx_process_section(section, 0);
1454 memory_region_unref(section->mr);
1455 }
1456
whpx_transaction_begin(MemoryListener * listener)1457 static void whpx_transaction_begin(MemoryListener *listener)
1458 {
1459 }
1460
whpx_transaction_commit(MemoryListener * listener)1461 static void whpx_transaction_commit(MemoryListener *listener)
1462 {
1463 }
1464
whpx_log_sync(MemoryListener * listener,MemoryRegionSection * section)1465 static void whpx_log_sync(MemoryListener *listener,
1466 MemoryRegionSection *section)
1467 {
1468 MemoryRegion *mr = section->mr;
1469
1470 if (!memory_region_is_ram(mr)) {
1471 return;
1472 }
1473
1474 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1475 }
1476
1477 static MemoryListener whpx_memory_listener = {
1478 .begin = whpx_transaction_begin,
1479 .commit = whpx_transaction_commit,
1480 .region_add = whpx_region_add,
1481 .region_del = whpx_region_del,
1482 .log_sync = whpx_log_sync,
1483 .priority = 10,
1484 };
1485
whpx_memory_init(void)1486 static void whpx_memory_init(void)
1487 {
1488 memory_listener_register(&whpx_memory_listener, &address_space_memory);
1489 }
1490
1491 /*
1492 * Load the functions from the given library, using the given handle. If a
1493 * handle is provided, it is used, otherwise the library is opened. The
1494 * handle will be updated on return with the opened one.
1495 */
load_whp_dispatch_fns(HMODULE * handle,WHPFunctionList function_list)1496 static bool load_whp_dispatch_fns(HMODULE *handle,
1497 WHPFunctionList function_list)
1498 {
1499 HMODULE hLib = *handle;
1500
1501 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1502 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1503 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1504 whp_dispatch.function_name = \
1505 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1506
1507 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1508 whp_dispatch.function_name = \
1509 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1510 if (!whp_dispatch.function_name) { \
1511 error_report("Could not load function %s", #function_name); \
1512 goto error; \
1513 } \
1514
1515 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1516 if (!handle_lib) { \
1517 handle_lib = LoadLibrary(lib_name); \
1518 if (!handle_lib) { \
1519 error_report("Could not load library %s.", lib_name); \
1520 goto error; \
1521 } \
1522 } \
1523
1524 switch (function_list) {
1525 case WINHV_PLATFORM_FNS_DEFAULT:
1526 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1527 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1528 break;
1529
1530 case WINHV_EMULATION_FNS_DEFAULT:
1531 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1532 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1533 break;
1534
1535 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1536 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1537 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1538 break;
1539 }
1540
1541 *handle = hLib;
1542 return true;
1543
1544 error:
1545 if (hLib) {
1546 FreeLibrary(hLib);
1547 }
1548
1549 return false;
1550 }
1551
1552 /*
1553 * Partition support
1554 */
1555
whpx_accel_init(MachineState * ms)1556 static int whpx_accel_init(MachineState *ms)
1557 {
1558 struct whpx_state *whpx;
1559 int ret;
1560 HRESULT hr;
1561 WHV_CAPABILITY whpx_cap;
1562 UINT32 whpx_cap_size;
1563 WHV_PARTITION_PROPERTY prop;
1564 UINT32 cpuidExitList[] = {1, 0x80000001};
1565
1566 whpx = &whpx_global;
1567
1568 if (!init_whp_dispatch()) {
1569 ret = -ENOSYS;
1570 goto error;
1571 }
1572
1573 memset(whpx, 0, sizeof(struct whpx_state));
1574 whpx->mem_quota = ms->ram_size;
1575
1576 hr = whp_dispatch.WHvGetCapability(
1577 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1578 sizeof(whpx_cap), &whpx_cap_size);
1579 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1580 error_report("WHPX: No accelerator found, hr=%08lx", hr);
1581 ret = -ENOSPC;
1582 goto error;
1583 }
1584
1585 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1586 if (FAILED(hr)) {
1587 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1588 ret = -EINVAL;
1589 goto error;
1590 }
1591
1592 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1593 prop.ProcessorCount = ms->smp.cpus;
1594 hr = whp_dispatch.WHvSetPartitionProperty(
1595 whpx->partition,
1596 WHvPartitionPropertyCodeProcessorCount,
1597 &prop,
1598 sizeof(WHV_PARTITION_PROPERTY));
1599
1600 if (FAILED(hr)) {
1601 error_report("WHPX: Failed to set partition core count to %d,"
1602 " hr=%08lx", ms->smp.cores, hr);
1603 ret = -EINVAL;
1604 goto error;
1605 }
1606
1607 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1608 prop.ExtendedVmExits.X64MsrExit = 1;
1609 prop.ExtendedVmExits.X64CpuidExit = 1;
1610 hr = whp_dispatch.WHvSetPartitionProperty(
1611 whpx->partition,
1612 WHvPartitionPropertyCodeExtendedVmExits,
1613 &prop,
1614 sizeof(WHV_PARTITION_PROPERTY));
1615
1616 if (FAILED(hr)) {
1617 error_report("WHPX: Failed to enable partition extended X64MsrExit and"
1618 " X64CpuidExit hr=%08lx", hr);
1619 ret = -EINVAL;
1620 goto error;
1621 }
1622
1623 hr = whp_dispatch.WHvSetPartitionProperty(
1624 whpx->partition,
1625 WHvPartitionPropertyCodeCpuidExitList,
1626 cpuidExitList,
1627 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1628
1629 if (FAILED(hr)) {
1630 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1631 hr);
1632 ret = -EINVAL;
1633 goto error;
1634 }
1635
1636 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1637 if (FAILED(hr)) {
1638 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1639 ret = -EINVAL;
1640 goto error;
1641 }
1642
1643 whpx_memory_init();
1644
1645 cpus_register_accel(&whpx_cpus);
1646
1647 printf("Windows Hypervisor Platform accelerator is operational\n");
1648 return 0;
1649
1650 error:
1651
1652 if (NULL != whpx->partition) {
1653 whp_dispatch.WHvDeletePartition(whpx->partition);
1654 whpx->partition = NULL;
1655 }
1656
1657 return ret;
1658 }
1659
whpx_enabled(void)1660 int whpx_enabled(void)
1661 {
1662 return whpx_allowed;
1663 }
1664
whpx_accel_class_init(ObjectClass * oc,void * data)1665 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1666 {
1667 AccelClass *ac = ACCEL_CLASS(oc);
1668 ac->name = "WHPX";
1669 ac->init_machine = whpx_accel_init;
1670 ac->allowed = &whpx_allowed;
1671 }
1672
1673 static const TypeInfo whpx_accel_type = {
1674 .name = ACCEL_CLASS_NAME("whpx"),
1675 .parent = TYPE_ACCEL,
1676 .class_init = whpx_accel_class_init,
1677 };
1678
whpx_type_init(void)1679 static void whpx_type_init(void)
1680 {
1681 type_register_static(&whpx_accel_type);
1682 }
1683
init_whp_dispatch(void)1684 bool init_whp_dispatch(void)
1685 {
1686 if (whp_dispatch_initialized) {
1687 return true;
1688 }
1689
1690 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1691 goto error;
1692 }
1693
1694 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1695 goto error;
1696 }
1697
1698 assert(load_whp_dispatch_fns(&hWinHvPlatform,
1699 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1700 whp_dispatch_initialized = true;
1701
1702 return true;
1703 error:
1704 if (hWinHvPlatform) {
1705 FreeLibrary(hWinHvPlatform);
1706 }
1707
1708 if (hWinHvEmulation) {
1709 FreeLibrary(hWinHvEmulation);
1710 }
1711
1712 return false;
1713 }
1714
1715 type_init(whpx_type_init);
1716