1 /*
2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
3 *
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "cpu.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu-common.h"
15 #include "qemu/accel.h"
16 #include "sysemu/nvmm.h"
17 #include "sysemu/cpus.h"
18 #include "sysemu/runstate.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/error-report.h"
21 #include "qapi/error.h"
22 #include "qemu/queue.h"
23 #include "migration/blocker.h"
24 #include "strings.h"
25
26 #include "nvmm-accel-ops.h"
27
28 #include <nvmm.h>
29
30 struct qemu_vcpu {
31 struct nvmm_vcpu vcpu;
32 uint8_t tpr;
33 bool stop;
34
35 /* Window-exiting for INTs/NMIs. */
36 bool int_window_exit;
37 bool nmi_window_exit;
38
39 /* The guest is in an interrupt shadow (POP SS, etc). */
40 bool int_shadow;
41 };
42
43 struct qemu_machine {
44 struct nvmm_capability cap;
45 struct nvmm_machine mach;
46 };
47
48 /* -------------------------------------------------------------------------- */
49
50 static bool nvmm_allowed;
51 static struct qemu_machine qemu_mach;
52
53 static struct qemu_vcpu *
54 get_qemu_vcpu(CPUState *cpu)
55 {
56 return (struct qemu_vcpu *)cpu->hax_vcpu;
57 }
58
59 static struct nvmm_machine *
60 get_nvmm_mach(void)
61 {
62 return &qemu_mach.mach;
63 }
64
65 /* -------------------------------------------------------------------------- */
66
67 static void
68 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
69 {
70 uint32_t attrib = qseg->flags;
71
72 nseg->selector = qseg->selector;
73 nseg->limit = qseg->limit;
74 nseg->base = qseg->base;
75 nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
76 nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
77 nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
78 nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
79 nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
80 nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
81 nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
82 nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
83 }
84
85 static void
86 nvmm_set_registers(CPUState *cpu)
87 {
88 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
89 struct nvmm_machine *mach = get_nvmm_mach();
90 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
91 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
92 struct nvmm_x64_state *state = vcpu->state;
93 uint64_t bitmap;
94 size_t i;
95 int ret;
96
97 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
98
99 /* GPRs. */
100 state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
101 state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
102 state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
103 state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
104 state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
105 state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
106 state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
107 state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
108 #ifdef TARGET_X86_64
109 state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8];
110 state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9];
111 state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
112 state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
113 state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
114 state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
115 state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
116 state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
117 #endif
118
119 /* RIP and RFLAGS. */
120 state->gprs[NVMM_X64_GPR_RIP] = env->eip;
121 state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
122
123 /* Segments. */
124 nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
125 nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
126 nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
127 nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
128 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
129 nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
130
131 /* Special segments. */
132 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
133 nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
134 nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
135 nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
136
137 /* Control registers. */
138 state->crs[NVMM_X64_CR_CR0] = env->cr[0];
139 state->crs[NVMM_X64_CR_CR2] = env->cr[2];
140 state->crs[NVMM_X64_CR_CR3] = env->cr[3];
141 state->crs[NVMM_X64_CR_CR4] = env->cr[4];
142 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
143 state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
144
cpu_get_fp80(uint64_t * pmant,uint16_t * pexp,floatx80 f)145 /* Debug registers. */
146 state->drs[NVMM_X64_DR_DR0] = env->dr[0];
147 state->drs[NVMM_X64_DR_DR1] = env->dr[1];
148 state->drs[NVMM_X64_DR_DR2] = env->dr[2];
149 state->drs[NVMM_X64_DR_DR3] = env->dr[3];
150 state->drs[NVMM_X64_DR_DR6] = env->dr[6];
151 state->drs[NVMM_X64_DR_DR7] = env->dr[7];
152
153 /* FPU. */
cpu_set_fp80(uint64_t mant,uint16_t upper)154 state->fpu.fx_cw = env->fpuc;
155 state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
156 state->fpu.fx_tw = 0;
157 for (i = 0; i < 8; i++) {
158 state->fpu.fx_tw |= (!env->fptags[i]) << i;
159 }
160 state->fpu.fx_opcode = env->fpop;
161 state->fpu.fx_ip.fa_64 = env->fpip;
162 state->fpu.fx_dp.fa_64 = env->fpdp;
fpreg_pre_save(void * opaque)163 state->fpu.fx_mxcsr = env->mxcsr;
164 state->fpu.fx_mxcsr_mask = 0x0000FFFF;
165 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
166 memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
167 for (i = 0; i < CPU_NB_REGS; i++) {
168 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
169 &env->xmm_regs[i].ZMM_Q(0), 8);
170 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
171 &env->xmm_regs[i].ZMM_Q(1), 8);
172 }
173
fpreg_post_load(void * opaque,int version)174 /* MSRs. */
175 state->msrs[NVMM_X64_MSR_EFER] = env->efer;
176 state->msrs[NVMM_X64_MSR_STAR] = env->star;
177 #ifdef TARGET_X86_64
178 state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
179 state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
180 state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
181 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
182 #endif
183 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs;
184 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
185 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
186 state->msrs[NVMM_X64_MSR_PAT] = env->pat;
187 state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
188
189 bitmap =
190 NVMM_X64_STATE_SEGS |
191 NVMM_X64_STATE_GPRS |
192 NVMM_X64_STATE_CRS |
193 NVMM_X64_STATE_DRS |
194 NVMM_X64_STATE_MSRS |
195 NVMM_X64_STATE_FPU;
196
197 ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
198 if (ret == -1) {
199 error_report("NVMM: Failed to set virtual processor context,"
200 " error=%d", errno);
cpu_pre_save(void * opaque)201 }
202 }
203
204 static void
205 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
206 {
207 qseg->selector = nseg->selector;
208 qseg->limit = nseg->limit;
209 qseg->base = nseg->base;
210
211 qseg->flags =
212 __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
213 __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
214 __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
215 __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
216 __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
217 __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
218 __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
219 __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
220 }
221
222 static void
223 nvmm_get_registers(CPUState *cpu)
224 {
225 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
226 struct nvmm_machine *mach = get_nvmm_mach();
227 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
228 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
229 X86CPU *x86_cpu = X86_CPU(cpu);
230 struct nvmm_x64_state *state = vcpu->state;
231 uint64_t bitmap, tpr;
232 size_t i;
233 int ret;
234
235 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
236
237 bitmap =
238 NVMM_X64_STATE_SEGS |
239 NVMM_X64_STATE_GPRS |
240 NVMM_X64_STATE_CRS |
241 NVMM_X64_STATE_DRS |
242 NVMM_X64_STATE_MSRS |
243 NVMM_X64_STATE_FPU;
244
245 ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
246 if (ret == -1) {
247 error_report("NVMM: Failed to get virtual processor context,"
248 " error=%d", errno);
249 }
250
251 /* GPRs. */
252 env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
253 env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
254 env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
255 env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
256 env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
257 env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
258 env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
259 env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
260 #ifdef TARGET_X86_64
261 env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8];
262 env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9];
263 env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
264 env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
265 env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
266 env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
267 env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
268 env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
269 #endif
270
271 /* RIP and RFLAGS. */
272 env->eip = state->gprs[NVMM_X64_GPR_RIP];
273 env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
274
275 /* Segments. */
276 nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
277 nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
278 nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
279 nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
280 nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
281 nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
282
283 /* Special segments. */
284 nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
285 nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
286 nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
287 nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
288
289 /* Control registers. */
290 env->cr[0] = state->crs[NVMM_X64_CR_CR0];
291 env->cr[2] = state->crs[NVMM_X64_CR_CR2];
292 env->cr[3] = state->crs[NVMM_X64_CR_CR3];
293 env->cr[4] = state->crs[NVMM_X64_CR_CR4];
294 tpr = state->crs[NVMM_X64_CR_CR8];
cpu_post_load(void * opaque,int version_id)295 if (tpr != qcpu->tpr) {
296 qcpu->tpr = tpr;
297 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
298 }
299 env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
300
301 /* Debug registers. */
302 env->dr[0] = state->drs[NVMM_X64_DR_DR0];
303 env->dr[1] = state->drs[NVMM_X64_DR_DR1];
304 env->dr[2] = state->drs[NVMM_X64_DR_DR2];
305 env->dr[3] = state->drs[NVMM_X64_DR_DR3];
306 env->dr[6] = state->drs[NVMM_X64_DR_DR6];
307 env->dr[7] = state->drs[NVMM_X64_DR_DR7];
308
309 /* FPU. */
310 env->fpuc = state->fpu.fx_cw;
311 env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
312 env->fpus = state->fpu.fx_sw & ~0x3800;
313 for (i = 0; i < 8; i++) {
314 env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
315 }
316 env->fpop = state->fpu.fx_opcode;
317 env->fpip = state->fpu.fx_ip.fa_64;
318 env->fpdp = state->fpu.fx_dp.fa_64;
319 env->mxcsr = state->fpu.fx_mxcsr;
320 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
321 memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
322 for (i = 0; i < CPU_NB_REGS; i++) {
323 memcpy(&env->xmm_regs[i].ZMM_Q(0),
324 &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
325 memcpy(&env->xmm_regs[i].ZMM_Q(1),
326 &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
327 }
328
329 /* MSRs. */
330 env->efer = state->msrs[NVMM_X64_MSR_EFER];
331 env->star = state->msrs[NVMM_X64_MSR_STAR];
332 #ifdef TARGET_X86_64
333 env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
334 env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
335 env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
336 env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
337 #endif
338 env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
339 env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
340 env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
341 env->pat = state->msrs[NVMM_X64_MSR_PAT];
342 env->tsc = state->msrs[NVMM_X64_MSR_TSC];
343
344 x86_update_hflags(env);
345 }
346
347 static bool
348 nvmm_can_take_int(CPUState *cpu)
349 {
350 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
351 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
352 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
353 struct nvmm_machine *mach = get_nvmm_mach();
354
355 if (qcpu->int_window_exit) {
356 return false;
357 }
358
359 if (qcpu->int_shadow || !(env->eflags & IF_MASK)) {
360 struct nvmm_x64_state *state = vcpu->state;
361
362 /* Exit on interrupt window. */
363 nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
364 state->intr.int_window_exiting = 1;
365 nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
366
367 return false;
368 }
369
370 return true;
371 }
372
373 static bool
374 nvmm_can_take_nmi(CPUState *cpu)
375 {
376 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
377
378 /*
379 * Contrary to INTs, NMIs always schedule an exit when they are
380 * completed. Therefore, if window-exiting is enabled, it means
381 * NMIs are blocked.
382 */
383 if (qcpu->nmi_window_exit) {
384 return false;
385 }
386
387 return true;
388 }
389
async_pf_msr_needed(void * opaque)390 /*
391 * Called before the VCPU is run. We inject events generated by the I/O
392 * thread, and synchronize the guest TPR.
393 */
394 static void
395 nvmm_vcpu_pre_run(CPUState *cpu)
396 {
397 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
398 struct nvmm_machine *mach = get_nvmm_mach();
399 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
400 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
401 X86CPU *x86_cpu = X86_CPU(cpu);
402 struct nvmm_x64_state *state = vcpu->state;
403 struct nvmm_vcpu_event *event = vcpu->event;
404 bool has_event = false;
405 bool sync_tpr = false;
406 uint8_t tpr;
407 int ret;
408
409 qemu_mutex_lock_iothread();
410
411 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
412 if (tpr != qcpu->tpr) {
413 qcpu->tpr = tpr;
414 sync_tpr = true;
415 }
416
417 /*
418 * Force the VCPU out of its inner loop to process any INIT requests
419 * or commit pending TPR access.
420 */
421 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
422 cpu->exit_request = 1;
423 }
424
425 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
426 if (nvmm_can_take_nmi(cpu)) {
427 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
428 event->type = NVMM_VCPU_EVENT_INTR;
429 event->vector = 2;
430 has_event = true;
431 }
432 }
433
434 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
435 if (nvmm_can_take_int(cpu)) {
436 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
437 event->type = NVMM_VCPU_EVENT_INTR;
438 event->vector = cpu_get_pic_interrupt(env);
439 has_event = true;
440 }
441 }
442
443 /* Don't want SMIs. */
444 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
445 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
446 }
447
448 if (sync_tpr) {
449 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
450 if (ret == -1) {
451 error_report("NVMM: Failed to get CPU state,"
452 " error=%d", errno);
453 }
454
455 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
456
457 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
458 if (ret == -1) {
459 error_report("NVMM: Failed to set CPU state,"
460 " error=%d", errno);
461 }
462 }
463
464 if (has_event) {
465 ret = nvmm_vcpu_inject(mach, vcpu);
466 if (ret == -1) {
467 error_report("NVMM: Failed to inject event,"
468 " error=%d", errno);
469 }
470 }
471
472 qemu_mutex_unlock_iothread();
473 }
474
475 /*
476 * Called after the VCPU ran. We synchronize the host view of the TPR and
477 * RFLAGS.
478 */
479 static void
480 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
481 {
482 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
483 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
484 X86CPU *x86_cpu = X86_CPU(cpu);
485 uint64_t tpr;
486
487 env->eflags = exit->exitstate.rflags;
488 qcpu->int_shadow = exit->exitstate.int_shadow;
489 qcpu->int_window_exit = exit->exitstate.int_window_exiting;
490 qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
491
492 tpr = exit->exitstate.cr8;
493 if (qcpu->tpr != tpr) {
494 qcpu->tpr = tpr;
495 qemu_mutex_lock_iothread();
496 cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
497 qemu_mutex_unlock_iothread();
498 }
499 }
500
501 /* -------------------------------------------------------------------------- */
502
503 static void
504 nvmm_io_callback(struct nvmm_io *io)
505 {
506 MemTxAttrs attrs = { 0 };
507 int ret;
508
509 ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
fpop_ip_dp_needed(void * opaque)510 io->size, !io->in);
511 if (ret != MEMTX_OK) {
512 error_report("NVMM: I/O Transaction Failed "
513 "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
514 io->port, io->size);
515 }
516
517 /* Needed, otherwise infinite loop. */
518 current_cpu->vcpu_dirty = false;
519 }
520
521 static void
522 nvmm_mem_callback(struct nvmm_mem *mem)
523 {
524 cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
525
526 /* Needed, otherwise infinite loop. */
527 current_cpu->vcpu_dirty = false;
528 }
529
530 static struct nvmm_assist_callbacks nvmm_callbacks = {
tsc_adjust_needed(void * opaque)531 .io = nvmm_io_callback,
532 .mem = nvmm_mem_callback
533 };
534
535 /* -------------------------------------------------------------------------- */
536
537 static int
538 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
539 {
540 int ret;
541
542 ret = nvmm_assist_mem(mach, vcpu);
543 if (ret == -1) {
544 error_report("NVMM: Mem Assist Failed [gpa=%p]",
545 (void *)vcpu->exit->u.mem.gpa);
546 }
547
548 return ret;
549 }
msr_smi_count_needed(void * opaque)550
551 static int
552 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
553 {
554 int ret;
555
556 ret = nvmm_assist_io(mach, vcpu);
557 if (ret == -1) {
558 error_report("NVMM: I/O Assist Failed [port=%d]",
559 (int)vcpu->exit->u.io.port);
560 }
561
562 return ret;
563 }
564
565 static int
566 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
567 struct nvmm_vcpu_exit *exit)
568 {
tscdeadline_needed(void * opaque)569 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
570 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
571 X86CPU *x86_cpu = X86_CPU(cpu);
572 struct nvmm_x64_state *state = vcpu->state;
573 uint64_t val;
574 int ret;
575
576 switch (exit->u.rdmsr.msr) {
577 case MSR_IA32_APICBASE:
578 val = cpu_get_apic_base(x86_cpu->apic_state);
579 break;
580 case MSR_MTRRcap:
581 case MSR_MTRRdefType:
582 case MSR_MCG_CAP:
583 case MSR_MCG_STATUS:
584 val = 0;
585 break;
586 default: /* More MSRs to add? */
587 val = 0;
misc_enable_needed(void * opaque)588 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
589 exit->u.rdmsr.msr);
590 break;
591 }
592
593 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
594 if (ret == -1) {
595 return -1;
feature_control_needed(void * opaque)596 }
597
598 state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
599 state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
600 state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
601
602 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
603 if (ret == -1) {
604 return -1;
605 }
606
607 return 0;
608 }
609
610 static int
611 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
612 struct nvmm_vcpu_exit *exit)
613 {
614 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
615 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
616 X86CPU *x86_cpu = X86_CPU(cpu);
617 struct nvmm_x64_state *state = vcpu->state;
618 uint64_t val;
619 int ret;
620
621 val = exit->u.wrmsr.val;
622
623 switch (exit->u.wrmsr.msr) {
624 case MSR_IA32_APICBASE:
625 cpu_set_apic_base(x86_cpu->apic_state, val);
pmu_enable_needed(void * opaque)626 break;
627 case MSR_MTRRdefType:
628 case MSR_MCG_STATUS:
629 break;
630 default: /* More MSRs to add? */
631 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
632 exit->u.wrmsr.msr, val);
633 break;
634 }
635
636 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
637 if (ret == -1) {
638 return -1;
639 }
640
641 state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
642
643 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
644 if (ret == -1) {
645 return -1;
646 }
647
648 return 0;
649 }
650
651 static int
652 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
653 struct nvmm_vcpu_exit *exit)
654 {
655 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
656 int ret = 0;
657
658 qemu_mutex_lock_iothread();
659
660 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
661 (env->eflags & IF_MASK)) &&
662 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
663 cpu->exception_index = EXCP_HLT;
664 cpu->halted = true;
665 ret = 1;
666 }
mpx_needed(void * opaque)667
668 qemu_mutex_unlock_iothread();
669
670 return ret;
671 }
672
673 static int
674 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
675 {
676 struct nvmm_vcpu_event *event = vcpu->event;
677
678 event->type = NVMM_VCPU_EVENT_EXCP;
679 event->vector = 6;
680 event->u.excp.error = 0;
681
682 return nvmm_vcpu_inject(mach, vcpu);
683 }
684
685 static int
686 nvmm_vcpu_loop(CPUState *cpu)
687 {
688 struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
689 struct nvmm_machine *mach = get_nvmm_mach();
690 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
691 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
692 X86CPU *x86_cpu = X86_CPU(cpu);
693 struct nvmm_vcpu_exit *exit = vcpu->exit;
694 int ret;
695
696 /*
697 * Some asynchronous events must be handled outside of the inner
698 * VCPU loop. They are handled here.
699 */
hyperv_hypercall_enable_needed(void * opaque)700 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
701 nvmm_cpu_synchronize_state(cpu);
702 do_cpu_init(x86_cpu);
703 /* set int/nmi windows back to the reset state */
704 }
705 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
706 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
707 apic_poll_irq(x86_cpu->apic_state);
708 }
709 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
710 (env->eflags & IF_MASK)) ||
711 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
712 cpu->halted = false;
713 }
714 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
715 nvmm_cpu_synchronize_state(cpu);
716 do_cpu_sipi(x86_cpu);
717 }
718 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
719 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
hyperv_vapic_enable_needed(void * opaque)720 nvmm_cpu_synchronize_state(cpu);
721 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
722 env->tpr_access_type);
723 }
724
725 if (cpu->halted) {
726 cpu->exception_index = EXCP_HLT;
727 qatomic_set(&cpu->exit_request, false);
728 return 0;
729 }
730
731 qemu_mutex_unlock_iothread();
732 cpu_exec_start(cpu);
733
734 /*
735 * Inner VCPU loop.
736 */
737 do {
738 if (cpu->vcpu_dirty) {
hyperv_time_enable_needed(void * opaque)739 nvmm_set_registers(cpu);
740 cpu->vcpu_dirty = false;
741 }
742
743 if (qcpu->stop) {
744 cpu->exception_index = EXCP_INTERRUPT;
745 qcpu->stop = false;
746 ret = 1;
747 break;
748 }
749
750 nvmm_vcpu_pre_run(cpu);
751
752 if (qatomic_read(&cpu->exit_request)) {
753 nvmm_vcpu_stop(vcpu);
754 }
755
756 /* Read exit_request before the kernel reads the immediate exit flag */
757 smp_rmb();
hyperv_crash_enable_needed(void * opaque)758 ret = nvmm_vcpu_run(mach, vcpu);
759 if (ret == -1) {
760 error_report("NVMM: Failed to exec a virtual processor,"
761 " error=%d", errno);
762 break;
763 }
764
765 nvmm_vcpu_post_run(cpu, exit);
766
767 switch (exit->reason) {
768 case NVMM_VCPU_EXIT_NONE:
769 break;
770 case NVMM_VCPU_EXIT_STOPPED:
771 /*
772 * The kernel cleared the immediate exit flag; cpu->exit_request
773 * must be cleared after
774 */
775 smp_wmb();
776 qcpu->stop = true;
777 break;
778 case NVMM_VCPU_EXIT_MEMORY:
779 ret = nvmm_handle_mem(mach, vcpu);
780 break;
781 case NVMM_VCPU_EXIT_IO:
782 ret = nvmm_handle_io(mach, vcpu);
hyperv_runtime_enable_needed(void * opaque)783 break;
784 case NVMM_VCPU_EXIT_INT_READY:
785 case NVMM_VCPU_EXIT_NMI_READY:
786 case NVMM_VCPU_EXIT_TPR_CHANGED:
787 break;
788 case NVMM_VCPU_EXIT_HALTED:
789 ret = nvmm_handle_halted(mach, cpu, exit);
790 break;
791 case NVMM_VCPU_EXIT_SHUTDOWN:
792 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
793 cpu->exception_index = EXCP_INTERRUPT;
794 ret = 1;
795 break;
796 case NVMM_VCPU_EXIT_RDMSR:
797 ret = nvmm_handle_rdmsr(mach, cpu, exit);
798 break;
799 case NVMM_VCPU_EXIT_WRMSR:
800 ret = nvmm_handle_wrmsr(mach, cpu, exit);
801 break;
802 case NVMM_VCPU_EXIT_MONITOR:
803 case NVMM_VCPU_EXIT_MWAIT:
804 ret = nvmm_inject_ud(mach, vcpu);
805 break;
hyperv_synic_enable_needed(void * opaque)806 default:
807 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
808 exit->reason, exit->u.inv.hwcode);
809 nvmm_get_registers(cpu);
810 qemu_mutex_lock_iothread();
811 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
812 qemu_mutex_unlock_iothread();
813 ret = -1;
814 break;
815 }
816 } while (ret == 0);
817
818 cpu_exec_end(cpu);
819 qemu_mutex_lock_iothread();
820
821 qatomic_set(&cpu->exit_request, false);
822
823 return ret < 0;
824 }
825
826 /* -------------------------------------------------------------------------- */
hyperv_synic_post_load(void * opaque,int version_id)827
828 static void
829 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
830 {
831 nvmm_get_registers(cpu);
832 cpu->vcpu_dirty = true;
833 }
834
835 static void
836 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
837 {
838 nvmm_set_registers(cpu);
839 cpu->vcpu_dirty = false;
840 }
841
842 static void
843 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
844 {
845 nvmm_set_registers(cpu);
846 cpu->vcpu_dirty = false;
847 }
848
hyperv_stimer_enable_needed(void * opaque)849 static void
850 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
851 {
852 cpu->vcpu_dirty = true;
853 }
854
855 void nvmm_cpu_synchronize_state(CPUState *cpu)
856 {
857 if (!cpu->vcpu_dirty) {
858 run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
859 }
860 }
861
862 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
863 {
864 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
865 }
866
867 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
868 {
869 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
870 }
871
872 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
873 {
874 run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
875 }
hyperv_reenlightenment_enable_needed(void * opaque)876
877 /* -------------------------------------------------------------------------- */
878
879 static Error *nvmm_migration_blocker;
880
881 /*
882 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
883 * and another thread signaling the vCPU thread to exit.
884 */
885
hyperv_reenlightenment_post_load(void * opaque,int version_id)886 static void
887 nvmm_ipi_signal(int sigcpu)
888 {
889 if (current_cpu) {
890 struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu);
891 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
892 nvmm_vcpu_stop(vcpu);
893 }
894 }
895
896 static void
897 nvmm_init_cpu_signals(void)
898 {
899 struct sigaction sigact;
900 sigset_t set;
901
902 /* Install the IPI handler. */
903 memset(&sigact, 0, sizeof(sigact));
904 sigact.sa_handler = nvmm_ipi_signal;
905 sigaction(SIG_IPI, &sigact, NULL);
906
907 /* Allow IPIs on the current thread. */
908 sigprocmask(SIG_BLOCK, NULL, &set);
909 sigdelset(&set, SIG_IPI);
910 pthread_sigmask(SIG_SETMASK, &set, NULL);
911 }
912
913 int
914 nvmm_init_vcpu(CPUState *cpu)
915 {
916 struct nvmm_machine *mach = get_nvmm_mach();
917 struct nvmm_vcpu_conf_cpuid cpuid;
918 struct nvmm_vcpu_conf_tpr tpr;
avx512_needed(void * opaque)919 Error *local_error = NULL;
920 struct qemu_vcpu *qcpu;
921 int ret, err;
922
923 nvmm_init_cpu_signals();
924
925 if (nvmm_migration_blocker == NULL) {
926 error_setg(&nvmm_migration_blocker,
927 "NVMM: Migration not supported");
928
929 if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) {
930 error_report_err(local_error);
931 error_free(nvmm_migration_blocker);
932 return -EINVAL;
933 }
934 }
935
936 qcpu = g_malloc0(sizeof(*qcpu));
937 if (qcpu == NULL) {
938 error_report("NVMM: Failed to allocate VCPU context.");
939 return -ENOMEM;
940 }
941
942 ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
943 if (ret == -1) {
944 err = errno;
945 error_report("NVMM: Failed to create a virtual processor,"
946 " error=%d", err);
947 g_free(qcpu);
948 return -err;
949 }
950
951 memset(&cpuid, 0, sizeof(cpuid));
952 cpuid.mask = 1;
953 cpuid.leaf = 0x00000001;
954 cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
955 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
956 &cpuid);
957 if (ret == -1) {
958 err = errno;
959 error_report("NVMM: Failed to configure a virtual processor,"
960 " error=%d", err);
961 g_free(qcpu);
962 return -err;
963 }
964
xss_needed(void * opaque)965 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
966 &nvmm_callbacks);
967 if (ret == -1) {
968 err = errno;
969 error_report("NVMM: Failed to configure a virtual processor,"
970 " error=%d", err);
971 g_free(qcpu);
972 return -err;
973 }
974
975 if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
976 memset(&tpr, 0, sizeof(tpr));
977 tpr.exit_changed = 1;
978 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
979 if (ret == -1) {
980 err = errno;
981 error_report("NVMM: Failed to configure a virtual processor,"
982 " error=%d", err);
983 g_free(qcpu);
umwait_needed(void * opaque)984 return -err;
985 }
986 }
987
988 cpu->vcpu_dirty = true;
989 cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu;
990
991 return 0;
992 }
993
994 int
995 nvmm_vcpu_exec(CPUState *cpu)
996 {
997 int ret, fatal;
998
999 while (1) {
1000 if (cpu->exception_index >= EXCP_INTERRUPT) {
1001 ret = cpu->exception_index;
1002 cpu->exception_index = -1;
pkru_needed(void * opaque)1003 break;
1004 }
1005
1006 fatal = nvmm_vcpu_loop(cpu);
1007
1008 if (fatal) {
1009 error_report("NVMM: Failed to execute a VCPU.");
1010 abort();
1011 }
1012 }
1013
1014 return ret;
1015 }
1016
1017 void
1018 nvmm_destroy_vcpu(CPUState *cpu)
1019 {
1020 struct nvmm_machine *mach = get_nvmm_mach();
1021 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
pkrs_needed(void * opaque)1022
1023 nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1024 g_free(cpu->hax_vcpu);
1025 }
1026
1027 /* -------------------------------------------------------------------------- */
1028
1029 static void
1030 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1031 bool add, bool rom, const char *name)
1032 {
1033 struct nvmm_machine *mach = get_nvmm_mach();
1034 int ret, prot;
1035
1036 if (add) {
1037 prot = PROT_READ | PROT_EXEC;
1038 if (!rom) {
1039 prot |= PROT_WRITE;
1040 }
tsc_khz_needed(void * opaque)1041 ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1042 } else {
1043 ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1044 }
1045
1046 if (ret == -1) {
1047 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1048 "Size:%p bytes, HostVA:%p, error=%d",
1049 (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1050 (void *)size, (void *)hva, errno);
1051 }
1052 }
1053
1054 static void
1055 nvmm_process_section(MemoryRegionSection *section, int add)
1056 {
1057 MemoryRegion *mr = section->mr;
1058 hwaddr start_pa = section->offset_within_address_space;
1059 ram_addr_t size = int128_get64(section->size);
1060 unsigned int delta;
1061 uintptr_t hva;
1062
vmx_vmcs12_needed(void * opaque)1063 if (!memory_region_is_ram(mr)) {
1064 return;
1065 }
1066
1067 /* Adjust start_pa and size so that they are page-aligned. */
1068 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1069 delta &= ~qemu_real_host_page_mask;
1070 if (delta > size) {
1071 return;
1072 }
1073 start_pa += delta;
1074 size -= delta;
1075 size &= qemu_real_host_page_mask;
1076 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1077 return;
1078 }
1079
1080 hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1081 section->offset_within_region + delta;
1082
vmx_shadow_vmcs12_needed(void * opaque)1083 nvmm_update_mapping(start_pa, size, hva, add,
1084 memory_region_is_rom(mr), mr->name);
1085 }
1086
1087 static void
1088 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1089 {
1090 memory_region_ref(section->mr);
1091 nvmm_process_section(section, 1);
1092 }
1093
1094 static void
1095 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1096 {
1097 nvmm_process_section(section, 0);
1098 memory_region_unref(section->mr);
1099 }
1100
1101 static void
1102 nvmm_transaction_begin(MemoryListener *listener)
vmx_nested_state_needed(void * opaque)1103 {
1104 /* nothing */
1105 }
1106
1107 static void
1108 nvmm_transaction_commit(MemoryListener *listener)
1109 {
1110 /* nothing */
1111 }
1112
1113 static void
1114 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1115 {
1116 MemoryRegion *mr = section->mr;
1117
1118 if (!memory_region_is_ram(mr)) {
1119 return;
1120 }
1121
1122 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1123 }
1124
1125 static MemoryListener nvmm_memory_listener = {
1126 .begin = nvmm_transaction_begin,
1127 .commit = nvmm_transaction_commit,
1128 .region_add = nvmm_region_add,
svm_nested_state_needed(void * opaque)1129 .region_del = nvmm_region_del,
1130 .log_sync = nvmm_log_sync,
1131 .priority = 10,
1132 };
1133
1134 static void
1135 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1136 size_t max_size)
1137 {
1138 struct nvmm_machine *mach = get_nvmm_mach();
1139 uintptr_t hva = (uintptr_t)host;
1140 int ret;
1141
1142 ret = nvmm_hva_map(mach, hva, max_size);
1143
1144 if (ret == -1) {
1145 error_report("NVMM: Failed to map HVA, HostVA:%p "
1146 "Size:%p bytes, error=%d",
1147 (void *)hva, (void *)size, errno);
1148 }
1149 }
1150
1151 static struct RAMBlockNotifier nvmm_ram_notifier = {
1152 .ram_block_added = nvmm_ram_block_added
1153 };
1154
1155 /* -------------------------------------------------------------------------- */
nested_state_needed(void * opaque)1156
1157 static int
1158 nvmm_accel_init(MachineState *ms)
1159 {
1160 int ret, err;
1161
1162 ret = nvmm_init();
1163 if (ret == -1) {
1164 err = errno;
1165 error_report("NVMM: Initialization failed, error=%d", errno);
1166 return -err;
1167 }
1168
1169 ret = nvmm_capability(&qemu_mach.cap);
1170 if (ret == -1) {
1171 err = errno;
1172 error_report("NVMM: Unable to fetch capability, error=%d", errno);
1173 return -err;
1174 }
1175 if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1176 error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1177 return -EPROGMISMATCH;
1178 }
1179 if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1180 error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1181 return -EPROGMISMATCH;
1182 }
1183
1184 ret = nvmm_machine_create(&qemu_mach.mach);
1185 if (ret == -1) {
1186 err = errno;
1187 error_report("NVMM: Machine creation failed, error=%d", errno);
1188 return -err;
1189 }
1190
1191 memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1192 ram_block_notifier_add(&nvmm_ram_notifier);
1193
1194 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1195 return 0;
1196 }
1197
1198 int
1199 nvmm_enabled(void)
1200 {
1201 return nvmm_allowed;
1202 }
1203
1204 static void
1205 nvmm_accel_class_init(ObjectClass *oc, void *data)
1206 {
1207 AccelClass *ac = ACCEL_CLASS(oc);
1208 ac->name = "NVMM";
1209 ac->init_machine = nvmm_accel_init;
1210 ac->allowed = &nvmm_allowed;
1211 }
1212
1213 static const TypeInfo nvmm_accel_type = {
1214 .name = ACCEL_CLASS_NAME("nvmm"),
1215 .parent = TYPE_ACCEL,
1216 .class_init = nvmm_accel_class_init,
1217 };
1218
1219 static void
1220 nvmm_type_init(void)
1221 {
1222 type_register_static(&nvmm_accel_type);
1223 }
1224
1225 type_init(nvmm_type_init);
1226