xref: /qemu/target/i386/nvmm/nvmm-all.c (revision 61653b4a)
1 /*
2  * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
3  *
4  * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "cpu.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu/accel.h"
15 #include "sysemu/nvmm.h"
16 #include "sysemu/cpus.h"
17 #include "sysemu/runstate.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "qemu/queue.h"
22 #include "migration/blocker.h"
23 #include "strings.h"
24 
25 #include "nvmm-accel-ops.h"
26 
27 #include <nvmm.h>
28 
29 struct AccelCPUState {
30     struct nvmm_vcpu vcpu;
31     uint8_t tpr;
32     bool stop;
33     bool dirty;
34 
35     /* Window-exiting for INTs/NMIs. */
36     bool int_window_exit;
37     bool nmi_window_exit;
38 
39     /* The guest is in an interrupt shadow (POP SS, etc). */
40     bool int_shadow;
41 };
42 
43 struct qemu_machine {
44     struct nvmm_capability cap;
45     struct nvmm_machine mach;
46 };
47 
48 /* -------------------------------------------------------------------------- */
49 
50 static bool nvmm_allowed;
51 static struct qemu_machine qemu_mach;
52 
53 static struct nvmm_machine *
get_nvmm_mach(void)54 get_nvmm_mach(void)
55 {
56     return &qemu_mach.mach;
57 }
58 
59 /* -------------------------------------------------------------------------- */
60 
61 static void
nvmm_set_segment(struct nvmm_x64_state_seg * nseg,const SegmentCache * qseg)62 nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
63 {
64     uint32_t attrib = qseg->flags;
65 
66     nseg->selector = qseg->selector;
67     nseg->limit = qseg->limit;
68     nseg->base = qseg->base;
69     nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
70     nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
71     nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
72     nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
73     nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
74     nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
75     nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
76     nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
77 }
78 
79 static void
nvmm_set_registers(CPUState * cpu)80 nvmm_set_registers(CPUState *cpu)
81 {
82     CPUX86State *env = cpu_env(cpu);
83     struct nvmm_machine *mach = get_nvmm_mach();
84     AccelCPUState *qcpu = cpu->accel;
85     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
86     struct nvmm_x64_state *state = vcpu->state;
87     uint64_t bitmap;
88     size_t i;
89     int ret;
90 
91     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
92 
93     /* GPRs. */
94     state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
95     state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
96     state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
97     state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
98     state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
99     state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
100     state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
101     state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
102 #ifdef TARGET_X86_64
103     state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
104     state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
105     state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
106     state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
107     state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
108     state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
109     state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
110     state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
111 #endif
112 
113     /* RIP and RFLAGS. */
114     state->gprs[NVMM_X64_GPR_RIP] = env->eip;
115     state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
116 
117     /* Segments. */
118     nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
119     nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
120     nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
121     nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
122     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
123     nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
124 
125     /* Special segments. */
126     nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
127     nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
128     nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
129     nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
130 
131     /* Control registers. */
132     state->crs[NVMM_X64_CR_CR0] = env->cr[0];
133     state->crs[NVMM_X64_CR_CR2] = env->cr[2];
134     state->crs[NVMM_X64_CR_CR3] = env->cr[3];
135     state->crs[NVMM_X64_CR_CR4] = env->cr[4];
136     state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
137     state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
138 
139     /* Debug registers. */
140     state->drs[NVMM_X64_DR_DR0] = env->dr[0];
141     state->drs[NVMM_X64_DR_DR1] = env->dr[1];
142     state->drs[NVMM_X64_DR_DR2] = env->dr[2];
143     state->drs[NVMM_X64_DR_DR3] = env->dr[3];
144     state->drs[NVMM_X64_DR_DR6] = env->dr[6];
145     state->drs[NVMM_X64_DR_DR7] = env->dr[7];
146 
147     /* FPU. */
148     state->fpu.fx_cw = env->fpuc;
149     state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
150     state->fpu.fx_tw = 0;
151     for (i = 0; i < 8; i++) {
152         state->fpu.fx_tw |= (!env->fptags[i]) << i;
153     }
154     state->fpu.fx_opcode = env->fpop;
155     state->fpu.fx_ip.fa_64 = env->fpip;
156     state->fpu.fx_dp.fa_64 = env->fpdp;
157     state->fpu.fx_mxcsr = env->mxcsr;
158     state->fpu.fx_mxcsr_mask = 0x0000FFFF;
159     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
160     memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
161     for (i = 0; i < CPU_NB_REGS; i++) {
162         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
163             &env->xmm_regs[i].ZMM_Q(0), 8);
164         memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
165             &env->xmm_regs[i].ZMM_Q(1), 8);
166     }
167 
168     /* MSRs. */
169     state->msrs[NVMM_X64_MSR_EFER] = env->efer;
170     state->msrs[NVMM_X64_MSR_STAR] = env->star;
171 #ifdef TARGET_X86_64
172     state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
173     state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
174     state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
175     state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
176 #endif
177     state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
178     state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
179     state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
180     state->msrs[NVMM_X64_MSR_PAT] = env->pat;
181     state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
182 
183     bitmap =
184         NVMM_X64_STATE_SEGS |
185         NVMM_X64_STATE_GPRS |
186         NVMM_X64_STATE_CRS  |
187         NVMM_X64_STATE_DRS  |
188         NVMM_X64_STATE_MSRS |
189         NVMM_X64_STATE_FPU;
190 
191     ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
192     if (ret == -1) {
193         error_report("NVMM: Failed to set virtual processor context,"
194             " error=%d", errno);
195     }
196 }
197 
198 static void
nvmm_get_segment(SegmentCache * qseg,const struct nvmm_x64_state_seg * nseg)199 nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
200 {
201     qseg->selector = nseg->selector;
202     qseg->limit = nseg->limit;
203     qseg->base = nseg->base;
204 
205     qseg->flags =
206         __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
207         __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
208         __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
209         __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
210         __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
211         __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
212         __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
213         __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
214 }
215 
216 static void
nvmm_get_registers(CPUState * cpu)217 nvmm_get_registers(CPUState *cpu)
218 {
219     CPUX86State *env = cpu_env(cpu);
220     struct nvmm_machine *mach = get_nvmm_mach();
221     AccelCPUState *qcpu = cpu->accel;
222     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
223     X86CPU *x86_cpu = X86_CPU(cpu);
224     struct nvmm_x64_state *state = vcpu->state;
225     uint64_t bitmap, tpr;
226     size_t i;
227     int ret;
228 
229     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
230 
231     bitmap =
232         NVMM_X64_STATE_SEGS |
233         NVMM_X64_STATE_GPRS |
234         NVMM_X64_STATE_CRS  |
235         NVMM_X64_STATE_DRS  |
236         NVMM_X64_STATE_MSRS |
237         NVMM_X64_STATE_FPU;
238 
239     ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
240     if (ret == -1) {
241         error_report("NVMM: Failed to get virtual processor context,"
242             " error=%d", errno);
243     }
244 
245     /* GPRs. */
246     env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
247     env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
248     env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
249     env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
250     env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
251     env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
252     env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
253     env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
254 #ifdef TARGET_X86_64
255     env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
256     env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
257     env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
258     env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
259     env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
260     env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
261     env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
262     env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
263 #endif
264 
265     /* RIP and RFLAGS. */
266     env->eip = state->gprs[NVMM_X64_GPR_RIP];
267     env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
268 
269     /* Segments. */
270     nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
271     nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
272     nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
273     nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
274     nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
275     nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
276 
277     /* Special segments. */
278     nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
279     nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
280     nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
281     nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
282 
283     /* Control registers. */
284     env->cr[0] = state->crs[NVMM_X64_CR_CR0];
285     env->cr[2] = state->crs[NVMM_X64_CR_CR2];
286     env->cr[3] = state->crs[NVMM_X64_CR_CR3];
287     env->cr[4] = state->crs[NVMM_X64_CR_CR4];
288     tpr = state->crs[NVMM_X64_CR_CR8];
289     if (tpr != qcpu->tpr) {
290         qcpu->tpr = tpr;
291         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
292     }
293     env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
294 
295     /* Debug registers. */
296     env->dr[0] = state->drs[NVMM_X64_DR_DR0];
297     env->dr[1] = state->drs[NVMM_X64_DR_DR1];
298     env->dr[2] = state->drs[NVMM_X64_DR_DR2];
299     env->dr[3] = state->drs[NVMM_X64_DR_DR3];
300     env->dr[6] = state->drs[NVMM_X64_DR_DR6];
301     env->dr[7] = state->drs[NVMM_X64_DR_DR7];
302 
303     /* FPU. */
304     env->fpuc = state->fpu.fx_cw;
305     env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
306     env->fpus = state->fpu.fx_sw & ~0x3800;
307     for (i = 0; i < 8; i++) {
308         env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
309     }
310     env->fpop = state->fpu.fx_opcode;
311     env->fpip = state->fpu.fx_ip.fa_64;
312     env->fpdp = state->fpu.fx_dp.fa_64;
313     env->mxcsr = state->fpu.fx_mxcsr;
314     assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
315     memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
316     for (i = 0; i < CPU_NB_REGS; i++) {
317         memcpy(&env->xmm_regs[i].ZMM_Q(0),
318             &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
319         memcpy(&env->xmm_regs[i].ZMM_Q(1),
320             &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
321     }
322 
323     /* MSRs. */
324     env->efer = state->msrs[NVMM_X64_MSR_EFER];
325     env->star = state->msrs[NVMM_X64_MSR_STAR];
326 #ifdef TARGET_X86_64
327     env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
328     env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
329     env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
330     env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
331 #endif
332     env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
333     env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
334     env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
335     env->pat = state->msrs[NVMM_X64_MSR_PAT];
336     env->tsc = state->msrs[NVMM_X64_MSR_TSC];
337 
338     x86_update_hflags(env);
339 }
340 
341 static bool
nvmm_can_take_int(CPUState * cpu)342 nvmm_can_take_int(CPUState *cpu)
343 {
344     AccelCPUState *qcpu = cpu->accel;
345     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
346     struct nvmm_machine *mach = get_nvmm_mach();
347 
348     if (qcpu->int_window_exit) {
349         return false;
350     }
351 
352     if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) {
353         struct nvmm_x64_state *state = vcpu->state;
354 
355         /* Exit on interrupt window. */
356         nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
357         state->intr.int_window_exiting = 1;
358         nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
359 
360         return false;
361     }
362 
363     return true;
364 }
365 
366 static bool
nvmm_can_take_nmi(CPUState * cpu)367 nvmm_can_take_nmi(CPUState *cpu)
368 {
369     AccelCPUState *qcpu = cpu->accel;
370 
371     /*
372      * Contrary to INTs, NMIs always schedule an exit when they are
373      * completed. Therefore, if window-exiting is enabled, it means
374      * NMIs are blocked.
375      */
376     if (qcpu->nmi_window_exit) {
377         return false;
378     }
379 
380     return true;
381 }
382 
383 /*
384  * Called before the VCPU is run. We inject events generated by the I/O
385  * thread, and synchronize the guest TPR.
386  */
387 static void
nvmm_vcpu_pre_run(CPUState * cpu)388 nvmm_vcpu_pre_run(CPUState *cpu)
389 {
390     CPUX86State *env = cpu_env(cpu);
391     struct nvmm_machine *mach = get_nvmm_mach();
392     AccelCPUState *qcpu = cpu->accel;
393     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
394     X86CPU *x86_cpu = X86_CPU(cpu);
395     struct nvmm_x64_state *state = vcpu->state;
396     struct nvmm_vcpu_event *event = vcpu->event;
397     bool has_event = false;
398     bool sync_tpr = false;
399     uint8_t tpr;
400     int ret;
401 
402     bql_lock();
403 
404     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
405     if (tpr != qcpu->tpr) {
406         qcpu->tpr = tpr;
407         sync_tpr = true;
408     }
409 
410     /*
411      * Force the VCPU out of its inner loop to process any INIT requests
412      * or commit pending TPR access.
413      */
414     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
415         cpu->exit_request = 1;
416     }
417 
418     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
419         if (nvmm_can_take_nmi(cpu)) {
420             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
421             event->type = NVMM_VCPU_EVENT_INTR;
422             event->vector = 2;
423             has_event = true;
424         }
425     }
426 
427     if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
428         if (nvmm_can_take_int(cpu)) {
429             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
430             event->type = NVMM_VCPU_EVENT_INTR;
431             event->vector = cpu_get_pic_interrupt(env);
432             has_event = true;
433         }
434     }
435 
436     /* Don't want SMIs. */
437     if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
438         cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
439     }
440 
441     if (sync_tpr) {
442         ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
443         if (ret == -1) {
444             error_report("NVMM: Failed to get CPU state,"
445                 " error=%d", errno);
446         }
447 
448         state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
449 
450         ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
451         if (ret == -1) {
452             error_report("NVMM: Failed to set CPU state,"
453                 " error=%d", errno);
454         }
455     }
456 
457     if (has_event) {
458         ret = nvmm_vcpu_inject(mach, vcpu);
459         if (ret == -1) {
460             error_report("NVMM: Failed to inject event,"
461                 " error=%d", errno);
462         }
463     }
464 
465     bql_unlock();
466 }
467 
468 /*
469  * Called after the VCPU ran. We synchronize the host view of the TPR and
470  * RFLAGS.
471  */
472 static void
nvmm_vcpu_post_run(CPUState * cpu,struct nvmm_vcpu_exit * exit)473 nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
474 {
475     AccelCPUState *qcpu = cpu->accel;
476     X86CPU *x86_cpu = X86_CPU(cpu);
477     CPUX86State *env = &x86_cpu->env;
478     uint64_t tpr;
479 
480     env->eflags = exit->exitstate.rflags;
481     qcpu->int_shadow = exit->exitstate.int_shadow;
482     qcpu->int_window_exit = exit->exitstate.int_window_exiting;
483     qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
484 
485     tpr = exit->exitstate.cr8;
486     if (qcpu->tpr != tpr) {
487         qcpu->tpr = tpr;
488         bql_lock();
489         cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
490         bql_unlock();
491     }
492 }
493 
494 /* -------------------------------------------------------------------------- */
495 
496 static void
nvmm_io_callback(struct nvmm_io * io)497 nvmm_io_callback(struct nvmm_io *io)
498 {
499     MemTxAttrs attrs = { 0 };
500     int ret;
501 
502     ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
503         io->size, !io->in);
504     if (ret != MEMTX_OK) {
505         error_report("NVMM: I/O Transaction Failed "
506             "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
507             io->port, io->size);
508     }
509 
510     /* Needed, otherwise infinite loop. */
511     current_cpu->accel->dirty = false;
512 }
513 
514 static void
nvmm_mem_callback(struct nvmm_mem * mem)515 nvmm_mem_callback(struct nvmm_mem *mem)
516 {
517     cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
518 
519     /* Needed, otherwise infinite loop. */
520     current_cpu->accel->dirty = false;
521 }
522 
523 static struct nvmm_assist_callbacks nvmm_callbacks = {
524     .io = nvmm_io_callback,
525     .mem = nvmm_mem_callback
526 };
527 
528 /* -------------------------------------------------------------------------- */
529 
530 static int
nvmm_handle_mem(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)531 nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
532 {
533     int ret;
534 
535     ret = nvmm_assist_mem(mach, vcpu);
536     if (ret == -1) {
537         error_report("NVMM: Mem Assist Failed [gpa=%p]",
538             (void *)vcpu->exit->u.mem.gpa);
539     }
540 
541     return ret;
542 }
543 
544 static int
nvmm_handle_io(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)545 nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
546 {
547     int ret;
548 
549     ret = nvmm_assist_io(mach, vcpu);
550     if (ret == -1) {
551         error_report("NVMM: I/O Assist Failed [port=%d]",
552             (int)vcpu->exit->u.io.port);
553     }
554 
555     return ret;
556 }
557 
558 static int
nvmm_handle_rdmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)559 nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
560     struct nvmm_vcpu_exit *exit)
561 {
562     AccelCPUState *qcpu = cpu->accel;
563     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
564     X86CPU *x86_cpu = X86_CPU(cpu);
565     struct nvmm_x64_state *state = vcpu->state;
566     uint64_t val;
567     int ret;
568 
569     switch (exit->u.rdmsr.msr) {
570     case MSR_IA32_APICBASE:
571         val = cpu_get_apic_base(x86_cpu->apic_state);
572         break;
573     case MSR_MTRRcap:
574     case MSR_MTRRdefType:
575     case MSR_MCG_CAP:
576     case MSR_MCG_STATUS:
577         val = 0;
578         break;
579     default: /* More MSRs to add? */
580         val = 0;
581         error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
582             exit->u.rdmsr.msr);
583         break;
584     }
585 
586     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
587     if (ret == -1) {
588         return -1;
589     }
590 
591     state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
592     state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
593     state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
594 
595     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
596     if (ret == -1) {
597         return -1;
598     }
599 
600     return 0;
601 }
602 
603 static int
nvmm_handle_wrmsr(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)604 nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
605     struct nvmm_vcpu_exit *exit)
606 {
607     AccelCPUState *qcpu = cpu->accel;
608     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
609     X86CPU *x86_cpu = X86_CPU(cpu);
610     struct nvmm_x64_state *state = vcpu->state;
611     uint64_t val;
612     int ret;
613 
614     val = exit->u.wrmsr.val;
615 
616     switch (exit->u.wrmsr.msr) {
617     case MSR_IA32_APICBASE:
618         cpu_set_apic_base(x86_cpu->apic_state, val);
619         break;
620     case MSR_MTRRdefType:
621     case MSR_MCG_STATUS:
622         break;
623     default: /* More MSRs to add? */
624         error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
625             exit->u.wrmsr.msr, val);
626         break;
627     }
628 
629     ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
630     if (ret == -1) {
631         return -1;
632     }
633 
634     state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
635 
636     ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
637     if (ret == -1) {
638         return -1;
639     }
640 
641     return 0;
642 }
643 
644 static int
nvmm_handle_halted(struct nvmm_machine * mach,CPUState * cpu,struct nvmm_vcpu_exit * exit)645 nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
646     struct nvmm_vcpu_exit *exit)
647 {
648     int ret = 0;
649 
650     bql_lock();
651 
652     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
653           (cpu_env(cpu)->eflags & IF_MASK)) &&
654         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
655         cpu->exception_index = EXCP_HLT;
656         cpu->halted = true;
657         ret = 1;
658     }
659 
660     bql_unlock();
661 
662     return ret;
663 }
664 
665 static int
nvmm_inject_ud(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)666 nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
667 {
668     struct nvmm_vcpu_event *event = vcpu->event;
669 
670     event->type = NVMM_VCPU_EVENT_EXCP;
671     event->vector = 6;
672     event->u.excp.error = 0;
673 
674     return nvmm_vcpu_inject(mach, vcpu);
675 }
676 
677 static int
nvmm_vcpu_loop(CPUState * cpu)678 nvmm_vcpu_loop(CPUState *cpu)
679 {
680     struct nvmm_machine *mach = get_nvmm_mach();
681     AccelCPUState *qcpu = cpu->accel;
682     struct nvmm_vcpu *vcpu = &qcpu->vcpu;
683     X86CPU *x86_cpu = X86_CPU(cpu);
684     CPUX86State *env = &x86_cpu->env;
685     struct nvmm_vcpu_exit *exit = vcpu->exit;
686     int ret;
687 
688     /*
689      * Some asynchronous events must be handled outside of the inner
690      * VCPU loop. They are handled here.
691      */
692     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
693         nvmm_cpu_synchronize_state(cpu);
694         do_cpu_init(x86_cpu);
695         /* set int/nmi windows back to the reset state */
696     }
697     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
698         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
699         apic_poll_irq(x86_cpu->apic_state);
700     }
701     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
702          (env->eflags & IF_MASK)) ||
703         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
704         cpu->halted = false;
705     }
706     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
707         nvmm_cpu_synchronize_state(cpu);
708         do_cpu_sipi(x86_cpu);
709     }
710     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
711         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
712         nvmm_cpu_synchronize_state(cpu);
713         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
714             env->tpr_access_type);
715     }
716 
717     if (cpu->halted) {
718         cpu->exception_index = EXCP_HLT;
719         qatomic_set(&cpu->exit_request, false);
720         return 0;
721     }
722 
723     bql_unlock();
724     cpu_exec_start(cpu);
725 
726     /*
727      * Inner VCPU loop.
728      */
729     do {
730         if (cpu->accel->dirty) {
731             nvmm_set_registers(cpu);
732             cpu->accel->dirty = false;
733         }
734 
735         if (qcpu->stop) {
736             cpu->exception_index = EXCP_INTERRUPT;
737             qcpu->stop = false;
738             ret = 1;
739             break;
740         }
741 
742         nvmm_vcpu_pre_run(cpu);
743 
744         if (qatomic_read(&cpu->exit_request)) {
745 #if NVMM_USER_VERSION >= 2
746             nvmm_vcpu_stop(vcpu);
747 #else
748             qemu_cpu_kick_self();
749 #endif
750         }
751 
752         /* Read exit_request before the kernel reads the immediate exit flag */
753         smp_rmb();
754         ret = nvmm_vcpu_run(mach, vcpu);
755         if (ret == -1) {
756             error_report("NVMM: Failed to exec a virtual processor,"
757                 " error=%d", errno);
758             break;
759         }
760 
761         nvmm_vcpu_post_run(cpu, exit);
762 
763         switch (exit->reason) {
764         case NVMM_VCPU_EXIT_NONE:
765             break;
766 #if NVMM_USER_VERSION >= 2
767         case NVMM_VCPU_EXIT_STOPPED:
768             /*
769              * The kernel cleared the immediate exit flag; cpu->exit_request
770              * must be cleared after
771              */
772             smp_wmb();
773             qcpu->stop = true;
774             break;
775 #endif
776         case NVMM_VCPU_EXIT_MEMORY:
777             ret = nvmm_handle_mem(mach, vcpu);
778             break;
779         case NVMM_VCPU_EXIT_IO:
780             ret = nvmm_handle_io(mach, vcpu);
781             break;
782         case NVMM_VCPU_EXIT_INT_READY:
783         case NVMM_VCPU_EXIT_NMI_READY:
784         case NVMM_VCPU_EXIT_TPR_CHANGED:
785             break;
786         case NVMM_VCPU_EXIT_HALTED:
787             ret = nvmm_handle_halted(mach, cpu, exit);
788             break;
789         case NVMM_VCPU_EXIT_SHUTDOWN:
790             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
791             cpu->exception_index = EXCP_INTERRUPT;
792             ret = 1;
793             break;
794         case NVMM_VCPU_EXIT_RDMSR:
795             ret = nvmm_handle_rdmsr(mach, cpu, exit);
796             break;
797         case NVMM_VCPU_EXIT_WRMSR:
798             ret = nvmm_handle_wrmsr(mach, cpu, exit);
799             break;
800         case NVMM_VCPU_EXIT_MONITOR:
801         case NVMM_VCPU_EXIT_MWAIT:
802             ret = nvmm_inject_ud(mach, vcpu);
803             break;
804         default:
805             error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
806                 exit->reason, exit->u.inv.hwcode);
807             nvmm_get_registers(cpu);
808             bql_lock();
809             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
810             bql_unlock();
811             ret = -1;
812             break;
813         }
814     } while (ret == 0);
815 
816     cpu_exec_end(cpu);
817     bql_lock();
818 
819     qatomic_set(&cpu->exit_request, false);
820 
821     return ret < 0;
822 }
823 
824 /* -------------------------------------------------------------------------- */
825 
826 static void
do_nvmm_cpu_synchronize_state(CPUState * cpu,run_on_cpu_data arg)827 do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
828 {
829     nvmm_get_registers(cpu);
830     cpu->accel->dirty = true;
831 }
832 
833 static void
do_nvmm_cpu_synchronize_post_reset(CPUState * cpu,run_on_cpu_data arg)834 do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
835 {
836     nvmm_set_registers(cpu);
837     cpu->accel->dirty = false;
838 }
839 
840 static void
do_nvmm_cpu_synchronize_post_init(CPUState * cpu,run_on_cpu_data arg)841 do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
842 {
843     nvmm_set_registers(cpu);
844     cpu->accel->dirty = false;
845 }
846 
847 static void
do_nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu,run_on_cpu_data arg)848 do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
849 {
850     cpu->accel->dirty = true;
851 }
852 
nvmm_cpu_synchronize_state(CPUState * cpu)853 void nvmm_cpu_synchronize_state(CPUState *cpu)
854 {
855     if (!cpu->accel->dirty) {
856         run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
857     }
858 }
859 
nvmm_cpu_synchronize_post_reset(CPUState * cpu)860 void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
861 {
862     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
863 }
864 
nvmm_cpu_synchronize_post_init(CPUState * cpu)865 void nvmm_cpu_synchronize_post_init(CPUState *cpu)
866 {
867     run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
868 }
869 
nvmm_cpu_synchronize_pre_loadvm(CPUState * cpu)870 void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
871 {
872     run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
873 }
874 
875 /* -------------------------------------------------------------------------- */
876 
877 static Error *nvmm_migration_blocker;
878 
879 /*
880  * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
881  * and another thread signaling the vCPU thread to exit.
882  */
883 
884 static void
nvmm_ipi_signal(int sigcpu)885 nvmm_ipi_signal(int sigcpu)
886 {
887     if (current_cpu) {
888         AccelCPUState *qcpu = current_cpu->accel;
889 #if NVMM_USER_VERSION >= 2
890         struct nvmm_vcpu *vcpu = &qcpu->vcpu;
891         nvmm_vcpu_stop(vcpu);
892 #else
893         qcpu->stop = true;
894 #endif
895     }
896 }
897 
898 static void
nvmm_init_cpu_signals(void)899 nvmm_init_cpu_signals(void)
900 {
901     struct sigaction sigact;
902     sigset_t set;
903 
904     /* Install the IPI handler. */
905     memset(&sigact, 0, sizeof(sigact));
906     sigact.sa_handler = nvmm_ipi_signal;
907     sigaction(SIG_IPI, &sigact, NULL);
908 
909     /* Allow IPIs on the current thread. */
910     sigprocmask(SIG_BLOCK, NULL, &set);
911     sigdelset(&set, SIG_IPI);
912     pthread_sigmask(SIG_SETMASK, &set, NULL);
913 }
914 
915 int
nvmm_init_vcpu(CPUState * cpu)916 nvmm_init_vcpu(CPUState *cpu)
917 {
918     struct nvmm_machine *mach = get_nvmm_mach();
919     struct nvmm_vcpu_conf_cpuid cpuid;
920     struct nvmm_vcpu_conf_tpr tpr;
921     Error *local_error = NULL;
922     AccelCPUState *qcpu;
923     int ret, err;
924 
925     nvmm_init_cpu_signals();
926 
927     if (nvmm_migration_blocker == NULL) {
928         error_setg(&nvmm_migration_blocker,
929             "NVMM: Migration not supported");
930 
931         if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
932             error_report_err(local_error);
933             return -EINVAL;
934         }
935     }
936 
937     qcpu = g_new0(AccelCPUState, 1);
938 
939     ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
940     if (ret == -1) {
941         err = errno;
942         error_report("NVMM: Failed to create a virtual processor,"
943             " error=%d", err);
944         g_free(qcpu);
945         return -err;
946     }
947 
948     memset(&cpuid, 0, sizeof(cpuid));
949     cpuid.mask = 1;
950     cpuid.leaf = 0x00000001;
951     cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
952     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
953         &cpuid);
954     if (ret == -1) {
955         err = errno;
956         error_report("NVMM: Failed to configure a virtual processor,"
957             " error=%d", err);
958         g_free(qcpu);
959         return -err;
960     }
961 
962     ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
963         &nvmm_callbacks);
964     if (ret == -1) {
965         err = errno;
966         error_report("NVMM: Failed to configure a virtual processor,"
967             " error=%d", err);
968         g_free(qcpu);
969         return -err;
970     }
971 
972     if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
973         memset(&tpr, 0, sizeof(tpr));
974         tpr.exit_changed = 1;
975         ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
976         if (ret == -1) {
977             err = errno;
978             error_report("NVMM: Failed to configure a virtual processor,"
979                 " error=%d", err);
980             g_free(qcpu);
981             return -err;
982         }
983     }
984 
985     qcpu->dirty = true;
986     cpu->accel = qcpu;
987 
988     return 0;
989 }
990 
991 int
nvmm_vcpu_exec(CPUState * cpu)992 nvmm_vcpu_exec(CPUState *cpu)
993 {
994     int ret, fatal;
995 
996     while (1) {
997         if (cpu->exception_index >= EXCP_INTERRUPT) {
998             ret = cpu->exception_index;
999             cpu->exception_index = -1;
1000             break;
1001         }
1002 
1003         fatal = nvmm_vcpu_loop(cpu);
1004 
1005         if (fatal) {
1006             error_report("NVMM: Failed to execute a VCPU.");
1007             abort();
1008         }
1009     }
1010 
1011     return ret;
1012 }
1013 
1014 void
nvmm_destroy_vcpu(CPUState * cpu)1015 nvmm_destroy_vcpu(CPUState *cpu)
1016 {
1017     struct nvmm_machine *mach = get_nvmm_mach();
1018     AccelCPUState *qcpu = cpu->accel;
1019 
1020     nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1021     g_free(cpu->accel);
1022 }
1023 
1024 /* -------------------------------------------------------------------------- */
1025 
1026 static void
nvmm_update_mapping(hwaddr start_pa,ram_addr_t size,uintptr_t hva,bool add,bool rom,const char * name)1027 nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1028     bool add, bool rom, const char *name)
1029 {
1030     struct nvmm_machine *mach = get_nvmm_mach();
1031     int ret, prot;
1032 
1033     if (add) {
1034         prot = PROT_READ | PROT_EXEC;
1035         if (!rom) {
1036             prot |= PROT_WRITE;
1037         }
1038         ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1039     } else {
1040         ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1041     }
1042 
1043     if (ret == -1) {
1044         error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1045             "Size:%p bytes, HostVA:%p, error=%d",
1046             (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1047             (void *)size, (void *)hva, errno);
1048     }
1049 }
1050 
1051 static void
nvmm_process_section(MemoryRegionSection * section,int add)1052 nvmm_process_section(MemoryRegionSection *section, int add)
1053 {
1054     MemoryRegion *mr = section->mr;
1055     hwaddr start_pa = section->offset_within_address_space;
1056     ram_addr_t size = int128_get64(section->size);
1057     unsigned int delta;
1058     uintptr_t hva;
1059 
1060     if (!memory_region_is_ram(mr)) {
1061         return;
1062     }
1063 
1064     /* Adjust start_pa and size so that they are page-aligned. */
1065     delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1066     delta &= ~qemu_real_host_page_mask();
1067     if (delta > size) {
1068         return;
1069     }
1070     start_pa += delta;
1071     size -= delta;
1072     size &= qemu_real_host_page_mask();
1073     if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1074         return;
1075     }
1076 
1077     hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1078         section->offset_within_region + delta;
1079 
1080     nvmm_update_mapping(start_pa, size, hva, add,
1081         memory_region_is_rom(mr), mr->name);
1082 }
1083 
1084 static void
nvmm_region_add(MemoryListener * listener,MemoryRegionSection * section)1085 nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1086 {
1087     memory_region_ref(section->mr);
1088     nvmm_process_section(section, 1);
1089 }
1090 
1091 static void
nvmm_region_del(MemoryListener * listener,MemoryRegionSection * section)1092 nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1093 {
1094     nvmm_process_section(section, 0);
1095     memory_region_unref(section->mr);
1096 }
1097 
1098 static void
nvmm_transaction_begin(MemoryListener * listener)1099 nvmm_transaction_begin(MemoryListener *listener)
1100 {
1101     /* nothing */
1102 }
1103 
1104 static void
nvmm_transaction_commit(MemoryListener * listener)1105 nvmm_transaction_commit(MemoryListener *listener)
1106 {
1107     /* nothing */
1108 }
1109 
1110 static void
nvmm_log_sync(MemoryListener * listener,MemoryRegionSection * section)1111 nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1112 {
1113     MemoryRegion *mr = section->mr;
1114 
1115     if (!memory_region_is_ram(mr)) {
1116         return;
1117     }
1118 
1119     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1120 }
1121 
1122 static MemoryListener nvmm_memory_listener = {
1123     .name = "nvmm",
1124     .begin = nvmm_transaction_begin,
1125     .commit = nvmm_transaction_commit,
1126     .region_add = nvmm_region_add,
1127     .region_del = nvmm_region_del,
1128     .log_sync = nvmm_log_sync,
1129     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
1130 };
1131 
1132 static void
nvmm_ram_block_added(RAMBlockNotifier * n,void * host,size_t size,size_t max_size)1133 nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1134                      size_t max_size)
1135 {
1136     struct nvmm_machine *mach = get_nvmm_mach();
1137     uintptr_t hva = (uintptr_t)host;
1138     int ret;
1139 
1140     ret = nvmm_hva_map(mach, hva, max_size);
1141 
1142     if (ret == -1) {
1143         error_report("NVMM: Failed to map HVA, HostVA:%p "
1144             "Size:%p bytes, error=%d",
1145             (void *)hva, (void *)size, errno);
1146     }
1147 }
1148 
1149 static struct RAMBlockNotifier nvmm_ram_notifier = {
1150     .ram_block_added = nvmm_ram_block_added
1151 };
1152 
1153 /* -------------------------------------------------------------------------- */
1154 
1155 static int
nvmm_accel_init(MachineState * ms)1156 nvmm_accel_init(MachineState *ms)
1157 {
1158     int ret, err;
1159 
1160     ret = nvmm_init();
1161     if (ret == -1) {
1162         err = errno;
1163         error_report("NVMM: Initialization failed, error=%d", errno);
1164         return -err;
1165     }
1166 
1167     ret = nvmm_capability(&qemu_mach.cap);
1168     if (ret == -1) {
1169         err = errno;
1170         error_report("NVMM: Unable to fetch capability, error=%d", errno);
1171         return -err;
1172     }
1173     if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1174         error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1175         return -EPROGMISMATCH;
1176     }
1177     if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1178         error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1179         return -EPROGMISMATCH;
1180     }
1181 
1182     ret = nvmm_machine_create(&qemu_mach.mach);
1183     if (ret == -1) {
1184         err = errno;
1185         error_report("NVMM: Machine creation failed, error=%d", errno);
1186         return -err;
1187     }
1188 
1189     memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1190     ram_block_notifier_add(&nvmm_ram_notifier);
1191 
1192     printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1193     return 0;
1194 }
1195 
1196 int
nvmm_enabled(void)1197 nvmm_enabled(void)
1198 {
1199     return nvmm_allowed;
1200 }
1201 
1202 static void
nvmm_accel_class_init(ObjectClass * oc,void * data)1203 nvmm_accel_class_init(ObjectClass *oc, void *data)
1204 {
1205     AccelClass *ac = ACCEL_CLASS(oc);
1206     ac->name = "NVMM";
1207     ac->init_machine = nvmm_accel_init;
1208     ac->allowed = &nvmm_allowed;
1209 }
1210 
1211 static const TypeInfo nvmm_accel_type = {
1212     .name = ACCEL_CLASS_NAME("nvmm"),
1213     .parent = TYPE_ACCEL,
1214     .class_init = nvmm_accel_class_init,
1215 };
1216 
1217 static void
nvmm_type_init(void)1218 nvmm_type_init(void)
1219 {
1220     type_register_static(&nvmm_accel_type);
1221 }
1222 
1223 type_init(nvmm_type_init);
1224