xref: /netbsd/lib/libnvmm/libnvmm_x86.c (revision e477fa7e)
1 /*	$NetBSD: libnvmm_x86.c,v 1.29 2019/04/28 14:22:13 maxv Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45 
46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
47 #define __cacheline_aligned __attribute__((__aligned__(64)))
48 
49 #include <x86/specialreg.h>
50 
51 /* -------------------------------------------------------------------------- */
52 
53 static void
54 nvmm_arch_copystate(void *_dst, void *_src, uint64_t flags)
55 {
56 	struct nvmm_x64_state *src = _src;
57 	struct nvmm_x64_state *dst = _dst;
58 
59 	if (flags & NVMM_X64_STATE_GPRS) {
60 		memcpy(dst->gprs, src->gprs, sizeof(dst->gprs));
61 	}
62 	if (flags & NVMM_X64_STATE_SEGS) {
63 		memcpy(dst->segs, src->segs, sizeof(dst->segs));
64 	}
65 	if (flags & NVMM_X64_STATE_CRS) {
66 		memcpy(dst->crs, src->crs, sizeof(dst->crs));
67 	}
68 	if (flags & NVMM_X64_STATE_DRS) {
69 		memcpy(dst->drs, src->drs, sizeof(dst->drs));
70 	}
71 	if (flags & NVMM_X64_STATE_MSRS) {
72 		memcpy(dst->msrs, src->msrs, sizeof(dst->msrs));
73 	}
74 	if (flags & NVMM_X64_STATE_INTR) {
75 		memcpy(&dst->intr, &src->intr, sizeof(dst->intr));
76 	}
77 	if (flags & NVMM_X64_STATE_FPU) {
78 		memcpy(&dst->fpu, &src->fpu, sizeof(dst->fpu));
79 	}
80 }
81 
82 /* -------------------------------------------------------------------------- */
83 
84 /*
85  * Undocumented debugging function. Helpful.
86  */
87 int
88 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
89 {
90 	struct nvmm_x64_state state;
91 	uint16_t *attr;
92 	size_t i;
93 	int ret;
94 
95 	const char *segnames[] = {
96 		"ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
97 	};
98 
99 	ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
100 	if (ret == -1)
101 		return -1;
102 
103 	printf("+ VCPU id=%d\n", (int)cpuid);
104 	printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]);
105 	printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]);
106 	printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]);
107 	printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]);
108 	printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]);
109 	printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
110 	for (i = 0; i < NVMM_X64_NSEG; i++) {
111 		attr = (uint16_t *)&state.segs[i].attrib;
112 		printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
113 		    segnames[i],
114 		    state.segs[i].selector,
115 		    state.segs[i].base,
116 		    state.segs[i].limit,
117 		    *attr);
118 	}
119 	printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]);
120 	printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]);
121 	printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]);
122 	printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]);
123 	printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]);
124 
125 	return 0;
126 }
127 
128 /* -------------------------------------------------------------------------- */
129 
130 #define PTE32_L1_SHIFT	12
131 #define PTE32_L2_SHIFT	22
132 
133 #define PTE32_L2_MASK	0xffc00000
134 #define PTE32_L1_MASK	0x003ff000
135 
136 #define PTE32_L2_FRAME	(PTE32_L2_MASK)
137 #define PTE32_L1_FRAME	(PTE32_L2_FRAME|PTE32_L1_MASK)
138 
139 #define pte32_l1idx(va)	(((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
140 #define pte32_l2idx(va)	(((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
141 
142 #define CR3_FRAME_32BIT	PG_FRAME
143 
144 typedef uint32_t pte_32bit_t;
145 
146 static int
147 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
148     gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
149 {
150 	gpaddr_t L2gpa, L1gpa;
151 	uintptr_t L2hva, L1hva;
152 	pte_32bit_t *pdir, pte;
153 	nvmm_prot_t pageprot;
154 
155 	/* We begin with an RWXU access. */
156 	*prot = NVMM_PROT_ALL;
157 
158 	/* Parse L2. */
159 	L2gpa = (cr3 & CR3_FRAME_32BIT);
160 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
161 		return -1;
162 	pdir = (pte_32bit_t *)L2hva;
163 	pte = pdir[pte32_l2idx(gva)];
164 	if ((pte & PG_V) == 0)
165 		return -1;
166 	if ((pte & PG_u) == 0)
167 		*prot &= ~NVMM_PROT_USER;
168 	if ((pte & PG_KW) == 0)
169 		*prot &= ~NVMM_PROT_WRITE;
170 	if ((pte & PG_PS) && !has_pse)
171 		return -1;
172 	if (pte & PG_PS) {
173 		*gpa = (pte & PTE32_L2_FRAME);
174 		*gpa = *gpa + (gva & PTE32_L1_MASK);
175 		return 0;
176 	}
177 
178 	/* Parse L1. */
179 	L1gpa = (pte & PG_FRAME);
180 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
181 		return -1;
182 	pdir = (pte_32bit_t *)L1hva;
183 	pte = pdir[pte32_l1idx(gva)];
184 	if ((pte & PG_V) == 0)
185 		return -1;
186 	if ((pte & PG_u) == 0)
187 		*prot &= ~NVMM_PROT_USER;
188 	if ((pte & PG_KW) == 0)
189 		*prot &= ~NVMM_PROT_WRITE;
190 	if (pte & PG_PS)
191 		return -1;
192 
193 	*gpa = (pte & PG_FRAME);
194 	return 0;
195 }
196 
197 /* -------------------------------------------------------------------------- */
198 
199 #define	PTE32_PAE_L1_SHIFT	12
200 #define	PTE32_PAE_L2_SHIFT	21
201 #define	PTE32_PAE_L3_SHIFT	30
202 
203 #define	PTE32_PAE_L3_MASK	0xc0000000
204 #define	PTE32_PAE_L2_MASK	0x3fe00000
205 #define	PTE32_PAE_L1_MASK	0x001ff000
206 
207 #define	PTE32_PAE_L3_FRAME	(PTE32_PAE_L3_MASK)
208 #define	PTE32_PAE_L2_FRAME	(PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
209 #define	PTE32_PAE_L1_FRAME	(PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
210 
211 #define pte32_pae_l1idx(va)	(((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
212 #define pte32_pae_l2idx(va)	(((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
213 #define pte32_pae_l3idx(va)	(((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
214 
215 #define CR3_FRAME_32BIT_PAE	__BITS(31, 5)
216 
217 typedef uint64_t pte_32bit_pae_t;
218 
219 static int
220 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
221     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
222 {
223 	gpaddr_t L3gpa, L2gpa, L1gpa;
224 	uintptr_t L3hva, L2hva, L1hva;
225 	pte_32bit_pae_t *pdir, pte;
226 	nvmm_prot_t pageprot;
227 
228 	/* We begin with an RWXU access. */
229 	*prot = NVMM_PROT_ALL;
230 
231 	/* Parse L3. */
232 	L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
233 	if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
234 		return -1;
235 	pdir = (pte_32bit_pae_t *)L3hva;
236 	pte = pdir[pte32_pae_l3idx(gva)];
237 	if ((pte & PG_V) == 0)
238 		return -1;
239 	if (pte & PG_NX)
240 		*prot &= ~NVMM_PROT_EXEC;
241 	if (pte & PG_PS)
242 		return -1;
243 
244 	/* Parse L2. */
245 	L2gpa = (pte & PG_FRAME);
246 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
247 		return -1;
248 	pdir = (pte_32bit_pae_t *)L2hva;
249 	pte = pdir[pte32_pae_l2idx(gva)];
250 	if ((pte & PG_V) == 0)
251 		return -1;
252 	if ((pte & PG_u) == 0)
253 		*prot &= ~NVMM_PROT_USER;
254 	if ((pte & PG_KW) == 0)
255 		*prot &= ~NVMM_PROT_WRITE;
256 	if (pte & PG_NX)
257 		*prot &= ~NVMM_PROT_EXEC;
258 	if (pte & PG_PS) {
259 		*gpa = (pte & PTE32_PAE_L2_FRAME);
260 		*gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
261 		return 0;
262 	}
263 
264 	/* Parse L1. */
265 	L1gpa = (pte & PG_FRAME);
266 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
267 		return -1;
268 	pdir = (pte_32bit_pae_t *)L1hva;
269 	pte = pdir[pte32_pae_l1idx(gva)];
270 	if ((pte & PG_V) == 0)
271 		return -1;
272 	if ((pte & PG_u) == 0)
273 		*prot &= ~NVMM_PROT_USER;
274 	if ((pte & PG_KW) == 0)
275 		*prot &= ~NVMM_PROT_WRITE;
276 	if (pte & PG_NX)
277 		*prot &= ~NVMM_PROT_EXEC;
278 	if (pte & PG_PS)
279 		return -1;
280 
281 	*gpa = (pte & PG_FRAME);
282 	return 0;
283 }
284 
285 /* -------------------------------------------------------------------------- */
286 
287 #define PTE64_L1_SHIFT	12
288 #define PTE64_L2_SHIFT	21
289 #define PTE64_L3_SHIFT	30
290 #define PTE64_L4_SHIFT	39
291 
292 #define PTE64_L4_MASK	0x0000ff8000000000
293 #define PTE64_L3_MASK	0x0000007fc0000000
294 #define PTE64_L2_MASK	0x000000003fe00000
295 #define PTE64_L1_MASK	0x00000000001ff000
296 
297 #define PTE64_L4_FRAME	PTE64_L4_MASK
298 #define PTE64_L3_FRAME	(PTE64_L4_FRAME|PTE64_L3_MASK)
299 #define PTE64_L2_FRAME	(PTE64_L3_FRAME|PTE64_L2_MASK)
300 #define PTE64_L1_FRAME	(PTE64_L2_FRAME|PTE64_L1_MASK)
301 
302 #define pte64_l1idx(va)	(((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
303 #define pte64_l2idx(va)	(((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
304 #define pte64_l3idx(va)	(((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
305 #define pte64_l4idx(va)	(((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
306 
307 #define CR3_FRAME_64BIT	PG_FRAME
308 
309 typedef uint64_t pte_64bit_t;
310 
311 static inline bool
312 x86_gva_64bit_canonical(gvaddr_t gva)
313 {
314 	/* Bits 63:47 must have the same value. */
315 #define SIGN_EXTEND	0xffff800000000000ULL
316 	return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
317 }
318 
319 static int
320 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
321     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
322 {
323 	gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
324 	uintptr_t L4hva, L3hva, L2hva, L1hva;
325 	pte_64bit_t *pdir, pte;
326 	nvmm_prot_t pageprot;
327 
328 	/* We begin with an RWXU access. */
329 	*prot = NVMM_PROT_ALL;
330 
331 	if (!x86_gva_64bit_canonical(gva))
332 		return -1;
333 
334 	/* Parse L4. */
335 	L4gpa = (cr3 & CR3_FRAME_64BIT);
336 	if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
337 		return -1;
338 	pdir = (pte_64bit_t *)L4hva;
339 	pte = pdir[pte64_l4idx(gva)];
340 	if ((pte & PG_V) == 0)
341 		return -1;
342 	if ((pte & PG_u) == 0)
343 		*prot &= ~NVMM_PROT_USER;
344 	if ((pte & PG_KW) == 0)
345 		*prot &= ~NVMM_PROT_WRITE;
346 	if (pte & PG_NX)
347 		*prot &= ~NVMM_PROT_EXEC;
348 	if (pte & PG_PS)
349 		return -1;
350 
351 	/* Parse L3. */
352 	L3gpa = (pte & PG_FRAME);
353 	if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
354 		return -1;
355 	pdir = (pte_64bit_t *)L3hva;
356 	pte = pdir[pte64_l3idx(gva)];
357 	if ((pte & PG_V) == 0)
358 		return -1;
359 	if ((pte & PG_u) == 0)
360 		*prot &= ~NVMM_PROT_USER;
361 	if ((pte & PG_KW) == 0)
362 		*prot &= ~NVMM_PROT_WRITE;
363 	if (pte & PG_NX)
364 		*prot &= ~NVMM_PROT_EXEC;
365 	if (pte & PG_PS) {
366 		*gpa = (pte & PTE64_L3_FRAME);
367 		*gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
368 		return 0;
369 	}
370 
371 	/* Parse L2. */
372 	L2gpa = (pte & PG_FRAME);
373 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
374 		return -1;
375 	pdir = (pte_64bit_t *)L2hva;
376 	pte = pdir[pte64_l2idx(gva)];
377 	if ((pte & PG_V) == 0)
378 		return -1;
379 	if ((pte & PG_u) == 0)
380 		*prot &= ~NVMM_PROT_USER;
381 	if ((pte & PG_KW) == 0)
382 		*prot &= ~NVMM_PROT_WRITE;
383 	if (pte & PG_NX)
384 		*prot &= ~NVMM_PROT_EXEC;
385 	if (pte & PG_PS) {
386 		*gpa = (pte & PTE64_L2_FRAME);
387 		*gpa = *gpa + (gva & PTE64_L1_MASK);
388 		return 0;
389 	}
390 
391 	/* Parse L1. */
392 	L1gpa = (pte & PG_FRAME);
393 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
394 		return -1;
395 	pdir = (pte_64bit_t *)L1hva;
396 	pte = pdir[pte64_l1idx(gva)];
397 	if ((pte & PG_V) == 0)
398 		return -1;
399 	if ((pte & PG_u) == 0)
400 		*prot &= ~NVMM_PROT_USER;
401 	if ((pte & PG_KW) == 0)
402 		*prot &= ~NVMM_PROT_WRITE;
403 	if (pte & PG_NX)
404 		*prot &= ~NVMM_PROT_EXEC;
405 	if (pte & PG_PS)
406 		return -1;
407 
408 	*gpa = (pte & PG_FRAME);
409 	return 0;
410 }
411 
412 static inline int
413 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
414     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
415 {
416 	bool is_pae, is_lng, has_pse;
417 	uint64_t cr3;
418 	size_t off;
419 	int ret;
420 
421 	if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
422 		/* No paging. */
423 		*prot = NVMM_PROT_ALL;
424 		*gpa = gva;
425 		return 0;
426 	}
427 
428 	off = (gva & PAGE_MASK);
429 	gva &= ~PAGE_MASK;
430 
431 	is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
432 	is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
433 	has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
434 	cr3 = state->crs[NVMM_X64_CR_CR3];
435 
436 	if (is_pae && is_lng) {
437 		/* 64bit */
438 		ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
439 	} else if (is_pae && !is_lng) {
440 		/* 32bit PAE */
441 		ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
442 	} else if (!is_pae && !is_lng) {
443 		/* 32bit */
444 		ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
445 	} else {
446 		ret = -1;
447 	}
448 
449 	if (ret == -1) {
450 		errno = EFAULT;
451 	}
452 
453 	*gpa = *gpa + off;
454 
455 	return ret;
456 }
457 
458 int
459 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
460     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
461 {
462 	struct nvmm_x64_state state;
463 	int ret;
464 
465 	ret = nvmm_vcpu_getstate(mach, cpuid, &state,
466 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
467 	if (ret == -1)
468 		return -1;
469 
470 	return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
471 }
472 
473 /* -------------------------------------------------------------------------- */
474 
475 static inline bool
476 is_long_mode(struct nvmm_x64_state *state)
477 {
478 	return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
479 }
480 
481 static inline bool
482 is_64bit(struct nvmm_x64_state *state)
483 {
484 	return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
485 }
486 
487 static inline bool
488 is_32bit(struct nvmm_x64_state *state)
489 {
490 	return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
491 	    (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
492 }
493 
494 static inline bool
495 is_16bit(struct nvmm_x64_state *state)
496 {
497 	return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
498 	    (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
499 }
500 
501 static int
502 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
503 {
504 	uint64_t limit;
505 
506 	/*
507 	 * This is incomplete. We should check topdown, etc, really that's
508 	 * tiring.
509 	 */
510 	if (__predict_false(!seg->attrib.p)) {
511 		goto error;
512 	}
513 
514 	limit = (uint64_t)seg->limit + 1;
515 	if (__predict_true(seg->attrib.g)) {
516 		limit *= PAGE_SIZE;
517 	}
518 
519 	if (__predict_false(gva + size > limit)) {
520 		goto error;
521 	}
522 
523 	return 0;
524 
525 error:
526 	errno = EFAULT;
527 	return -1;
528 }
529 
530 static inline void
531 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
532 {
533 	*gva += seg->base;
534 }
535 
536 static inline uint64_t
537 size_to_mask(size_t size)
538 {
539 	switch (size) {
540 	case 1:
541 		return 0x00000000000000FF;
542 	case 2:
543 		return 0x000000000000FFFF;
544 	case 4:
545 		return 0x00000000FFFFFFFF;
546 	case 8:
547 	default:
548 		return 0xFFFFFFFFFFFFFFFF;
549 	}
550 }
551 
552 static uint64_t
553 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
554 {
555 	uint64_t mask, cnt;
556 
557 	mask = size_to_mask(adsize);
558 	cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
559 
560 	return cnt;
561 }
562 
563 static void
564 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
565 {
566 	uint64_t mask;
567 
568 	/* XXX: should we zero-extend? */
569 	mask = size_to_mask(adsize);
570 	state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
571 	state->gprs[NVMM_X64_GPR_RCX] |= cnt;
572 }
573 
574 static int
575 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
576     gvaddr_t gva, uint8_t *data, size_t size)
577 {
578 	struct nvmm_mem mem;
579 	nvmm_prot_t prot;
580 	gpaddr_t gpa;
581 	uintptr_t hva;
582 	bool is_mmio;
583 	int ret, remain;
584 
585 	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
586 	if (__predict_false(ret == -1)) {
587 		return -1;
588 	}
589 	if (__predict_false(!(prot & NVMM_PROT_READ))) {
590 		errno = EFAULT;
591 		return -1;
592 	}
593 
594 	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
595 		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
596 	} else {
597 		remain = 0;
598 	}
599 	size -= remain;
600 
601 	ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
602 	is_mmio = (ret == -1);
603 
604 	if (is_mmio) {
605 		mem.data = data;
606 		mem.gpa = gpa;
607 		mem.write = false;
608 		mem.size = size;
609 		(*__callbacks.mem)(&mem);
610 	} else {
611 		if (__predict_false(!(prot & NVMM_PROT_READ))) {
612 			errno = EFAULT;
613 			return -1;
614 		}
615 		memcpy(data, (uint8_t *)hva, size);
616 	}
617 
618 	if (remain > 0) {
619 		ret = read_guest_memory(mach, state, gva + size,
620 		    data + size, remain);
621 	} else {
622 		ret = 0;
623 	}
624 
625 	return ret;
626 }
627 
628 static int
629 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
630     gvaddr_t gva, uint8_t *data, size_t size)
631 {
632 	struct nvmm_mem mem;
633 	nvmm_prot_t prot;
634 	gpaddr_t gpa;
635 	uintptr_t hva;
636 	bool is_mmio;
637 	int ret, remain;
638 
639 	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
640 	if (__predict_false(ret == -1)) {
641 		return -1;
642 	}
643 	if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
644 		errno = EFAULT;
645 		return -1;
646 	}
647 
648 	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
649 		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
650 	} else {
651 		remain = 0;
652 	}
653 	size -= remain;
654 
655 	ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
656 	is_mmio = (ret == -1);
657 
658 	if (is_mmio) {
659 		mem.data = data;
660 		mem.gpa = gpa;
661 		mem.write = true;
662 		mem.size = size;
663 		(*__callbacks.mem)(&mem);
664 	} else {
665 		if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
666 			errno = EFAULT;
667 			return -1;
668 		}
669 		memcpy((uint8_t *)hva, data, size);
670 	}
671 
672 	if (remain > 0) {
673 		ret = write_guest_memory(mach, state, gva + size,
674 		    data + size, remain);
675 	} else {
676 		ret = 0;
677 	}
678 
679 	return ret;
680 }
681 
682 /* -------------------------------------------------------------------------- */
683 
684 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
685 
686 #define NVMM_IO_BATCH_SIZE	32
687 
688 static int
689 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
690     struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
691 {
692 	uint8_t iobuf[NVMM_IO_BATCH_SIZE];
693 	size_t i, iosize, iocnt;
694 	int ret;
695 
696 	cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
697 	iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
698 	iocnt = iosize / io->size;
699 
700 	io->data = iobuf;
701 
702 	if (!io->in) {
703 		ret = read_guest_memory(mach, state, gva, iobuf, iosize);
704 		if (ret == -1)
705 			return -1;
706 	}
707 
708 	for (i = 0; i < iocnt; i++) {
709 		(*__callbacks.io)(io);
710 		io->data += io->size;
711 	}
712 
713 	if (io->in) {
714 		ret = write_guest_memory(mach, state, gva, iobuf, iosize);
715 		if (ret == -1)
716 			return -1;
717 	}
718 
719 	return iocnt;
720 }
721 
722 int
723 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
724     struct nvmm_exit *exit)
725 {
726 	struct nvmm_x64_state state;
727 	struct nvmm_io io;
728 	uint64_t cnt = 0; /* GCC */
729 	uint8_t iobuf[8];
730 	int iocnt = 1;
731 	gvaddr_t gva = 0; /* GCC */
732 	int reg = 0; /* GCC */
733 	int ret, seg;
734 	bool psld = false;
735 
736 	if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
737 		errno = EINVAL;
738 		return -1;
739 	}
740 
741 	io.port = exit->u.io.port;
742 	io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
743 	io.size = exit->u.io.operand_size;
744 	io.data = iobuf;
745 
746 	ret = nvmm_vcpu_getstate(mach, cpuid, &state,
747 	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
748 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
749 	if (ret == -1)
750 		return -1;
751 
752 	if (exit->u.io.rep) {
753 		cnt = rep_get_cnt(&state, exit->u.io.address_size);
754 		if (__predict_false(cnt == 0)) {
755 			state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
756 			goto out;
757 		}
758 	}
759 
760 	if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
761 		psld = true;
762 	}
763 
764 	/*
765 	 * Determine GVA.
766 	 */
767 	if (exit->u.io.str) {
768 		if (io.in) {
769 			reg = NVMM_X64_GPR_RDI;
770 		} else {
771 			reg = NVMM_X64_GPR_RSI;
772 		}
773 
774 		gva = state.gprs[reg];
775 		gva &= size_to_mask(exit->u.io.address_size);
776 
777 		if (exit->u.io.seg != -1) {
778 			seg = exit->u.io.seg;
779 		} else {
780 			if (io.in) {
781 				seg = NVMM_X64_SEG_ES;
782 			} else {
783 				seg = fetch_segment(mach, &state);
784 				if (seg == -1)
785 					return -1;
786 			}
787 		}
788 
789 		if (__predict_true(is_long_mode(&state))) {
790 			if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
791 				segment_apply(&state.segs[seg], &gva);
792 			}
793 		} else {
794 			ret = segment_check(&state.segs[seg], gva, io.size);
795 			if (ret == -1)
796 				return -1;
797 			segment_apply(&state.segs[seg], &gva);
798 		}
799 
800 		if (exit->u.io.rep && !psld) {
801 			iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
802 			if (iocnt == -1)
803 				return -1;
804 			goto done;
805 		}
806 	}
807 
808 	if (!io.in) {
809 		if (!exit->u.io.str) {
810 			memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
811 		} else {
812 			ret = read_guest_memory(mach, &state, gva, io.data,
813 			    io.size);
814 			if (ret == -1)
815 				return -1;
816 		}
817 	}
818 
819 	(*__callbacks.io)(&io);
820 
821 	if (io.in) {
822 		if (!exit->u.io.str) {
823 			memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
824 			if (io.size == 4) {
825 				/* Zero-extend to 64 bits. */
826 				state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
827 			}
828 		} else {
829 			ret = write_guest_memory(mach, &state, gva, io.data,
830 			    io.size);
831 			if (ret == -1)
832 				return -1;
833 		}
834 	}
835 
836 done:
837 	if (exit->u.io.str) {
838 		if (__predict_false(psld)) {
839 			state.gprs[reg] -= iocnt * io.size;
840 		} else {
841 			state.gprs[reg] += iocnt * io.size;
842 		}
843 	}
844 
845 	if (exit->u.io.rep) {
846 		cnt -= iocnt;
847 		rep_set_cnt(&state, exit->u.io.address_size, cnt);
848 		if (cnt == 0) {
849 			state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
850 		}
851 	} else {
852 		state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
853 	}
854 
855 out:
856 	ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
857 	if (ret == -1)
858 		return -1;
859 
860 	return 0;
861 }
862 
863 /* -------------------------------------------------------------------------- */
864 
865 struct x86_emul {
866 	bool read;
867 	bool notouch;
868 	void (*func)(struct nvmm_mem *, uint64_t *);
869 };
870 
871 static void x86_func_or(struct nvmm_mem *, uint64_t *);
872 static void x86_func_and(struct nvmm_mem *, uint64_t *);
873 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
874 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
875 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
876 static void x86_func_test(struct nvmm_mem *, uint64_t *);
877 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
878 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
879 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
880 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
881 
882 static const struct x86_emul x86_emul_or = {
883 	.read = true,
884 	.func = x86_func_or
885 };
886 
887 static const struct x86_emul x86_emul_and = {
888 	.read = true,
889 	.func = x86_func_and
890 };
891 
892 static const struct x86_emul x86_emul_sub = {
893 	.read = true,
894 	.func = x86_func_sub
895 };
896 
897 static const struct x86_emul x86_emul_xor = {
898 	.read = true,
899 	.func = x86_func_xor
900 };
901 
902 static const struct x86_emul x86_emul_cmp = {
903 	.notouch = true,
904 	.func = x86_func_cmp
905 };
906 
907 static const struct x86_emul x86_emul_test = {
908 	.notouch = true,
909 	.func = x86_func_test
910 };
911 
912 static const struct x86_emul x86_emul_mov = {
913 	.func = x86_func_mov
914 };
915 
916 static const struct x86_emul x86_emul_stos = {
917 	.func = x86_func_stos
918 };
919 
920 static const struct x86_emul x86_emul_lods = {
921 	.func = x86_func_lods
922 };
923 
924 static const struct x86_emul x86_emul_movs = {
925 	.func = x86_func_movs
926 };
927 
928 /* Legacy prefixes. */
929 #define LEG_LOCK	0xF0
930 #define LEG_REPN	0xF2
931 #define LEG_REP		0xF3
932 #define LEG_OVR_CS	0x2E
933 #define LEG_OVR_SS	0x36
934 #define LEG_OVR_DS	0x3E
935 #define LEG_OVR_ES	0x26
936 #define LEG_OVR_FS	0x64
937 #define LEG_OVR_GS	0x65
938 #define LEG_OPR_OVR	0x66
939 #define LEG_ADR_OVR	0x67
940 
941 struct x86_legpref {
942 	bool opr_ovr:1;
943 	bool adr_ovr:1;
944 	bool rep:1;
945 	bool repn:1;
946 	int8_t seg;
947 };
948 
949 struct x86_rexpref {
950 	bool b:1;
951 	bool x:1;
952 	bool r:1;
953 	bool w:1;
954 	bool present:1;
955 };
956 
957 struct x86_reg {
958 	int num;	/* NVMM GPR state index */
959 	uint64_t mask;
960 };
961 
962 enum x86_disp_type {
963 	DISP_NONE,
964 	DISP_0,
965 	DISP_1,
966 	DISP_4
967 };
968 
969 struct x86_disp {
970 	enum x86_disp_type type;
971 	uint64_t data; /* 4 bytes, but can be sign-extended */
972 };
973 
974 enum REGMODRM__Mod {
975 	MOD_DIS0, /* also, register indirect */
976 	MOD_DIS1,
977 	MOD_DIS4,
978 	MOD_REG
979 };
980 
981 enum REGMODRM__Reg {
982 	REG_000, /* these fields are indexes to the register map */
983 	REG_001,
984 	REG_010,
985 	REG_011,
986 	REG_100,
987 	REG_101,
988 	REG_110,
989 	REG_111
990 };
991 
992 enum REGMODRM__Rm {
993 	RM_000, /* reg */
994 	RM_001, /* reg */
995 	RM_010, /* reg */
996 	RM_011, /* reg */
997 	RM_RSP_SIB, /* reg or SIB, depending on the MOD */
998 	RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
999 	RM_110,
1000 	RM_111
1001 };
1002 
1003 struct x86_regmodrm {
1004 	uint8_t mod:2;
1005 	uint8_t reg:3;
1006 	uint8_t rm:3;
1007 };
1008 
1009 struct x86_immediate {
1010 	uint64_t data;
1011 };
1012 
1013 struct x86_sib {
1014 	uint8_t scale;
1015 	const struct x86_reg *idx;
1016 	const struct x86_reg *bas;
1017 };
1018 
1019 enum x86_store_type {
1020 	STORE_NONE,
1021 	STORE_REG,
1022 	STORE_IMM,
1023 	STORE_SIB,
1024 	STORE_DMO
1025 };
1026 
1027 struct x86_store {
1028 	enum x86_store_type type;
1029 	union {
1030 		const struct x86_reg *reg;
1031 		struct x86_immediate imm;
1032 		struct x86_sib sib;
1033 		uint64_t dmo;
1034 	} u;
1035 	struct x86_disp disp;
1036 	int hardseg;
1037 };
1038 
1039 struct x86_instr {
1040 	uint8_t len;
1041 	struct x86_legpref legpref;
1042 	struct x86_rexpref rexpref;
1043 	struct x86_regmodrm regmodrm;
1044 	uint8_t operand_size;
1045 	uint8_t address_size;
1046 	uint64_t zeroextend_mask;
1047 
1048 	const struct x86_opcode *opcode;
1049 	const struct x86_emul *emul;
1050 
1051 	struct x86_store src;
1052 	struct x86_store dst;
1053 	struct x86_store *strm;
1054 };
1055 
1056 struct x86_decode_fsm {
1057 	/* vcpu */
1058 	bool is64bit;
1059 	bool is32bit;
1060 	bool is16bit;
1061 
1062 	/* fsm */
1063 	int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1064 	uint8_t *buf;
1065 	uint8_t *end;
1066 };
1067 
1068 struct x86_opcode {
1069 	bool valid:1;
1070 	bool regmodrm:1;
1071 	bool regtorm:1;
1072 	bool dmo:1;
1073 	bool todmo:1;
1074 	bool movs:1;
1075 	bool stos:1;
1076 	bool lods:1;
1077 	bool szoverride:1;
1078 	bool group1:1;
1079 	bool group3:1;
1080 	bool group11:1;
1081 	bool immediate:1;
1082 	uint8_t defsize;
1083 	uint8_t flags;
1084 	const struct x86_emul *emul;
1085 };
1086 
1087 struct x86_group_entry {
1088 	const struct x86_emul *emul;
1089 };
1090 
1091 #define OPSIZE_BYTE 0x01
1092 #define OPSIZE_WORD 0x02 /* 2 bytes */
1093 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1094 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1095 
1096 #define FLAG_imm8	0x01
1097 #define FLAG_immz	0x02
1098 #define FLAG_ze		0x04
1099 
1100 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1101 	[1] = { .emul = &x86_emul_or },
1102 	[4] = { .emul = &x86_emul_and },
1103 	[6] = { .emul = &x86_emul_xor },
1104 	[7] = { .emul = &x86_emul_cmp }
1105 };
1106 
1107 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1108 	[0] = { .emul = &x86_emul_test },
1109 	[1] = { .emul = &x86_emul_test }
1110 };
1111 
1112 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1113 	[0] = { .emul = &x86_emul_mov }
1114 };
1115 
1116 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1117 	/*
1118 	 * Group1
1119 	 */
1120 	[0x80] = {
1121 		/* Eb, Ib */
1122 		.valid = true,
1123 		.regmodrm = true,
1124 		.regtorm = true,
1125 		.szoverride = false,
1126 		.defsize = OPSIZE_BYTE,
1127 		.group1 = true,
1128 		.immediate = true,
1129 		.emul = NULL /* group1 */
1130 	},
1131 	[0x81] = {
1132 		/* Ev, Iz */
1133 		.valid = true,
1134 		.regmodrm = true,
1135 		.regtorm = true,
1136 		.szoverride = true,
1137 		.defsize = -1,
1138 		.group1 = true,
1139 		.immediate = true,
1140 		.flags = FLAG_immz,
1141 		.emul = NULL /* group1 */
1142 	},
1143 	[0x83] = {
1144 		/* Ev, Ib */
1145 		.valid = true,
1146 		.regmodrm = true,
1147 		.regtorm = true,
1148 		.szoverride = true,
1149 		.defsize = -1,
1150 		.group1 = true,
1151 		.immediate = true,
1152 		.flags = FLAG_imm8,
1153 		.emul = NULL /* group1 */
1154 	},
1155 
1156 	/*
1157 	 * Group3
1158 	 */
1159 	[0xF6] = {
1160 		/* Eb, Ib */
1161 		.valid = true,
1162 		.regmodrm = true,
1163 		.regtorm = true,
1164 		.szoverride = false,
1165 		.defsize = OPSIZE_BYTE,
1166 		.group3 = true,
1167 		.immediate = true,
1168 		.emul = NULL /* group3 */
1169 	},
1170 	[0xF7] = {
1171 		/* Ev, Iz */
1172 		.valid = true,
1173 		.regmodrm = true,
1174 		.regtorm = true,
1175 		.szoverride = true,
1176 		.defsize = -1,
1177 		.group3 = true,
1178 		.immediate = true,
1179 		.flags = FLAG_immz,
1180 		.emul = NULL /* group3 */
1181 	},
1182 
1183 	/*
1184 	 * Group11
1185 	 */
1186 	[0xC6] = {
1187 		/* Eb, Ib */
1188 		.valid = true,
1189 		.regmodrm = true,
1190 		.regtorm = true,
1191 		.szoverride = false,
1192 		.defsize = OPSIZE_BYTE,
1193 		.group11 = true,
1194 		.immediate = true,
1195 		.emul = NULL /* group11 */
1196 	},
1197 	[0xC7] = {
1198 		/* Ev, Iz */
1199 		.valid = true,
1200 		.regmodrm = true,
1201 		.regtorm = true,
1202 		.szoverride = true,
1203 		.defsize = -1,
1204 		.group11 = true,
1205 		.immediate = true,
1206 		.flags = FLAG_immz,
1207 		.emul = NULL /* group11 */
1208 	},
1209 
1210 	/*
1211 	 * OR
1212 	 */
1213 	[0x08] = {
1214 		/* Eb, Gb */
1215 		.valid = true,
1216 		.regmodrm = true,
1217 		.regtorm = true,
1218 		.szoverride = false,
1219 		.defsize = OPSIZE_BYTE,
1220 		.emul = &x86_emul_or
1221 	},
1222 	[0x09] = {
1223 		/* Ev, Gv */
1224 		.valid = true,
1225 		.regmodrm = true,
1226 		.regtorm = true,
1227 		.szoverride = true,
1228 		.defsize = -1,
1229 		.emul = &x86_emul_or
1230 	},
1231 	[0x0A] = {
1232 		/* Gb, Eb */
1233 		.valid = true,
1234 		.regmodrm = true,
1235 		.regtorm = false,
1236 		.szoverride = false,
1237 		.defsize = OPSIZE_BYTE,
1238 		.emul = &x86_emul_or
1239 	},
1240 	[0x0B] = {
1241 		/* Gv, Ev */
1242 		.valid = true,
1243 		.regmodrm = true,
1244 		.regtorm = false,
1245 		.szoverride = true,
1246 		.defsize = -1,
1247 		.emul = &x86_emul_or
1248 	},
1249 
1250 	/*
1251 	 * AND
1252 	 */
1253 	[0x20] = {
1254 		/* Eb, Gb */
1255 		.valid = true,
1256 		.regmodrm = true,
1257 		.regtorm = true,
1258 		.szoverride = false,
1259 		.defsize = OPSIZE_BYTE,
1260 		.emul = &x86_emul_and
1261 	},
1262 	[0x21] = {
1263 		/* Ev, Gv */
1264 		.valid = true,
1265 		.regmodrm = true,
1266 		.regtorm = true,
1267 		.szoverride = true,
1268 		.defsize = -1,
1269 		.emul = &x86_emul_and
1270 	},
1271 	[0x22] = {
1272 		/* Gb, Eb */
1273 		.valid = true,
1274 		.regmodrm = true,
1275 		.regtorm = false,
1276 		.szoverride = false,
1277 		.defsize = OPSIZE_BYTE,
1278 		.emul = &x86_emul_and
1279 	},
1280 	[0x23] = {
1281 		/* Gv, Ev */
1282 		.valid = true,
1283 		.regmodrm = true,
1284 		.regtorm = false,
1285 		.szoverride = true,
1286 		.defsize = -1,
1287 		.emul = &x86_emul_and
1288 	},
1289 
1290 	/*
1291 	 * SUB
1292 	 */
1293 	[0x28] = {
1294 		/* Eb, Gb */
1295 		.valid = true,
1296 		.regmodrm = true,
1297 		.regtorm = true,
1298 		.szoverride = false,
1299 		.defsize = OPSIZE_BYTE,
1300 		.emul = &x86_emul_sub
1301 	},
1302 	[0x29] = {
1303 		/* Ev, Gv */
1304 		.valid = true,
1305 		.regmodrm = true,
1306 		.regtorm = true,
1307 		.szoverride = true,
1308 		.defsize = -1,
1309 		.emul = &x86_emul_sub
1310 	},
1311 	[0x2A] = {
1312 		/* Gb, Eb */
1313 		.valid = true,
1314 		.regmodrm = true,
1315 		.regtorm = false,
1316 		.szoverride = false,
1317 		.defsize = OPSIZE_BYTE,
1318 		.emul = &x86_emul_sub
1319 	},
1320 	[0x2B] = {
1321 		/* Gv, Ev */
1322 		.valid = true,
1323 		.regmodrm = true,
1324 		.regtorm = false,
1325 		.szoverride = true,
1326 		.defsize = -1,
1327 		.emul = &x86_emul_sub
1328 	},
1329 
1330 	/*
1331 	 * XOR
1332 	 */
1333 	[0x30] = {
1334 		/* Eb, Gb */
1335 		.valid = true,
1336 		.regmodrm = true,
1337 		.regtorm = true,
1338 		.szoverride = false,
1339 		.defsize = OPSIZE_BYTE,
1340 		.emul = &x86_emul_xor
1341 	},
1342 	[0x31] = {
1343 		/* Ev, Gv */
1344 		.valid = true,
1345 		.regmodrm = true,
1346 		.regtorm = true,
1347 		.szoverride = true,
1348 		.defsize = -1,
1349 		.emul = &x86_emul_xor
1350 	},
1351 	[0x32] = {
1352 		/* Gb, Eb */
1353 		.valid = true,
1354 		.regmodrm = true,
1355 		.regtorm = false,
1356 		.szoverride = false,
1357 		.defsize = OPSIZE_BYTE,
1358 		.emul = &x86_emul_xor
1359 	},
1360 	[0x33] = {
1361 		/* Gv, Ev */
1362 		.valid = true,
1363 		.regmodrm = true,
1364 		.regtorm = false,
1365 		.szoverride = true,
1366 		.defsize = -1,
1367 		.emul = &x86_emul_xor
1368 	},
1369 
1370 	/*
1371 	 * MOV
1372 	 */
1373 	[0x88] = {
1374 		/* Eb, Gb */
1375 		.valid = true,
1376 		.regmodrm = true,
1377 		.regtorm = true,
1378 		.szoverride = false,
1379 		.defsize = OPSIZE_BYTE,
1380 		.emul = &x86_emul_mov
1381 	},
1382 	[0x89] = {
1383 		/* Ev, Gv */
1384 		.valid = true,
1385 		.regmodrm = true,
1386 		.regtorm = true,
1387 		.szoverride = true,
1388 		.defsize = -1,
1389 		.emul = &x86_emul_mov
1390 	},
1391 	[0x8A] = {
1392 		/* Gb, Eb */
1393 		.valid = true,
1394 		.regmodrm = true,
1395 		.regtorm = false,
1396 		.szoverride = false,
1397 		.defsize = OPSIZE_BYTE,
1398 		.emul = &x86_emul_mov
1399 	},
1400 	[0x8B] = {
1401 		/* Gv, Ev */
1402 		.valid = true,
1403 		.regmodrm = true,
1404 		.regtorm = false,
1405 		.szoverride = true,
1406 		.defsize = -1,
1407 		.emul = &x86_emul_mov
1408 	},
1409 	[0xA0] = {
1410 		/* AL, Ob */
1411 		.valid = true,
1412 		.dmo = true,
1413 		.todmo = false,
1414 		.szoverride = false,
1415 		.defsize = OPSIZE_BYTE,
1416 		.emul = &x86_emul_mov
1417 	},
1418 	[0xA1] = {
1419 		/* rAX, Ov */
1420 		.valid = true,
1421 		.dmo = true,
1422 		.todmo = false,
1423 		.szoverride = true,
1424 		.defsize = -1,
1425 		.emul = &x86_emul_mov
1426 	},
1427 	[0xA2] = {
1428 		/* Ob, AL */
1429 		.valid = true,
1430 		.dmo = true,
1431 		.todmo = true,
1432 		.szoverride = false,
1433 		.defsize = OPSIZE_BYTE,
1434 		.emul = &x86_emul_mov
1435 	},
1436 	[0xA3] = {
1437 		/* Ov, rAX */
1438 		.valid = true,
1439 		.dmo = true,
1440 		.todmo = true,
1441 		.szoverride = true,
1442 		.defsize = -1,
1443 		.emul = &x86_emul_mov
1444 	},
1445 
1446 	/*
1447 	 * MOVS
1448 	 */
1449 	[0xA4] = {
1450 		/* Yb, Xb */
1451 		.valid = true,
1452 		.movs = true,
1453 		.szoverride = false,
1454 		.defsize = OPSIZE_BYTE,
1455 		.emul = &x86_emul_movs
1456 	},
1457 	[0xA5] = {
1458 		/* Yv, Xv */
1459 		.valid = true,
1460 		.movs = true,
1461 		.szoverride = true,
1462 		.defsize = -1,
1463 		.emul = &x86_emul_movs
1464 	},
1465 
1466 	/*
1467 	 * STOS
1468 	 */
1469 	[0xAA] = {
1470 		/* Yb, AL */
1471 		.valid = true,
1472 		.stos = true,
1473 		.szoverride = false,
1474 		.defsize = OPSIZE_BYTE,
1475 		.emul = &x86_emul_stos
1476 	},
1477 	[0xAB] = {
1478 		/* Yv, rAX */
1479 		.valid = true,
1480 		.stos = true,
1481 		.szoverride = true,
1482 		.defsize = -1,
1483 		.emul = &x86_emul_stos
1484 	},
1485 
1486 	/*
1487 	 * LODS
1488 	 */
1489 	[0xAC] = {
1490 		/* AL, Xb */
1491 		.valid = true,
1492 		.lods = true,
1493 		.szoverride = false,
1494 		.defsize = OPSIZE_BYTE,
1495 		.emul = &x86_emul_lods
1496 	},
1497 	[0xAD] = {
1498 		/* rAX, Xv */
1499 		.valid = true,
1500 		.lods = true,
1501 		.szoverride = true,
1502 		.defsize = -1,
1503 		.emul = &x86_emul_lods
1504 	},
1505 };
1506 
1507 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1508 	/*
1509 	 * MOVZX
1510 	 */
1511 	[0xB6] = {
1512 		/* Gv, Eb */
1513 		.valid = true,
1514 		.regmodrm = true,
1515 		.regtorm = false,
1516 		.szoverride = true,
1517 		.defsize = OPSIZE_BYTE,
1518 		.flags = FLAG_ze,
1519 		.emul = &x86_emul_mov
1520 	},
1521 	[0xB7] = {
1522 		/* Gv, Ew */
1523 		.valid = true,
1524 		.regmodrm = true,
1525 		.regtorm = false,
1526 		.szoverride = true,
1527 		.defsize = OPSIZE_WORD,
1528 		.flags = FLAG_ze,
1529 		.emul = &x86_emul_mov
1530 	},
1531 };
1532 
1533 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1534 
1535 /* [REX-present][enc][opsize] */
1536 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1537 	[false] = {
1538 		/* No REX prefix. */
1539 		[0b00] = {
1540 			[0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1541 			[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1542 			[2] = { -1, 0 },
1543 			[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1544 			[4] = { -1, 0 },
1545 			[5] = { -1, 0 },
1546 			[6] = { -1, 0 },
1547 			[7] = { -1, 0 },
1548 		},
1549 		[0b01] = {
1550 			[0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1551 			[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1552 			[2] = { -1, 0 },
1553 			[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF },	/* EBP */
1554 			[4] = { -1, 0 },
1555 			[5] = { -1, 0 },
1556 			[6] = { -1, 0 },
1557 			[7] = { -1, 0 },
1558 		},
1559 		[0b10] = {
1560 			[0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1561 			[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1562 			[2] = { -1, 0 },
1563 			[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1564 			[4] = { -1, 0 },
1565 			[5] = { -1, 0 },
1566 			[6] = { -1, 0 },
1567 			[7] = { -1, 0 },
1568 		},
1569 		[0b11] = {
1570 			[0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1571 			[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1572 			[2] = { -1, 0 },
1573 			[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1574 			[4] = { -1, 0 },
1575 			[5] = { -1, 0 },
1576 			[6] = { -1, 0 },
1577 			[7] = { -1, 0 },
1578 		}
1579 	},
1580 	[true] = {
1581 		/* Has REX prefix. */
1582 		[0b00] = {
1583 			[0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1584 			[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1585 			[2] = { -1, 0 },
1586 			[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1587 			[4] = { -1, 0 },
1588 			[5] = { -1, 0 },
1589 			[6] = { -1, 0 },
1590 			[7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1591 		},
1592 		[0b01] = {
1593 			[0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1594 			[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1595 			[2] = { -1, 0 },
1596 			[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1597 			[4] = { -1, 0 },
1598 			[5] = { -1, 0 },
1599 			[6] = { -1, 0 },
1600 			[7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1601 		},
1602 		[0b10] = {
1603 			[0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1604 			[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1605 			[2] = { -1, 0 },
1606 			[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1607 			[4] = { -1, 0 },
1608 			[5] = { -1, 0 },
1609 			[6] = { -1, 0 },
1610 			[7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1611 		},
1612 		[0b11] = {
1613 			[0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1614 			[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1615 			[2] = { -1, 0 },
1616 			[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1617 			[4] = { -1, 0 },
1618 			[5] = { -1, 0 },
1619 			[6] = { -1, 0 },
1620 			[7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1621 		}
1622 	}
1623 };
1624 
1625 /* [depends][enc][size] */
1626 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1627 	[false] = {
1628 		/* Not extended. */
1629 		[0b000] = {
1630 			[0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1631 			[1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1632 			[2] = { -1, 0 },
1633 			[3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1634 			[4] = { -1, 0 },
1635 			[5] = { -1, 0 },
1636 			[6] = { -1, 0 },
1637 			[7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1638 		},
1639 		[0b001] = {
1640 			[0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1641 			[1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1642 			[2] = { -1, 0 },
1643 			[3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1644 			[4] = { -1, 0 },
1645 			[5] = { -1, 0 },
1646 			[6] = { -1, 0 },
1647 			[7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1648 		},
1649 		[0b010] = {
1650 			[0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1651 			[1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1652 			[2] = { -1, 0 },
1653 			[3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1654 			[4] = { -1, 0 },
1655 			[5] = { -1, 0 },
1656 			[6] = { -1, 0 },
1657 			[7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1658 		},
1659 		[0b011] = {
1660 			[0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1661 			[1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1662 			[2] = { -1, 0 },
1663 			[3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1664 			[4] = { -1, 0 },
1665 			[5] = { -1, 0 },
1666 			[6] = { -1, 0 },
1667 			[7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1668 		},
1669 		[0b100] = {
1670 			[0] = { -1, 0 }, /* SPECIAL */
1671 			[1] = { -1, 0 }, /* SPECIAL */
1672 			[2] = { -1, 0 },
1673 			[3] = { -1, 0 }, /* SPECIAL */
1674 			[4] = { -1, 0 },
1675 			[5] = { -1, 0 },
1676 			[6] = { -1, 0 },
1677 			[7] = { -1, 0 }, /* SPECIAL */
1678 		},
1679 		[0b101] = {
1680 			[0] = { -1, 0 }, /* SPECIAL */
1681 			[1] = { -1, 0 }, /* SPECIAL */
1682 			[2] = { -1, 0 },
1683 			[3] = { -1, 0 }, /* SPECIAL */
1684 			[4] = { -1, 0 },
1685 			[5] = { -1, 0 },
1686 			[6] = { -1, 0 },
1687 			[7] = { -1, 0 }, /* SPECIAL */
1688 		},
1689 		[0b110] = {
1690 			[0] = { -1, 0 }, /* SPECIAL */
1691 			[1] = { -1, 0 }, /* SPECIAL */
1692 			[2] = { -1, 0 },
1693 			[3] = { -1, 0 }, /* SPECIAL */
1694 			[4] = { -1, 0 },
1695 			[5] = { -1, 0 },
1696 			[6] = { -1, 0 },
1697 			[7] = { -1, 0 }, /* SPECIAL */
1698 		},
1699 		[0b111] = {
1700 			[0] = { -1, 0 }, /* SPECIAL */
1701 			[1] = { -1, 0 }, /* SPECIAL */
1702 			[2] = { -1, 0 },
1703 			[3] = { -1, 0 }, /* SPECIAL */
1704 			[4] = { -1, 0 },
1705 			[5] = { -1, 0 },
1706 			[6] = { -1, 0 },
1707 			[7] = { -1, 0 }, /* SPECIAL */
1708 		},
1709 	},
1710 	[true] = {
1711 		/* Extended. */
1712 		[0b000] = {
1713 			[0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1714 			[1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1715 			[2] = { -1, 0 },
1716 			[3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1717 			[4] = { -1, 0 },
1718 			[5] = { -1, 0 },
1719 			[6] = { -1, 0 },
1720 			[7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1721 		},
1722 		[0b001] = {
1723 			[0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1724 			[1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1725 			[2] = { -1, 0 },
1726 			[3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1727 			[4] = { -1, 0 },
1728 			[5] = { -1, 0 },
1729 			[6] = { -1, 0 },
1730 			[7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1731 		},
1732 		[0b010] = {
1733 			[0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1734 			[1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1735 			[2] = { -1, 0 },
1736 			[3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1737 			[4] = { -1, 0 },
1738 			[5] = { -1, 0 },
1739 			[6] = { -1, 0 },
1740 			[7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1741 		},
1742 		[0b011] = {
1743 			[0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1744 			[1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1745 			[2] = { -1, 0 },
1746 			[3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1747 			[4] = { -1, 0 },
1748 			[5] = { -1, 0 },
1749 			[6] = { -1, 0 },
1750 			[7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1751 		},
1752 		[0b100] = {
1753 			[0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1754 			[1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1755 			[2] = { -1, 0 },
1756 			[3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1757 			[4] = { -1, 0 },
1758 			[5] = { -1, 0 },
1759 			[6] = { -1, 0 },
1760 			[7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1761 		},
1762 		[0b101] = {
1763 			[0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1764 			[1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1765 			[2] = { -1, 0 },
1766 			[3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1767 			[4] = { -1, 0 },
1768 			[5] = { -1, 0 },
1769 			[6] = { -1, 0 },
1770 			[7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1771 		},
1772 		[0b110] = {
1773 			[0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1774 			[1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1775 			[2] = { -1, 0 },
1776 			[3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1777 			[4] = { -1, 0 },
1778 			[5] = { -1, 0 },
1779 			[6] = { -1, 0 },
1780 			[7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1781 		},
1782 		[0b111] = {
1783 			[0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1784 			[1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1785 			[2] = { -1, 0 },
1786 			[3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1787 			[4] = { -1, 0 },
1788 			[5] = { -1, 0 },
1789 			[6] = { -1, 0 },
1790 			[7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1791 		},
1792 	}
1793 };
1794 
1795 static int
1796 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1797 {
1798 	fsm->fn = NULL;
1799 	return -1;
1800 }
1801 
1802 static int
1803 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1804 {
1805 	if (fsm->buf + n > fsm->end) {
1806 		return -1;
1807 	}
1808 	memcpy(bytes, fsm->buf, n);
1809 	return 0;
1810 }
1811 
1812 static inline void
1813 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1814     int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1815 {
1816 	fsm->buf += n;
1817 	if (fsm->buf > fsm->end) {
1818 		fsm->fn = node_overflow;
1819 	} else {
1820 		fsm->fn = fn;
1821 	}
1822 }
1823 
1824 static const struct x86_reg *
1825 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1826 {
1827 	enc &= 0b11;
1828 	if (regsize == 8) {
1829 		/* May be 64bit without REX */
1830 		return &gpr_map__special[1][enc][regsize-1];
1831 	}
1832 	return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1833 }
1834 
1835 /*
1836  * Special node, for MOVS. Fake two displacements of zero on the source and
1837  * destination registers.
1838  */
1839 static int
1840 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1841 {
1842 	size_t adrsize;
1843 
1844 	adrsize = instr->address_size;
1845 
1846 	/* DS:RSI */
1847 	instr->src.type = STORE_REG;
1848 	instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1849 	instr->src.disp.type = DISP_0;
1850 
1851 	/* ES:RDI, force ES */
1852 	instr->dst.type = STORE_REG;
1853 	instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1854 	instr->dst.disp.type = DISP_0;
1855 	instr->dst.hardseg = NVMM_X64_SEG_ES;
1856 
1857 	fsm_advance(fsm, 0, NULL);
1858 
1859 	return 0;
1860 }
1861 
1862 /*
1863  * Special node, for STOS and LODS. Fake a displacement of zero on the
1864  * destination register.
1865  */
1866 static int
1867 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1868 {
1869 	const struct x86_opcode *opcode = instr->opcode;
1870 	struct x86_store *stlo, *streg;
1871 	size_t adrsize, regsize;
1872 
1873 	adrsize = instr->address_size;
1874 	regsize = instr->operand_size;
1875 
1876 	if (opcode->stos) {
1877 		streg = &instr->src;
1878 		stlo = &instr->dst;
1879 	} else {
1880 		streg = &instr->dst;
1881 		stlo = &instr->src;
1882 	}
1883 
1884 	streg->type = STORE_REG;
1885 	streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1886 
1887 	stlo->type = STORE_REG;
1888 	if (opcode->stos) {
1889 		/* ES:RDI, force ES */
1890 		stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1891 		stlo->hardseg = NVMM_X64_SEG_ES;
1892 	} else {
1893 		/* DS:RSI */
1894 		stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1895 	}
1896 	stlo->disp.type = DISP_0;
1897 
1898 	fsm_advance(fsm, 0, NULL);
1899 
1900 	return 0;
1901 }
1902 
1903 static int
1904 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1905 {
1906 	const struct x86_opcode *opcode = instr->opcode;
1907 	struct x86_store *stdmo, *streg;
1908 	size_t adrsize, regsize;
1909 
1910 	adrsize = instr->address_size;
1911 	regsize = instr->operand_size;
1912 
1913 	if (opcode->todmo) {
1914 		streg = &instr->src;
1915 		stdmo = &instr->dst;
1916 	} else {
1917 		streg = &instr->dst;
1918 		stdmo = &instr->src;
1919 	}
1920 
1921 	streg->type = STORE_REG;
1922 	streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1923 
1924 	stdmo->type = STORE_DMO;
1925 	if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1926 		return -1;
1927 	}
1928 	fsm_advance(fsm, adrsize, NULL);
1929 
1930 	return 0;
1931 }
1932 
1933 static inline uint64_t
1934 sign_extend(uint64_t val, int size)
1935 {
1936 	if (size == 1) {
1937 		if (val & __BIT(7))
1938 			val |= 0xFFFFFFFFFFFFFF00;
1939 	} else if (size == 2) {
1940 		if (val & __BIT(15))
1941 			val |= 0xFFFFFFFFFFFF0000;
1942 	} else if (size == 4) {
1943 		if (val & __BIT(31))
1944 			val |= 0xFFFFFFFF00000000;
1945 	}
1946 	return val;
1947 }
1948 
1949 static int
1950 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1951 {
1952 	const struct x86_opcode *opcode = instr->opcode;
1953 	struct x86_store *store;
1954 	uint8_t immsize;
1955 	size_t sesize = 0;
1956 
1957 	/* The immediate is the source */
1958 	store = &instr->src;
1959 	immsize = instr->operand_size;
1960 
1961 	if (opcode->flags & FLAG_imm8) {
1962 		sesize = immsize;
1963 		immsize = 1;
1964 	} else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1965 		sesize = immsize;
1966 		immsize = 4;
1967 	}
1968 
1969 	store->type = STORE_IMM;
1970 	if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1971 		return -1;
1972 	}
1973 	fsm_advance(fsm, immsize, NULL);
1974 
1975 	if (sesize != 0) {
1976 		store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 static int
1983 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1984 {
1985 	const struct x86_opcode *opcode = instr->opcode;
1986 	uint64_t data = 0;
1987 	size_t n;
1988 
1989 	if (instr->strm->disp.type == DISP_1) {
1990 		n = 1;
1991 	} else { /* DISP4 */
1992 		n = 4;
1993 	}
1994 
1995 	if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1996 		return -1;
1997 	}
1998 
1999 	if (__predict_true(fsm->is64bit)) {
2000 		data = sign_extend(data, n);
2001 	}
2002 
2003 	instr->strm->disp.data = data;
2004 
2005 	if (opcode->immediate) {
2006 		fsm_advance(fsm, n, node_immediate);
2007 	} else {
2008 		fsm_advance(fsm, n, NULL);
2009 	}
2010 
2011 	return 0;
2012 }
2013 
2014 static const struct x86_reg *
2015 get_register_idx(struct x86_instr *instr, uint8_t index)
2016 {
2017 	uint8_t enc = index;
2018 	const struct x86_reg *reg;
2019 	size_t regsize;
2020 
2021 	regsize = instr->address_size;
2022 	reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2023 
2024 	if (reg->num == -1) {
2025 		reg = resolve_special_register(instr, enc, regsize);
2026 	}
2027 
2028 	return reg;
2029 }
2030 
2031 static const struct x86_reg *
2032 get_register_bas(struct x86_instr *instr, uint8_t base)
2033 {
2034 	uint8_t enc = base;
2035 	const struct x86_reg *reg;
2036 	size_t regsize;
2037 
2038 	regsize = instr->address_size;
2039 	reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2040 	if (reg->num == -1) {
2041 		reg = resolve_special_register(instr, enc, regsize);
2042 	}
2043 
2044 	return reg;
2045 }
2046 
2047 static int
2048 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2049 {
2050 	const struct x86_opcode *opcode;
2051 	uint8_t scale, index, base;
2052 	bool noindex, nobase;
2053 	uint8_t byte;
2054 
2055 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2056 		return -1;
2057 	}
2058 
2059 	scale = ((byte & 0b11000000) >> 6);
2060 	index = ((byte & 0b00111000) >> 3);
2061 	base  = ((byte & 0b00000111) >> 0);
2062 
2063 	opcode = instr->opcode;
2064 
2065 	noindex = false;
2066 	nobase = false;
2067 
2068 	if (index == 0b100 && !instr->rexpref.x) {
2069 		/* Special case: the index is null */
2070 		noindex = true;
2071 	}
2072 
2073 	if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2074 		/* Special case: the base is null + disp32 */
2075 		instr->strm->disp.type = DISP_4;
2076 		nobase = true;
2077 	}
2078 
2079 	instr->strm->type = STORE_SIB;
2080 	instr->strm->u.sib.scale = (1 << scale);
2081 	if (!noindex)
2082 		instr->strm->u.sib.idx = get_register_idx(instr, index);
2083 	if (!nobase)
2084 		instr->strm->u.sib.bas = get_register_bas(instr, base);
2085 
2086 	/* May have a displacement, or an immediate */
2087 	if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2088 		fsm_advance(fsm, 1, node_disp);
2089 	} else if (opcode->immediate) {
2090 		fsm_advance(fsm, 1, node_immediate);
2091 	} else {
2092 		fsm_advance(fsm, 1, NULL);
2093 	}
2094 
2095 	return 0;
2096 }
2097 
2098 static const struct x86_reg *
2099 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2100 {
2101 	uint8_t enc = instr->regmodrm.reg;
2102 	const struct x86_reg *reg;
2103 	size_t regsize;
2104 
2105 	regsize = instr->operand_size;
2106 
2107 	reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2108 	if (reg->num == -1) {
2109 		reg = resolve_special_register(instr, enc, regsize);
2110 	}
2111 
2112 	return reg;
2113 }
2114 
2115 static const struct x86_reg *
2116 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2117 {
2118 	uint8_t enc = instr->regmodrm.rm;
2119 	const struct x86_reg *reg;
2120 	size_t regsize;
2121 
2122 	if (instr->strm->disp.type == DISP_NONE) {
2123 		regsize = instr->operand_size;
2124 	} else {
2125 		/* Indirect access, the size is that of the address. */
2126 		regsize = instr->address_size;
2127 	}
2128 
2129 	reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2130 	if (reg->num == -1) {
2131 		reg = resolve_special_register(instr, enc, regsize);
2132 	}
2133 
2134 	return reg;
2135 }
2136 
2137 static inline bool
2138 has_sib(struct x86_instr *instr)
2139 {
2140 	return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2141 }
2142 
2143 static inline bool
2144 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2145 {
2146 	return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2147 	    instr->regmodrm.rm == RM_RBP_DISP32);
2148 }
2149 
2150 static inline bool
2151 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2152 {
2153 	return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2154 	    instr->regmodrm.rm == RM_RBP_DISP32);
2155 }
2156 
2157 static enum x86_disp_type
2158 get_disp_type(struct x86_instr *instr)
2159 {
2160 	switch (instr->regmodrm.mod) {
2161 	case MOD_DIS0:	/* indirect */
2162 		return DISP_0;
2163 	case MOD_DIS1:	/* indirect+1 */
2164 		return DISP_1;
2165 	case MOD_DIS4:	/* indirect+4 */
2166 		return DISP_4;
2167 	case MOD_REG:	/* direct */
2168 	default:	/* gcc */
2169 		return DISP_NONE;
2170 	}
2171 }
2172 
2173 static int
2174 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2175 {
2176 	struct x86_store *strg, *strm;
2177 	const struct x86_opcode *opcode;
2178 	const struct x86_reg *reg;
2179 	uint8_t byte;
2180 
2181 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2182 		return -1;
2183 	}
2184 
2185 	opcode = instr->opcode;
2186 
2187 	instr->regmodrm.rm  = ((byte & 0b00000111) >> 0);
2188 	instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2189 	instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2190 
2191 	if (opcode->regtorm) {
2192 		strg = &instr->src;
2193 		strm = &instr->dst;
2194 	} else { /* RM to REG */
2195 		strm = &instr->src;
2196 		strg = &instr->dst;
2197 	}
2198 
2199 	/* Save for later use. */
2200 	instr->strm = strm;
2201 
2202 	/*
2203 	 * Special cases: Groups. The REG field of REGMODRM is the index in
2204 	 * the group. op1 gets overwritten in the Immediate node, if any.
2205 	 */
2206 	if (opcode->group1) {
2207 		if (group1[instr->regmodrm.reg].emul == NULL) {
2208 			return -1;
2209 		}
2210 		instr->emul = group1[instr->regmodrm.reg].emul;
2211 	} else if (opcode->group3) {
2212 		if (group3[instr->regmodrm.reg].emul == NULL) {
2213 			return -1;
2214 		}
2215 		instr->emul = group3[instr->regmodrm.reg].emul;
2216 	} else if (opcode->group11) {
2217 		if (group11[instr->regmodrm.reg].emul == NULL) {
2218 			return -1;
2219 		}
2220 		instr->emul = group11[instr->regmodrm.reg].emul;
2221 	}
2222 
2223 	if (!opcode->immediate) {
2224 		reg = get_register_reg(instr, opcode);
2225 		if (reg == NULL) {
2226 			return -1;
2227 		}
2228 		strg->type = STORE_REG;
2229 		strg->u.reg = reg;
2230 	}
2231 
2232 	/* The displacement applies to RM. */
2233 	strm->disp.type = get_disp_type(instr);
2234 
2235 	if (has_sib(instr)) {
2236 		/* Overwrites RM */
2237 		fsm_advance(fsm, 1, node_sib);
2238 		return 0;
2239 	}
2240 
2241 	if (is_rip_relative(fsm, instr)) {
2242 		/* Overwrites RM */
2243 		strm->type = STORE_REG;
2244 		strm->u.reg = &gpr_map__rip;
2245 		strm->disp.type = DISP_4;
2246 		fsm_advance(fsm, 1, node_disp);
2247 		return 0;
2248 	}
2249 
2250 	if (is_disp32_only(fsm, instr)) {
2251 		/* Overwrites RM */
2252 		strm->type = STORE_REG;
2253 		strm->u.reg = NULL;
2254 		strm->disp.type = DISP_4;
2255 		fsm_advance(fsm, 1, node_disp);
2256 		return 0;
2257 	}
2258 
2259 	reg = get_register_rm(instr, opcode);
2260 	if (reg == NULL) {
2261 		return -1;
2262 	}
2263 	strm->type = STORE_REG;
2264 	strm->u.reg = reg;
2265 
2266 	if (strm->disp.type == DISP_NONE) {
2267 		/* Direct register addressing mode */
2268 		if (opcode->immediate) {
2269 			fsm_advance(fsm, 1, node_immediate);
2270 		} else {
2271 			fsm_advance(fsm, 1, NULL);
2272 		}
2273 	} else if (strm->disp.type == DISP_0) {
2274 		/* Indirect register addressing mode */
2275 		if (opcode->immediate) {
2276 			fsm_advance(fsm, 1, node_immediate);
2277 		} else {
2278 			fsm_advance(fsm, 1, NULL);
2279 		}
2280 	} else {
2281 		fsm_advance(fsm, 1, node_disp);
2282 	}
2283 
2284 	return 0;
2285 }
2286 
2287 static size_t
2288 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2289 {
2290 	const struct x86_opcode *opcode = instr->opcode;
2291 	int opsize;
2292 
2293 	/* Get the opsize */
2294 	if (!opcode->szoverride) {
2295 		opsize = opcode->defsize;
2296 	} else if (instr->rexpref.present && instr->rexpref.w) {
2297 		opsize = 8;
2298 	} else {
2299 		if (!fsm->is16bit) {
2300 			if (instr->legpref.opr_ovr) {
2301 				opsize = 2;
2302 			} else {
2303 				opsize = 4;
2304 			}
2305 		} else { /* 16bit */
2306 			if (instr->legpref.opr_ovr) {
2307 				opsize = 4;
2308 			} else {
2309 				opsize = 2;
2310 			}
2311 		}
2312 	}
2313 
2314 	return opsize;
2315 }
2316 
2317 static size_t
2318 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2319 {
2320 	if (fsm->is64bit) {
2321 		if (__predict_false(instr->legpref.adr_ovr)) {
2322 			return 4;
2323 		}
2324 		return 8;
2325 	}
2326 
2327 	if (fsm->is32bit) {
2328 		if (__predict_false(instr->legpref.adr_ovr)) {
2329 			return 2;
2330 		}
2331 		return 4;
2332 	}
2333 
2334 	/* 16bit. */
2335 	if (__predict_false(instr->legpref.adr_ovr)) {
2336 		return 4;
2337 	}
2338 	return 2;
2339 }
2340 
2341 static int
2342 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2343 {
2344 	const struct x86_opcode *opcode;
2345 	uint8_t byte;
2346 
2347 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2348 		return -1;
2349 	}
2350 
2351 	opcode = &primary_opcode_table[byte];
2352 	if (__predict_false(!opcode->valid)) {
2353 		return -1;
2354 	}
2355 
2356 	instr->opcode = opcode;
2357 	instr->emul = opcode->emul;
2358 	instr->operand_size = get_operand_size(fsm, instr);
2359 	instr->address_size = get_address_size(fsm, instr);
2360 
2361 	if (fsm->is64bit && (instr->operand_size == 4)) {
2362 		/* Zero-extend to 64 bits. */
2363 		instr->zeroextend_mask = ~size_to_mask(4);
2364 	}
2365 
2366 	if (opcode->regmodrm) {
2367 		fsm_advance(fsm, 1, node_regmodrm);
2368 	} else if (opcode->dmo) {
2369 		/* Direct-Memory Offsets */
2370 		fsm_advance(fsm, 1, node_dmo);
2371 	} else if (opcode->stos || opcode->lods) {
2372 		fsm_advance(fsm, 1, node_stlo);
2373 	} else if (opcode->movs) {
2374 		fsm_advance(fsm, 1, node_movs);
2375 	} else {
2376 		return -1;
2377 	}
2378 
2379 	return 0;
2380 }
2381 
2382 static int
2383 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2384 {
2385 	const struct x86_opcode *opcode;
2386 	uint8_t byte;
2387 
2388 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2389 		return -1;
2390 	}
2391 
2392 	opcode = &secondary_opcode_table[byte];
2393 	if (__predict_false(!opcode->valid)) {
2394 		return -1;
2395 	}
2396 
2397 	instr->opcode = opcode;
2398 	instr->emul = opcode->emul;
2399 	instr->operand_size = get_operand_size(fsm, instr);
2400 	instr->address_size = get_address_size(fsm, instr);
2401 
2402 	if (fsm->is64bit && (instr->operand_size == 4)) {
2403 		/* Zero-extend to 64 bits. */
2404 		instr->zeroextend_mask = ~size_to_mask(4);
2405 	}
2406 
2407 	if (opcode->flags & FLAG_ze) {
2408 		/*
2409 		 * Compute the mask for zero-extend. Update the operand size,
2410 		 * we move fewer bytes.
2411 		 */
2412 		instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2413 		instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2414 		instr->operand_size = opcode->defsize;
2415 	}
2416 
2417 	if (opcode->regmodrm) {
2418 		fsm_advance(fsm, 1, node_regmodrm);
2419 	} else {
2420 		return -1;
2421 	}
2422 
2423 	return 0;
2424 }
2425 
2426 static int
2427 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2428 {
2429 	uint8_t byte;
2430 
2431 #define ESCAPE	0x0F
2432 #define VEX_1	0xC5
2433 #define VEX_2	0xC4
2434 #define XOP	0x8F
2435 
2436 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2437 		return -1;
2438 	}
2439 
2440 	/*
2441 	 * We don't take XOP. It is AMD-specific, and it was removed shortly
2442 	 * after being introduced.
2443 	 */
2444 	if (byte == ESCAPE) {
2445 		fsm_advance(fsm, 1, node_secondary_opcode);
2446 	} else if (!instr->rexpref.present) {
2447 		if (byte == VEX_1) {
2448 			return -1;
2449 		} else if (byte == VEX_2) {
2450 			return -1;
2451 		} else {
2452 			fsm->fn = node_primary_opcode;
2453 		}
2454 	} else {
2455 		fsm->fn = node_primary_opcode;
2456 	}
2457 
2458 	return 0;
2459 }
2460 
2461 static int
2462 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2463 {
2464 	struct x86_rexpref *rexpref = &instr->rexpref;
2465 	uint8_t byte;
2466 	size_t n = 0;
2467 
2468 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2469 		return -1;
2470 	}
2471 
2472 	if (byte >= 0x40 && byte <= 0x4F) {
2473 		if (__predict_false(!fsm->is64bit)) {
2474 			return -1;
2475 		}
2476 		rexpref->b = ((byte & 0x1) != 0);
2477 		rexpref->x = ((byte & 0x2) != 0);
2478 		rexpref->r = ((byte & 0x4) != 0);
2479 		rexpref->w = ((byte & 0x8) != 0);
2480 		rexpref->present = true;
2481 		n = 1;
2482 	}
2483 
2484 	fsm_advance(fsm, n, node_main);
2485 	return 0;
2486 }
2487 
2488 static int
2489 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2490 {
2491 	uint8_t byte;
2492 
2493 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2494 		return -1;
2495 	}
2496 
2497 	if (byte == LEG_OPR_OVR) {
2498 		instr->legpref.opr_ovr = 1;
2499 	} else if (byte == LEG_OVR_DS) {
2500 		instr->legpref.seg = NVMM_X64_SEG_DS;
2501 	} else if (byte == LEG_OVR_ES) {
2502 		instr->legpref.seg = NVMM_X64_SEG_ES;
2503 	} else if (byte == LEG_REP) {
2504 		instr->legpref.rep = 1;
2505 	} else if (byte == LEG_OVR_GS) {
2506 		instr->legpref.seg = NVMM_X64_SEG_GS;
2507 	} else if (byte == LEG_OVR_FS) {
2508 		instr->legpref.seg = NVMM_X64_SEG_FS;
2509 	} else if (byte == LEG_ADR_OVR) {
2510 		instr->legpref.adr_ovr = 1;
2511 	} else if (byte == LEG_OVR_CS) {
2512 		instr->legpref.seg = NVMM_X64_SEG_CS;
2513 	} else if (byte == LEG_OVR_SS) {
2514 		instr->legpref.seg = NVMM_X64_SEG_SS;
2515 	} else if (byte == LEG_REPN) {
2516 		instr->legpref.repn = 1;
2517 	} else if (byte == LEG_LOCK) {
2518 		/* ignore */
2519 	} else {
2520 		/* not a legacy prefix */
2521 		fsm_advance(fsm, 0, node_rex_prefix);
2522 		return 0;
2523 	}
2524 
2525 	fsm_advance(fsm, 1, node_legacy_prefix);
2526 	return 0;
2527 }
2528 
2529 static int
2530 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2531     struct nvmm_x64_state *state)
2532 {
2533 	struct x86_decode_fsm fsm;
2534 	int ret;
2535 
2536 	memset(instr, 0, sizeof(*instr));
2537 	instr->legpref.seg = -1;
2538 	instr->src.hardseg = -1;
2539 	instr->dst.hardseg = -1;
2540 
2541 	fsm.is64bit = is_64bit(state);
2542 	fsm.is32bit = is_32bit(state);
2543 	fsm.is16bit = is_16bit(state);
2544 
2545 	fsm.fn = node_legacy_prefix;
2546 	fsm.buf = inst_bytes;
2547 	fsm.end = inst_bytes + inst_len;
2548 
2549 	while (fsm.fn != NULL) {
2550 		ret = (*fsm.fn)(&fsm, instr);
2551 		if (ret == -1)
2552 			return -1;
2553 	}
2554 
2555 	instr->len = fsm.buf - inst_bytes;
2556 
2557 	return 0;
2558 }
2559 
2560 /* -------------------------------------------------------------------------- */
2561 
2562 #define EXEC_INSTR(sz, instr)						\
2563 static uint##sz##_t							\
2564 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags)	\
2565 {									\
2566 	uint##sz##_t res;						\
2567 	__asm __volatile (						\
2568 		#instr " %2, %3;"					\
2569 		"mov %3, %1;"						\
2570 		"pushfq;"						\
2571 		"popq %0"						\
2572 	    : "=r" (*rflags), "=r" (res)				\
2573 	    : "r" (op1), "r" (op2));					\
2574 	return res;							\
2575 }
2576 
2577 #define EXEC_DISPATCHER(instr)						\
2578 static uint64_t								\
2579 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2580 {									\
2581 	switch (opsize) {						\
2582 	case 1:								\
2583 		return exec_##instr##8(op1, op2, rflags);		\
2584 	case 2:								\
2585 		return exec_##instr##16(op1, op2, rflags);		\
2586 	case 4:								\
2587 		return exec_##instr##32(op1, op2, rflags);		\
2588 	default:							\
2589 		return exec_##instr##64(op1, op2, rflags);		\
2590 	}								\
2591 }
2592 
2593 /* SUB: ret = op1 - op2 */
2594 #define PSL_SUB_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2595 EXEC_INSTR(8, sub)
2596 EXEC_INSTR(16, sub)
2597 EXEC_INSTR(32, sub)
2598 EXEC_INSTR(64, sub)
2599 EXEC_DISPATCHER(sub)
2600 
2601 /* OR:  ret = op1 | op2 */
2602 #define PSL_OR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2603 EXEC_INSTR(8, or)
2604 EXEC_INSTR(16, or)
2605 EXEC_INSTR(32, or)
2606 EXEC_INSTR(64, or)
2607 EXEC_DISPATCHER(or)
2608 
2609 /* AND: ret = op1 & op2 */
2610 #define PSL_AND_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2611 EXEC_INSTR(8, and)
2612 EXEC_INSTR(16, and)
2613 EXEC_INSTR(32, and)
2614 EXEC_INSTR(64, and)
2615 EXEC_DISPATCHER(and)
2616 
2617 /* XOR: ret = op1 ^ op2 */
2618 #define PSL_XOR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2619 EXEC_INSTR(8, xor)
2620 EXEC_INSTR(16, xor)
2621 EXEC_INSTR(32, xor)
2622 EXEC_INSTR(64, xor)
2623 EXEC_DISPATCHER(xor)
2624 
2625 /* -------------------------------------------------------------------------- */
2626 
2627 /*
2628  * Emulation functions. We don't care about the order of the operands, except
2629  * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2630  * is op1 and who is op2.
2631  */
2632 
2633 static void
2634 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2635 {
2636 	uint64_t *retval = (uint64_t *)mem->data;
2637 	const bool write = mem->write;
2638 	uint64_t *op1, op2, fl, ret;
2639 
2640 	op1 = (uint64_t *)mem->data;
2641 	op2 = 0;
2642 
2643 	/* Fetch the value to be OR'ed (op2). */
2644 	mem->data = (uint8_t *)&op2;
2645 	mem->write = false;
2646 	(*__callbacks.mem)(mem);
2647 
2648 	/* Perform the OR. */
2649 	ret = exec_or(*op1, op2, &fl, mem->size);
2650 
2651 	if (write) {
2652 		/* Write back the result. */
2653 		mem->data = (uint8_t *)&ret;
2654 		mem->write = true;
2655 		(*__callbacks.mem)(mem);
2656 	} else {
2657 		/* Return data to the caller. */
2658 		*retval = ret;
2659 	}
2660 
2661 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2662 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2663 }
2664 
2665 static void
2666 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2667 {
2668 	uint64_t *retval = (uint64_t *)mem->data;
2669 	const bool write = mem->write;
2670 	uint64_t *op1, op2, fl, ret;
2671 
2672 	op1 = (uint64_t *)mem->data;
2673 	op2 = 0;
2674 
2675 	/* Fetch the value to be AND'ed (op2). */
2676 	mem->data = (uint8_t *)&op2;
2677 	mem->write = false;
2678 	(*__callbacks.mem)(mem);
2679 
2680 	/* Perform the AND. */
2681 	ret = exec_and(*op1, op2, &fl, mem->size);
2682 
2683 	if (write) {
2684 		/* Write back the result. */
2685 		mem->data = (uint8_t *)&ret;
2686 		mem->write = true;
2687 		(*__callbacks.mem)(mem);
2688 	} else {
2689 		/* Return data to the caller. */
2690 		*retval = ret;
2691 	}
2692 
2693 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2694 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2695 }
2696 
2697 static void
2698 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2699 {
2700 	uint64_t *retval = (uint64_t *)mem->data;
2701 	const bool write = mem->write;
2702 	uint64_t *op1, *op2, fl, ret;
2703 	uint64_t tmp;
2704 	bool memop1;
2705 
2706 	memop1 = !mem->write;
2707 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2708 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2709 
2710 	/* Fetch the value to be SUB'ed (op1 or op2). */
2711 	mem->data = (uint8_t *)&tmp;
2712 	mem->write = false;
2713 	(*__callbacks.mem)(mem);
2714 
2715 	/* Perform the SUB. */
2716 	ret = exec_sub(*op1, *op2, &fl, mem->size);
2717 
2718 	if (write) {
2719 		/* Write back the result. */
2720 		mem->data = (uint8_t *)&ret;
2721 		mem->write = true;
2722 		(*__callbacks.mem)(mem);
2723 	} else {
2724 		/* Return data to the caller. */
2725 		*retval = ret;
2726 	}
2727 
2728 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2729 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2730 }
2731 
2732 static void
2733 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2734 {
2735 	uint64_t *retval = (uint64_t *)mem->data;
2736 	const bool write = mem->write;
2737 	uint64_t *op1, op2, fl, ret;
2738 
2739 	op1 = (uint64_t *)mem->data;
2740 	op2 = 0;
2741 
2742 	/* Fetch the value to be XOR'ed (op2). */
2743 	mem->data = (uint8_t *)&op2;
2744 	mem->write = false;
2745 	(*__callbacks.mem)(mem);
2746 
2747 	/* Perform the XOR. */
2748 	ret = exec_xor(*op1, op2, &fl, mem->size);
2749 
2750 	if (write) {
2751 		/* Write back the result. */
2752 		mem->data = (uint8_t *)&ret;
2753 		mem->write = true;
2754 		(*__callbacks.mem)(mem);
2755 	} else {
2756 		/* Return data to the caller. */
2757 		*retval = ret;
2758 	}
2759 
2760 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2761 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2762 }
2763 
2764 static void
2765 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2766 {
2767 	uint64_t *op1, *op2, fl;
2768 	uint64_t tmp;
2769 	bool memop1;
2770 
2771 	memop1 = !mem->write;
2772 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2773 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2774 
2775 	/* Fetch the value to be CMP'ed (op1 or op2). */
2776 	mem->data = (uint8_t *)&tmp;
2777 	mem->write = false;
2778 	(*__callbacks.mem)(mem);
2779 
2780 	/* Perform the CMP. */
2781 	exec_sub(*op1, *op2, &fl, mem->size);
2782 
2783 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2784 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2785 }
2786 
2787 static void
2788 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2789 {
2790 	uint64_t *op1, *op2, fl;
2791 	uint64_t tmp;
2792 	bool memop1;
2793 
2794 	memop1 = !mem->write;
2795 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2796 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2797 
2798 	/* Fetch the value to be TEST'ed (op1 or op2). */
2799 	mem->data = (uint8_t *)&tmp;
2800 	mem->write = false;
2801 	(*__callbacks.mem)(mem);
2802 
2803 	/* Perform the TEST. */
2804 	exec_and(*op1, *op2, &fl, mem->size);
2805 
2806 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2807 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2808 }
2809 
2810 static void
2811 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2812 {
2813 	/*
2814 	 * Nothing special, just move without emulation.
2815 	 */
2816 	(*__callbacks.mem)(mem);
2817 }
2818 
2819 static void
2820 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2821 {
2822 	/*
2823 	 * Just move, and update RDI.
2824 	 */
2825 	(*__callbacks.mem)(mem);
2826 
2827 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2828 		gprs[NVMM_X64_GPR_RDI] -= mem->size;
2829 	} else {
2830 		gprs[NVMM_X64_GPR_RDI] += mem->size;
2831 	}
2832 }
2833 
2834 static void
2835 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2836 {
2837 	/*
2838 	 * Just move, and update RSI.
2839 	 */
2840 	(*__callbacks.mem)(mem);
2841 
2842 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2843 		gprs[NVMM_X64_GPR_RSI] -= mem->size;
2844 	} else {
2845 		gprs[NVMM_X64_GPR_RSI] += mem->size;
2846 	}
2847 }
2848 
2849 static void
2850 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2851 {
2852 	/*
2853 	 * Special instruction: double memory operand. Don't call the cb,
2854 	 * because the storage has already been performed earlier.
2855 	 */
2856 
2857 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2858 		gprs[NVMM_X64_GPR_RSI] -= mem->size;
2859 		gprs[NVMM_X64_GPR_RDI] -= mem->size;
2860 	} else {
2861 		gprs[NVMM_X64_GPR_RSI] += mem->size;
2862 		gprs[NVMM_X64_GPR_RDI] += mem->size;
2863 	}
2864 }
2865 
2866 /* -------------------------------------------------------------------------- */
2867 
2868 static inline uint64_t
2869 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2870 {
2871 	uint64_t val;
2872 
2873 	val = state->gprs[gpr];
2874 	val &= size_to_mask(instr->address_size);
2875 
2876 	return val;
2877 }
2878 
2879 static int
2880 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2881     struct x86_store *store, gvaddr_t *gvap, size_t size)
2882 {
2883 	struct x86_sib *sib;
2884 	gvaddr_t gva = 0;
2885 	uint64_t reg;
2886 	int ret, seg;
2887 
2888 	if (store->type == STORE_SIB) {
2889 		sib = &store->u.sib;
2890 		if (sib->bas != NULL)
2891 			gva += gpr_read_address(instr, state, sib->bas->num);
2892 		if (sib->idx != NULL) {
2893 			reg = gpr_read_address(instr, state, sib->idx->num);
2894 			gva += sib->scale * reg;
2895 		}
2896 	} else if (store->type == STORE_REG) {
2897 		if (store->u.reg == NULL) {
2898 			/* The base is null. Happens with disp32-only. */
2899 		} else {
2900 			gva = gpr_read_address(instr, state, store->u.reg->num);
2901 		}
2902 	} else {
2903 		gva = store->u.dmo;
2904 	}
2905 
2906 	if (store->disp.type != DISP_NONE) {
2907 		gva += store->disp.data;
2908 	}
2909 
2910 	if (store->hardseg != -1) {
2911 		seg = store->hardseg;
2912 	} else {
2913 		if (__predict_false(instr->legpref.seg != -1)) {
2914 			seg = instr->legpref.seg;
2915 		} else {
2916 			seg = NVMM_X64_SEG_DS;
2917 		}
2918 	}
2919 
2920 	if (__predict_true(is_long_mode(state))) {
2921 		if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2922 			segment_apply(&state->segs[seg], &gva);
2923 		}
2924 	} else {
2925 		ret = segment_check(&state->segs[seg], gva, size);
2926 		if (ret == -1)
2927 			return -1;
2928 		segment_apply(&state->segs[seg], &gva);
2929 	}
2930 
2931 	*gvap = gva;
2932 	return 0;
2933 }
2934 
2935 static int
2936 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2937 {
2938 	uint8_t inst_bytes[5], byte;
2939 	size_t i, fetchsize;
2940 	gvaddr_t gva;
2941 	int ret, seg;
2942 
2943 	fetchsize = sizeof(inst_bytes);
2944 
2945 	gva = state->gprs[NVMM_X64_GPR_RIP];
2946 	if (__predict_false(!is_long_mode(state))) {
2947 		ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2948 		    fetchsize);
2949 		if (ret == -1)
2950 			return -1;
2951 		segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2952 	}
2953 
2954 	ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2955 	if (ret == -1)
2956 		return -1;
2957 
2958 	seg = NVMM_X64_SEG_DS;
2959 	for (i = 0; i < fetchsize; i++) {
2960 		byte = inst_bytes[i];
2961 
2962 		if (byte == LEG_OVR_DS) {
2963 			seg = NVMM_X64_SEG_DS;
2964 		} else if (byte == LEG_OVR_ES) {
2965 			seg = NVMM_X64_SEG_ES;
2966 		} else if (byte == LEG_OVR_GS) {
2967 			seg = NVMM_X64_SEG_GS;
2968 		} else if (byte == LEG_OVR_FS) {
2969 			seg = NVMM_X64_SEG_FS;
2970 		} else if (byte == LEG_OVR_CS) {
2971 			seg = NVMM_X64_SEG_CS;
2972 		} else if (byte == LEG_OVR_SS) {
2973 			seg = NVMM_X64_SEG_SS;
2974 		} else if (byte == LEG_OPR_OVR) {
2975 			/* nothing */
2976 		} else if (byte == LEG_ADR_OVR) {
2977 			/* nothing */
2978 		} else if (byte == LEG_REP) {
2979 			/* nothing */
2980 		} else if (byte == LEG_REPN) {
2981 			/* nothing */
2982 		} else if (byte == LEG_LOCK) {
2983 			/* nothing */
2984 		} else {
2985 			return seg;
2986 		}
2987 	}
2988 
2989 	return seg;
2990 }
2991 
2992 static int
2993 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2994     struct nvmm_exit *exit)
2995 {
2996 	size_t fetchsize;
2997 	gvaddr_t gva;
2998 	int ret;
2999 
3000 	fetchsize = sizeof(exit->u.mem.inst_bytes);
3001 
3002 	gva = state->gprs[NVMM_X64_GPR_RIP];
3003 	if (__predict_false(!is_long_mode(state))) {
3004 		ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3005 		    fetchsize);
3006 		if (ret == -1)
3007 			return -1;
3008 		segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3009 	}
3010 
3011 	ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3012 	    fetchsize);
3013 	if (ret == -1)
3014 		return -1;
3015 
3016 	exit->u.mem.inst_len = fetchsize;
3017 
3018 	return 0;
3019 }
3020 
3021 static int
3022 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3023     struct x86_instr *instr)
3024 {
3025 	struct nvmm_mem mem;
3026 	uint8_t data[8];
3027 	gvaddr_t gva;
3028 	size_t size;
3029 	int ret;
3030 
3031 	size = instr->operand_size;
3032 
3033 	/* Source. */
3034 	ret = store_to_gva(state, instr, &instr->src, &gva, size);
3035 	if (ret == -1)
3036 		return -1;
3037 	ret = read_guest_memory(mach, state, gva, data, size);
3038 	if (ret == -1)
3039 		return -1;
3040 
3041 	/* Destination. */
3042 	ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3043 	if (ret == -1)
3044 		return -1;
3045 	ret = write_guest_memory(mach, state, gva, data, size);
3046 	if (ret == -1)
3047 		return -1;
3048 
3049 	mem.size = size;
3050 	(*instr->emul->func)(&mem, state->gprs);
3051 
3052 	return 0;
3053 }
3054 
3055 #define DISASSEMBLER_BUG()	\
3056 	do {			\
3057 		errno = EINVAL;	\
3058 		return -1;	\
3059 	} while (0);
3060 
3061 static int
3062 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3063     struct x86_instr *instr, struct nvmm_exit *exit)
3064 {
3065 	struct nvmm_mem mem;
3066 	uint8_t membuf[8];
3067 	uint64_t val;
3068 
3069 	memset(membuf, 0, sizeof(membuf));
3070 
3071 	mem.gpa = exit->u.mem.gpa;
3072 	mem.size = instr->operand_size;
3073 	mem.data = membuf;
3074 
3075 	/* Determine the direction. */
3076 	switch (instr->src.type) {
3077 	case STORE_REG:
3078 		if (instr->src.disp.type != DISP_NONE) {
3079 			/* Indirect access. */
3080 			mem.write = false;
3081 		} else {
3082 			/* Direct access. */
3083 			mem.write = true;
3084 		}
3085 		break;
3086 	case STORE_IMM:
3087 		mem.write = true;
3088 		break;
3089 	case STORE_SIB:
3090 		mem.write = false;
3091 		break;
3092 	case STORE_DMO:
3093 		mem.write = false;
3094 		break;
3095 	default:
3096 		DISASSEMBLER_BUG();
3097 	}
3098 
3099 	if (mem.write) {
3100 		switch (instr->src.type) {
3101 		case STORE_REG:
3102 			if (instr->src.disp.type != DISP_NONE) {
3103 				DISASSEMBLER_BUG();
3104 			}
3105 			val = state->gprs[instr->src.u.reg->num];
3106 			val = __SHIFTOUT(val, instr->src.u.reg->mask);
3107 			memcpy(mem.data, &val, mem.size);
3108 			break;
3109 		case STORE_IMM:
3110 			memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3111 			break;
3112 		default:
3113 			DISASSEMBLER_BUG();
3114 		}
3115 	} else if (instr->emul->read) {
3116 		if (instr->dst.type != STORE_REG) {
3117 			DISASSEMBLER_BUG();
3118 		}
3119 		if (instr->dst.disp.type != DISP_NONE) {
3120 			DISASSEMBLER_BUG();
3121 		}
3122 		val = state->gprs[instr->dst.u.reg->num];
3123 		val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3124 		memcpy(mem.data, &val, mem.size);
3125 	}
3126 
3127 	(*instr->emul->func)(&mem, state->gprs);
3128 
3129 	if (!instr->emul->notouch && !mem.write) {
3130 		if (instr->dst.type != STORE_REG) {
3131 			DISASSEMBLER_BUG();
3132 		}
3133 		memcpy(&val, membuf, sizeof(uint64_t));
3134 		val = __SHIFTIN(val, instr->dst.u.reg->mask);
3135 		state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3136 		state->gprs[instr->dst.u.reg->num] |= val;
3137 		state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3138 	}
3139 
3140 	return 0;
3141 }
3142 
3143 int
3144 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3145     struct nvmm_exit *exit)
3146 {
3147 	struct nvmm_x64_state state;
3148 	struct x86_instr instr;
3149 	uint64_t cnt = 0; /* GCC */
3150 	int ret;
3151 
3152 	if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3153 		errno = EINVAL;
3154 		return -1;
3155 	}
3156 
3157 	ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3158 	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3159 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3160 	if (ret == -1)
3161 		return -1;
3162 
3163 	if (exit->u.mem.inst_len == 0) {
3164 		/*
3165 		 * The instruction was not fetched from the kernel. Fetch
3166 		 * it ourselves.
3167 		 */
3168 		ret = fetch_instruction(mach, &state, exit);
3169 		if (ret == -1)
3170 			return -1;
3171 	}
3172 
3173 	ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3174 	    &instr, &state);
3175 	if (ret == -1) {
3176 		errno = ENODEV;
3177 		return -1;
3178 	}
3179 
3180 	if (instr.legpref.rep || instr.legpref.repn) {
3181 		cnt = rep_get_cnt(&state, instr.address_size);
3182 		if (__predict_false(cnt == 0)) {
3183 			state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3184 			goto out;
3185 		}
3186 	}
3187 
3188 	if (instr.opcode->movs) {
3189 		ret = assist_mem_double(mach, &state, &instr);
3190 	} else {
3191 		ret = assist_mem_single(mach, &state, &instr, exit);
3192 	}
3193 	if (ret == -1) {
3194 		errno = ENODEV;
3195 		return -1;
3196 	}
3197 
3198 	if (instr.legpref.rep || instr.legpref.repn) {
3199 		cnt -= 1;
3200 		rep_set_cnt(&state, instr.address_size, cnt);
3201 		if (cnt == 0) {
3202 			state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3203 		} else if (__predict_false(instr.legpref.repn)) {
3204 			if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3205 				state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3206 			}
3207 		}
3208 	} else {
3209 		state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3210 	}
3211 
3212 out:
3213 	ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3214 	if (ret == -1)
3215 		return -1;
3216 
3217 	return 0;
3218 }
3219