xref: /dragonfly/lib/libnvmm/libnvmm_x86.c (revision 655933d6)
1 /*
2  * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3  * All rights reserved.
4  *
5  * This code is part of the NVMM hypervisor.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <inttypes.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 #include <errno.h>
36 
37 #include <machine/psl.h>
38 
39 #define MIN(X, Y)		(((X) < (Y)) ? (X) : (Y))
40 #define __cacheline_aligned	__attribute__((__aligned__(64)))
41 
42 /* -------------------------------------------------------------------------- */
43 
44 /*
45  * Undocumented debugging function. Helpful.
46  */
47 int
48 nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
49 {
50 	struct nvmm_x64_state *state = vcpu->state;
51 	uint16_t *attr;
52 	size_t i;
53 	int ret;
54 
55 	const char *segnames[] = {
56 		"ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
57 	};
58 
59 	ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL);
60 	if (ret == -1)
61 		return -1;
62 
63 	printf("+ VCPU id=%u\n", vcpu->cpuid);
64 	printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]);
65 	printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]);
66 	printf("| -> RDX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDX]);
67 	printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]);
68 	printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]);
69 	printf("| -> RBP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBP]);
70 	printf("| -> RSI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSI]);
71 	printf("| -> RDI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDI]);
72 	printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]);
73 	printf("| -> RFLAGS=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RFLAGS]);
74 	for (i = 0; i < NVMM_X64_NSEG; i++) {
75 		attr = (uint16_t *)&state->segs[i].attrib;
76 		printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, "
77 		    "attrib=%x [type=%d,l=%d,def=%d]\n",
78 		    segnames[i],
79 		    state->segs[i].selector,
80 		    state->segs[i].base,
81 		    state->segs[i].limit,
82 		    *attr,
83 		    state->segs[i].attrib.type,
84 		    state->segs[i].attrib.l,
85 		    state->segs[i].attrib.def);
86 	}
87 	printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]);
88 	printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]);
89 	printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]);
90 	printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]);
91 	printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]);
92 
93 	return 0;
94 }
95 
96 /* -------------------------------------------------------------------------- */
97 
98 /*
99  * x86 page size.
100  */
101 #define PAGE_SIZE	0x1000
102 #define PAGE_MASK	(PAGE_SIZE - 1)
103 
104 /*
105  * x86 PTE/PDE bits.
106  */
107 #define PTE_P		0x0000000000000001	/* Present */
108 #define PTE_W		0x0000000000000002	/* Write */
109 #define PTE_U		0x0000000000000004	/* User */
110 #define PTE_PWT		0x0000000000000008	/* Write-Through */
111 #define PTE_PCD		0x0000000000000010	/* Cache-Disable */
112 #define PTE_A		0x0000000000000020	/* Accessed */
113 #define PTE_D		0x0000000000000040	/* Dirty */
114 #define PTE_PAT		0x0000000000000080	/* PAT on 4KB Pages */
115 #define PTE_PS		0x0000000000000080	/* Large Page Size */
116 #define PTE_G		0x0000000000000100	/* Global Translation */
117 #define PTE_AVL1	0x0000000000000200	/* Ignored by Hardware */
118 #define PTE_AVL2	0x0000000000000400	/* Ignored by Hardware */
119 #define PTE_AVL3	0x0000000000000800	/* Ignored by Hardware */
120 #define PTE_LGPAT	0x0000000000001000	/* PAT on Large Pages */
121 #define PTE_NX		0x8000000000000000	/* No Execute */
122 
123 #define PTE_4KFRAME	0x000ffffffffff000
124 #define PTE_2MFRAME	0x000fffffffe00000
125 #define PTE_1GFRAME	0x000fffffc0000000
126 
127 #define PTE_FRAME	PTE_4KFRAME
128 
129 /* -------------------------------------------------------------------------- */
130 
131 #define PTE32_L1_SHIFT	12
132 #define PTE32_L2_SHIFT	22
133 
134 #define PTE32_L2_MASK	0xffc00000
135 #define PTE32_L1_MASK	0x003ff000
136 
137 #define PTE32_L2_FRAME	(PTE32_L2_MASK)
138 #define PTE32_L1_FRAME	(PTE32_L2_FRAME|PTE32_L1_MASK)
139 
140 #define pte32_l1idx(va)	(((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
141 #define pte32_l2idx(va)	(((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
142 
143 #define CR3_FRAME_32BIT	0xfffff000
144 
145 typedef uint32_t pte_32bit_t;
146 
147 static int
148 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
149     gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
150 {
151 	gpaddr_t L2gpa, L1gpa;
152 	uintptr_t L2hva, L1hva;
153 	pte_32bit_t *pdir, pte;
154 	nvmm_prot_t pageprot;
155 
156 	/* We begin with an RWXU access. */
157 	*prot = NVMM_PROT_ALL;
158 
159 	/* Parse L2. */
160 	L2gpa = (cr3 & CR3_FRAME_32BIT);
161 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
162 		return -1;
163 	pdir = (pte_32bit_t *)L2hva;
164 	pte = pdir[pte32_l2idx(gva)];
165 	if ((pte & PTE_P) == 0)
166 		return -1;
167 	if ((pte & PTE_U) == 0)
168 		*prot &= ~NVMM_PROT_USER;
169 	if ((pte & PTE_W) == 0)
170 		*prot &= ~NVMM_PROT_WRITE;
171 	if ((pte & PTE_PS) && !has_pse)
172 		return -1;
173 	if (pte & PTE_PS) {
174 		*gpa = (pte & PTE32_L2_FRAME);
175 		*gpa = *gpa + (gva & PTE32_L1_MASK);
176 		return 0;
177 	}
178 
179 	/* Parse L1. */
180 	L1gpa = (pte & PTE_FRAME);
181 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
182 		return -1;
183 	pdir = (pte_32bit_t *)L1hva;
184 	pte = pdir[pte32_l1idx(gva)];
185 	if ((pte & PTE_P) == 0)
186 		return -1;
187 	if ((pte & PTE_U) == 0)
188 		*prot &= ~NVMM_PROT_USER;
189 	if ((pte & PTE_W) == 0)
190 		*prot &= ~NVMM_PROT_WRITE;
191 	if (pte & PTE_PS)
192 		return -1;
193 
194 	*gpa = (pte & PTE_FRAME);
195 	return 0;
196 }
197 
198 /* -------------------------------------------------------------------------- */
199 
200 #define	PTE32_PAE_L1_SHIFT	12
201 #define	PTE32_PAE_L2_SHIFT	21
202 #define	PTE32_PAE_L3_SHIFT	30
203 
204 #define	PTE32_PAE_L3_MASK	0xc0000000
205 #define	PTE32_PAE_L2_MASK	0x3fe00000
206 #define	PTE32_PAE_L1_MASK	0x001ff000
207 
208 #define	PTE32_PAE_L3_FRAME	(PTE32_PAE_L3_MASK)
209 #define	PTE32_PAE_L2_FRAME	(PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
210 #define	PTE32_PAE_L1_FRAME	(PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
211 
212 #define pte32_pae_l1idx(va)	(((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
213 #define pte32_pae_l2idx(va)	(((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
214 #define pte32_pae_l3idx(va)	(((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
215 
216 #define CR3_FRAME_32BIT_PAE	0xffffffe0
217 
218 typedef uint64_t pte_32bit_pae_t;
219 
220 static int
221 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
222     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
223 {
224 	gpaddr_t L3gpa, L2gpa, L1gpa;
225 	uintptr_t L3hva, L2hva, L1hva;
226 	pte_32bit_pae_t *pdir, pte;
227 	nvmm_prot_t pageprot;
228 
229 	/* We begin with an RWXU access. */
230 	*prot = NVMM_PROT_ALL;
231 
232 	/* Parse L3. */
233 	L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
234 	if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
235 		return -1;
236 	pdir = (pte_32bit_pae_t *)L3hva;
237 	pte = pdir[pte32_pae_l3idx(gva)];
238 	if ((pte & PTE_P) == 0)
239 		return -1;
240 	if (pte & PTE_NX)
241 		*prot &= ~NVMM_PROT_EXEC;
242 	if (pte & PTE_PS)
243 		return -1;
244 
245 	/* Parse L2. */
246 	L2gpa = (pte & PTE_FRAME);
247 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
248 		return -1;
249 	pdir = (pte_32bit_pae_t *)L2hva;
250 	pte = pdir[pte32_pae_l2idx(gva)];
251 	if ((pte & PTE_P) == 0)
252 		return -1;
253 	if ((pte & PTE_U) == 0)
254 		*prot &= ~NVMM_PROT_USER;
255 	if ((pte & PTE_W) == 0)
256 		*prot &= ~NVMM_PROT_WRITE;
257 	if (pte & PTE_NX)
258 		*prot &= ~NVMM_PROT_EXEC;
259 	if (pte & PTE_PS) {
260 		*gpa = (pte & PTE32_PAE_L2_FRAME);
261 		*gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
262 		return 0;
263 	}
264 
265 	/* Parse L1. */
266 	L1gpa = (pte & PTE_FRAME);
267 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
268 		return -1;
269 	pdir = (pte_32bit_pae_t *)L1hva;
270 	pte = pdir[pte32_pae_l1idx(gva)];
271 	if ((pte & PTE_P) == 0)
272 		return -1;
273 	if ((pte & PTE_U) == 0)
274 		*prot &= ~NVMM_PROT_USER;
275 	if ((pte & PTE_W) == 0)
276 		*prot &= ~NVMM_PROT_WRITE;
277 	if (pte & PTE_NX)
278 		*prot &= ~NVMM_PROT_EXEC;
279 	if (pte & PTE_PS)
280 		return -1;
281 
282 	*gpa = (pte & PTE_FRAME);
283 	return 0;
284 }
285 
286 /* -------------------------------------------------------------------------- */
287 
288 #define PTE64_L1_SHIFT	12
289 #define PTE64_L2_SHIFT	21
290 #define PTE64_L3_SHIFT	30
291 #define PTE64_L4_SHIFT	39
292 
293 #define PTE64_L4_MASK	0x0000ff8000000000
294 #define PTE64_L3_MASK	0x0000007fc0000000
295 #define PTE64_L2_MASK	0x000000003fe00000
296 #define PTE64_L1_MASK	0x00000000001ff000
297 
298 #define PTE64_L4_FRAME	PTE64_L4_MASK
299 #define PTE64_L3_FRAME	(PTE64_L4_FRAME|PTE64_L3_MASK)
300 #define PTE64_L2_FRAME	(PTE64_L3_FRAME|PTE64_L2_MASK)
301 #define PTE64_L1_FRAME	(PTE64_L2_FRAME|PTE64_L1_MASK)
302 
303 #define pte64_l1idx(va)	(((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
304 #define pte64_l2idx(va)	(((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
305 #define pte64_l3idx(va)	(((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
306 #define pte64_l4idx(va)	(((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
307 
308 #define CR3_FRAME_64BIT	0x000ffffffffff000
309 
310 typedef uint64_t pte_64bit_t;
311 
312 static inline bool
313 x86_gva_64bit_canonical(gvaddr_t gva)
314 {
315 	/* Bits 63:47 must have the same value. */
316 #define SIGN_EXTEND	0xffff800000000000ULL
317 	return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
318 }
319 
320 static int
321 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
322     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
323 {
324 	gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
325 	uintptr_t L4hva, L3hva, L2hva, L1hva;
326 	pte_64bit_t *pdir, pte;
327 	nvmm_prot_t pageprot;
328 
329 	/* We begin with an RWXU access. */
330 	*prot = NVMM_PROT_ALL;
331 
332 	if (!x86_gva_64bit_canonical(gva))
333 		return -1;
334 
335 	/* Parse L4. */
336 	L4gpa = (cr3 & CR3_FRAME_64BIT);
337 	if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
338 		return -1;
339 	pdir = (pte_64bit_t *)L4hva;
340 	pte = pdir[pte64_l4idx(gva)];
341 	if ((pte & PTE_P) == 0)
342 		return -1;
343 	if ((pte & PTE_U) == 0)
344 		*prot &= ~NVMM_PROT_USER;
345 	if ((pte & PTE_W) == 0)
346 		*prot &= ~NVMM_PROT_WRITE;
347 	if (pte & PTE_NX)
348 		*prot &= ~NVMM_PROT_EXEC;
349 	if (pte & PTE_PS)
350 		return -1;
351 
352 	/* Parse L3. */
353 	L3gpa = (pte & PTE_FRAME);
354 	if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
355 		return -1;
356 	pdir = (pte_64bit_t *)L3hva;
357 	pte = pdir[pte64_l3idx(gva)];
358 	if ((pte & PTE_P) == 0)
359 		return -1;
360 	if ((pte & PTE_U) == 0)
361 		*prot &= ~NVMM_PROT_USER;
362 	if ((pte & PTE_W) == 0)
363 		*prot &= ~NVMM_PROT_WRITE;
364 	if (pte & PTE_NX)
365 		*prot &= ~NVMM_PROT_EXEC;
366 	if (pte & PTE_PS) {
367 		*gpa = (pte & PTE64_L3_FRAME);
368 		*gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
369 		return 0;
370 	}
371 
372 	/* Parse L2. */
373 	L2gpa = (pte & PTE_FRAME);
374 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
375 		return -1;
376 	pdir = (pte_64bit_t *)L2hva;
377 	pte = pdir[pte64_l2idx(gva)];
378 	if ((pte & PTE_P) == 0)
379 		return -1;
380 	if ((pte & PTE_U) == 0)
381 		*prot &= ~NVMM_PROT_USER;
382 	if ((pte & PTE_W) == 0)
383 		*prot &= ~NVMM_PROT_WRITE;
384 	if (pte & PTE_NX)
385 		*prot &= ~NVMM_PROT_EXEC;
386 	if (pte & PTE_PS) {
387 		*gpa = (pte & PTE64_L2_FRAME);
388 		*gpa = *gpa + (gva & PTE64_L1_MASK);
389 		return 0;
390 	}
391 
392 	/* Parse L1. */
393 	L1gpa = (pte & PTE_FRAME);
394 	if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
395 		return -1;
396 	pdir = (pte_64bit_t *)L1hva;
397 	pte = pdir[pte64_l1idx(gva)];
398 	if ((pte & PTE_P) == 0)
399 		return -1;
400 	if ((pte & PTE_U) == 0)
401 		*prot &= ~NVMM_PROT_USER;
402 	if ((pte & PTE_W) == 0)
403 		*prot &= ~NVMM_PROT_WRITE;
404 	if (pte & PTE_NX)
405 		*prot &= ~NVMM_PROT_EXEC;
406 	if (pte & PTE_PS)
407 		return -1;
408 
409 	*gpa = (pte & PTE_FRAME);
410 	return 0;
411 }
412 
413 static inline int
414 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
415     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
416 {
417 	bool is_pae, is_lng, has_pse;
418 	uint64_t cr3;
419 	size_t off;
420 	int ret;
421 
422 	if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
423 		/* No paging. */
424 		*prot = NVMM_PROT_ALL;
425 		*gpa = gva;
426 		return 0;
427 	}
428 
429 	off = (gva & PAGE_MASK);
430 	gva &= ~PAGE_MASK;
431 
432 	is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
433 	is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
434 	has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
435 	cr3 = state->crs[NVMM_X64_CR_CR3];
436 
437 	if (is_pae && is_lng) {
438 		/* 64bit */
439 		ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
440 	} else if (is_pae && !is_lng) {
441 		/* 32bit PAE */
442 		ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
443 	} else if (!is_pae && !is_lng) {
444 		/* 32bit */
445 		ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
446 	} else {
447 		ret = -1;
448 	}
449 
450 	if (ret == -1) {
451 		errno = EFAULT;
452 	}
453 
454 	*gpa = *gpa + off;
455 
456 	return ret;
457 }
458 
459 int
460 nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
461     gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
462 {
463 	struct nvmm_x64_state *state = vcpu->state;
464 	int ret;
465 
466 	ret = nvmm_vcpu_getstate(mach, vcpu,
467 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
468 	if (ret == -1)
469 		return -1;
470 
471 	return x86_gva_to_gpa(mach, state, gva, gpa, prot);
472 }
473 
474 /* -------------------------------------------------------------------------- */
475 
476 #define DISASSEMBLER_BUG()	\
477 	do {			\
478 		errno = EINVAL;	\
479 		return -1;	\
480 	} while (0);
481 
482 static inline bool
483 is_long_mode(struct nvmm_x64_state *state)
484 {
485 	return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
486 }
487 
488 static inline bool
489 is_64bit(struct nvmm_x64_state *state)
490 {
491 	return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
492 }
493 
494 static inline bool
495 is_32bit(struct nvmm_x64_state *state)
496 {
497 	return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
498 	    (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
499 }
500 
501 static inline bool
502 is_16bit(struct nvmm_x64_state *state)
503 {
504 	return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
505 	    (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
506 }
507 
508 static int
509 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
510 {
511 	uint64_t limit;
512 
513 	/*
514 	 * This is incomplete. We should check topdown, etc, really that's
515 	 * tiring.
516 	 */
517 	if (__predict_false(!seg->attrib.p)) {
518 		goto error;
519 	}
520 
521 	limit = (uint64_t)seg->limit + 1;
522 	if (__predict_true(seg->attrib.g)) {
523 		limit *= PAGE_SIZE;
524 	}
525 
526 	if (__predict_false(gva + size > limit)) {
527 		goto error;
528 	}
529 
530 	return 0;
531 
532 error:
533 	errno = EFAULT;
534 	return -1;
535 }
536 
537 static inline void
538 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
539 {
540 	*gva += seg->base;
541 }
542 
543 static inline uint64_t
544 size_to_mask(size_t size)
545 {
546 	switch (size) {
547 	case 1:
548 		return 0x00000000000000FF;
549 	case 2:
550 		return 0x000000000000FFFF;
551 	case 4:
552 		return 0x00000000FFFFFFFF;
553 	case 8:
554 	default:
555 		return 0xFFFFFFFFFFFFFFFF;
556 	}
557 }
558 
559 static uint64_t
560 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
561 {
562 	uint64_t mask, cnt;
563 
564 	mask = size_to_mask(adsize);
565 	cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
566 
567 	return cnt;
568 }
569 
570 static void
571 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
572 {
573 	uint64_t mask;
574 
575 	/* XXX: should we zero-extend? */
576 	mask = size_to_mask(adsize);
577 	state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
578 	state->gprs[NVMM_X64_GPR_RCX] |= cnt;
579 }
580 
581 static int
582 read_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
583     gvaddr_t gva, uint8_t *data, size_t size)
584 {
585 	struct nvmm_x64_state *state = vcpu->state;
586 	struct nvmm_mem mem;
587 	nvmm_prot_t prot;
588 	gpaddr_t gpa;
589 	uintptr_t hva;
590 	bool is_mmio;
591 	int ret, remain;
592 
593 	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
594 	if (__predict_false(ret == -1)) {
595 		return -1;
596 	}
597 	if (__predict_false(!(prot & NVMM_PROT_READ))) {
598 		errno = EFAULT;
599 		return -1;
600 	}
601 
602 	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
603 		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
604 	} else {
605 		remain = 0;
606 	}
607 	size -= remain;
608 
609 	ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
610 	is_mmio = (ret == -1);
611 
612 	if (is_mmio) {
613 		mem.mach = mach;
614 		mem.vcpu = vcpu;
615 		mem.data = data;
616 		mem.gpa = gpa;
617 		mem.write = false;
618 		mem.size = size;
619 		(*vcpu->cbs.mem)(&mem);
620 	} else {
621 		if (__predict_false(!(prot & NVMM_PROT_READ))) {
622 			errno = EFAULT;
623 			return -1;
624 		}
625 		memcpy(data, (uint8_t *)hva, size);
626 	}
627 
628 	if (remain > 0) {
629 		ret = read_guest_memory(mach, vcpu, gva + size,
630 		    data + size, remain);
631 	} else {
632 		ret = 0;
633 	}
634 
635 	return ret;
636 }
637 
638 static int
639 write_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
640     gvaddr_t gva, uint8_t *data, size_t size)
641 {
642 	struct nvmm_x64_state *state = vcpu->state;
643 	struct nvmm_mem mem;
644 	nvmm_prot_t prot;
645 	gpaddr_t gpa;
646 	uintptr_t hva;
647 	bool is_mmio;
648 	int ret, remain;
649 
650 	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
651 	if (__predict_false(ret == -1)) {
652 		return -1;
653 	}
654 	if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
655 		errno = EFAULT;
656 		return -1;
657 	}
658 
659 	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
660 		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
661 	} else {
662 		remain = 0;
663 	}
664 	size -= remain;
665 
666 	ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
667 	is_mmio = (ret == -1);
668 
669 	if (is_mmio) {
670 		mem.mach = mach;
671 		mem.vcpu = vcpu;
672 		mem.data = data;
673 		mem.gpa = gpa;
674 		mem.write = true;
675 		mem.size = size;
676 		(*vcpu->cbs.mem)(&mem);
677 	} else {
678 		if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
679 			errno = EFAULT;
680 			return -1;
681 		}
682 		memcpy((uint8_t *)hva, data, size);
683 	}
684 
685 	if (remain > 0) {
686 		ret = write_guest_memory(mach, vcpu, gva + size,
687 		    data + size, remain);
688 	} else {
689 		ret = 0;
690 	}
691 
692 	return ret;
693 }
694 
695 /* -------------------------------------------------------------------------- */
696 
697 static int fetch_segment(struct nvmm_machine *, struct nvmm_vcpu *);
698 
699 #define NVMM_IO_BATCH_SIZE	32
700 
701 static int
702 assist_io_batch(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
703     struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
704 {
705 	uint8_t iobuf[NVMM_IO_BATCH_SIZE];
706 	size_t i, iosize, iocnt;
707 	int ret;
708 
709 	cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
710 	iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
711 	iocnt = iosize / io->size;
712 
713 	io->data = iobuf;
714 
715 	if (!io->in) {
716 		ret = read_guest_memory(mach, vcpu, gva, iobuf, iosize);
717 		if (ret == -1)
718 			return -1;
719 	}
720 
721 	for (i = 0; i < iocnt; i++) {
722 		(*vcpu->cbs.io)(io);
723 		io->data += io->size;
724 	}
725 
726 	if (io->in) {
727 		ret = write_guest_memory(mach, vcpu, gva, iobuf, iosize);
728 		if (ret == -1)
729 			return -1;
730 	}
731 
732 	return iocnt;
733 }
734 
735 int
736 nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
737 {
738 	struct nvmm_x64_state *state = vcpu->state;
739 	struct nvmm_vcpu_exit *exit = vcpu->exit;
740 	struct nvmm_io io;
741 	uint64_t cnt = 0; /* GCC */
742 	uint8_t iobuf[8];
743 	int iocnt = 1;
744 	gvaddr_t gva = 0; /* GCC */
745 	int reg = 0; /* GCC */
746 	int ret, seg;
747 	bool psld = false;
748 
749 	if (__predict_false(exit->reason != NVMM_VCPU_EXIT_IO)) {
750 		errno = EINVAL;
751 		return -1;
752 	}
753 
754 	io.mach = mach;
755 	io.vcpu = vcpu;
756 	io.port = exit->u.io.port;
757 	io.in = exit->u.io.in;
758 	io.size = exit->u.io.operand_size;
759 	io.data = iobuf;
760 
761 	ret = nvmm_vcpu_getstate(mach, vcpu,
762 	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
763 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
764 	if (ret == -1)
765 		return -1;
766 
767 	if (exit->u.io.rep) {
768 		cnt = rep_get_cnt(state, exit->u.io.address_size);
769 		if (__predict_false(cnt == 0)) {
770 			state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
771 			goto out;
772 		}
773 	}
774 
775 	if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
776 		psld = true;
777 	}
778 
779 	/*
780 	 * Determine GVA.
781 	 */
782 	if (exit->u.io.str) {
783 		if (io.in) {
784 			reg = NVMM_X64_GPR_RDI;
785 		} else {
786 			reg = NVMM_X64_GPR_RSI;
787 		}
788 
789 		gva = state->gprs[reg];
790 		gva &= size_to_mask(exit->u.io.address_size);
791 
792 		if (exit->u.io.seg != -1) {
793 			seg = exit->u.io.seg;
794 		} else {
795 			if (io.in) {
796 				seg = NVMM_X64_SEG_ES;
797 			} else {
798 				seg = fetch_segment(mach, vcpu);
799 				if (seg == -1)
800 					return -1;
801 			}
802 		}
803 
804 		if (__predict_true(is_long_mode(state))) {
805 			if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
806 				segment_apply(&state->segs[seg], &gva);
807 			}
808 		} else {
809 			ret = segment_check(&state->segs[seg], gva, io.size);
810 			if (ret == -1)
811 				return -1;
812 			segment_apply(&state->segs[seg], &gva);
813 		}
814 
815 		if (exit->u.io.rep && !psld) {
816 			iocnt = assist_io_batch(mach, vcpu, &io, gva, cnt);
817 			if (iocnt == -1)
818 				return -1;
819 			goto done;
820 		}
821 	}
822 
823 	if (!io.in) {
824 		if (!exit->u.io.str) {
825 			memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size);
826 		} else {
827 			ret = read_guest_memory(mach, vcpu, gva, io.data,
828 			    io.size);
829 			if (ret == -1)
830 				return -1;
831 		}
832 	}
833 
834 	(*vcpu->cbs.io)(&io);
835 
836 	if (io.in) {
837 		if (!exit->u.io.str) {
838 			memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size);
839 			if (io.size == 4) {
840 				/* Zero-extend to 64 bits. */
841 				state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
842 			}
843 		} else {
844 			ret = write_guest_memory(mach, vcpu, gva, io.data,
845 			    io.size);
846 			if (ret == -1)
847 				return -1;
848 		}
849 	}
850 
851 done:
852 	if (exit->u.io.str) {
853 		if (__predict_false(psld)) {
854 			state->gprs[reg] -= iocnt * io.size;
855 		} else {
856 			state->gprs[reg] += iocnt * io.size;
857 		}
858 	}
859 
860 	if (exit->u.io.rep) {
861 		cnt -= iocnt;
862 		rep_set_cnt(state, exit->u.io.address_size, cnt);
863 		if (cnt == 0) {
864 			state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
865 		}
866 	} else {
867 		state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
868 	}
869 
870 out:
871 	ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
872 	if (ret == -1)
873 		return -1;
874 
875 	return 0;
876 }
877 
878 /* -------------------------------------------------------------------------- */
879 
880 struct x86_emul {
881 	bool readreg;
882 	bool backprop;
883 	bool notouch;
884 	void (*func)(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
885 };
886 
887 static void x86_func_or(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
888 static void x86_func_and(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
889 static void x86_func_xchg(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
890 static void x86_func_sub(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
891 static void x86_func_xor(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
892 static void x86_func_cmp(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
893 static void x86_func_test(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
894 static void x86_func_mov(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
895 static void x86_func_stos(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
896 static void x86_func_lods(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
897 
898 static const struct x86_emul x86_emul_or = {
899 	.readreg = true,
900 	.func = x86_func_or
901 };
902 
903 static const struct x86_emul x86_emul_and = {
904 	.readreg = true,
905 	.func = x86_func_and
906 };
907 
908 static const struct x86_emul x86_emul_xchg = {
909 	.readreg = true,
910 	.backprop = true,
911 	.func = x86_func_xchg
912 };
913 
914 static const struct x86_emul x86_emul_sub = {
915 	.readreg = true,
916 	.func = x86_func_sub
917 };
918 
919 static const struct x86_emul x86_emul_xor = {
920 	.readreg = true,
921 	.func = x86_func_xor
922 };
923 
924 static const struct x86_emul x86_emul_cmp = {
925 	.notouch = true,
926 	.func = x86_func_cmp
927 };
928 
929 static const struct x86_emul x86_emul_test = {
930 	.notouch = true,
931 	.func = x86_func_test
932 };
933 
934 static const struct x86_emul x86_emul_mov = {
935 	.func = x86_func_mov
936 };
937 
938 static const struct x86_emul x86_emul_stos = {
939 	.func = x86_func_stos
940 };
941 
942 static const struct x86_emul x86_emul_lods = {
943 	.func = x86_func_lods
944 };
945 
946 /* Legacy prefixes. */
947 #define LEG_LOCK	0xF0
948 #define LEG_REPN	0xF2
949 #define LEG_REP		0xF3
950 #define LEG_OVR_CS	0x2E
951 #define LEG_OVR_SS	0x36
952 #define LEG_OVR_DS	0x3E
953 #define LEG_OVR_ES	0x26
954 #define LEG_OVR_FS	0x64
955 #define LEG_OVR_GS	0x65
956 #define LEG_OPR_OVR	0x66
957 #define LEG_ADR_OVR	0x67
958 
959 struct x86_legpref {
960 	bool opr_ovr:1;
961 	bool adr_ovr:1;
962 	bool rep:1;
963 	bool repn:1;
964 	int8_t seg;
965 };
966 
967 struct x86_rexpref {
968 	bool b:1;
969 	bool x:1;
970 	bool r:1;
971 	bool w:1;
972 	bool present:1;
973 };
974 
975 struct x86_reg {
976 	int num;	/* NVMM GPR state index */
977 	uint64_t mask;
978 };
979 
980 struct x86_dualreg {
981 	int reg1;
982 	int reg2;
983 };
984 
985 enum x86_disp_type {
986 	DISP_NONE,
987 	DISP_0,
988 	DISP_1,
989 	DISP_2,
990 	DISP_4
991 };
992 
993 struct x86_disp {
994 	enum x86_disp_type type;
995 	uint64_t data; /* 4 bytes, but can be sign-extended */
996 };
997 
998 struct x86_regmodrm {
999 	uint8_t mod:2;
1000 	uint8_t reg:3;
1001 	uint8_t rm:3;
1002 };
1003 
1004 struct x86_immediate {
1005 	uint64_t data;
1006 };
1007 
1008 struct x86_sib {
1009 	uint8_t scale;
1010 	const struct x86_reg *idx;
1011 	const struct x86_reg *bas;
1012 };
1013 
1014 enum x86_store_type {
1015 	STORE_NONE,
1016 	STORE_REG,
1017 	STORE_DUALREG,
1018 	STORE_IMM,
1019 	STORE_SIB,
1020 	STORE_DMO
1021 };
1022 
1023 struct x86_store {
1024 	enum x86_store_type type;
1025 	union {
1026 		const struct x86_reg *reg;
1027 		struct x86_dualreg dualreg;
1028 		struct x86_immediate imm;
1029 		struct x86_sib sib;
1030 		uint64_t dmo;
1031 	} u;
1032 	struct x86_disp disp;
1033 	int hardseg;
1034 };
1035 
1036 struct x86_instr {
1037 	uint8_t len;
1038 	struct x86_legpref legpref;
1039 	struct x86_rexpref rexpref;
1040 	struct x86_regmodrm regmodrm;
1041 	uint8_t operand_size;
1042 	uint8_t address_size;
1043 	uint64_t zeroextend_mask;
1044 
1045 	const struct x86_opcode *opcode;
1046 	const struct x86_emul *emul;
1047 
1048 	struct x86_store src;
1049 	struct x86_store dst;
1050 	struct x86_store *strm;
1051 };
1052 
1053 struct x86_decode_fsm {
1054 	/* vcpu */
1055 	bool is64bit;
1056 	bool is32bit;
1057 	bool is16bit;
1058 
1059 	/* fsm */
1060 	int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1061 	uint8_t *buf;
1062 	uint8_t *end;
1063 };
1064 
1065 struct x86_opcode {
1066 	bool valid:1;
1067 	bool regmodrm:1;
1068 	bool regtorm:1;
1069 	bool dmo:1;
1070 	bool todmo:1;
1071 	bool movs:1;
1072 	bool stos:1;
1073 	bool lods:1;
1074 	bool szoverride:1;
1075 	bool group1:1;
1076 	bool group3:1;
1077 	bool group11:1;
1078 	bool immediate:1;
1079 	uint8_t defsize;
1080 	uint8_t flags;
1081 	const struct x86_emul *emul;
1082 };
1083 
1084 struct x86_group_entry {
1085 	const struct x86_emul *emul;
1086 };
1087 
1088 #define OPSIZE_BYTE 0x01
1089 #define OPSIZE_WORD 0x02 /* 2 bytes */
1090 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1091 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1092 
1093 #define FLAG_imm8	0x01
1094 #define FLAG_immz	0x02
1095 #define FLAG_ze		0x04
1096 
1097 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1098 	[1] = { .emul = &x86_emul_or },
1099 	[4] = { .emul = &x86_emul_and },
1100 	[6] = { .emul = &x86_emul_xor },
1101 	[7] = { .emul = &x86_emul_cmp }
1102 };
1103 
1104 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1105 	[0] = { .emul = &x86_emul_test },
1106 	[1] = { .emul = &x86_emul_test }
1107 };
1108 
1109 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1110 	[0] = { .emul = &x86_emul_mov }
1111 };
1112 
1113 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1114 	/*
1115 	 * Group1
1116 	 */
1117 	[0x80] = {
1118 		/* Eb, Ib */
1119 		.valid = true,
1120 		.regmodrm = true,
1121 		.regtorm = true,
1122 		.szoverride = false,
1123 		.defsize = OPSIZE_BYTE,
1124 		.group1 = true,
1125 		.immediate = true,
1126 		.emul = NULL /* group1 */
1127 	},
1128 	[0x81] = {
1129 		/* Ev, Iz */
1130 		.valid = true,
1131 		.regmodrm = true,
1132 		.regtorm = true,
1133 		.szoverride = true,
1134 		.defsize = -1,
1135 		.group1 = true,
1136 		.immediate = true,
1137 		.flags = FLAG_immz,
1138 		.emul = NULL /* group1 */
1139 	},
1140 	[0x83] = {
1141 		/* Ev, Ib */
1142 		.valid = true,
1143 		.regmodrm = true,
1144 		.regtorm = true,
1145 		.szoverride = true,
1146 		.defsize = -1,
1147 		.group1 = true,
1148 		.immediate = true,
1149 		.flags = FLAG_imm8,
1150 		.emul = NULL /* group1 */
1151 	},
1152 
1153 	/*
1154 	 * Group3
1155 	 */
1156 	[0xF6] = {
1157 		/* Eb, Ib */
1158 		.valid = true,
1159 		.regmodrm = true,
1160 		.regtorm = true,
1161 		.szoverride = false,
1162 		.defsize = OPSIZE_BYTE,
1163 		.group3 = true,
1164 		.immediate = true,
1165 		.emul = NULL /* group3 */
1166 	},
1167 	[0xF7] = {
1168 		/* Ev, Iz */
1169 		.valid = true,
1170 		.regmodrm = true,
1171 		.regtorm = true,
1172 		.szoverride = true,
1173 		.defsize = -1,
1174 		.group3 = true,
1175 		.immediate = true,
1176 		.flags = FLAG_immz,
1177 		.emul = NULL /* group3 */
1178 	},
1179 
1180 	/*
1181 	 * Group11
1182 	 */
1183 	[0xC6] = {
1184 		/* Eb, Ib */
1185 		.valid = true,
1186 		.regmodrm = true,
1187 		.regtorm = true,
1188 		.szoverride = false,
1189 		.defsize = OPSIZE_BYTE,
1190 		.group11 = true,
1191 		.immediate = true,
1192 		.emul = NULL /* group11 */
1193 	},
1194 	[0xC7] = {
1195 		/* Ev, Iz */
1196 		.valid = true,
1197 		.regmodrm = true,
1198 		.regtorm = true,
1199 		.szoverride = true,
1200 		.defsize = -1,
1201 		.group11 = true,
1202 		.immediate = true,
1203 		.flags = FLAG_immz,
1204 		.emul = NULL /* group11 */
1205 	},
1206 
1207 	/*
1208 	 * OR
1209 	 */
1210 	[0x08] = {
1211 		/* Eb, Gb */
1212 		.valid = true,
1213 		.regmodrm = true,
1214 		.regtorm = true,
1215 		.szoverride = false,
1216 		.defsize = OPSIZE_BYTE,
1217 		.emul = &x86_emul_or
1218 	},
1219 	[0x09] = {
1220 		/* Ev, Gv */
1221 		.valid = true,
1222 		.regmodrm = true,
1223 		.regtorm = true,
1224 		.szoverride = true,
1225 		.defsize = -1,
1226 		.emul = &x86_emul_or
1227 	},
1228 	[0x0A] = {
1229 		/* Gb, Eb */
1230 		.valid = true,
1231 		.regmodrm = true,
1232 		.regtorm = false,
1233 		.szoverride = false,
1234 		.defsize = OPSIZE_BYTE,
1235 		.emul = &x86_emul_or
1236 	},
1237 	[0x0B] = {
1238 		/* Gv, Ev */
1239 		.valid = true,
1240 		.regmodrm = true,
1241 		.regtorm = false,
1242 		.szoverride = true,
1243 		.defsize = -1,
1244 		.emul = &x86_emul_or
1245 	},
1246 
1247 	/*
1248 	 * AND
1249 	 */
1250 	[0x20] = {
1251 		/* Eb, Gb */
1252 		.valid = true,
1253 		.regmodrm = true,
1254 		.regtorm = true,
1255 		.szoverride = false,
1256 		.defsize = OPSIZE_BYTE,
1257 		.emul = &x86_emul_and
1258 	},
1259 	[0x21] = {
1260 		/* Ev, Gv */
1261 		.valid = true,
1262 		.regmodrm = true,
1263 		.regtorm = true,
1264 		.szoverride = true,
1265 		.defsize = -1,
1266 		.emul = &x86_emul_and
1267 	},
1268 	[0x22] = {
1269 		/* Gb, Eb */
1270 		.valid = true,
1271 		.regmodrm = true,
1272 		.regtorm = false,
1273 		.szoverride = false,
1274 		.defsize = OPSIZE_BYTE,
1275 		.emul = &x86_emul_and
1276 	},
1277 	[0x23] = {
1278 		/* Gv, Ev */
1279 		.valid = true,
1280 		.regmodrm = true,
1281 		.regtorm = false,
1282 		.szoverride = true,
1283 		.defsize = -1,
1284 		.emul = &x86_emul_and
1285 	},
1286 
1287 	/*
1288 	 * SUB
1289 	 */
1290 	[0x28] = {
1291 		/* Eb, Gb */
1292 		.valid = true,
1293 		.regmodrm = true,
1294 		.regtorm = true,
1295 		.szoverride = false,
1296 		.defsize = OPSIZE_BYTE,
1297 		.emul = &x86_emul_sub
1298 	},
1299 	[0x29] = {
1300 		/* Ev, Gv */
1301 		.valid = true,
1302 		.regmodrm = true,
1303 		.regtorm = true,
1304 		.szoverride = true,
1305 		.defsize = -1,
1306 		.emul = &x86_emul_sub
1307 	},
1308 	[0x2A] = {
1309 		/* Gb, Eb */
1310 		.valid = true,
1311 		.regmodrm = true,
1312 		.regtorm = false,
1313 		.szoverride = false,
1314 		.defsize = OPSIZE_BYTE,
1315 		.emul = &x86_emul_sub
1316 	},
1317 	[0x2B] = {
1318 		/* Gv, Ev */
1319 		.valid = true,
1320 		.regmodrm = true,
1321 		.regtorm = false,
1322 		.szoverride = true,
1323 		.defsize = -1,
1324 		.emul = &x86_emul_sub
1325 	},
1326 
1327 	/*
1328 	 * XOR
1329 	 */
1330 	[0x30] = {
1331 		/* Eb, Gb */
1332 		.valid = true,
1333 		.regmodrm = true,
1334 		.regtorm = true,
1335 		.szoverride = false,
1336 		.defsize = OPSIZE_BYTE,
1337 		.emul = &x86_emul_xor
1338 	},
1339 	[0x31] = {
1340 		/* Ev, Gv */
1341 		.valid = true,
1342 		.regmodrm = true,
1343 		.regtorm = true,
1344 		.szoverride = true,
1345 		.defsize = -1,
1346 		.emul = &x86_emul_xor
1347 	},
1348 	[0x32] = {
1349 		/* Gb, Eb */
1350 		.valid = true,
1351 		.regmodrm = true,
1352 		.regtorm = false,
1353 		.szoverride = false,
1354 		.defsize = OPSIZE_BYTE,
1355 		.emul = &x86_emul_xor
1356 	},
1357 	[0x33] = {
1358 		/* Gv, Ev */
1359 		.valid = true,
1360 		.regmodrm = true,
1361 		.regtorm = false,
1362 		.szoverride = true,
1363 		.defsize = -1,
1364 		.emul = &x86_emul_xor
1365 	},
1366 
1367 	/*
1368 	 * XCHG
1369 	 */
1370 	[0x86] = {
1371 		/* Eb, Gb */
1372 		.valid = true,
1373 		.regmodrm = true,
1374 		.regtorm = true,
1375 		.szoverride = false,
1376 		.defsize = OPSIZE_BYTE,
1377 		.emul = &x86_emul_xchg
1378 	},
1379 	[0x87] = {
1380 		/* Ev, Gv */
1381 		.valid = true,
1382 		.regmodrm = true,
1383 		.regtorm = true,
1384 		.szoverride = true,
1385 		.defsize = -1,
1386 		.emul = &x86_emul_xchg
1387 	},
1388 
1389 	/*
1390 	 * MOV
1391 	 */
1392 	[0x88] = {
1393 		/* Eb, Gb */
1394 		.valid = true,
1395 		.regmodrm = true,
1396 		.regtorm = true,
1397 		.szoverride = false,
1398 		.defsize = OPSIZE_BYTE,
1399 		.emul = &x86_emul_mov
1400 	},
1401 	[0x89] = {
1402 		/* Ev, Gv */
1403 		.valid = true,
1404 		.regmodrm = true,
1405 		.regtorm = true,
1406 		.szoverride = true,
1407 		.defsize = -1,
1408 		.emul = &x86_emul_mov
1409 	},
1410 	[0x8A] = {
1411 		/* Gb, Eb */
1412 		.valid = true,
1413 		.regmodrm = true,
1414 		.regtorm = false,
1415 		.szoverride = false,
1416 		.defsize = OPSIZE_BYTE,
1417 		.emul = &x86_emul_mov
1418 	},
1419 	[0x8B] = {
1420 		/* Gv, Ev */
1421 		.valid = true,
1422 		.regmodrm = true,
1423 		.regtorm = false,
1424 		.szoverride = true,
1425 		.defsize = -1,
1426 		.emul = &x86_emul_mov
1427 	},
1428 	[0xA0] = {
1429 		/* AL, Ob */
1430 		.valid = true,
1431 		.dmo = true,
1432 		.todmo = false,
1433 		.szoverride = false,
1434 		.defsize = OPSIZE_BYTE,
1435 		.emul = &x86_emul_mov
1436 	},
1437 	[0xA1] = {
1438 		/* rAX, Ov */
1439 		.valid = true,
1440 		.dmo = true,
1441 		.todmo = false,
1442 		.szoverride = true,
1443 		.defsize = -1,
1444 		.emul = &x86_emul_mov
1445 	},
1446 	[0xA2] = {
1447 		/* Ob, AL */
1448 		.valid = true,
1449 		.dmo = true,
1450 		.todmo = true,
1451 		.szoverride = false,
1452 		.defsize = OPSIZE_BYTE,
1453 		.emul = &x86_emul_mov
1454 	},
1455 	[0xA3] = {
1456 		/* Ov, rAX */
1457 		.valid = true,
1458 		.dmo = true,
1459 		.todmo = true,
1460 		.szoverride = true,
1461 		.defsize = -1,
1462 		.emul = &x86_emul_mov
1463 	},
1464 
1465 	/*
1466 	 * MOVS
1467 	 */
1468 	[0xA4] = {
1469 		/* Yb, Xb */
1470 		.valid = true,
1471 		.movs = true,
1472 		.szoverride = false,
1473 		.defsize = OPSIZE_BYTE,
1474 		.emul = NULL /* assist_mem_double_movs */
1475 	},
1476 	[0xA5] = {
1477 		/* Yv, Xv */
1478 		.valid = true,
1479 		.movs = true,
1480 		.szoverride = true,
1481 		.defsize = -1,
1482 		.emul = NULL /* assist_mem_double_movs */
1483 	},
1484 
1485 	/*
1486 	 * STOS
1487 	 */
1488 	[0xAA] = {
1489 		/* Yb, AL */
1490 		.valid = true,
1491 		.stos = true,
1492 		.szoverride = false,
1493 		.defsize = OPSIZE_BYTE,
1494 		.emul = &x86_emul_stos
1495 	},
1496 	[0xAB] = {
1497 		/* Yv, rAX */
1498 		.valid = true,
1499 		.stos = true,
1500 		.szoverride = true,
1501 		.defsize = -1,
1502 		.emul = &x86_emul_stos
1503 	},
1504 
1505 	/*
1506 	 * LODS
1507 	 */
1508 	[0xAC] = {
1509 		/* AL, Xb */
1510 		.valid = true,
1511 		.lods = true,
1512 		.szoverride = false,
1513 		.defsize = OPSIZE_BYTE,
1514 		.emul = &x86_emul_lods
1515 	},
1516 	[0xAD] = {
1517 		/* rAX, Xv */
1518 		.valid = true,
1519 		.lods = true,
1520 		.szoverride = true,
1521 		.defsize = -1,
1522 		.emul = &x86_emul_lods
1523 	},
1524 };
1525 
1526 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1527 	/*
1528 	 * MOVZX
1529 	 */
1530 	[0xB6] = {
1531 		/* Gv, Eb */
1532 		.valid = true,
1533 		.regmodrm = true,
1534 		.regtorm = false,
1535 		.szoverride = true,
1536 		.defsize = OPSIZE_BYTE,
1537 		.flags = FLAG_ze,
1538 		.emul = &x86_emul_mov
1539 	},
1540 	[0xB7] = {
1541 		/* Gv, Ew */
1542 		.valid = true,
1543 		.regmodrm = true,
1544 		.regtorm = false,
1545 		.szoverride = true,
1546 		.defsize = OPSIZE_WORD,
1547 		.flags = FLAG_ze,
1548 		.emul = &x86_emul_mov
1549 	},
1550 };
1551 
1552 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1553 
1554 /* [REX-present][enc][opsize] */
1555 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1556 	[false] = {
1557 		/* No REX prefix. */
1558 		[0b00] = {
1559 			[0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1560 			[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1561 			[2] = { -1, 0 },
1562 			[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1563 			[4] = { -1, 0 },
1564 			[5] = { -1, 0 },
1565 			[6] = { -1, 0 },
1566 			[7] = { -1, 0 },
1567 		},
1568 		[0b01] = {
1569 			[0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1570 			[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1571 			[2] = { -1, 0 },
1572 			[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF },	/* EBP */
1573 			[4] = { -1, 0 },
1574 			[5] = { -1, 0 },
1575 			[6] = { -1, 0 },
1576 			[7] = { -1, 0 },
1577 		},
1578 		[0b10] = {
1579 			[0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1580 			[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1581 			[2] = { -1, 0 },
1582 			[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1583 			[4] = { -1, 0 },
1584 			[5] = { -1, 0 },
1585 			[6] = { -1, 0 },
1586 			[7] = { -1, 0 },
1587 		},
1588 		[0b11] = {
1589 			[0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1590 			[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1591 			[2] = { -1, 0 },
1592 			[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1593 			[4] = { -1, 0 },
1594 			[5] = { -1, 0 },
1595 			[6] = { -1, 0 },
1596 			[7] = { -1, 0 },
1597 		}
1598 	},
1599 	[true] = {
1600 		/* Has REX prefix. */
1601 		[0b00] = {
1602 			[0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1603 			[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1604 			[2] = { -1, 0 },
1605 			[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1606 			[4] = { -1, 0 },
1607 			[5] = { -1, 0 },
1608 			[6] = { -1, 0 },
1609 			[7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1610 		},
1611 		[0b01] = {
1612 			[0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1613 			[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1614 			[2] = { -1, 0 },
1615 			[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1616 			[4] = { -1, 0 },
1617 			[5] = { -1, 0 },
1618 			[6] = { -1, 0 },
1619 			[7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1620 		},
1621 		[0b10] = {
1622 			[0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1623 			[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1624 			[2] = { -1, 0 },
1625 			[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1626 			[4] = { -1, 0 },
1627 			[5] = { -1, 0 },
1628 			[6] = { -1, 0 },
1629 			[7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1630 		},
1631 		[0b11] = {
1632 			[0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1633 			[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1634 			[2] = { -1, 0 },
1635 			[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1636 			[4] = { -1, 0 },
1637 			[5] = { -1, 0 },
1638 			[6] = { -1, 0 },
1639 			[7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1640 		}
1641 	}
1642 };
1643 
1644 /* [depends][enc][size] */
1645 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1646 	[false] = {
1647 		/* Not extended. */
1648 		[0b000] = {
1649 			[0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1650 			[1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1651 			[2] = { -1, 0 },
1652 			[3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1653 			[4] = { -1, 0 },
1654 			[5] = { -1, 0 },
1655 			[6] = { -1, 0 },
1656 			[7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1657 		},
1658 		[0b001] = {
1659 			[0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1660 			[1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1661 			[2] = { -1, 0 },
1662 			[3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1663 			[4] = { -1, 0 },
1664 			[5] = { -1, 0 },
1665 			[6] = { -1, 0 },
1666 			[7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1667 		},
1668 		[0b010] = {
1669 			[0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1670 			[1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1671 			[2] = { -1, 0 },
1672 			[3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1673 			[4] = { -1, 0 },
1674 			[5] = { -1, 0 },
1675 			[6] = { -1, 0 },
1676 			[7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1677 		},
1678 		[0b011] = {
1679 			[0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1680 			[1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1681 			[2] = { -1, 0 },
1682 			[3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1683 			[4] = { -1, 0 },
1684 			[5] = { -1, 0 },
1685 			[6] = { -1, 0 },
1686 			[7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1687 		},
1688 		[0b100] = {
1689 			[0] = { -1, 0 }, /* SPECIAL */
1690 			[1] = { -1, 0 }, /* SPECIAL */
1691 			[2] = { -1, 0 },
1692 			[3] = { -1, 0 }, /* SPECIAL */
1693 			[4] = { -1, 0 },
1694 			[5] = { -1, 0 },
1695 			[6] = { -1, 0 },
1696 			[7] = { -1, 0 }, /* SPECIAL */
1697 		},
1698 		[0b101] = {
1699 			[0] = { -1, 0 }, /* SPECIAL */
1700 			[1] = { -1, 0 }, /* SPECIAL */
1701 			[2] = { -1, 0 },
1702 			[3] = { -1, 0 }, /* SPECIAL */
1703 			[4] = { -1, 0 },
1704 			[5] = { -1, 0 },
1705 			[6] = { -1, 0 },
1706 			[7] = { -1, 0 }, /* SPECIAL */
1707 		},
1708 		[0b110] = {
1709 			[0] = { -1, 0 }, /* SPECIAL */
1710 			[1] = { -1, 0 }, /* SPECIAL */
1711 			[2] = { -1, 0 },
1712 			[3] = { -1, 0 }, /* SPECIAL */
1713 			[4] = { -1, 0 },
1714 			[5] = { -1, 0 },
1715 			[6] = { -1, 0 },
1716 			[7] = { -1, 0 }, /* SPECIAL */
1717 		},
1718 		[0b111] = {
1719 			[0] = { -1, 0 }, /* SPECIAL */
1720 			[1] = { -1, 0 }, /* SPECIAL */
1721 			[2] = { -1, 0 },
1722 			[3] = { -1, 0 }, /* SPECIAL */
1723 			[4] = { -1, 0 },
1724 			[5] = { -1, 0 },
1725 			[6] = { -1, 0 },
1726 			[7] = { -1, 0 }, /* SPECIAL */
1727 		},
1728 	},
1729 	[true] = {
1730 		/* Extended. */
1731 		[0b000] = {
1732 			[0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1733 			[1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1734 			[2] = { -1, 0 },
1735 			[3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1736 			[4] = { -1, 0 },
1737 			[5] = { -1, 0 },
1738 			[6] = { -1, 0 },
1739 			[7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1740 		},
1741 		[0b001] = {
1742 			[0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1743 			[1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1744 			[2] = { -1, 0 },
1745 			[3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1746 			[4] = { -1, 0 },
1747 			[5] = { -1, 0 },
1748 			[6] = { -1, 0 },
1749 			[7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1750 		},
1751 		[0b010] = {
1752 			[0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1753 			[1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1754 			[2] = { -1, 0 },
1755 			[3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1756 			[4] = { -1, 0 },
1757 			[5] = { -1, 0 },
1758 			[6] = { -1, 0 },
1759 			[7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1760 		},
1761 		[0b011] = {
1762 			[0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1763 			[1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1764 			[2] = { -1, 0 },
1765 			[3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1766 			[4] = { -1, 0 },
1767 			[5] = { -1, 0 },
1768 			[6] = { -1, 0 },
1769 			[7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1770 		},
1771 		[0b100] = {
1772 			[0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1773 			[1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1774 			[2] = { -1, 0 },
1775 			[3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1776 			[4] = { -1, 0 },
1777 			[5] = { -1, 0 },
1778 			[6] = { -1, 0 },
1779 			[7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1780 		},
1781 		[0b101] = {
1782 			[0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1783 			[1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1784 			[2] = { -1, 0 },
1785 			[3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1786 			[4] = { -1, 0 },
1787 			[5] = { -1, 0 },
1788 			[6] = { -1, 0 },
1789 			[7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1790 		},
1791 		[0b110] = {
1792 			[0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1793 			[1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1794 			[2] = { -1, 0 },
1795 			[3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1796 			[4] = { -1, 0 },
1797 			[5] = { -1, 0 },
1798 			[6] = { -1, 0 },
1799 			[7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1800 		},
1801 		[0b111] = {
1802 			[0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1803 			[1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1804 			[2] = { -1, 0 },
1805 			[3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1806 			[4] = { -1, 0 },
1807 			[5] = { -1, 0 },
1808 			[6] = { -1, 0 },
1809 			[7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1810 		},
1811 	}
1812 };
1813 
1814 /* [enc] */
1815 static const int gpr_dual_reg1_rm[8] __cacheline_aligned = {
1816 	[0b000] = NVMM_X64_GPR_RBX, /* BX (+SI) */
1817 	[0b001] = NVMM_X64_GPR_RBX, /* BX (+DI) */
1818 	[0b010] = NVMM_X64_GPR_RBP, /* BP (+SI) */
1819 	[0b011] = NVMM_X64_GPR_RBP, /* BP (+DI) */
1820 	[0b100] = NVMM_X64_GPR_RSI, /* SI */
1821 	[0b101] = NVMM_X64_GPR_RDI, /* DI */
1822 	[0b110] = NVMM_X64_GPR_RBP, /* BP */
1823 	[0b111] = NVMM_X64_GPR_RBX, /* BX */
1824 };
1825 
1826 static int
1827 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr __unused)
1828 {
1829 	fsm->fn = NULL;
1830 	return -1;
1831 }
1832 
1833 static int
1834 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1835 {
1836 	if (fsm->buf + n > fsm->end) {
1837 		return -1;
1838 	}
1839 	memcpy(bytes, fsm->buf, n);
1840 	return 0;
1841 }
1842 
1843 static inline void
1844 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1845     int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1846 {
1847 	fsm->buf += n;
1848 	if (fsm->buf > fsm->end) {
1849 		fsm->fn = node_overflow;
1850 	} else {
1851 		fsm->fn = fn;
1852 	}
1853 }
1854 
1855 static const struct x86_reg *
1856 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1857 {
1858 	enc &= 0b11;
1859 	if (regsize == 8) {
1860 		/* May be 64bit without REX */
1861 		return &gpr_map__special[1][enc][regsize-1];
1862 	}
1863 	return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1864 }
1865 
1866 /*
1867  * Special node, for MOVS. Fake two displacements of zero on the source and
1868  * destination registers.
1869  */
1870 static int
1871 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1872 {
1873 	size_t adrsize;
1874 
1875 	adrsize = instr->address_size;
1876 
1877 	/* DS:RSI */
1878 	instr->src.type = STORE_REG;
1879 	instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1880 	instr->src.disp.type = DISP_0;
1881 
1882 	/* ES:RDI, force ES */
1883 	instr->dst.type = STORE_REG;
1884 	instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1885 	instr->dst.disp.type = DISP_0;
1886 	instr->dst.hardseg = NVMM_X64_SEG_ES;
1887 
1888 	fsm_advance(fsm, 0, NULL);
1889 
1890 	return 0;
1891 }
1892 
1893 /*
1894  * Special node, for STOS and LODS. Fake a displacement of zero on the
1895  * destination register.
1896  */
1897 static int
1898 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1899 {
1900 	const struct x86_opcode *opcode = instr->opcode;
1901 	struct x86_store *stlo, *streg;
1902 	size_t adrsize, regsize;
1903 
1904 	adrsize = instr->address_size;
1905 	regsize = instr->operand_size;
1906 
1907 	if (opcode->stos) {
1908 		streg = &instr->src;
1909 		stlo = &instr->dst;
1910 	} else {
1911 		streg = &instr->dst;
1912 		stlo = &instr->src;
1913 	}
1914 
1915 	streg->type = STORE_REG;
1916 	streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1917 
1918 	stlo->type = STORE_REG;
1919 	if (opcode->stos) {
1920 		/* ES:RDI, force ES */
1921 		stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1922 		stlo->hardseg = NVMM_X64_SEG_ES;
1923 	} else {
1924 		/* DS:RSI */
1925 		stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1926 	}
1927 	stlo->disp.type = DISP_0;
1928 
1929 	fsm_advance(fsm, 0, NULL);
1930 
1931 	return 0;
1932 }
1933 
1934 static int
1935 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1936 {
1937 	const struct x86_opcode *opcode = instr->opcode;
1938 	struct x86_store *stdmo, *streg;
1939 	size_t adrsize, regsize;
1940 
1941 	adrsize = instr->address_size;
1942 	regsize = instr->operand_size;
1943 
1944 	if (opcode->todmo) {
1945 		streg = &instr->src;
1946 		stdmo = &instr->dst;
1947 	} else {
1948 		streg = &instr->dst;
1949 		stdmo = &instr->src;
1950 	}
1951 
1952 	streg->type = STORE_REG;
1953 	streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1954 
1955 	stdmo->type = STORE_DMO;
1956 	if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1957 		return -1;
1958 	}
1959 	fsm_advance(fsm, adrsize, NULL);
1960 
1961 	return 0;
1962 }
1963 
1964 static inline uint64_t
1965 sign_extend(uint64_t val, int size)
1966 {
1967 	if (size == 1) {
1968 		if (val & __BIT(7))
1969 			val |= 0xFFFFFFFFFFFFFF00;
1970 	} else if (size == 2) {
1971 		if (val & __BIT(15))
1972 			val |= 0xFFFFFFFFFFFF0000;
1973 	} else if (size == 4) {
1974 		if (val & __BIT(31))
1975 			val |= 0xFFFFFFFF00000000;
1976 	}
1977 	return val;
1978 }
1979 
1980 static int
1981 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1982 {
1983 	const struct x86_opcode *opcode = instr->opcode;
1984 	struct x86_store *store;
1985 	uint8_t immsize;
1986 	size_t sesize = 0;
1987 
1988 	/* The immediate is the source */
1989 	store = &instr->src;
1990 	immsize = instr->operand_size;
1991 
1992 	if (opcode->flags & FLAG_imm8) {
1993 		sesize = immsize;
1994 		immsize = 1;
1995 	} else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1996 		sesize = immsize;
1997 		immsize = 4;
1998 	}
1999 
2000 	store->type = STORE_IMM;
2001 	if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
2002 		return -1;
2003 	}
2004 	fsm_advance(fsm, immsize, NULL);
2005 
2006 	if (sesize != 0) {
2007 		store->u.imm.data = sign_extend(store->u.imm.data, sesize);
2008 	}
2009 
2010 	return 0;
2011 }
2012 
2013 static int
2014 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2015 {
2016 	const struct x86_opcode *opcode = instr->opcode;
2017 	uint64_t data = 0;
2018 	size_t n;
2019 
2020 	if (instr->strm->disp.type == DISP_1) {
2021 		n = 1;
2022 	} else if (instr->strm->disp.type == DISP_2) {
2023 		n = 2;
2024 	} else if (instr->strm->disp.type == DISP_4) {
2025 		n = 4;
2026 	} else {
2027 		DISASSEMBLER_BUG();
2028 	}
2029 
2030 	if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
2031 		return -1;
2032 	}
2033 
2034 	if (__predict_true(fsm->is64bit)) {
2035 		data = sign_extend(data, n);
2036 	}
2037 
2038 	instr->strm->disp.data = data;
2039 
2040 	if (opcode->immediate) {
2041 		fsm_advance(fsm, n, node_immediate);
2042 	} else {
2043 		fsm_advance(fsm, n, NULL);
2044 	}
2045 
2046 	return 0;
2047 }
2048 
2049 /*
2050  * Special node to handle 16bit addressing encoding, which can reference two
2051  * registers at once.
2052  */
2053 static int
2054 node_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2055 {
2056 	int reg1, reg2;
2057 
2058 	reg1 = gpr_dual_reg1_rm[instr->regmodrm.rm];
2059 
2060 	if (instr->regmodrm.rm == 0b000 ||
2061 	    instr->regmodrm.rm == 0b010) {
2062 		reg2 = NVMM_X64_GPR_RSI;
2063 	} else if (instr->regmodrm.rm == 0b001 ||
2064 	    instr->regmodrm.rm == 0b011) {
2065 		reg2 = NVMM_X64_GPR_RDI;
2066 	} else {
2067 		DISASSEMBLER_BUG();
2068 	}
2069 
2070 	instr->strm->type = STORE_DUALREG;
2071 	instr->strm->u.dualreg.reg1 = reg1;
2072 	instr->strm->u.dualreg.reg2 = reg2;
2073 
2074 	if (instr->strm->disp.type == DISP_NONE) {
2075 		DISASSEMBLER_BUG();
2076 	} else if (instr->strm->disp.type == DISP_0) {
2077 		/* Indirect register addressing mode */
2078 		if (instr->opcode->immediate) {
2079 			fsm_advance(fsm, 1, node_immediate);
2080 		} else {
2081 			fsm_advance(fsm, 1, NULL);
2082 		}
2083 	} else {
2084 		fsm_advance(fsm, 1, node_disp);
2085 	}
2086 
2087 	return 0;
2088 }
2089 
2090 static const struct x86_reg *
2091 get_register_idx(struct x86_instr *instr, uint8_t index)
2092 {
2093 	uint8_t enc = index;
2094 	const struct x86_reg *reg;
2095 	size_t regsize;
2096 
2097 	regsize = instr->address_size;
2098 	reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2099 
2100 	if (reg->num == -1) {
2101 		reg = resolve_special_register(instr, enc, regsize);
2102 	}
2103 
2104 	return reg;
2105 }
2106 
2107 static const struct x86_reg *
2108 get_register_bas(struct x86_instr *instr, uint8_t base)
2109 {
2110 	uint8_t enc = base;
2111 	const struct x86_reg *reg;
2112 	size_t regsize;
2113 
2114 	regsize = instr->address_size;
2115 	reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2116 	if (reg->num == -1) {
2117 		reg = resolve_special_register(instr, enc, regsize);
2118 	}
2119 
2120 	return reg;
2121 }
2122 
2123 static int
2124 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2125 {
2126 	const struct x86_opcode *opcode;
2127 	uint8_t scale, index, base;
2128 	bool noindex, nobase;
2129 	uint8_t byte;
2130 
2131 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2132 		return -1;
2133 	}
2134 
2135 	scale = ((byte & 0b11000000) >> 6);
2136 	index = ((byte & 0b00111000) >> 3);
2137 	base  = ((byte & 0b00000111) >> 0);
2138 
2139 	opcode = instr->opcode;
2140 
2141 	noindex = false;
2142 	nobase = false;
2143 
2144 	if (index == 0b100 && !instr->rexpref.x) {
2145 		/* Special case: the index is null */
2146 		noindex = true;
2147 	}
2148 
2149 	if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2150 		/* Special case: the base is null + disp32 */
2151 		instr->strm->disp.type = DISP_4;
2152 		nobase = true;
2153 	}
2154 
2155 	instr->strm->type = STORE_SIB;
2156 	instr->strm->u.sib.scale = (1 << scale);
2157 	if (!noindex)
2158 		instr->strm->u.sib.idx = get_register_idx(instr, index);
2159 	if (!nobase)
2160 		instr->strm->u.sib.bas = get_register_bas(instr, base);
2161 
2162 	/* May have a displacement, or an immediate */
2163 	if (instr->strm->disp.type == DISP_1 ||
2164 	    instr->strm->disp.type == DISP_2 ||
2165 	    instr->strm->disp.type == DISP_4) {
2166 		fsm_advance(fsm, 1, node_disp);
2167 	} else if (opcode->immediate) {
2168 		fsm_advance(fsm, 1, node_immediate);
2169 	} else {
2170 		fsm_advance(fsm, 1, NULL);
2171 	}
2172 
2173 	return 0;
2174 }
2175 
2176 static const struct x86_reg *
2177 get_register_reg(struct x86_instr *instr)
2178 {
2179 	uint8_t enc = instr->regmodrm.reg;
2180 	const struct x86_reg *reg;
2181 	size_t regsize;
2182 
2183 	regsize = instr->operand_size;
2184 
2185 	reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2186 	if (reg->num == -1) {
2187 		reg = resolve_special_register(instr, enc, regsize);
2188 	}
2189 
2190 	return reg;
2191 }
2192 
2193 static const struct x86_reg *
2194 get_register_rm(struct x86_instr *instr)
2195 {
2196 	uint8_t enc = instr->regmodrm.rm;
2197 	const struct x86_reg *reg;
2198 	size_t regsize;
2199 
2200 	if (instr->strm->disp.type == DISP_NONE) {
2201 		regsize = instr->operand_size;
2202 	} else {
2203 		/* Indirect access, the size is that of the address. */
2204 		regsize = instr->address_size;
2205 	}
2206 
2207 	reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2208 	if (reg->num == -1) {
2209 		reg = resolve_special_register(instr, enc, regsize);
2210 	}
2211 
2212 	return reg;
2213 }
2214 
2215 static inline bool
2216 has_sib(struct x86_instr *instr)
2217 {
2218 	return (instr->address_size != 2 && /* no SIB in 16bit addressing */
2219 	    instr->regmodrm.mod != 0b11 &&
2220 	    instr->regmodrm.rm == 0b100);
2221 }
2222 
2223 static inline bool
2224 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2225 {
2226 	return (fsm->is64bit && /* RIP-relative only in 64bit mode */
2227 	    instr->regmodrm.mod == 0b00 &&
2228 	    instr->regmodrm.rm == 0b101);
2229 }
2230 
2231 static inline bool
2232 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2233 {
2234 	return (!fsm->is64bit && /* no disp32-only in 64bit mode */
2235 	    instr->address_size != 2 && /* no disp32-only in 16bit addressing */
2236 	    instr->regmodrm.mod == 0b00 &&
2237 	    instr->regmodrm.rm == 0b101);
2238 }
2239 
2240 static inline bool
2241 is_disp16_only(struct x86_decode_fsm *fsm __unused, struct x86_instr *instr)
2242 {
2243 	return (instr->address_size == 2 && /* disp16-only only in 16bit addr */
2244 	    instr->regmodrm.mod == 0b00 &&
2245 	    instr->regmodrm.rm == 0b110);
2246 }
2247 
2248 static inline bool
2249 is_dual(struct x86_decode_fsm *fsm __unused, struct x86_instr *instr)
2250 {
2251 	return (instr->address_size == 2 &&
2252 	    instr->regmodrm.mod != 0b11 &&
2253 	    instr->regmodrm.rm <= 0b011);
2254 }
2255 
2256 static enum x86_disp_type
2257 get_disp_type(struct x86_instr *instr)
2258 {
2259 	switch (instr->regmodrm.mod) {
2260 	case 0b00:	/* indirect */
2261 		return DISP_0;
2262 	case 0b01:	/* indirect+1 */
2263 		return DISP_1;
2264 	case 0b10:	/* indirect+{2,4} */
2265 		if (__predict_false(instr->address_size == 2)) {
2266 			return DISP_2;
2267 		}
2268 		return DISP_4;
2269 	case 0b11:	/* direct */
2270 	default:	/* llvm */
2271 		return DISP_NONE;
2272 	}
2273 	__unreachable();
2274 }
2275 
2276 static int
2277 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2278 {
2279 	struct x86_store *strg, *strm;
2280 	const struct x86_opcode *opcode;
2281 	const struct x86_reg *reg;
2282 	uint8_t byte;
2283 
2284 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2285 		return -1;
2286 	}
2287 
2288 	opcode = instr->opcode;
2289 
2290 	instr->regmodrm.rm  = ((byte & 0b00000111) >> 0);
2291 	instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2292 	instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2293 
2294 	if (opcode->regtorm) {
2295 		strg = &instr->src;
2296 		strm = &instr->dst;
2297 	} else { /* RM to REG */
2298 		strm = &instr->src;
2299 		strg = &instr->dst;
2300 	}
2301 
2302 	/* Save for later use. */
2303 	instr->strm = strm;
2304 
2305 	/*
2306 	 * Special cases: Groups. The REG field of REGMODRM is the index in
2307 	 * the group. op1 gets overwritten in the Immediate node, if any.
2308 	 */
2309 	if (opcode->group1) {
2310 		if (group1[instr->regmodrm.reg].emul == NULL) {
2311 			return -1;
2312 		}
2313 		instr->emul = group1[instr->regmodrm.reg].emul;
2314 	} else if (opcode->group3) {
2315 		if (group3[instr->regmodrm.reg].emul == NULL) {
2316 			return -1;
2317 		}
2318 		instr->emul = group3[instr->regmodrm.reg].emul;
2319 	} else if (opcode->group11) {
2320 		if (group11[instr->regmodrm.reg].emul == NULL) {
2321 			return -1;
2322 		}
2323 		instr->emul = group11[instr->regmodrm.reg].emul;
2324 	}
2325 
2326 	if (!opcode->immediate) {
2327 		reg = get_register_reg(instr);
2328 		if (reg == NULL) {
2329 			return -1;
2330 		}
2331 		strg->type = STORE_REG;
2332 		strg->u.reg = reg;
2333 	}
2334 
2335 	/* The displacement applies to RM. */
2336 	strm->disp.type = get_disp_type(instr);
2337 
2338 	if (has_sib(instr)) {
2339 		/* Overwrites RM */
2340 		fsm_advance(fsm, 1, node_sib);
2341 		return 0;
2342 	}
2343 
2344 	if (is_rip_relative(fsm, instr)) {
2345 		/* Overwrites RM */
2346 		strm->type = STORE_REG;
2347 		strm->u.reg = &gpr_map__rip;
2348 		strm->disp.type = DISP_4;
2349 		fsm_advance(fsm, 1, node_disp);
2350 		return 0;
2351 	}
2352 
2353 	if (is_disp32_only(fsm, instr)) {
2354 		/* Overwrites RM */
2355 		strm->type = STORE_REG;
2356 		strm->u.reg = NULL;
2357 		strm->disp.type = DISP_4;
2358 		fsm_advance(fsm, 1, node_disp);
2359 		return 0;
2360 	}
2361 
2362 	if (__predict_false(is_disp16_only(fsm, instr))) {
2363 		/* Overwrites RM */
2364 		strm->type = STORE_REG;
2365 		strm->u.reg = NULL;
2366 		strm->disp.type = DISP_2;
2367 		fsm_advance(fsm, 1, node_disp);
2368 		return 0;
2369 	}
2370 
2371 	if (__predict_false(is_dual(fsm, instr))) {
2372 		/* Overwrites RM */
2373 		fsm_advance(fsm, 0, node_dual);
2374 		return 0;
2375 	}
2376 
2377 	reg = get_register_rm(instr);
2378 	if (reg == NULL) {
2379 		return -1;
2380 	}
2381 	strm->type = STORE_REG;
2382 	strm->u.reg = reg;
2383 
2384 	if (strm->disp.type == DISP_NONE) {
2385 		/* Direct register addressing mode */
2386 		if (opcode->immediate) {
2387 			fsm_advance(fsm, 1, node_immediate);
2388 		} else {
2389 			fsm_advance(fsm, 1, NULL);
2390 		}
2391 	} else if (strm->disp.type == DISP_0) {
2392 		/* Indirect register addressing mode */
2393 		if (opcode->immediate) {
2394 			fsm_advance(fsm, 1, node_immediate);
2395 		} else {
2396 			fsm_advance(fsm, 1, NULL);
2397 		}
2398 	} else {
2399 		fsm_advance(fsm, 1, node_disp);
2400 	}
2401 
2402 	return 0;
2403 }
2404 
2405 static size_t
2406 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2407 {
2408 	const struct x86_opcode *opcode = instr->opcode;
2409 	int opsize;
2410 
2411 	/* Get the opsize */
2412 	if (!opcode->szoverride) {
2413 		opsize = opcode->defsize;
2414 	} else if (instr->rexpref.present && instr->rexpref.w) {
2415 		opsize = 8;
2416 	} else {
2417 		if (!fsm->is16bit) {
2418 			if (instr->legpref.opr_ovr) {
2419 				opsize = 2;
2420 			} else {
2421 				opsize = 4;
2422 			}
2423 		} else { /* 16bit */
2424 			if (instr->legpref.opr_ovr) {
2425 				opsize = 4;
2426 			} else {
2427 				opsize = 2;
2428 			}
2429 		}
2430 	}
2431 
2432 	return opsize;
2433 }
2434 
2435 static size_t
2436 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2437 {
2438 	if (fsm->is64bit) {
2439 		if (__predict_false(instr->legpref.adr_ovr)) {
2440 			return 4;
2441 		}
2442 		return 8;
2443 	}
2444 
2445 	if (fsm->is32bit) {
2446 		if (__predict_false(instr->legpref.adr_ovr)) {
2447 			return 2;
2448 		}
2449 		return 4;
2450 	}
2451 
2452 	/* 16bit. */
2453 	if (__predict_false(instr->legpref.adr_ovr)) {
2454 		return 4;
2455 	}
2456 	return 2;
2457 }
2458 
2459 static int
2460 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2461 {
2462 	const struct x86_opcode *opcode;
2463 	uint8_t byte;
2464 
2465 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2466 		return -1;
2467 	}
2468 
2469 	opcode = &primary_opcode_table[byte];
2470 	if (__predict_false(!opcode->valid)) {
2471 		return -1;
2472 	}
2473 
2474 	instr->opcode = opcode;
2475 	instr->emul = opcode->emul;
2476 	instr->operand_size = get_operand_size(fsm, instr);
2477 	instr->address_size = get_address_size(fsm, instr);
2478 
2479 	if (fsm->is64bit && (instr->operand_size == 4)) {
2480 		/* Zero-extend to 64 bits. */
2481 		instr->zeroextend_mask = ~size_to_mask(4);
2482 	}
2483 
2484 	if (opcode->regmodrm) {
2485 		fsm_advance(fsm, 1, node_regmodrm);
2486 	} else if (opcode->dmo) {
2487 		/* Direct-Memory Offsets */
2488 		fsm_advance(fsm, 1, node_dmo);
2489 	} else if (opcode->stos || opcode->lods) {
2490 		fsm_advance(fsm, 1, node_stlo);
2491 	} else if (opcode->movs) {
2492 		fsm_advance(fsm, 1, node_movs);
2493 	} else {
2494 		return -1;
2495 	}
2496 
2497 	return 0;
2498 }
2499 
2500 static int
2501 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2502 {
2503 	const struct x86_opcode *opcode;
2504 	uint8_t byte;
2505 
2506 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2507 		return -1;
2508 	}
2509 
2510 	opcode = &secondary_opcode_table[byte];
2511 	if (__predict_false(!opcode->valid)) {
2512 		return -1;
2513 	}
2514 
2515 	instr->opcode = opcode;
2516 	instr->emul = opcode->emul;
2517 	instr->operand_size = get_operand_size(fsm, instr);
2518 	instr->address_size = get_address_size(fsm, instr);
2519 
2520 	if (fsm->is64bit && (instr->operand_size == 4)) {
2521 		/* Zero-extend to 64 bits. */
2522 		instr->zeroextend_mask = ~size_to_mask(4);
2523 	}
2524 
2525 	if (opcode->flags & FLAG_ze) {
2526 		/*
2527 		 * Compute the mask for zero-extend. Update the operand size,
2528 		 * we move fewer bytes.
2529 		 */
2530 		instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2531 		instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2532 		instr->operand_size = opcode->defsize;
2533 	}
2534 
2535 	if (opcode->regmodrm) {
2536 		fsm_advance(fsm, 1, node_regmodrm);
2537 	} else {
2538 		return -1;
2539 	}
2540 
2541 	return 0;
2542 }
2543 
2544 static int
2545 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2546 {
2547 	uint8_t byte;
2548 
2549 #define ESCAPE	0x0F
2550 #define VEX_1	0xC5
2551 #define VEX_2	0xC4
2552 #define XOP	0x8F
2553 
2554 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2555 		return -1;
2556 	}
2557 
2558 	/*
2559 	 * We don't take XOP. It is AMD-specific, and it was removed shortly
2560 	 * after being introduced.
2561 	 */
2562 	if (byte == ESCAPE) {
2563 		fsm_advance(fsm, 1, node_secondary_opcode);
2564 	} else if (!instr->rexpref.present) {
2565 		if (byte == VEX_1) {
2566 			return -1;
2567 		} else if (byte == VEX_2) {
2568 			return -1;
2569 		} else {
2570 			fsm->fn = node_primary_opcode;
2571 		}
2572 	} else {
2573 		fsm->fn = node_primary_opcode;
2574 	}
2575 
2576 	return 0;
2577 }
2578 
2579 static int
2580 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2581 {
2582 	struct x86_rexpref *rexpref = &instr->rexpref;
2583 	uint8_t byte;
2584 	size_t n = 0;
2585 
2586 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2587 		return -1;
2588 	}
2589 
2590 	if (byte >= 0x40 && byte <= 0x4F) {
2591 		if (__predict_false(!fsm->is64bit)) {
2592 			return -1;
2593 		}
2594 		rexpref->b = ((byte & 0x1) != 0);
2595 		rexpref->x = ((byte & 0x2) != 0);
2596 		rexpref->r = ((byte & 0x4) != 0);
2597 		rexpref->w = ((byte & 0x8) != 0);
2598 		rexpref->present = true;
2599 		n = 1;
2600 	}
2601 
2602 	fsm_advance(fsm, n, node_main);
2603 	return 0;
2604 }
2605 
2606 static int
2607 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2608 {
2609 	uint8_t byte;
2610 
2611 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2612 		return -1;
2613 	}
2614 
2615 	if (byte == LEG_OPR_OVR) {
2616 		instr->legpref.opr_ovr = 1;
2617 	} else if (byte == LEG_OVR_DS) {
2618 		instr->legpref.seg = NVMM_X64_SEG_DS;
2619 	} else if (byte == LEG_OVR_ES) {
2620 		instr->legpref.seg = NVMM_X64_SEG_ES;
2621 	} else if (byte == LEG_REP) {
2622 		instr->legpref.rep = 1;
2623 	} else if (byte == LEG_OVR_GS) {
2624 		instr->legpref.seg = NVMM_X64_SEG_GS;
2625 	} else if (byte == LEG_OVR_FS) {
2626 		instr->legpref.seg = NVMM_X64_SEG_FS;
2627 	} else if (byte == LEG_ADR_OVR) {
2628 		instr->legpref.adr_ovr = 1;
2629 	} else if (byte == LEG_OVR_CS) {
2630 		instr->legpref.seg = NVMM_X64_SEG_CS;
2631 	} else if (byte == LEG_OVR_SS) {
2632 		instr->legpref.seg = NVMM_X64_SEG_SS;
2633 	} else if (byte == LEG_REPN) {
2634 		instr->legpref.repn = 1;
2635 	} else if (byte == LEG_LOCK) {
2636 		/* ignore */
2637 	} else {
2638 		/* not a legacy prefix */
2639 		fsm_advance(fsm, 0, node_rex_prefix);
2640 		return 0;
2641 	}
2642 
2643 	fsm_advance(fsm, 1, node_legacy_prefix);
2644 	return 0;
2645 }
2646 
2647 static int
2648 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2649     struct nvmm_x64_state *state)
2650 {
2651 	struct x86_decode_fsm fsm;
2652 	int ret;
2653 
2654 	memset(instr, 0, sizeof(*instr));
2655 	instr->legpref.seg = -1;
2656 	instr->src.hardseg = -1;
2657 	instr->dst.hardseg = -1;
2658 
2659 	fsm.is64bit = is_64bit(state);
2660 	fsm.is32bit = is_32bit(state);
2661 	fsm.is16bit = is_16bit(state);
2662 
2663 	fsm.fn = node_legacy_prefix;
2664 	fsm.buf = inst_bytes;
2665 	fsm.end = inst_bytes + inst_len;
2666 
2667 	while (fsm.fn != NULL) {
2668 		ret = (*fsm.fn)(&fsm, instr);
2669 		if (ret == -1)
2670 			return -1;
2671 	}
2672 
2673 	instr->len = fsm.buf - inst_bytes;
2674 
2675 	return 0;
2676 }
2677 
2678 /* -------------------------------------------------------------------------- */
2679 
2680 #define EXEC_INSTR(sz, instr)						\
2681 static uint##sz##_t							\
2682 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags)	\
2683 {									\
2684 	uint##sz##_t res;						\
2685 	__asm __volatile (						\
2686 		#instr"	%2, %3;"					\
2687 		"mov	%3, %1;"					\
2688 		"pushfq;"						\
2689 		"popq	%0"						\
2690 	    : "=r" (*rflags), "=r" (res)				\
2691 	    : "r" (op1), "r" (op2));					\
2692 	return res;							\
2693 }
2694 
2695 #define EXEC_DISPATCHER(instr)						\
2696 static uint64_t								\
2697 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2698 {									\
2699 	switch (opsize) {						\
2700 	case 1:								\
2701 		return exec_##instr##8(op1, op2, rflags);		\
2702 	case 2:								\
2703 		return exec_##instr##16(op1, op2, rflags);		\
2704 	case 4:								\
2705 		return exec_##instr##32(op1, op2, rflags);		\
2706 	default:							\
2707 		return exec_##instr##64(op1, op2, rflags);		\
2708 	}								\
2709 }
2710 
2711 /* SUB: ret = op1 - op2 */
2712 #define PSL_SUB_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2713 EXEC_INSTR(8, sub)
2714 EXEC_INSTR(16, sub)
2715 EXEC_INSTR(32, sub)
2716 EXEC_INSTR(64, sub)
2717 EXEC_DISPATCHER(sub)
2718 
2719 /* OR:  ret = op1 | op2 */
2720 #define PSL_OR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2721 EXEC_INSTR(8, or)
2722 EXEC_INSTR(16, or)
2723 EXEC_INSTR(32, or)
2724 EXEC_INSTR(64, or)
2725 EXEC_DISPATCHER(or)
2726 
2727 /* AND: ret = op1 & op2 */
2728 #define PSL_AND_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2729 EXEC_INSTR(8, and)
2730 EXEC_INSTR(16, and)
2731 EXEC_INSTR(32, and)
2732 EXEC_INSTR(64, and)
2733 EXEC_DISPATCHER(and)
2734 
2735 /* XOR: ret = op1 ^ op2 */
2736 #define PSL_XOR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2737 EXEC_INSTR(8, xor)
2738 EXEC_INSTR(16, xor)
2739 EXEC_INSTR(32, xor)
2740 EXEC_INSTR(64, xor)
2741 EXEC_DISPATCHER(xor)
2742 
2743 /* -------------------------------------------------------------------------- */
2744 
2745 /*
2746  * Emulation functions. We don't care about the order of the operands, except
2747  * for SUB, CMP and TEST. For these ones we look at mem->write to determine who
2748  * is op1 and who is op2.
2749  */
2750 
2751 static void
2752 x86_func_or(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2753 {
2754 	uint64_t *retval = (uint64_t *)mem->data;
2755 	const bool write = mem->write;
2756 	uint64_t *op1, op2, fl, ret;
2757 
2758 	op1 = (uint64_t *)mem->data;
2759 	op2 = 0;
2760 
2761 	/* Fetch the value to be OR'ed (op2). */
2762 	mem->data = (uint8_t *)&op2;
2763 	mem->write = false;
2764 	(*vcpu->cbs.mem)(mem);
2765 
2766 	/* Perform the OR. */
2767 	ret = exec_or(*op1, op2, &fl, mem->size);
2768 
2769 	if (write) {
2770 		/* Write back the result. */
2771 		mem->data = (uint8_t *)&ret;
2772 		mem->write = true;
2773 		(*vcpu->cbs.mem)(mem);
2774 	} else {
2775 		/* Return data to the caller. */
2776 		*retval = ret;
2777 	}
2778 
2779 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2780 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2781 }
2782 
2783 static void
2784 x86_func_and(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2785 {
2786 	uint64_t *retval = (uint64_t *)mem->data;
2787 	const bool write = mem->write;
2788 	uint64_t *op1, op2, fl, ret;
2789 
2790 	op1 = (uint64_t *)mem->data;
2791 	op2 = 0;
2792 
2793 	/* Fetch the value to be AND'ed (op2). */
2794 	mem->data = (uint8_t *)&op2;
2795 	mem->write = false;
2796 	(*vcpu->cbs.mem)(mem);
2797 
2798 	/* Perform the AND. */
2799 	ret = exec_and(*op1, op2, &fl, mem->size);
2800 
2801 	if (write) {
2802 		/* Write back the result. */
2803 		mem->data = (uint8_t *)&ret;
2804 		mem->write = true;
2805 		(*vcpu->cbs.mem)(mem);
2806 	} else {
2807 		/* Return data to the caller. */
2808 		*retval = ret;
2809 	}
2810 
2811 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2812 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2813 }
2814 
2815 static void
2816 x86_func_xchg(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs __unused)
2817 {
2818 	uint64_t *op1, op2;
2819 
2820 	op1 = (uint64_t *)mem->data;
2821 	op2 = 0;
2822 
2823 	/* Fetch op2. */
2824 	mem->data = (uint8_t *)&op2;
2825 	mem->write = false;
2826 	(*vcpu->cbs.mem)(mem);
2827 
2828 	/* Write op1 in op2. */
2829 	mem->data = (uint8_t *)op1;
2830 	mem->write = true;
2831 	(*vcpu->cbs.mem)(mem);
2832 
2833 	/* Write op2 in op1. */
2834 	*op1 = op2;
2835 }
2836 
2837 static void
2838 x86_func_sub(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2839 {
2840 	uint64_t *retval = (uint64_t *)mem->data;
2841 	const bool write = mem->write;
2842 	uint64_t *op1, *op2, fl, ret;
2843 	uint64_t tmp;
2844 	bool memop1;
2845 
2846 	memop1 = !mem->write;
2847 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2848 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2849 
2850 	/* Fetch the value to be SUB'ed (op1 or op2). */
2851 	mem->data = (uint8_t *)&tmp;
2852 	mem->write = false;
2853 	(*vcpu->cbs.mem)(mem);
2854 
2855 	/* Perform the SUB. */
2856 	ret = exec_sub(*op1, *op2, &fl, mem->size);
2857 
2858 	if (write) {
2859 		/* Write back the result. */
2860 		mem->data = (uint8_t *)&ret;
2861 		mem->write = true;
2862 		(*vcpu->cbs.mem)(mem);
2863 	} else {
2864 		/* Return data to the caller. */
2865 		*retval = ret;
2866 	}
2867 
2868 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2869 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2870 }
2871 
2872 static void
2873 x86_func_xor(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2874 {
2875 	uint64_t *retval = (uint64_t *)mem->data;
2876 	const bool write = mem->write;
2877 	uint64_t *op1, op2, fl, ret;
2878 
2879 	op1 = (uint64_t *)mem->data;
2880 	op2 = 0;
2881 
2882 	/* Fetch the value to be XOR'ed (op2). */
2883 	mem->data = (uint8_t *)&op2;
2884 	mem->write = false;
2885 	(*vcpu->cbs.mem)(mem);
2886 
2887 	/* Perform the XOR. */
2888 	ret = exec_xor(*op1, op2, &fl, mem->size);
2889 
2890 	if (write) {
2891 		/* Write back the result. */
2892 		mem->data = (uint8_t *)&ret;
2893 		mem->write = true;
2894 		(*vcpu->cbs.mem)(mem);
2895 	} else {
2896 		/* Return data to the caller. */
2897 		*retval = ret;
2898 	}
2899 
2900 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2901 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2902 }
2903 
2904 static void
2905 x86_func_cmp(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2906 {
2907 	uint64_t *op1, *op2, fl;
2908 	uint64_t tmp;
2909 	bool memop1;
2910 
2911 	memop1 = !mem->write;
2912 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2913 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2914 
2915 	/* Fetch the value to be CMP'ed (op1 or op2). */
2916 	mem->data = (uint8_t *)&tmp;
2917 	mem->write = false;
2918 	(*vcpu->cbs.mem)(mem);
2919 
2920 	/* Perform the CMP. */
2921 	exec_sub(*op1, *op2, &fl, mem->size);
2922 
2923 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2924 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2925 }
2926 
2927 static void
2928 x86_func_test(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2929 {
2930 	uint64_t *op1, *op2, fl;
2931 	uint64_t tmp;
2932 	bool memop1;
2933 
2934 	memop1 = !mem->write;
2935 	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2936 	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2937 
2938 	/* Fetch the value to be TEST'ed (op1 or op2). */
2939 	mem->data = (uint8_t *)&tmp;
2940 	mem->write = false;
2941 	(*vcpu->cbs.mem)(mem);
2942 
2943 	/* Perform the TEST. */
2944 	exec_and(*op1, *op2, &fl, mem->size);
2945 
2946 	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2947 	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2948 }
2949 
2950 static void
2951 x86_func_mov(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs __unused)
2952 {
2953 	/*
2954 	 * Nothing special, just move without emulation.
2955 	 */
2956 	(*vcpu->cbs.mem)(mem);
2957 }
2958 
2959 static void
2960 x86_func_stos(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2961 {
2962 	/*
2963 	 * Just move, and update RDI.
2964 	 */
2965 	(*vcpu->cbs.mem)(mem);
2966 
2967 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2968 		gprs[NVMM_X64_GPR_RDI] -= mem->size;
2969 	} else {
2970 		gprs[NVMM_X64_GPR_RDI] += mem->size;
2971 	}
2972 }
2973 
2974 static void
2975 x86_func_lods(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2976 {
2977 	/*
2978 	 * Just move, and update RSI.
2979 	 */
2980 	(*vcpu->cbs.mem)(mem);
2981 
2982 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2983 		gprs[NVMM_X64_GPR_RSI] -= mem->size;
2984 	} else {
2985 		gprs[NVMM_X64_GPR_RSI] += mem->size;
2986 	}
2987 }
2988 
2989 /* -------------------------------------------------------------------------- */
2990 
2991 static inline uint64_t
2992 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2993 {
2994 	uint64_t val;
2995 
2996 	val = state->gprs[gpr];
2997 	val &= size_to_mask(instr->address_size);
2998 
2999 	return val;
3000 }
3001 
3002 static int
3003 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
3004     struct x86_store *store, gvaddr_t *gvap, size_t size)
3005 {
3006 	struct x86_sib *sib;
3007 	gvaddr_t gva = 0;
3008 	uint64_t reg;
3009 	int ret, seg;
3010 
3011 	if (store->type == STORE_SIB) {
3012 		sib = &store->u.sib;
3013 		if (sib->bas != NULL)
3014 			gva += gpr_read_address(instr, state, sib->bas->num);
3015 		if (sib->idx != NULL) {
3016 			reg = gpr_read_address(instr, state, sib->idx->num);
3017 			gva += sib->scale * reg;
3018 		}
3019 	} else if (store->type == STORE_REG) {
3020 		if (store->u.reg == NULL) {
3021 			/* The base is null. Happens with disp32-only and
3022 			 * disp16-only. */
3023 		} else {
3024 			gva = gpr_read_address(instr, state, store->u.reg->num);
3025 		}
3026 	} else if (store->type == STORE_DUALREG) {
3027 		gva = gpr_read_address(instr, state, store->u.dualreg.reg1) +
3028 		    gpr_read_address(instr, state, store->u.dualreg.reg2);
3029 	} else {
3030 		gva = store->u.dmo;
3031 	}
3032 
3033 	if (store->disp.type != DISP_NONE) {
3034 		gva += store->disp.data;
3035 	}
3036 
3037 	if (store->hardseg != -1) {
3038 		seg = store->hardseg;
3039 	} else {
3040 		if (__predict_false(instr->legpref.seg != -1)) {
3041 			seg = instr->legpref.seg;
3042 		} else {
3043 			seg = NVMM_X64_SEG_DS;
3044 		}
3045 	}
3046 
3047 	if (__predict_true(is_long_mode(state))) {
3048 		if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
3049 			segment_apply(&state->segs[seg], &gva);
3050 		}
3051 	} else {
3052 		ret = segment_check(&state->segs[seg], gva, size);
3053 		if (ret == -1)
3054 			return -1;
3055 		segment_apply(&state->segs[seg], &gva);
3056 	}
3057 
3058 	*gvap = gva;
3059 	return 0;
3060 }
3061 
3062 static int
3063 fetch_segment(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3064 {
3065 	struct nvmm_x64_state *state = vcpu->state;
3066 	uint8_t inst_bytes[5], byte;
3067 	size_t i, fetchsize;
3068 	gvaddr_t gva;
3069 	int ret, seg;
3070 
3071 	fetchsize = sizeof(inst_bytes);
3072 
3073 	gva = state->gprs[NVMM_X64_GPR_RIP];
3074 	if (__predict_false(!is_long_mode(state))) {
3075 		ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3076 		    fetchsize);
3077 		if (ret == -1)
3078 			return -1;
3079 		segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3080 	}
3081 
3082 	ret = read_guest_memory(mach, vcpu, gva, inst_bytes, fetchsize);
3083 	if (ret == -1)
3084 		return -1;
3085 
3086 	seg = NVMM_X64_SEG_DS;
3087 	for (i = 0; i < fetchsize; i++) {
3088 		byte = inst_bytes[i];
3089 
3090 		if (byte == LEG_OVR_DS) {
3091 			seg = NVMM_X64_SEG_DS;
3092 		} else if (byte == LEG_OVR_ES) {
3093 			seg = NVMM_X64_SEG_ES;
3094 		} else if (byte == LEG_OVR_GS) {
3095 			seg = NVMM_X64_SEG_GS;
3096 		} else if (byte == LEG_OVR_FS) {
3097 			seg = NVMM_X64_SEG_FS;
3098 		} else if (byte == LEG_OVR_CS) {
3099 			seg = NVMM_X64_SEG_CS;
3100 		} else if (byte == LEG_OVR_SS) {
3101 			seg = NVMM_X64_SEG_SS;
3102 		} else if (byte == LEG_OPR_OVR) {
3103 			/* nothing */
3104 		} else if (byte == LEG_ADR_OVR) {
3105 			/* nothing */
3106 		} else if (byte == LEG_REP) {
3107 			/* nothing */
3108 		} else if (byte == LEG_REPN) {
3109 			/* nothing */
3110 		} else if (byte == LEG_LOCK) {
3111 			/* nothing */
3112 		} else {
3113 			return seg;
3114 		}
3115 	}
3116 
3117 	return seg;
3118 }
3119 
3120 static int
3121 fetch_instruction(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3122     struct nvmm_vcpu_exit *exit)
3123 {
3124 	struct nvmm_x64_state *state = vcpu->state;
3125 	size_t fetchsize;
3126 	gvaddr_t gva;
3127 	int ret;
3128 
3129 	fetchsize = sizeof(exit->u.mem.inst_bytes);
3130 
3131 	gva = state->gprs[NVMM_X64_GPR_RIP];
3132 	if (__predict_false(!is_long_mode(state))) {
3133 		ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3134 		    fetchsize);
3135 		if (ret == -1)
3136 			return -1;
3137 		segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3138 	}
3139 
3140 	ret = read_guest_memory(mach, vcpu, gva, exit->u.mem.inst_bytes,
3141 	    fetchsize);
3142 	if (ret == -1)
3143 		return -1;
3144 
3145 	exit->u.mem.inst_len = fetchsize;
3146 
3147 	return 0;
3148 }
3149 
3150 /*
3151  * Double memory operand, MOVS only.
3152  */
3153 static int
3154 assist_mem_double_movs(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3155     struct x86_instr *instr)
3156 {
3157 	struct nvmm_x64_state *state = vcpu->state;
3158 	uint8_t data[8];
3159 	gvaddr_t gva;
3160 	size_t size;
3161 	int ret;
3162 
3163 	size = instr->operand_size;
3164 
3165 	/* Source. */
3166 	ret = store_to_gva(state, instr, &instr->src, &gva, size);
3167 	if (ret == -1)
3168 		return -1;
3169 	ret = read_guest_memory(mach, vcpu, gva, data, size);
3170 	if (ret == -1)
3171 		return -1;
3172 
3173 	/* Destination. */
3174 	ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3175 	if (ret == -1)
3176 		return -1;
3177 	ret = write_guest_memory(mach, vcpu, gva, data, size);
3178 	if (ret == -1)
3179 		return -1;
3180 
3181 	if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
3182 		state->gprs[NVMM_X64_GPR_RSI] -= size;
3183 		state->gprs[NVMM_X64_GPR_RDI] -= size;
3184 	} else {
3185 		state->gprs[NVMM_X64_GPR_RSI] += size;
3186 		state->gprs[NVMM_X64_GPR_RDI] += size;
3187 	}
3188 
3189 	return 0;
3190 }
3191 
3192 /*
3193  * Single memory operand, covers most instructions.
3194  */
3195 static int
3196 assist_mem_single(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3197     struct x86_instr *instr)
3198 {
3199 	struct nvmm_x64_state *state = vcpu->state;
3200 	struct nvmm_vcpu_exit *exit = vcpu->exit;
3201 	struct nvmm_mem mem;
3202 	uint8_t membuf[8];
3203 	uint64_t val;
3204 
3205 	memset(membuf, 0, sizeof(membuf));
3206 
3207 	mem.mach = mach;
3208 	mem.vcpu = vcpu;
3209 	mem.gpa = exit->u.mem.gpa;
3210 	mem.size = instr->operand_size;
3211 	mem.data = membuf;
3212 
3213 	/* Determine the direction. */
3214 	switch (instr->src.type) {
3215 	case STORE_REG:
3216 		if (instr->src.disp.type != DISP_NONE) {
3217 			/* Indirect access. */
3218 			mem.write = false;
3219 		} else {
3220 			/* Direct access. */
3221 			mem.write = true;
3222 		}
3223 		break;
3224 	case STORE_DUALREG:
3225 		if (instr->src.disp.type == DISP_NONE) {
3226 			DISASSEMBLER_BUG();
3227 		}
3228 		mem.write = false;
3229 		break;
3230 	case STORE_IMM:
3231 		mem.write = true;
3232 		break;
3233 	case STORE_SIB:
3234 		mem.write = false;
3235 		break;
3236 	case STORE_DMO:
3237 		mem.write = false;
3238 		break;
3239 	default:
3240 		DISASSEMBLER_BUG();
3241 	}
3242 
3243 	if (mem.write) {
3244 		switch (instr->src.type) {
3245 		case STORE_REG:
3246 			/* The instruction was "reg -> mem". Fetch the register
3247 			 * in membuf. */
3248 			if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3249 				DISASSEMBLER_BUG();
3250 			}
3251 			val = state->gprs[instr->src.u.reg->num];
3252 			val = __SHIFTOUT(val, instr->src.u.reg->mask);
3253 			memcpy(mem.data, &val, mem.size);
3254 			break;
3255 		case STORE_IMM:
3256 			/* The instruction was "imm -> mem". Fetch the immediate
3257 			 * in membuf. */
3258 			memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3259 			break;
3260 		default:
3261 			DISASSEMBLER_BUG();
3262 		}
3263 	} else if (instr->emul->readreg) {
3264 		/* The instruction was "mem -> reg", but the value of the
3265 		 * register matters for the emul func. Fetch it in membuf. */
3266 		if (__predict_false(instr->dst.type != STORE_REG)) {
3267 			DISASSEMBLER_BUG();
3268 		}
3269 		if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3270 			DISASSEMBLER_BUG();
3271 		}
3272 		val = state->gprs[instr->dst.u.reg->num];
3273 		val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3274 		memcpy(mem.data, &val, mem.size);
3275 	}
3276 
3277 	(*instr->emul->func)(vcpu, &mem, state->gprs);
3278 
3279 	if (instr->emul->notouch) {
3280 		/* We're done. */
3281 		return 0;
3282 	}
3283 
3284 	if (!mem.write) {
3285 		/* The instruction was "mem -> reg". The emul func has filled
3286 		 * membuf with the memory content. Install membuf in the
3287 		 * register. */
3288 		if (__predict_false(instr->dst.type != STORE_REG)) {
3289 			DISASSEMBLER_BUG();
3290 		}
3291 		if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3292 			DISASSEMBLER_BUG();
3293 		}
3294 		memcpy(&val, membuf, sizeof(uint64_t));
3295 		val = __SHIFTIN(val, instr->dst.u.reg->mask);
3296 		state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3297 		state->gprs[instr->dst.u.reg->num] |= val;
3298 		state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3299 	} else if (instr->emul->backprop) {
3300 		/* The instruction was "reg -> mem", but the memory must be
3301 		 * back-propagated to the register. Install membuf in the
3302 		 * register. */
3303 		if (__predict_false(instr->src.type != STORE_REG)) {
3304 			DISASSEMBLER_BUG();
3305 		}
3306 		if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3307 			DISASSEMBLER_BUG();
3308 		}
3309 		memcpy(&val, membuf, sizeof(uint64_t));
3310 		val = __SHIFTIN(val, instr->src.u.reg->mask);
3311 		state->gprs[instr->src.u.reg->num] &= ~instr->src.u.reg->mask;
3312 		state->gprs[instr->src.u.reg->num] |= val;
3313 		state->gprs[instr->src.u.reg->num] &= ~instr->zeroextend_mask;
3314 	}
3315 
3316 	return 0;
3317 }
3318 
3319 int
3320 nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3321 {
3322 	struct nvmm_x64_state *state = vcpu->state;
3323 	struct nvmm_vcpu_exit *exit = vcpu->exit;
3324 	struct x86_instr instr;
3325 	uint64_t cnt = 0; /* GCC */
3326 	int ret;
3327 
3328 	if (__predict_false(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
3329 		errno = EINVAL;
3330 		return -1;
3331 	}
3332 
3333 	ret = nvmm_vcpu_getstate(mach, vcpu,
3334 	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3335 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3336 	if (ret == -1)
3337 		return -1;
3338 
3339 	if (exit->u.mem.inst_len == 0) {
3340 		/*
3341 		 * The instruction was not fetched from the kernel. Fetch
3342 		 * it ourselves.
3343 		 */
3344 		ret = fetch_instruction(mach, vcpu, exit);
3345 		if (ret == -1)
3346 			return -1;
3347 	}
3348 
3349 	ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3350 	    &instr, state);
3351 	if (ret == -1) {
3352 		errno = ENODEV;
3353 		return -1;
3354 	}
3355 
3356 	if (instr.legpref.rep || instr.legpref.repn) {
3357 		cnt = rep_get_cnt(state, instr.address_size);
3358 		if (__predict_false(cnt == 0)) {
3359 			state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3360 			goto out;
3361 		}
3362 	}
3363 
3364 	if (instr.opcode->movs) {
3365 		ret = assist_mem_double_movs(mach, vcpu, &instr);
3366 	} else {
3367 		ret = assist_mem_single(mach, vcpu, &instr);
3368 	}
3369 	if (ret == -1) {
3370 		errno = ENODEV;
3371 		return -1;
3372 	}
3373 
3374 	if (instr.legpref.rep || instr.legpref.repn) {
3375 		cnt -= 1;
3376 		rep_set_cnt(state, instr.address_size, cnt);
3377 		if (cnt == 0) {
3378 			state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3379 		} else if (__predict_false(instr.legpref.repn)) {
3380 			if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3381 				state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3382 			}
3383 		}
3384 	} else {
3385 		state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3386 	}
3387 
3388 out:
3389 	ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
3390 	if (ret == -1)
3391 		return -1;
3392 
3393 	return 0;
3394 }
3395