1 /*
2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3 * All rights reserved.
4 *
5 * This code is part of the NVMM hypervisor.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <inttypes.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 #include <errno.h>
36
37 #include <machine/psl.h>
38
39 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
40 #define __cacheline_aligned __attribute__((__aligned__(64)))
41
42 /* -------------------------------------------------------------------------- */
43
44 /*
45 * Undocumented debugging function. Helpful.
46 */
47 int
nvmm_vcpu_dump(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)48 nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
49 {
50 struct nvmm_x64_state *state = vcpu->state;
51 uint16_t *attr;
52 size_t i;
53 int ret;
54
55 const char *segnames[] = {
56 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
57 };
58
59 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL);
60 if (ret == -1)
61 return -1;
62
63 printf("+ VCPU id=%u\n", vcpu->cpuid);
64 printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]);
65 printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]);
66 printf("| -> RDX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDX]);
67 printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]);
68 printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]);
69 printf("| -> RBP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBP]);
70 printf("| -> RSI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSI]);
71 printf("| -> RDI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDI]);
72 printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]);
73 printf("| -> RFLAGS=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RFLAGS]);
74 for (i = 0; i < NVMM_X64_NSEG; i++) {
75 attr = (uint16_t *)&state->segs[i].attrib;
76 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, "
77 "attrib=%x [type=%d,l=%d,def=%d]\n",
78 segnames[i],
79 state->segs[i].selector,
80 state->segs[i].base,
81 state->segs[i].limit,
82 *attr,
83 state->segs[i].attrib.type,
84 state->segs[i].attrib.l,
85 state->segs[i].attrib.def);
86 }
87 printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]);
88 printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]);
89 printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]);
90 printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]);
91 printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]);
92
93 return 0;
94 }
95
96 /* -------------------------------------------------------------------------- */
97
98 /*
99 * x86 page size.
100 */
101 #define PAGE_SIZE 0x1000
102 #define PAGE_MASK (PAGE_SIZE - 1)
103
104 /*
105 * x86 PTE/PDE bits.
106 */
107 #define PTE_P 0x0000000000000001 /* Present */
108 #define PTE_W 0x0000000000000002 /* Write */
109 #define PTE_U 0x0000000000000004 /* User */
110 #define PTE_PWT 0x0000000000000008 /* Write-Through */
111 #define PTE_PCD 0x0000000000000010 /* Cache-Disable */
112 #define PTE_A 0x0000000000000020 /* Accessed */
113 #define PTE_D 0x0000000000000040 /* Dirty */
114 #define PTE_PAT 0x0000000000000080 /* PAT on 4KB Pages */
115 #define PTE_PS 0x0000000000000080 /* Large Page Size */
116 #define PTE_G 0x0000000000000100 /* Global Translation */
117 #define PTE_AVL1 0x0000000000000200 /* Ignored by Hardware */
118 #define PTE_AVL2 0x0000000000000400 /* Ignored by Hardware */
119 #define PTE_AVL3 0x0000000000000800 /* Ignored by Hardware */
120 #define PTE_LGPAT 0x0000000000001000 /* PAT on Large Pages */
121 #define PTE_NX 0x8000000000000000 /* No Execute */
122
123 #define PTE_4KFRAME 0x000ffffffffff000
124 #define PTE_2MFRAME 0x000fffffffe00000
125 #define PTE_1GFRAME 0x000fffffc0000000
126
127 #define PTE_FRAME PTE_4KFRAME
128
129 /* -------------------------------------------------------------------------- */
130
131 #define PTE32_L1_SHIFT 12
132 #define PTE32_L2_SHIFT 22
133
134 #define PTE32_L2_MASK 0xffc00000
135 #define PTE32_L1_MASK 0x003ff000
136
137 #define PTE32_L2_FRAME (PTE32_L2_MASK)
138 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
139
140 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
141 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
142
143 #define CR3_FRAME_32BIT 0xfffff000
144
145 typedef uint32_t pte_32bit_t;
146
147 static int
x86_gva_to_gpa_32bit(struct nvmm_machine * mach,uint64_t cr3,gvaddr_t gva,gpaddr_t * gpa,bool has_pse,nvmm_prot_t * prot)148 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
149 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
150 {
151 gpaddr_t L2gpa, L1gpa;
152 uintptr_t L2hva, L1hva;
153 pte_32bit_t *pdir, pte;
154 nvmm_prot_t pageprot;
155
156 /* We begin with an RWXU access. */
157 *prot = NVMM_PROT_ALL;
158
159 /* Parse L2. */
160 L2gpa = (cr3 & CR3_FRAME_32BIT);
161 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
162 return -1;
163 pdir = (pte_32bit_t *)L2hva;
164 pte = pdir[pte32_l2idx(gva)];
165 if ((pte & PTE_P) == 0)
166 return -1;
167 if ((pte & PTE_U) == 0)
168 *prot &= ~NVMM_PROT_USER;
169 if ((pte & PTE_W) == 0)
170 *prot &= ~NVMM_PROT_WRITE;
171 if ((pte & PTE_PS) && !has_pse)
172 return -1;
173 if (pte & PTE_PS) {
174 *gpa = (pte & PTE32_L2_FRAME);
175 *gpa = *gpa + (gva & PTE32_L1_MASK);
176 return 0;
177 }
178
179 /* Parse L1. */
180 L1gpa = (pte & PTE_FRAME);
181 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
182 return -1;
183 pdir = (pte_32bit_t *)L1hva;
184 pte = pdir[pte32_l1idx(gva)];
185 if ((pte & PTE_P) == 0)
186 return -1;
187 if ((pte & PTE_U) == 0)
188 *prot &= ~NVMM_PROT_USER;
189 if ((pte & PTE_W) == 0)
190 *prot &= ~NVMM_PROT_WRITE;
191 if (pte & PTE_PS)
192 return -1;
193
194 *gpa = (pte & PTE_FRAME);
195 return 0;
196 }
197
198 /* -------------------------------------------------------------------------- */
199
200 #define PTE32_PAE_L1_SHIFT 12
201 #define PTE32_PAE_L2_SHIFT 21
202 #define PTE32_PAE_L3_SHIFT 30
203
204 #define PTE32_PAE_L3_MASK 0xc0000000
205 #define PTE32_PAE_L2_MASK 0x3fe00000
206 #define PTE32_PAE_L1_MASK 0x001ff000
207
208 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
209 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
210 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
211
212 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
213 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
214 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
215
216 #define CR3_FRAME_32BIT_PAE 0xffffffe0
217
218 typedef uint64_t pte_32bit_pae_t;
219
220 static int
x86_gva_to_gpa_32bit_pae(struct nvmm_machine * mach,uint64_t cr3,gvaddr_t gva,gpaddr_t * gpa,nvmm_prot_t * prot)221 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
222 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
223 {
224 gpaddr_t L3gpa, L2gpa, L1gpa;
225 uintptr_t L3hva, L2hva, L1hva;
226 pte_32bit_pae_t *pdir, pte;
227 nvmm_prot_t pageprot;
228
229 /* We begin with an RWXU access. */
230 *prot = NVMM_PROT_ALL;
231
232 /* Parse L3. */
233 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
234 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
235 return -1;
236 pdir = (pte_32bit_pae_t *)L3hva;
237 pte = pdir[pte32_pae_l3idx(gva)];
238 if ((pte & PTE_P) == 0)
239 return -1;
240 if (pte & PTE_NX)
241 *prot &= ~NVMM_PROT_EXEC;
242 if (pte & PTE_PS)
243 return -1;
244
245 /* Parse L2. */
246 L2gpa = (pte & PTE_FRAME);
247 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
248 return -1;
249 pdir = (pte_32bit_pae_t *)L2hva;
250 pte = pdir[pte32_pae_l2idx(gva)];
251 if ((pte & PTE_P) == 0)
252 return -1;
253 if ((pte & PTE_U) == 0)
254 *prot &= ~NVMM_PROT_USER;
255 if ((pte & PTE_W) == 0)
256 *prot &= ~NVMM_PROT_WRITE;
257 if (pte & PTE_NX)
258 *prot &= ~NVMM_PROT_EXEC;
259 if (pte & PTE_PS) {
260 *gpa = (pte & PTE32_PAE_L2_FRAME);
261 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
262 return 0;
263 }
264
265 /* Parse L1. */
266 L1gpa = (pte & PTE_FRAME);
267 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
268 return -1;
269 pdir = (pte_32bit_pae_t *)L1hva;
270 pte = pdir[pte32_pae_l1idx(gva)];
271 if ((pte & PTE_P) == 0)
272 return -1;
273 if ((pte & PTE_U) == 0)
274 *prot &= ~NVMM_PROT_USER;
275 if ((pte & PTE_W) == 0)
276 *prot &= ~NVMM_PROT_WRITE;
277 if (pte & PTE_NX)
278 *prot &= ~NVMM_PROT_EXEC;
279 if (pte & PTE_PS)
280 return -1;
281
282 *gpa = (pte & PTE_FRAME);
283 return 0;
284 }
285
286 /* -------------------------------------------------------------------------- */
287
288 #define PTE64_L1_SHIFT 12
289 #define PTE64_L2_SHIFT 21
290 #define PTE64_L3_SHIFT 30
291 #define PTE64_L4_SHIFT 39
292
293 #define PTE64_L4_MASK 0x0000ff8000000000
294 #define PTE64_L3_MASK 0x0000007fc0000000
295 #define PTE64_L2_MASK 0x000000003fe00000
296 #define PTE64_L1_MASK 0x00000000001ff000
297
298 #define PTE64_L4_FRAME PTE64_L4_MASK
299 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
300 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
301 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
302
303 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
304 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
305 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
306 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
307
308 #define CR3_FRAME_64BIT 0x000ffffffffff000
309
310 typedef uint64_t pte_64bit_t;
311
312 static inline bool
x86_gva_64bit_canonical(gvaddr_t gva)313 x86_gva_64bit_canonical(gvaddr_t gva)
314 {
315 /* Bits 63:47 must have the same value. */
316 #define SIGN_EXTEND 0xffff800000000000ULL
317 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
318 }
319
320 static int
x86_gva_to_gpa_64bit(struct nvmm_machine * mach,uint64_t cr3,gvaddr_t gva,gpaddr_t * gpa,nvmm_prot_t * prot)321 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
322 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
323 {
324 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
325 uintptr_t L4hva, L3hva, L2hva, L1hva;
326 pte_64bit_t *pdir, pte;
327 nvmm_prot_t pageprot;
328
329 /* We begin with an RWXU access. */
330 *prot = NVMM_PROT_ALL;
331
332 if (!x86_gva_64bit_canonical(gva))
333 return -1;
334
335 /* Parse L4. */
336 L4gpa = (cr3 & CR3_FRAME_64BIT);
337 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
338 return -1;
339 pdir = (pte_64bit_t *)L4hva;
340 pte = pdir[pte64_l4idx(gva)];
341 if ((pte & PTE_P) == 0)
342 return -1;
343 if ((pte & PTE_U) == 0)
344 *prot &= ~NVMM_PROT_USER;
345 if ((pte & PTE_W) == 0)
346 *prot &= ~NVMM_PROT_WRITE;
347 if (pte & PTE_NX)
348 *prot &= ~NVMM_PROT_EXEC;
349 if (pte & PTE_PS)
350 return -1;
351
352 /* Parse L3. */
353 L3gpa = (pte & PTE_FRAME);
354 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
355 return -1;
356 pdir = (pte_64bit_t *)L3hva;
357 pte = pdir[pte64_l3idx(gva)];
358 if ((pte & PTE_P) == 0)
359 return -1;
360 if ((pte & PTE_U) == 0)
361 *prot &= ~NVMM_PROT_USER;
362 if ((pte & PTE_W) == 0)
363 *prot &= ~NVMM_PROT_WRITE;
364 if (pte & PTE_NX)
365 *prot &= ~NVMM_PROT_EXEC;
366 if (pte & PTE_PS) {
367 *gpa = (pte & PTE64_L3_FRAME);
368 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
369 return 0;
370 }
371
372 /* Parse L2. */
373 L2gpa = (pte & PTE_FRAME);
374 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
375 return -1;
376 pdir = (pte_64bit_t *)L2hva;
377 pte = pdir[pte64_l2idx(gva)];
378 if ((pte & PTE_P) == 0)
379 return -1;
380 if ((pte & PTE_U) == 0)
381 *prot &= ~NVMM_PROT_USER;
382 if ((pte & PTE_W) == 0)
383 *prot &= ~NVMM_PROT_WRITE;
384 if (pte & PTE_NX)
385 *prot &= ~NVMM_PROT_EXEC;
386 if (pte & PTE_PS) {
387 *gpa = (pte & PTE64_L2_FRAME);
388 *gpa = *gpa + (gva & PTE64_L1_MASK);
389 return 0;
390 }
391
392 /* Parse L1. */
393 L1gpa = (pte & PTE_FRAME);
394 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
395 return -1;
396 pdir = (pte_64bit_t *)L1hva;
397 pte = pdir[pte64_l1idx(gva)];
398 if ((pte & PTE_P) == 0)
399 return -1;
400 if ((pte & PTE_U) == 0)
401 *prot &= ~NVMM_PROT_USER;
402 if ((pte & PTE_W) == 0)
403 *prot &= ~NVMM_PROT_WRITE;
404 if (pte & PTE_NX)
405 *prot &= ~NVMM_PROT_EXEC;
406 if (pte & PTE_PS)
407 return -1;
408
409 *gpa = (pte & PTE_FRAME);
410 return 0;
411 }
412
413 static inline int
x86_gva_to_gpa(struct nvmm_machine * mach,struct nvmm_x64_state * state,gvaddr_t gva,gpaddr_t * gpa,nvmm_prot_t * prot)414 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
415 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
416 {
417 bool is_pae, is_lng, has_pse;
418 uint64_t cr3;
419 size_t off;
420 int ret;
421
422 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
423 /* No paging. */
424 *prot = NVMM_PROT_ALL;
425 *gpa = gva;
426 return 0;
427 }
428
429 off = (gva & PAGE_MASK);
430 gva &= ~PAGE_MASK;
431
432 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
433 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
434 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
435 cr3 = state->crs[NVMM_X64_CR_CR3];
436
437 if (is_pae && is_lng) {
438 /* 64bit */
439 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
440 } else if (is_pae && !is_lng) {
441 /* 32bit PAE */
442 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
443 } else if (!is_pae && !is_lng) {
444 /* 32bit */
445 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
446 } else {
447 ret = -1;
448 }
449
450 if (ret == -1) {
451 errno = EFAULT;
452 }
453
454 *gpa = *gpa + off;
455
456 return ret;
457 }
458
459 int
nvmm_gva_to_gpa(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,gvaddr_t gva,gpaddr_t * gpa,nvmm_prot_t * prot)460 nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
461 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
462 {
463 struct nvmm_x64_state *state = vcpu->state;
464 int ret;
465
466 ret = nvmm_vcpu_getstate(mach, vcpu,
467 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
468 if (ret == -1)
469 return -1;
470
471 return x86_gva_to_gpa(mach, state, gva, gpa, prot);
472 }
473
474 /* -------------------------------------------------------------------------- */
475
476 #define DISASSEMBLER_BUG() \
477 do { \
478 errno = EINVAL; \
479 return -1; \
480 } while (0);
481
482 static inline bool
is_long_mode(struct nvmm_x64_state * state)483 is_long_mode(struct nvmm_x64_state *state)
484 {
485 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
486 }
487
488 static inline bool
is_64bit(struct nvmm_x64_state * state)489 is_64bit(struct nvmm_x64_state *state)
490 {
491 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
492 }
493
494 static inline bool
is_32bit(struct nvmm_x64_state * state)495 is_32bit(struct nvmm_x64_state *state)
496 {
497 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
498 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
499 }
500
501 static inline bool
is_16bit(struct nvmm_x64_state * state)502 is_16bit(struct nvmm_x64_state *state)
503 {
504 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
505 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
506 }
507
508 static int
segment_check(struct nvmm_x64_state_seg * seg,gvaddr_t gva,size_t size)509 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
510 {
511 uint64_t limit;
512
513 /*
514 * This is incomplete. We should check topdown, etc, really that's
515 * tiring.
516 */
517 if (__predict_false(!seg->attrib.p)) {
518 goto error;
519 }
520
521 limit = (uint64_t)seg->limit + 1;
522 if (__predict_true(seg->attrib.g)) {
523 limit *= PAGE_SIZE;
524 }
525
526 if (__predict_false(gva + size > limit)) {
527 goto error;
528 }
529
530 return 0;
531
532 error:
533 errno = EFAULT;
534 return -1;
535 }
536
537 static inline void
segment_apply(struct nvmm_x64_state_seg * seg,gvaddr_t * gva)538 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
539 {
540 *gva += seg->base;
541 }
542
543 static inline uint64_t
size_to_mask(size_t size)544 size_to_mask(size_t size)
545 {
546 switch (size) {
547 case 1:
548 return 0x00000000000000FF;
549 case 2:
550 return 0x000000000000FFFF;
551 case 4:
552 return 0x00000000FFFFFFFF;
553 case 8:
554 default:
555 return 0xFFFFFFFFFFFFFFFF;
556 }
557 }
558
559 static uint64_t
rep_get_cnt(struct nvmm_x64_state * state,size_t adsize)560 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
561 {
562 uint64_t mask, cnt;
563
564 mask = size_to_mask(adsize);
565 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
566
567 return cnt;
568 }
569
570 static void
rep_set_cnt(struct nvmm_x64_state * state,size_t adsize,uint64_t cnt)571 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
572 {
573 uint64_t mask;
574
575 /* XXX: should we zero-extend? */
576 mask = size_to_mask(adsize);
577 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
578 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
579 }
580
581 static int
read_guest_memory(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,gvaddr_t gva,uint8_t * data,size_t size)582 read_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
583 gvaddr_t gva, uint8_t *data, size_t size)
584 {
585 struct nvmm_x64_state *state = vcpu->state;
586 struct nvmm_mem mem;
587 nvmm_prot_t prot;
588 gpaddr_t gpa;
589 uintptr_t hva;
590 bool is_mmio;
591 int ret, remain;
592
593 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
594 if (__predict_false(ret == -1)) {
595 return -1;
596 }
597 if (__predict_false(!(prot & NVMM_PROT_READ))) {
598 errno = EFAULT;
599 return -1;
600 }
601
602 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
603 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
604 } else {
605 remain = 0;
606 }
607 size -= remain;
608
609 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
610 is_mmio = (ret == -1);
611
612 if (is_mmio) {
613 mem.mach = mach;
614 mem.vcpu = vcpu;
615 mem.data = data;
616 mem.gpa = gpa;
617 mem.write = false;
618 mem.size = size;
619 (*vcpu->cbs.mem)(&mem);
620 } else {
621 if (__predict_false(!(prot & NVMM_PROT_READ))) {
622 errno = EFAULT;
623 return -1;
624 }
625 memcpy(data, (uint8_t *)hva, size);
626 }
627
628 if (remain > 0) {
629 ret = read_guest_memory(mach, vcpu, gva + size,
630 data + size, remain);
631 } else {
632 ret = 0;
633 }
634
635 return ret;
636 }
637
638 static int
write_guest_memory(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,gvaddr_t gva,uint8_t * data,size_t size)639 write_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
640 gvaddr_t gva, uint8_t *data, size_t size)
641 {
642 struct nvmm_x64_state *state = vcpu->state;
643 struct nvmm_mem mem;
644 nvmm_prot_t prot;
645 gpaddr_t gpa;
646 uintptr_t hva;
647 bool is_mmio;
648 int ret, remain;
649
650 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
651 if (__predict_false(ret == -1)) {
652 return -1;
653 }
654 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
655 errno = EFAULT;
656 return -1;
657 }
658
659 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
660 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
661 } else {
662 remain = 0;
663 }
664 size -= remain;
665
666 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
667 is_mmio = (ret == -1);
668
669 if (is_mmio) {
670 mem.mach = mach;
671 mem.vcpu = vcpu;
672 mem.data = data;
673 mem.gpa = gpa;
674 mem.write = true;
675 mem.size = size;
676 (*vcpu->cbs.mem)(&mem);
677 } else {
678 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
679 errno = EFAULT;
680 return -1;
681 }
682 memcpy((uint8_t *)hva, data, size);
683 }
684
685 if (remain > 0) {
686 ret = write_guest_memory(mach, vcpu, gva + size,
687 data + size, remain);
688 } else {
689 ret = 0;
690 }
691
692 return ret;
693 }
694
695 /* -------------------------------------------------------------------------- */
696
697 static int fetch_segment(struct nvmm_machine *, struct nvmm_vcpu *);
698
699 #define NVMM_IO_BATCH_SIZE 32
700
701 static int
assist_io_batch(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,struct nvmm_io * io,gvaddr_t gva,uint64_t cnt)702 assist_io_batch(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
703 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
704 {
705 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
706 size_t i, iosize, iocnt;
707 int ret;
708
709 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
710 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
711 iocnt = iosize / io->size;
712
713 io->data = iobuf;
714
715 if (!io->in) {
716 ret = read_guest_memory(mach, vcpu, gva, iobuf, iosize);
717 if (ret == -1)
718 return -1;
719 }
720
721 for (i = 0; i < iocnt; i++) {
722 (*vcpu->cbs.io)(io);
723 io->data += io->size;
724 }
725
726 if (io->in) {
727 ret = write_guest_memory(mach, vcpu, gva, iobuf, iosize);
728 if (ret == -1)
729 return -1;
730 }
731
732 return iocnt;
733 }
734
735 int
nvmm_assist_io(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)736 nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
737 {
738 struct nvmm_x64_state *state = vcpu->state;
739 struct nvmm_vcpu_exit *exit = vcpu->exit;
740 struct nvmm_io io;
741 uint64_t cnt = 0; /* GCC */
742 uint8_t iobuf[8];
743 int iocnt = 1;
744 gvaddr_t gva = 0; /* GCC */
745 int reg = 0; /* GCC */
746 int ret, seg;
747 bool psld = false;
748
749 if (__predict_false(exit->reason != NVMM_VCPU_EXIT_IO)) {
750 errno = EINVAL;
751 return -1;
752 }
753
754 io.mach = mach;
755 io.vcpu = vcpu;
756 io.port = exit->u.io.port;
757 io.in = exit->u.io.in;
758 io.size = exit->u.io.operand_size;
759 io.data = iobuf;
760
761 ret = nvmm_vcpu_getstate(mach, vcpu,
762 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
763 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
764 if (ret == -1)
765 return -1;
766
767 if (exit->u.io.rep) {
768 cnt = rep_get_cnt(state, exit->u.io.address_size);
769 if (__predict_false(cnt == 0)) {
770 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
771 goto out;
772 }
773 }
774
775 if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
776 psld = true;
777 }
778
779 /*
780 * Determine GVA.
781 */
782 if (exit->u.io.str) {
783 if (io.in) {
784 reg = NVMM_X64_GPR_RDI;
785 } else {
786 reg = NVMM_X64_GPR_RSI;
787 }
788
789 gva = state->gprs[reg];
790 gva &= size_to_mask(exit->u.io.address_size);
791
792 if (exit->u.io.seg != -1) {
793 seg = exit->u.io.seg;
794 } else {
795 if (io.in) {
796 seg = NVMM_X64_SEG_ES;
797 } else {
798 seg = fetch_segment(mach, vcpu);
799 if (seg == -1)
800 return -1;
801 }
802 }
803
804 if (__predict_true(is_long_mode(state))) {
805 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
806 segment_apply(&state->segs[seg], &gva);
807 }
808 } else {
809 ret = segment_check(&state->segs[seg], gva, io.size);
810 if (ret == -1)
811 return -1;
812 segment_apply(&state->segs[seg], &gva);
813 }
814
815 if (exit->u.io.rep && !psld) {
816 iocnt = assist_io_batch(mach, vcpu, &io, gva, cnt);
817 if (iocnt == -1)
818 return -1;
819 goto done;
820 }
821 }
822
823 if (!io.in) {
824 if (!exit->u.io.str) {
825 memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size);
826 } else {
827 ret = read_guest_memory(mach, vcpu, gva, io.data,
828 io.size);
829 if (ret == -1)
830 return -1;
831 }
832 }
833
834 (*vcpu->cbs.io)(&io);
835
836 if (io.in) {
837 if (!exit->u.io.str) {
838 memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size);
839 if (io.size == 4) {
840 /* Zero-extend to 64 bits. */
841 state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
842 }
843 } else {
844 ret = write_guest_memory(mach, vcpu, gva, io.data,
845 io.size);
846 if (ret == -1)
847 return -1;
848 }
849 }
850
851 done:
852 if (exit->u.io.str) {
853 if (__predict_false(psld)) {
854 state->gprs[reg] -= iocnt * io.size;
855 } else {
856 state->gprs[reg] += iocnt * io.size;
857 }
858 }
859
860 if (exit->u.io.rep) {
861 cnt -= iocnt;
862 rep_set_cnt(state, exit->u.io.address_size, cnt);
863 if (cnt == 0) {
864 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
865 }
866 } else {
867 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
868 }
869
870 out:
871 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
872 if (ret == -1)
873 return -1;
874
875 return 0;
876 }
877
878 /* -------------------------------------------------------------------------- */
879
880 struct x86_emul {
881 bool readreg;
882 bool backprop;
883 bool notouch;
884 void (*func)(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
885 };
886
887 static void x86_func_or(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
888 static void x86_func_and(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
889 static void x86_func_xchg(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
890 static void x86_func_sub(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
891 static void x86_func_xor(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
892 static void x86_func_cmp(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
893 static void x86_func_test(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
894 static void x86_func_mov(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
895 static void x86_func_stos(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
896 static void x86_func_lods(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
897
898 static const struct x86_emul x86_emul_or = {
899 .readreg = true,
900 .func = x86_func_or
901 };
902
903 static const struct x86_emul x86_emul_and = {
904 .readreg = true,
905 .func = x86_func_and
906 };
907
908 static const struct x86_emul x86_emul_xchg = {
909 .readreg = true,
910 .backprop = true,
911 .func = x86_func_xchg
912 };
913
914 static const struct x86_emul x86_emul_sub = {
915 .readreg = true,
916 .func = x86_func_sub
917 };
918
919 static const struct x86_emul x86_emul_xor = {
920 .readreg = true,
921 .func = x86_func_xor
922 };
923
924 static const struct x86_emul x86_emul_cmp = {
925 .notouch = true,
926 .func = x86_func_cmp
927 };
928
929 static const struct x86_emul x86_emul_test = {
930 .notouch = true,
931 .func = x86_func_test
932 };
933
934 static const struct x86_emul x86_emul_mov = {
935 .func = x86_func_mov
936 };
937
938 static const struct x86_emul x86_emul_stos = {
939 .func = x86_func_stos
940 };
941
942 static const struct x86_emul x86_emul_lods = {
943 .func = x86_func_lods
944 };
945
946 /* Legacy prefixes. */
947 #define LEG_LOCK 0xF0
948 #define LEG_REPN 0xF2
949 #define LEG_REP 0xF3
950 #define LEG_OVR_CS 0x2E
951 #define LEG_OVR_SS 0x36
952 #define LEG_OVR_DS 0x3E
953 #define LEG_OVR_ES 0x26
954 #define LEG_OVR_FS 0x64
955 #define LEG_OVR_GS 0x65
956 #define LEG_OPR_OVR 0x66
957 #define LEG_ADR_OVR 0x67
958
959 struct x86_legpref {
960 bool opr_ovr:1;
961 bool adr_ovr:1;
962 bool rep:1;
963 bool repn:1;
964 int8_t seg;
965 };
966
967 struct x86_rexpref {
968 bool b:1;
969 bool x:1;
970 bool r:1;
971 bool w:1;
972 bool present:1;
973 };
974
975 struct x86_reg {
976 int num; /* NVMM GPR state index */
977 uint64_t mask;
978 };
979
980 struct x86_dualreg {
981 int reg1;
982 int reg2;
983 };
984
985 enum x86_disp_type {
986 DISP_NONE,
987 DISP_0,
988 DISP_1,
989 DISP_2,
990 DISP_4
991 };
992
993 struct x86_disp {
994 enum x86_disp_type type;
995 uint64_t data; /* 4 bytes, but can be sign-extended */
996 };
997
998 struct x86_regmodrm {
999 uint8_t mod:2;
1000 uint8_t reg:3;
1001 uint8_t rm:3;
1002 };
1003
1004 struct x86_immediate {
1005 uint64_t data;
1006 };
1007
1008 struct x86_sib {
1009 uint8_t scale;
1010 const struct x86_reg *idx;
1011 const struct x86_reg *bas;
1012 };
1013
1014 enum x86_store_type {
1015 STORE_NONE,
1016 STORE_REG,
1017 STORE_DUALREG,
1018 STORE_IMM,
1019 STORE_SIB,
1020 STORE_DMO
1021 };
1022
1023 struct x86_store {
1024 enum x86_store_type type;
1025 union {
1026 const struct x86_reg *reg;
1027 struct x86_dualreg dualreg;
1028 struct x86_immediate imm;
1029 struct x86_sib sib;
1030 uint64_t dmo;
1031 } u;
1032 struct x86_disp disp;
1033 int hardseg;
1034 };
1035
1036 struct x86_instr {
1037 uint8_t len;
1038 struct x86_legpref legpref;
1039 struct x86_rexpref rexpref;
1040 struct x86_regmodrm regmodrm;
1041 uint8_t operand_size;
1042 uint8_t address_size;
1043 uint64_t zeroextend_mask;
1044
1045 const struct x86_opcode *opcode;
1046 const struct x86_emul *emul;
1047
1048 struct x86_store src;
1049 struct x86_store dst;
1050 struct x86_store *strm;
1051 };
1052
1053 struct x86_decode_fsm {
1054 /* vcpu */
1055 bool is64bit;
1056 bool is32bit;
1057 bool is16bit;
1058
1059 /* fsm */
1060 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1061 uint8_t *buf;
1062 uint8_t *end;
1063 };
1064
1065 struct x86_opcode {
1066 bool valid:1;
1067 bool regmodrm:1;
1068 bool regtorm:1;
1069 bool dmo:1;
1070 bool todmo:1;
1071 bool movs:1;
1072 bool stos:1;
1073 bool lods:1;
1074 bool szoverride:1;
1075 bool group1:1;
1076 bool group3:1;
1077 bool group11:1;
1078 bool immediate:1;
1079 uint8_t defsize;
1080 uint8_t flags;
1081 const struct x86_emul *emul;
1082 };
1083
1084 struct x86_group_entry {
1085 const struct x86_emul *emul;
1086 };
1087
1088 #define OPSIZE_BYTE 0x01
1089 #define OPSIZE_WORD 0x02 /* 2 bytes */
1090 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1091 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1092
1093 #define FLAG_imm8 0x01
1094 #define FLAG_immz 0x02
1095 #define FLAG_ze 0x04
1096
1097 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1098 [1] = { .emul = &x86_emul_or },
1099 [4] = { .emul = &x86_emul_and },
1100 [6] = { .emul = &x86_emul_xor },
1101 [7] = { .emul = &x86_emul_cmp }
1102 };
1103
1104 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1105 [0] = { .emul = &x86_emul_test },
1106 [1] = { .emul = &x86_emul_test }
1107 };
1108
1109 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1110 [0] = { .emul = &x86_emul_mov }
1111 };
1112
1113 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1114 /*
1115 * Group1
1116 */
1117 [0x80] = {
1118 /* Eb, Ib */
1119 .valid = true,
1120 .regmodrm = true,
1121 .regtorm = true,
1122 .szoverride = false,
1123 .defsize = OPSIZE_BYTE,
1124 .group1 = true,
1125 .immediate = true,
1126 .emul = NULL /* group1 */
1127 },
1128 [0x81] = {
1129 /* Ev, Iz */
1130 .valid = true,
1131 .regmodrm = true,
1132 .regtorm = true,
1133 .szoverride = true,
1134 .defsize = -1,
1135 .group1 = true,
1136 .immediate = true,
1137 .flags = FLAG_immz,
1138 .emul = NULL /* group1 */
1139 },
1140 [0x83] = {
1141 /* Ev, Ib */
1142 .valid = true,
1143 .regmodrm = true,
1144 .regtorm = true,
1145 .szoverride = true,
1146 .defsize = -1,
1147 .group1 = true,
1148 .immediate = true,
1149 .flags = FLAG_imm8,
1150 .emul = NULL /* group1 */
1151 },
1152
1153 /*
1154 * Group3
1155 */
1156 [0xF6] = {
1157 /* Eb, Ib */
1158 .valid = true,
1159 .regmodrm = true,
1160 .regtorm = true,
1161 .szoverride = false,
1162 .defsize = OPSIZE_BYTE,
1163 .group3 = true,
1164 .immediate = true,
1165 .emul = NULL /* group3 */
1166 },
1167 [0xF7] = {
1168 /* Ev, Iz */
1169 .valid = true,
1170 .regmodrm = true,
1171 .regtorm = true,
1172 .szoverride = true,
1173 .defsize = -1,
1174 .group3 = true,
1175 .immediate = true,
1176 .flags = FLAG_immz,
1177 .emul = NULL /* group3 */
1178 },
1179
1180 /*
1181 * Group11
1182 */
1183 [0xC6] = {
1184 /* Eb, Ib */
1185 .valid = true,
1186 .regmodrm = true,
1187 .regtorm = true,
1188 .szoverride = false,
1189 .defsize = OPSIZE_BYTE,
1190 .group11 = true,
1191 .immediate = true,
1192 .emul = NULL /* group11 */
1193 },
1194 [0xC7] = {
1195 /* Ev, Iz */
1196 .valid = true,
1197 .regmodrm = true,
1198 .regtorm = true,
1199 .szoverride = true,
1200 .defsize = -1,
1201 .group11 = true,
1202 .immediate = true,
1203 .flags = FLAG_immz,
1204 .emul = NULL /* group11 */
1205 },
1206
1207 /*
1208 * OR
1209 */
1210 [0x08] = {
1211 /* Eb, Gb */
1212 .valid = true,
1213 .regmodrm = true,
1214 .regtorm = true,
1215 .szoverride = false,
1216 .defsize = OPSIZE_BYTE,
1217 .emul = &x86_emul_or
1218 },
1219 [0x09] = {
1220 /* Ev, Gv */
1221 .valid = true,
1222 .regmodrm = true,
1223 .regtorm = true,
1224 .szoverride = true,
1225 .defsize = -1,
1226 .emul = &x86_emul_or
1227 },
1228 [0x0A] = {
1229 /* Gb, Eb */
1230 .valid = true,
1231 .regmodrm = true,
1232 .regtorm = false,
1233 .szoverride = false,
1234 .defsize = OPSIZE_BYTE,
1235 .emul = &x86_emul_or
1236 },
1237 [0x0B] = {
1238 /* Gv, Ev */
1239 .valid = true,
1240 .regmodrm = true,
1241 .regtorm = false,
1242 .szoverride = true,
1243 .defsize = -1,
1244 .emul = &x86_emul_or
1245 },
1246
1247 /*
1248 * AND
1249 */
1250 [0x20] = {
1251 /* Eb, Gb */
1252 .valid = true,
1253 .regmodrm = true,
1254 .regtorm = true,
1255 .szoverride = false,
1256 .defsize = OPSIZE_BYTE,
1257 .emul = &x86_emul_and
1258 },
1259 [0x21] = {
1260 /* Ev, Gv */
1261 .valid = true,
1262 .regmodrm = true,
1263 .regtorm = true,
1264 .szoverride = true,
1265 .defsize = -1,
1266 .emul = &x86_emul_and
1267 },
1268 [0x22] = {
1269 /* Gb, Eb */
1270 .valid = true,
1271 .regmodrm = true,
1272 .regtorm = false,
1273 .szoverride = false,
1274 .defsize = OPSIZE_BYTE,
1275 .emul = &x86_emul_and
1276 },
1277 [0x23] = {
1278 /* Gv, Ev */
1279 .valid = true,
1280 .regmodrm = true,
1281 .regtorm = false,
1282 .szoverride = true,
1283 .defsize = -1,
1284 .emul = &x86_emul_and
1285 },
1286
1287 /*
1288 * SUB
1289 */
1290 [0x28] = {
1291 /* Eb, Gb */
1292 .valid = true,
1293 .regmodrm = true,
1294 .regtorm = true,
1295 .szoverride = false,
1296 .defsize = OPSIZE_BYTE,
1297 .emul = &x86_emul_sub
1298 },
1299 [0x29] = {
1300 /* Ev, Gv */
1301 .valid = true,
1302 .regmodrm = true,
1303 .regtorm = true,
1304 .szoverride = true,
1305 .defsize = -1,
1306 .emul = &x86_emul_sub
1307 },
1308 [0x2A] = {
1309 /* Gb, Eb */
1310 .valid = true,
1311 .regmodrm = true,
1312 .regtorm = false,
1313 .szoverride = false,
1314 .defsize = OPSIZE_BYTE,
1315 .emul = &x86_emul_sub
1316 },
1317 [0x2B] = {
1318 /* Gv, Ev */
1319 .valid = true,
1320 .regmodrm = true,
1321 .regtorm = false,
1322 .szoverride = true,
1323 .defsize = -1,
1324 .emul = &x86_emul_sub
1325 },
1326
1327 /*
1328 * XOR
1329 */
1330 [0x30] = {
1331 /* Eb, Gb */
1332 .valid = true,
1333 .regmodrm = true,
1334 .regtorm = true,
1335 .szoverride = false,
1336 .defsize = OPSIZE_BYTE,
1337 .emul = &x86_emul_xor
1338 },
1339 [0x31] = {
1340 /* Ev, Gv */
1341 .valid = true,
1342 .regmodrm = true,
1343 .regtorm = true,
1344 .szoverride = true,
1345 .defsize = -1,
1346 .emul = &x86_emul_xor
1347 },
1348 [0x32] = {
1349 /* Gb, Eb */
1350 .valid = true,
1351 .regmodrm = true,
1352 .regtorm = false,
1353 .szoverride = false,
1354 .defsize = OPSIZE_BYTE,
1355 .emul = &x86_emul_xor
1356 },
1357 [0x33] = {
1358 /* Gv, Ev */
1359 .valid = true,
1360 .regmodrm = true,
1361 .regtorm = false,
1362 .szoverride = true,
1363 .defsize = -1,
1364 .emul = &x86_emul_xor
1365 },
1366
1367 /*
1368 * XCHG
1369 */
1370 [0x86] = {
1371 /* Eb, Gb */
1372 .valid = true,
1373 .regmodrm = true,
1374 .regtorm = true,
1375 .szoverride = false,
1376 .defsize = OPSIZE_BYTE,
1377 .emul = &x86_emul_xchg
1378 },
1379 [0x87] = {
1380 /* Ev, Gv */
1381 .valid = true,
1382 .regmodrm = true,
1383 .regtorm = true,
1384 .szoverride = true,
1385 .defsize = -1,
1386 .emul = &x86_emul_xchg
1387 },
1388
1389 /*
1390 * MOV
1391 */
1392 [0x88] = {
1393 /* Eb, Gb */
1394 .valid = true,
1395 .regmodrm = true,
1396 .regtorm = true,
1397 .szoverride = false,
1398 .defsize = OPSIZE_BYTE,
1399 .emul = &x86_emul_mov
1400 },
1401 [0x89] = {
1402 /* Ev, Gv */
1403 .valid = true,
1404 .regmodrm = true,
1405 .regtorm = true,
1406 .szoverride = true,
1407 .defsize = -1,
1408 .emul = &x86_emul_mov
1409 },
1410 [0x8A] = {
1411 /* Gb, Eb */
1412 .valid = true,
1413 .regmodrm = true,
1414 .regtorm = false,
1415 .szoverride = false,
1416 .defsize = OPSIZE_BYTE,
1417 .emul = &x86_emul_mov
1418 },
1419 [0x8B] = {
1420 /* Gv, Ev */
1421 .valid = true,
1422 .regmodrm = true,
1423 .regtorm = false,
1424 .szoverride = true,
1425 .defsize = -1,
1426 .emul = &x86_emul_mov
1427 },
1428 [0xA0] = {
1429 /* AL, Ob */
1430 .valid = true,
1431 .dmo = true,
1432 .todmo = false,
1433 .szoverride = false,
1434 .defsize = OPSIZE_BYTE,
1435 .emul = &x86_emul_mov
1436 },
1437 [0xA1] = {
1438 /* rAX, Ov */
1439 .valid = true,
1440 .dmo = true,
1441 .todmo = false,
1442 .szoverride = true,
1443 .defsize = -1,
1444 .emul = &x86_emul_mov
1445 },
1446 [0xA2] = {
1447 /* Ob, AL */
1448 .valid = true,
1449 .dmo = true,
1450 .todmo = true,
1451 .szoverride = false,
1452 .defsize = OPSIZE_BYTE,
1453 .emul = &x86_emul_mov
1454 },
1455 [0xA3] = {
1456 /* Ov, rAX */
1457 .valid = true,
1458 .dmo = true,
1459 .todmo = true,
1460 .szoverride = true,
1461 .defsize = -1,
1462 .emul = &x86_emul_mov
1463 },
1464
1465 /*
1466 * MOVS
1467 */
1468 [0xA4] = {
1469 /* Yb, Xb */
1470 .valid = true,
1471 .movs = true,
1472 .szoverride = false,
1473 .defsize = OPSIZE_BYTE,
1474 .emul = NULL /* assist_mem_double_movs */
1475 },
1476 [0xA5] = {
1477 /* Yv, Xv */
1478 .valid = true,
1479 .movs = true,
1480 .szoverride = true,
1481 .defsize = -1,
1482 .emul = NULL /* assist_mem_double_movs */
1483 },
1484
1485 /*
1486 * STOS
1487 */
1488 [0xAA] = {
1489 /* Yb, AL */
1490 .valid = true,
1491 .stos = true,
1492 .szoverride = false,
1493 .defsize = OPSIZE_BYTE,
1494 .emul = &x86_emul_stos
1495 },
1496 [0xAB] = {
1497 /* Yv, rAX */
1498 .valid = true,
1499 .stos = true,
1500 .szoverride = true,
1501 .defsize = -1,
1502 .emul = &x86_emul_stos
1503 },
1504
1505 /*
1506 * LODS
1507 */
1508 [0xAC] = {
1509 /* AL, Xb */
1510 .valid = true,
1511 .lods = true,
1512 .szoverride = false,
1513 .defsize = OPSIZE_BYTE,
1514 .emul = &x86_emul_lods
1515 },
1516 [0xAD] = {
1517 /* rAX, Xv */
1518 .valid = true,
1519 .lods = true,
1520 .szoverride = true,
1521 .defsize = -1,
1522 .emul = &x86_emul_lods
1523 },
1524 };
1525
1526 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1527 /*
1528 * MOVZX
1529 */
1530 [0xB6] = {
1531 /* Gv, Eb */
1532 .valid = true,
1533 .regmodrm = true,
1534 .regtorm = false,
1535 .szoverride = true,
1536 .defsize = OPSIZE_BYTE,
1537 .flags = FLAG_ze,
1538 .emul = &x86_emul_mov
1539 },
1540 [0xB7] = {
1541 /* Gv, Ew */
1542 .valid = true,
1543 .regmodrm = true,
1544 .regtorm = false,
1545 .szoverride = true,
1546 .defsize = OPSIZE_WORD,
1547 .flags = FLAG_ze,
1548 .emul = &x86_emul_mov
1549 },
1550 };
1551
1552 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1553
1554 /* [REX-present][enc][opsize] */
1555 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1556 [false] = {
1557 /* No REX prefix. */
1558 [0b00] = {
1559 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1560 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1561 [2] = { -1, 0 },
1562 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1563 [4] = { -1, 0 },
1564 [5] = { -1, 0 },
1565 [6] = { -1, 0 },
1566 [7] = { -1, 0 },
1567 },
1568 [0b01] = {
1569 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1570 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1571 [2] = { -1, 0 },
1572 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1573 [4] = { -1, 0 },
1574 [5] = { -1, 0 },
1575 [6] = { -1, 0 },
1576 [7] = { -1, 0 },
1577 },
1578 [0b10] = {
1579 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1580 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1581 [2] = { -1, 0 },
1582 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1583 [4] = { -1, 0 },
1584 [5] = { -1, 0 },
1585 [6] = { -1, 0 },
1586 [7] = { -1, 0 },
1587 },
1588 [0b11] = {
1589 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1590 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1591 [2] = { -1, 0 },
1592 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1593 [4] = { -1, 0 },
1594 [5] = { -1, 0 },
1595 [6] = { -1, 0 },
1596 [7] = { -1, 0 },
1597 }
1598 },
1599 [true] = {
1600 /* Has REX prefix. */
1601 [0b00] = {
1602 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1603 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1604 [2] = { -1, 0 },
1605 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1606 [4] = { -1, 0 },
1607 [5] = { -1, 0 },
1608 [6] = { -1, 0 },
1609 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1610 },
1611 [0b01] = {
1612 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1613 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1614 [2] = { -1, 0 },
1615 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1616 [4] = { -1, 0 },
1617 [5] = { -1, 0 },
1618 [6] = { -1, 0 },
1619 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1620 },
1621 [0b10] = {
1622 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1623 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1624 [2] = { -1, 0 },
1625 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1626 [4] = { -1, 0 },
1627 [5] = { -1, 0 },
1628 [6] = { -1, 0 },
1629 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1630 },
1631 [0b11] = {
1632 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1633 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1634 [2] = { -1, 0 },
1635 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1636 [4] = { -1, 0 },
1637 [5] = { -1, 0 },
1638 [6] = { -1, 0 },
1639 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1640 }
1641 }
1642 };
1643
1644 /* [depends][enc][size] */
1645 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1646 [false] = {
1647 /* Not extended. */
1648 [0b000] = {
1649 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1650 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1651 [2] = { -1, 0 },
1652 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1653 [4] = { -1, 0 },
1654 [5] = { -1, 0 },
1655 [6] = { -1, 0 },
1656 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1657 },
1658 [0b001] = {
1659 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1660 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1661 [2] = { -1, 0 },
1662 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1663 [4] = { -1, 0 },
1664 [5] = { -1, 0 },
1665 [6] = { -1, 0 },
1666 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1667 },
1668 [0b010] = {
1669 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1670 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1671 [2] = { -1, 0 },
1672 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1673 [4] = { -1, 0 },
1674 [5] = { -1, 0 },
1675 [6] = { -1, 0 },
1676 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1677 },
1678 [0b011] = {
1679 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1680 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1681 [2] = { -1, 0 },
1682 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1683 [4] = { -1, 0 },
1684 [5] = { -1, 0 },
1685 [6] = { -1, 0 },
1686 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1687 },
1688 [0b100] = {
1689 [0] = { -1, 0 }, /* SPECIAL */
1690 [1] = { -1, 0 }, /* SPECIAL */
1691 [2] = { -1, 0 },
1692 [3] = { -1, 0 }, /* SPECIAL */
1693 [4] = { -1, 0 },
1694 [5] = { -1, 0 },
1695 [6] = { -1, 0 },
1696 [7] = { -1, 0 }, /* SPECIAL */
1697 },
1698 [0b101] = {
1699 [0] = { -1, 0 }, /* SPECIAL */
1700 [1] = { -1, 0 }, /* SPECIAL */
1701 [2] = { -1, 0 },
1702 [3] = { -1, 0 }, /* SPECIAL */
1703 [4] = { -1, 0 },
1704 [5] = { -1, 0 },
1705 [6] = { -1, 0 },
1706 [7] = { -1, 0 }, /* SPECIAL */
1707 },
1708 [0b110] = {
1709 [0] = { -1, 0 }, /* SPECIAL */
1710 [1] = { -1, 0 }, /* SPECIAL */
1711 [2] = { -1, 0 },
1712 [3] = { -1, 0 }, /* SPECIAL */
1713 [4] = { -1, 0 },
1714 [5] = { -1, 0 },
1715 [6] = { -1, 0 },
1716 [7] = { -1, 0 }, /* SPECIAL */
1717 },
1718 [0b111] = {
1719 [0] = { -1, 0 }, /* SPECIAL */
1720 [1] = { -1, 0 }, /* SPECIAL */
1721 [2] = { -1, 0 },
1722 [3] = { -1, 0 }, /* SPECIAL */
1723 [4] = { -1, 0 },
1724 [5] = { -1, 0 },
1725 [6] = { -1, 0 },
1726 [7] = { -1, 0 }, /* SPECIAL */
1727 },
1728 },
1729 [true] = {
1730 /* Extended. */
1731 [0b000] = {
1732 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1733 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1734 [2] = { -1, 0 },
1735 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1736 [4] = { -1, 0 },
1737 [5] = { -1, 0 },
1738 [6] = { -1, 0 },
1739 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1740 },
1741 [0b001] = {
1742 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1743 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1744 [2] = { -1, 0 },
1745 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1746 [4] = { -1, 0 },
1747 [5] = { -1, 0 },
1748 [6] = { -1, 0 },
1749 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1750 },
1751 [0b010] = {
1752 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1753 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1754 [2] = { -1, 0 },
1755 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1756 [4] = { -1, 0 },
1757 [5] = { -1, 0 },
1758 [6] = { -1, 0 },
1759 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1760 },
1761 [0b011] = {
1762 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1763 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1764 [2] = { -1, 0 },
1765 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1766 [4] = { -1, 0 },
1767 [5] = { -1, 0 },
1768 [6] = { -1, 0 },
1769 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1770 },
1771 [0b100] = {
1772 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1773 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1774 [2] = { -1, 0 },
1775 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1776 [4] = { -1, 0 },
1777 [5] = { -1, 0 },
1778 [6] = { -1, 0 },
1779 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1780 },
1781 [0b101] = {
1782 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1783 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1784 [2] = { -1, 0 },
1785 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1786 [4] = { -1, 0 },
1787 [5] = { -1, 0 },
1788 [6] = { -1, 0 },
1789 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1790 },
1791 [0b110] = {
1792 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1793 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1794 [2] = { -1, 0 },
1795 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1796 [4] = { -1, 0 },
1797 [5] = { -1, 0 },
1798 [6] = { -1, 0 },
1799 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1800 },
1801 [0b111] = {
1802 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1803 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1804 [2] = { -1, 0 },
1805 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1806 [4] = { -1, 0 },
1807 [5] = { -1, 0 },
1808 [6] = { -1, 0 },
1809 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1810 },
1811 }
1812 };
1813
1814 /* [enc] */
1815 static const int gpr_dual_reg1_rm[8] __cacheline_aligned = {
1816 [0b000] = NVMM_X64_GPR_RBX, /* BX (+SI) */
1817 [0b001] = NVMM_X64_GPR_RBX, /* BX (+DI) */
1818 [0b010] = NVMM_X64_GPR_RBP, /* BP (+SI) */
1819 [0b011] = NVMM_X64_GPR_RBP, /* BP (+DI) */
1820 [0b100] = NVMM_X64_GPR_RSI, /* SI */
1821 [0b101] = NVMM_X64_GPR_RDI, /* DI */
1822 [0b110] = NVMM_X64_GPR_RBP, /* BP */
1823 [0b111] = NVMM_X64_GPR_RBX, /* BX */
1824 };
1825
1826 static int
node_overflow(struct x86_decode_fsm * fsm,struct x86_instr * instr __unused)1827 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr __unused)
1828 {
1829 fsm->fn = NULL;
1830 return -1;
1831 }
1832
1833 static int
fsm_read(struct x86_decode_fsm * fsm,uint8_t * bytes,size_t n)1834 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1835 {
1836 if (fsm->buf + n > fsm->end) {
1837 return -1;
1838 }
1839 memcpy(bytes, fsm->buf, n);
1840 return 0;
1841 }
1842
1843 static inline void
fsm_advance(struct x86_decode_fsm * fsm,size_t n,int (* fn)(struct x86_decode_fsm *,struct x86_instr *))1844 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1845 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1846 {
1847 fsm->buf += n;
1848 if (fsm->buf > fsm->end) {
1849 fsm->fn = node_overflow;
1850 } else {
1851 fsm->fn = fn;
1852 }
1853 }
1854
1855 static const struct x86_reg *
resolve_special_register(struct x86_instr * instr,uint8_t enc,size_t regsize)1856 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1857 {
1858 enc &= 0b11;
1859 if (regsize == 8) {
1860 /* May be 64bit without REX */
1861 return &gpr_map__special[1][enc][regsize-1];
1862 }
1863 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1864 }
1865
1866 /*
1867 * Special node, for MOVS. Fake two displacements of zero on the source and
1868 * destination registers.
1869 */
1870 static int
node_movs(struct x86_decode_fsm * fsm,struct x86_instr * instr)1871 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1872 {
1873 size_t adrsize;
1874
1875 adrsize = instr->address_size;
1876
1877 /* DS:RSI */
1878 instr->src.type = STORE_REG;
1879 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1880 instr->src.disp.type = DISP_0;
1881
1882 /* ES:RDI, force ES */
1883 instr->dst.type = STORE_REG;
1884 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1885 instr->dst.disp.type = DISP_0;
1886 instr->dst.hardseg = NVMM_X64_SEG_ES;
1887
1888 fsm_advance(fsm, 0, NULL);
1889
1890 return 0;
1891 }
1892
1893 /*
1894 * Special node, for STOS and LODS. Fake a displacement of zero on the
1895 * destination register.
1896 */
1897 static int
node_stlo(struct x86_decode_fsm * fsm,struct x86_instr * instr)1898 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1899 {
1900 const struct x86_opcode *opcode = instr->opcode;
1901 struct x86_store *stlo, *streg;
1902 size_t adrsize, regsize;
1903
1904 adrsize = instr->address_size;
1905 regsize = instr->operand_size;
1906
1907 if (opcode->stos) {
1908 streg = &instr->src;
1909 stlo = &instr->dst;
1910 } else {
1911 streg = &instr->dst;
1912 stlo = &instr->src;
1913 }
1914
1915 streg->type = STORE_REG;
1916 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1917
1918 stlo->type = STORE_REG;
1919 if (opcode->stos) {
1920 /* ES:RDI, force ES */
1921 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1922 stlo->hardseg = NVMM_X64_SEG_ES;
1923 } else {
1924 /* DS:RSI */
1925 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1926 }
1927 stlo->disp.type = DISP_0;
1928
1929 fsm_advance(fsm, 0, NULL);
1930
1931 return 0;
1932 }
1933
1934 static int
node_dmo(struct x86_decode_fsm * fsm,struct x86_instr * instr)1935 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1936 {
1937 const struct x86_opcode *opcode = instr->opcode;
1938 struct x86_store *stdmo, *streg;
1939 size_t adrsize, regsize;
1940
1941 adrsize = instr->address_size;
1942 regsize = instr->operand_size;
1943
1944 if (opcode->todmo) {
1945 streg = &instr->src;
1946 stdmo = &instr->dst;
1947 } else {
1948 streg = &instr->dst;
1949 stdmo = &instr->src;
1950 }
1951
1952 streg->type = STORE_REG;
1953 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1954
1955 stdmo->type = STORE_DMO;
1956 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1957 return -1;
1958 }
1959 fsm_advance(fsm, adrsize, NULL);
1960
1961 return 0;
1962 }
1963
1964 static inline uint64_t
sign_extend(uint64_t val,int size)1965 sign_extend(uint64_t val, int size)
1966 {
1967 if (size == 1) {
1968 if (val & __BIT(7))
1969 val |= 0xFFFFFFFFFFFFFF00;
1970 } else if (size == 2) {
1971 if (val & __BIT(15))
1972 val |= 0xFFFFFFFFFFFF0000;
1973 } else if (size == 4) {
1974 if (val & __BIT(31))
1975 val |= 0xFFFFFFFF00000000;
1976 }
1977 return val;
1978 }
1979
1980 static int
node_immediate(struct x86_decode_fsm * fsm,struct x86_instr * instr)1981 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1982 {
1983 const struct x86_opcode *opcode = instr->opcode;
1984 struct x86_store *store;
1985 uint8_t immsize;
1986 size_t sesize = 0;
1987
1988 /* The immediate is the source */
1989 store = &instr->src;
1990 immsize = instr->operand_size;
1991
1992 if (opcode->flags & FLAG_imm8) {
1993 sesize = immsize;
1994 immsize = 1;
1995 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1996 sesize = immsize;
1997 immsize = 4;
1998 }
1999
2000 store->type = STORE_IMM;
2001 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
2002 return -1;
2003 }
2004 fsm_advance(fsm, immsize, NULL);
2005
2006 if (sesize != 0) {
2007 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
2008 }
2009
2010 return 0;
2011 }
2012
2013 static int
node_disp(struct x86_decode_fsm * fsm,struct x86_instr * instr)2014 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2015 {
2016 const struct x86_opcode *opcode = instr->opcode;
2017 uint64_t data = 0;
2018 size_t n;
2019
2020 if (instr->strm->disp.type == DISP_1) {
2021 n = 1;
2022 } else if (instr->strm->disp.type == DISP_2) {
2023 n = 2;
2024 } else if (instr->strm->disp.type == DISP_4) {
2025 n = 4;
2026 } else {
2027 DISASSEMBLER_BUG();
2028 }
2029
2030 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
2031 return -1;
2032 }
2033
2034 if (__predict_true(fsm->is64bit)) {
2035 data = sign_extend(data, n);
2036 }
2037
2038 instr->strm->disp.data = data;
2039
2040 if (opcode->immediate) {
2041 fsm_advance(fsm, n, node_immediate);
2042 } else {
2043 fsm_advance(fsm, n, NULL);
2044 }
2045
2046 return 0;
2047 }
2048
2049 /*
2050 * Special node to handle 16bit addressing encoding, which can reference two
2051 * registers at once.
2052 */
2053 static int
node_dual(struct x86_decode_fsm * fsm,struct x86_instr * instr)2054 node_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2055 {
2056 int reg1, reg2;
2057
2058 reg1 = gpr_dual_reg1_rm[instr->regmodrm.rm];
2059
2060 if (instr->regmodrm.rm == 0b000 ||
2061 instr->regmodrm.rm == 0b010) {
2062 reg2 = NVMM_X64_GPR_RSI;
2063 } else if (instr->regmodrm.rm == 0b001 ||
2064 instr->regmodrm.rm == 0b011) {
2065 reg2 = NVMM_X64_GPR_RDI;
2066 } else {
2067 DISASSEMBLER_BUG();
2068 }
2069
2070 instr->strm->type = STORE_DUALREG;
2071 instr->strm->u.dualreg.reg1 = reg1;
2072 instr->strm->u.dualreg.reg2 = reg2;
2073
2074 if (instr->strm->disp.type == DISP_NONE) {
2075 DISASSEMBLER_BUG();
2076 } else if (instr->strm->disp.type == DISP_0) {
2077 /* Indirect register addressing mode */
2078 if (instr->opcode->immediate) {
2079 fsm_advance(fsm, 1, node_immediate);
2080 } else {
2081 fsm_advance(fsm, 1, NULL);
2082 }
2083 } else {
2084 fsm_advance(fsm, 1, node_disp);
2085 }
2086
2087 return 0;
2088 }
2089
2090 static const struct x86_reg *
get_register_idx(struct x86_instr * instr,uint8_t index)2091 get_register_idx(struct x86_instr *instr, uint8_t index)
2092 {
2093 uint8_t enc = index;
2094 const struct x86_reg *reg;
2095 size_t regsize;
2096
2097 regsize = instr->address_size;
2098 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2099
2100 if (reg->num == -1) {
2101 reg = resolve_special_register(instr, enc, regsize);
2102 }
2103
2104 return reg;
2105 }
2106
2107 static const struct x86_reg *
get_register_bas(struct x86_instr * instr,uint8_t base)2108 get_register_bas(struct x86_instr *instr, uint8_t base)
2109 {
2110 uint8_t enc = base;
2111 const struct x86_reg *reg;
2112 size_t regsize;
2113
2114 regsize = instr->address_size;
2115 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2116 if (reg->num == -1) {
2117 reg = resolve_special_register(instr, enc, regsize);
2118 }
2119
2120 return reg;
2121 }
2122
2123 static int
node_sib(struct x86_decode_fsm * fsm,struct x86_instr * instr)2124 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2125 {
2126 const struct x86_opcode *opcode;
2127 uint8_t scale, index, base;
2128 bool noindex, nobase;
2129 uint8_t byte;
2130
2131 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2132 return -1;
2133 }
2134
2135 scale = ((byte & 0b11000000) >> 6);
2136 index = ((byte & 0b00111000) >> 3);
2137 base = ((byte & 0b00000111) >> 0);
2138
2139 opcode = instr->opcode;
2140
2141 noindex = false;
2142 nobase = false;
2143
2144 if (index == 0b100 && !instr->rexpref.x) {
2145 /* Special case: the index is null */
2146 noindex = true;
2147 }
2148
2149 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2150 /* Special case: the base is null + disp32 */
2151 instr->strm->disp.type = DISP_4;
2152 nobase = true;
2153 }
2154
2155 instr->strm->type = STORE_SIB;
2156 instr->strm->u.sib.scale = (1 << scale);
2157 if (!noindex)
2158 instr->strm->u.sib.idx = get_register_idx(instr, index);
2159 if (!nobase)
2160 instr->strm->u.sib.bas = get_register_bas(instr, base);
2161
2162 /* May have a displacement, or an immediate */
2163 if (instr->strm->disp.type == DISP_1 ||
2164 instr->strm->disp.type == DISP_2 ||
2165 instr->strm->disp.type == DISP_4) {
2166 fsm_advance(fsm, 1, node_disp);
2167 } else if (opcode->immediate) {
2168 fsm_advance(fsm, 1, node_immediate);
2169 } else {
2170 fsm_advance(fsm, 1, NULL);
2171 }
2172
2173 return 0;
2174 }
2175
2176 static const struct x86_reg *
get_register_reg(struct x86_instr * instr)2177 get_register_reg(struct x86_instr *instr)
2178 {
2179 uint8_t enc = instr->regmodrm.reg;
2180 const struct x86_reg *reg;
2181 size_t regsize;
2182
2183 regsize = instr->operand_size;
2184
2185 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2186 if (reg->num == -1) {
2187 reg = resolve_special_register(instr, enc, regsize);
2188 }
2189
2190 return reg;
2191 }
2192
2193 static const struct x86_reg *
get_register_rm(struct x86_instr * instr)2194 get_register_rm(struct x86_instr *instr)
2195 {
2196 uint8_t enc = instr->regmodrm.rm;
2197 const struct x86_reg *reg;
2198 size_t regsize;
2199
2200 if (instr->strm->disp.type == DISP_NONE) {
2201 regsize = instr->operand_size;
2202 } else {
2203 /* Indirect access, the size is that of the address. */
2204 regsize = instr->address_size;
2205 }
2206
2207 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2208 if (reg->num == -1) {
2209 reg = resolve_special_register(instr, enc, regsize);
2210 }
2211
2212 return reg;
2213 }
2214
2215 static inline bool
has_sib(struct x86_instr * instr)2216 has_sib(struct x86_instr *instr)
2217 {
2218 return (instr->address_size != 2 && /* no SIB in 16bit addressing */
2219 instr->regmodrm.mod != 0b11 &&
2220 instr->regmodrm.rm == 0b100);
2221 }
2222
2223 static inline bool
is_rip_relative(struct x86_decode_fsm * fsm,struct x86_instr * instr)2224 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2225 {
2226 return (fsm->is64bit && /* RIP-relative only in 64bit mode */
2227 instr->regmodrm.mod == 0b00 &&
2228 instr->regmodrm.rm == 0b101);
2229 }
2230
2231 static inline bool
is_disp32_only(struct x86_decode_fsm * fsm,struct x86_instr * instr)2232 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2233 {
2234 return (!fsm->is64bit && /* no disp32-only in 64bit mode */
2235 instr->address_size != 2 && /* no disp32-only in 16bit addressing */
2236 instr->regmodrm.mod == 0b00 &&
2237 instr->regmodrm.rm == 0b101);
2238 }
2239
2240 static inline bool
is_disp16_only(struct x86_decode_fsm * fsm __unused,struct x86_instr * instr)2241 is_disp16_only(struct x86_decode_fsm *fsm __unused, struct x86_instr *instr)
2242 {
2243 return (instr->address_size == 2 && /* disp16-only only in 16bit addr */
2244 instr->regmodrm.mod == 0b00 &&
2245 instr->regmodrm.rm == 0b110);
2246 }
2247
2248 static inline bool
is_dual(struct x86_decode_fsm * fsm __unused,struct x86_instr * instr)2249 is_dual(struct x86_decode_fsm *fsm __unused, struct x86_instr *instr)
2250 {
2251 return (instr->address_size == 2 &&
2252 instr->regmodrm.mod != 0b11 &&
2253 instr->regmodrm.rm <= 0b011);
2254 }
2255
2256 static enum x86_disp_type
get_disp_type(struct x86_instr * instr)2257 get_disp_type(struct x86_instr *instr)
2258 {
2259 switch (instr->regmodrm.mod) {
2260 case 0b00: /* indirect */
2261 return DISP_0;
2262 case 0b01: /* indirect+1 */
2263 return DISP_1;
2264 case 0b10: /* indirect+{2,4} */
2265 if (__predict_false(instr->address_size == 2)) {
2266 return DISP_2;
2267 }
2268 return DISP_4;
2269 case 0b11: /* direct */
2270 default: /* llvm */
2271 return DISP_NONE;
2272 }
2273 __unreachable();
2274 }
2275
2276 static int
node_regmodrm(struct x86_decode_fsm * fsm,struct x86_instr * instr)2277 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2278 {
2279 struct x86_store *strg, *strm;
2280 const struct x86_opcode *opcode;
2281 const struct x86_reg *reg;
2282 uint8_t byte;
2283
2284 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2285 return -1;
2286 }
2287
2288 opcode = instr->opcode;
2289
2290 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2291 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2292 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2293
2294 if (opcode->regtorm) {
2295 strg = &instr->src;
2296 strm = &instr->dst;
2297 } else { /* RM to REG */
2298 strm = &instr->src;
2299 strg = &instr->dst;
2300 }
2301
2302 /* Save for later use. */
2303 instr->strm = strm;
2304
2305 /*
2306 * Special cases: Groups. The REG field of REGMODRM is the index in
2307 * the group. op1 gets overwritten in the Immediate node, if any.
2308 */
2309 if (opcode->group1) {
2310 if (group1[instr->regmodrm.reg].emul == NULL) {
2311 return -1;
2312 }
2313 instr->emul = group1[instr->regmodrm.reg].emul;
2314 } else if (opcode->group3) {
2315 if (group3[instr->regmodrm.reg].emul == NULL) {
2316 return -1;
2317 }
2318 instr->emul = group3[instr->regmodrm.reg].emul;
2319 } else if (opcode->group11) {
2320 if (group11[instr->regmodrm.reg].emul == NULL) {
2321 return -1;
2322 }
2323 instr->emul = group11[instr->regmodrm.reg].emul;
2324 }
2325
2326 if (!opcode->immediate) {
2327 reg = get_register_reg(instr);
2328 if (reg == NULL) {
2329 return -1;
2330 }
2331 strg->type = STORE_REG;
2332 strg->u.reg = reg;
2333 }
2334
2335 /* The displacement applies to RM. */
2336 strm->disp.type = get_disp_type(instr);
2337
2338 if (has_sib(instr)) {
2339 /* Overwrites RM */
2340 fsm_advance(fsm, 1, node_sib);
2341 return 0;
2342 }
2343
2344 if (is_rip_relative(fsm, instr)) {
2345 /* Overwrites RM */
2346 strm->type = STORE_REG;
2347 strm->u.reg = &gpr_map__rip;
2348 strm->disp.type = DISP_4;
2349 fsm_advance(fsm, 1, node_disp);
2350 return 0;
2351 }
2352
2353 if (is_disp32_only(fsm, instr)) {
2354 /* Overwrites RM */
2355 strm->type = STORE_REG;
2356 strm->u.reg = NULL;
2357 strm->disp.type = DISP_4;
2358 fsm_advance(fsm, 1, node_disp);
2359 return 0;
2360 }
2361
2362 if (__predict_false(is_disp16_only(fsm, instr))) {
2363 /* Overwrites RM */
2364 strm->type = STORE_REG;
2365 strm->u.reg = NULL;
2366 strm->disp.type = DISP_2;
2367 fsm_advance(fsm, 1, node_disp);
2368 return 0;
2369 }
2370
2371 if (__predict_false(is_dual(fsm, instr))) {
2372 /* Overwrites RM */
2373 fsm_advance(fsm, 0, node_dual);
2374 return 0;
2375 }
2376
2377 reg = get_register_rm(instr);
2378 if (reg == NULL) {
2379 return -1;
2380 }
2381 strm->type = STORE_REG;
2382 strm->u.reg = reg;
2383
2384 if (strm->disp.type == DISP_NONE) {
2385 /* Direct register addressing mode */
2386 if (opcode->immediate) {
2387 fsm_advance(fsm, 1, node_immediate);
2388 } else {
2389 fsm_advance(fsm, 1, NULL);
2390 }
2391 } else if (strm->disp.type == DISP_0) {
2392 /* Indirect register addressing mode */
2393 if (opcode->immediate) {
2394 fsm_advance(fsm, 1, node_immediate);
2395 } else {
2396 fsm_advance(fsm, 1, NULL);
2397 }
2398 } else {
2399 fsm_advance(fsm, 1, node_disp);
2400 }
2401
2402 return 0;
2403 }
2404
2405 static size_t
get_operand_size(struct x86_decode_fsm * fsm,struct x86_instr * instr)2406 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2407 {
2408 const struct x86_opcode *opcode = instr->opcode;
2409 int opsize;
2410
2411 /* Get the opsize */
2412 if (!opcode->szoverride) {
2413 opsize = opcode->defsize;
2414 } else if (instr->rexpref.present && instr->rexpref.w) {
2415 opsize = 8;
2416 } else {
2417 if (!fsm->is16bit) {
2418 if (instr->legpref.opr_ovr) {
2419 opsize = 2;
2420 } else {
2421 opsize = 4;
2422 }
2423 } else { /* 16bit */
2424 if (instr->legpref.opr_ovr) {
2425 opsize = 4;
2426 } else {
2427 opsize = 2;
2428 }
2429 }
2430 }
2431
2432 return opsize;
2433 }
2434
2435 static size_t
get_address_size(struct x86_decode_fsm * fsm,struct x86_instr * instr)2436 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2437 {
2438 if (fsm->is64bit) {
2439 if (__predict_false(instr->legpref.adr_ovr)) {
2440 return 4;
2441 }
2442 return 8;
2443 }
2444
2445 if (fsm->is32bit) {
2446 if (__predict_false(instr->legpref.adr_ovr)) {
2447 return 2;
2448 }
2449 return 4;
2450 }
2451
2452 /* 16bit. */
2453 if (__predict_false(instr->legpref.adr_ovr)) {
2454 return 4;
2455 }
2456 return 2;
2457 }
2458
2459 static int
node_primary_opcode(struct x86_decode_fsm * fsm,struct x86_instr * instr)2460 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2461 {
2462 const struct x86_opcode *opcode;
2463 uint8_t byte;
2464
2465 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2466 return -1;
2467 }
2468
2469 opcode = &primary_opcode_table[byte];
2470 if (__predict_false(!opcode->valid)) {
2471 return -1;
2472 }
2473
2474 instr->opcode = opcode;
2475 instr->emul = opcode->emul;
2476 instr->operand_size = get_operand_size(fsm, instr);
2477 instr->address_size = get_address_size(fsm, instr);
2478
2479 if (fsm->is64bit && (instr->operand_size == 4)) {
2480 /* Zero-extend to 64 bits. */
2481 instr->zeroextend_mask = ~size_to_mask(4);
2482 }
2483
2484 if (opcode->regmodrm) {
2485 fsm_advance(fsm, 1, node_regmodrm);
2486 } else if (opcode->dmo) {
2487 /* Direct-Memory Offsets */
2488 fsm_advance(fsm, 1, node_dmo);
2489 } else if (opcode->stos || opcode->lods) {
2490 fsm_advance(fsm, 1, node_stlo);
2491 } else if (opcode->movs) {
2492 fsm_advance(fsm, 1, node_movs);
2493 } else {
2494 return -1;
2495 }
2496
2497 return 0;
2498 }
2499
2500 static int
node_secondary_opcode(struct x86_decode_fsm * fsm,struct x86_instr * instr)2501 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2502 {
2503 const struct x86_opcode *opcode;
2504 uint8_t byte;
2505
2506 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2507 return -1;
2508 }
2509
2510 opcode = &secondary_opcode_table[byte];
2511 if (__predict_false(!opcode->valid)) {
2512 return -1;
2513 }
2514
2515 instr->opcode = opcode;
2516 instr->emul = opcode->emul;
2517 instr->operand_size = get_operand_size(fsm, instr);
2518 instr->address_size = get_address_size(fsm, instr);
2519
2520 if (fsm->is64bit && (instr->operand_size == 4)) {
2521 /* Zero-extend to 64 bits. */
2522 instr->zeroextend_mask = ~size_to_mask(4);
2523 }
2524
2525 if (opcode->flags & FLAG_ze) {
2526 /*
2527 * Compute the mask for zero-extend. Update the operand size,
2528 * we move fewer bytes.
2529 */
2530 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2531 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2532 instr->operand_size = opcode->defsize;
2533 }
2534
2535 if (opcode->regmodrm) {
2536 fsm_advance(fsm, 1, node_regmodrm);
2537 } else {
2538 return -1;
2539 }
2540
2541 return 0;
2542 }
2543
2544 static int
node_main(struct x86_decode_fsm * fsm,struct x86_instr * instr)2545 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2546 {
2547 uint8_t byte;
2548
2549 #define ESCAPE 0x0F
2550 #define VEX_1 0xC5
2551 #define VEX_2 0xC4
2552 #define XOP 0x8F
2553
2554 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2555 return -1;
2556 }
2557
2558 /*
2559 * We don't take XOP. It is AMD-specific, and it was removed shortly
2560 * after being introduced.
2561 */
2562 if (byte == ESCAPE) {
2563 fsm_advance(fsm, 1, node_secondary_opcode);
2564 } else if (!instr->rexpref.present) {
2565 if (byte == VEX_1) {
2566 return -1;
2567 } else if (byte == VEX_2) {
2568 return -1;
2569 } else {
2570 fsm->fn = node_primary_opcode;
2571 }
2572 } else {
2573 fsm->fn = node_primary_opcode;
2574 }
2575
2576 return 0;
2577 }
2578
2579 static int
node_rex_prefix(struct x86_decode_fsm * fsm,struct x86_instr * instr)2580 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2581 {
2582 struct x86_rexpref *rexpref = &instr->rexpref;
2583 uint8_t byte;
2584 size_t n = 0;
2585
2586 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2587 return -1;
2588 }
2589
2590 if (byte >= 0x40 && byte <= 0x4F) {
2591 if (__predict_false(!fsm->is64bit)) {
2592 return -1;
2593 }
2594 rexpref->b = ((byte & 0x1) != 0);
2595 rexpref->x = ((byte & 0x2) != 0);
2596 rexpref->r = ((byte & 0x4) != 0);
2597 rexpref->w = ((byte & 0x8) != 0);
2598 rexpref->present = true;
2599 n = 1;
2600 }
2601
2602 fsm_advance(fsm, n, node_main);
2603 return 0;
2604 }
2605
2606 static int
node_legacy_prefix(struct x86_decode_fsm * fsm,struct x86_instr * instr)2607 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2608 {
2609 uint8_t byte;
2610
2611 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2612 return -1;
2613 }
2614
2615 if (byte == LEG_OPR_OVR) {
2616 instr->legpref.opr_ovr = 1;
2617 } else if (byte == LEG_OVR_DS) {
2618 instr->legpref.seg = NVMM_X64_SEG_DS;
2619 } else if (byte == LEG_OVR_ES) {
2620 instr->legpref.seg = NVMM_X64_SEG_ES;
2621 } else if (byte == LEG_REP) {
2622 instr->legpref.rep = 1;
2623 } else if (byte == LEG_OVR_GS) {
2624 instr->legpref.seg = NVMM_X64_SEG_GS;
2625 } else if (byte == LEG_OVR_FS) {
2626 instr->legpref.seg = NVMM_X64_SEG_FS;
2627 } else if (byte == LEG_ADR_OVR) {
2628 instr->legpref.adr_ovr = 1;
2629 } else if (byte == LEG_OVR_CS) {
2630 instr->legpref.seg = NVMM_X64_SEG_CS;
2631 } else if (byte == LEG_OVR_SS) {
2632 instr->legpref.seg = NVMM_X64_SEG_SS;
2633 } else if (byte == LEG_REPN) {
2634 instr->legpref.repn = 1;
2635 } else if (byte == LEG_LOCK) {
2636 /* ignore */
2637 } else {
2638 /* not a legacy prefix */
2639 fsm_advance(fsm, 0, node_rex_prefix);
2640 return 0;
2641 }
2642
2643 fsm_advance(fsm, 1, node_legacy_prefix);
2644 return 0;
2645 }
2646
2647 static int
x86_decode(uint8_t * inst_bytes,size_t inst_len,struct x86_instr * instr,struct nvmm_x64_state * state)2648 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2649 struct nvmm_x64_state *state)
2650 {
2651 struct x86_decode_fsm fsm;
2652 int ret;
2653
2654 memset(instr, 0, sizeof(*instr));
2655 instr->legpref.seg = -1;
2656 instr->src.hardseg = -1;
2657 instr->dst.hardseg = -1;
2658
2659 fsm.is64bit = is_64bit(state);
2660 fsm.is32bit = is_32bit(state);
2661 fsm.is16bit = is_16bit(state);
2662
2663 fsm.fn = node_legacy_prefix;
2664 fsm.buf = inst_bytes;
2665 fsm.end = inst_bytes + inst_len;
2666
2667 while (fsm.fn != NULL) {
2668 ret = (*fsm.fn)(&fsm, instr);
2669 if (ret == -1)
2670 return -1;
2671 }
2672
2673 instr->len = fsm.buf - inst_bytes;
2674
2675 return 0;
2676 }
2677
2678 /* -------------------------------------------------------------------------- */
2679
2680 #define EXEC_INSTR(sz, instr) \
2681 static uint##sz##_t \
2682 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2683 { \
2684 uint##sz##_t res; \
2685 __asm __volatile ( \
2686 #instr" %2, %3;" \
2687 "mov %3, %1;" \
2688 "pushfq;" \
2689 "popq %0" \
2690 : "=r" (*rflags), "=r" (res) \
2691 : "r" (op1), "r" (op2)); \
2692 return res; \
2693 }
2694
2695 #define EXEC_DISPATCHER(instr) \
2696 static uint64_t \
2697 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2698 { \
2699 switch (opsize) { \
2700 case 1: \
2701 return exec_##instr##8(op1, op2, rflags); \
2702 case 2: \
2703 return exec_##instr##16(op1, op2, rflags); \
2704 case 4: \
2705 return exec_##instr##32(op1, op2, rflags); \
2706 default: \
2707 return exec_##instr##64(op1, op2, rflags); \
2708 } \
2709 }
2710
2711 /* SUB: ret = op1 - op2 */
2712 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2713 EXEC_INSTR(8, sub)
2714 EXEC_INSTR(16, sub)
2715 EXEC_INSTR(32, sub)
2716 EXEC_INSTR(64, sub)
EXEC_DISPATCHER(sub)2717 EXEC_DISPATCHER(sub)
2718
2719 /* OR: ret = op1 | op2 */
2720 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2721 EXEC_INSTR(8, or)
2722 EXEC_INSTR(16, or)
2723 EXEC_INSTR(32, or)
2724 EXEC_INSTR(64, or)
2725 EXEC_DISPATCHER(or)
2726
2727 /* AND: ret = op1 & op2 */
2728 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2729 EXEC_INSTR(8, and)
2730 EXEC_INSTR(16, and)
2731 EXEC_INSTR(32, and)
2732 EXEC_INSTR(64, and)
2733 EXEC_DISPATCHER(and)
2734
2735 /* XOR: ret = op1 ^ op2 */
2736 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2737 EXEC_INSTR(8, xor)
2738 EXEC_INSTR(16, xor)
2739 EXEC_INSTR(32, xor)
2740 EXEC_INSTR(64, xor)
2741 EXEC_DISPATCHER(xor)
2742
2743 /* -------------------------------------------------------------------------- */
2744
2745 /*
2746 * Emulation functions. We don't care about the order of the operands, except
2747 * for SUB, CMP and TEST. For these ones we look at mem->write to determine who
2748 * is op1 and who is op2.
2749 */
2750
2751 static void
2752 x86_func_or(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2753 {
2754 uint64_t *retval = (uint64_t *)mem->data;
2755 const bool write = mem->write;
2756 uint64_t *op1, op2, fl, ret;
2757
2758 op1 = (uint64_t *)mem->data;
2759 op2 = 0;
2760
2761 /* Fetch the value to be OR'ed (op2). */
2762 mem->data = (uint8_t *)&op2;
2763 mem->write = false;
2764 (*vcpu->cbs.mem)(mem);
2765
2766 /* Perform the OR. */
2767 ret = exec_or(*op1, op2, &fl, mem->size);
2768
2769 if (write) {
2770 /* Write back the result. */
2771 mem->data = (uint8_t *)&ret;
2772 mem->write = true;
2773 (*vcpu->cbs.mem)(mem);
2774 } else {
2775 /* Return data to the caller. */
2776 *retval = ret;
2777 }
2778
2779 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2780 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2781 }
2782
2783 static void
x86_func_and(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2784 x86_func_and(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2785 {
2786 uint64_t *retval = (uint64_t *)mem->data;
2787 const bool write = mem->write;
2788 uint64_t *op1, op2, fl, ret;
2789
2790 op1 = (uint64_t *)mem->data;
2791 op2 = 0;
2792
2793 /* Fetch the value to be AND'ed (op2). */
2794 mem->data = (uint8_t *)&op2;
2795 mem->write = false;
2796 (*vcpu->cbs.mem)(mem);
2797
2798 /* Perform the AND. */
2799 ret = exec_and(*op1, op2, &fl, mem->size);
2800
2801 if (write) {
2802 /* Write back the result. */
2803 mem->data = (uint8_t *)&ret;
2804 mem->write = true;
2805 (*vcpu->cbs.mem)(mem);
2806 } else {
2807 /* Return data to the caller. */
2808 *retval = ret;
2809 }
2810
2811 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2812 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2813 }
2814
2815 static void
x86_func_xchg(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs __unused)2816 x86_func_xchg(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs __unused)
2817 {
2818 uint64_t *op1, op2;
2819
2820 op1 = (uint64_t *)mem->data;
2821 op2 = 0;
2822
2823 /* Fetch op2. */
2824 mem->data = (uint8_t *)&op2;
2825 mem->write = false;
2826 (*vcpu->cbs.mem)(mem);
2827
2828 /* Write op1 in op2. */
2829 mem->data = (uint8_t *)op1;
2830 mem->write = true;
2831 (*vcpu->cbs.mem)(mem);
2832
2833 /* Write op2 in op1. */
2834 *op1 = op2;
2835 }
2836
2837 static void
x86_func_sub(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2838 x86_func_sub(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2839 {
2840 uint64_t *retval = (uint64_t *)mem->data;
2841 const bool write = mem->write;
2842 uint64_t *op1, *op2, fl, ret;
2843 uint64_t tmp;
2844 bool memop1;
2845
2846 memop1 = !mem->write;
2847 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2848 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2849
2850 /* Fetch the value to be SUB'ed (op1 or op2). */
2851 mem->data = (uint8_t *)&tmp;
2852 mem->write = false;
2853 (*vcpu->cbs.mem)(mem);
2854
2855 /* Perform the SUB. */
2856 ret = exec_sub(*op1, *op2, &fl, mem->size);
2857
2858 if (write) {
2859 /* Write back the result. */
2860 mem->data = (uint8_t *)&ret;
2861 mem->write = true;
2862 (*vcpu->cbs.mem)(mem);
2863 } else {
2864 /* Return data to the caller. */
2865 *retval = ret;
2866 }
2867
2868 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2869 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2870 }
2871
2872 static void
x86_func_xor(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2873 x86_func_xor(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2874 {
2875 uint64_t *retval = (uint64_t *)mem->data;
2876 const bool write = mem->write;
2877 uint64_t *op1, op2, fl, ret;
2878
2879 op1 = (uint64_t *)mem->data;
2880 op2 = 0;
2881
2882 /* Fetch the value to be XOR'ed (op2). */
2883 mem->data = (uint8_t *)&op2;
2884 mem->write = false;
2885 (*vcpu->cbs.mem)(mem);
2886
2887 /* Perform the XOR. */
2888 ret = exec_xor(*op1, op2, &fl, mem->size);
2889
2890 if (write) {
2891 /* Write back the result. */
2892 mem->data = (uint8_t *)&ret;
2893 mem->write = true;
2894 (*vcpu->cbs.mem)(mem);
2895 } else {
2896 /* Return data to the caller. */
2897 *retval = ret;
2898 }
2899
2900 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2901 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2902 }
2903
2904 static void
x86_func_cmp(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2905 x86_func_cmp(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2906 {
2907 uint64_t *op1, *op2, fl;
2908 uint64_t tmp;
2909 bool memop1;
2910
2911 memop1 = !mem->write;
2912 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2913 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2914
2915 /* Fetch the value to be CMP'ed (op1 or op2). */
2916 mem->data = (uint8_t *)&tmp;
2917 mem->write = false;
2918 (*vcpu->cbs.mem)(mem);
2919
2920 /* Perform the CMP. */
2921 exec_sub(*op1, *op2, &fl, mem->size);
2922
2923 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2924 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2925 }
2926
2927 static void
x86_func_test(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2928 x86_func_test(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2929 {
2930 uint64_t *op1, *op2, fl;
2931 uint64_t tmp;
2932 bool memop1;
2933
2934 memop1 = !mem->write;
2935 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2936 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2937
2938 /* Fetch the value to be TEST'ed (op1 or op2). */
2939 mem->data = (uint8_t *)&tmp;
2940 mem->write = false;
2941 (*vcpu->cbs.mem)(mem);
2942
2943 /* Perform the TEST. */
2944 exec_and(*op1, *op2, &fl, mem->size);
2945
2946 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2947 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2948 }
2949
2950 static void
x86_func_mov(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs __unused)2951 x86_func_mov(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs __unused)
2952 {
2953 /*
2954 * Nothing special, just move without emulation.
2955 */
2956 (*vcpu->cbs.mem)(mem);
2957 }
2958
2959 static void
x86_func_stos(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2960 x86_func_stos(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2961 {
2962 /*
2963 * Just move, and update RDI.
2964 */
2965 (*vcpu->cbs.mem)(mem);
2966
2967 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2968 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2969 } else {
2970 gprs[NVMM_X64_GPR_RDI] += mem->size;
2971 }
2972 }
2973
2974 static void
x86_func_lods(struct nvmm_vcpu * vcpu,struct nvmm_mem * mem,uint64_t * gprs)2975 x86_func_lods(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
2976 {
2977 /*
2978 * Just move, and update RSI.
2979 */
2980 (*vcpu->cbs.mem)(mem);
2981
2982 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2983 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2984 } else {
2985 gprs[NVMM_X64_GPR_RSI] += mem->size;
2986 }
2987 }
2988
2989 /* -------------------------------------------------------------------------- */
2990
2991 static inline uint64_t
gpr_read_address(struct x86_instr * instr,struct nvmm_x64_state * state,int gpr)2992 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2993 {
2994 uint64_t val;
2995
2996 val = state->gprs[gpr];
2997 val &= size_to_mask(instr->address_size);
2998
2999 return val;
3000 }
3001
3002 static int
store_to_gva(struct nvmm_x64_state * state,struct x86_instr * instr,struct x86_store * store,gvaddr_t * gvap,size_t size)3003 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
3004 struct x86_store *store, gvaddr_t *gvap, size_t size)
3005 {
3006 struct x86_sib *sib;
3007 gvaddr_t gva = 0;
3008 uint64_t reg;
3009 int ret, seg;
3010
3011 if (store->type == STORE_SIB) {
3012 sib = &store->u.sib;
3013 if (sib->bas != NULL)
3014 gva += gpr_read_address(instr, state, sib->bas->num);
3015 if (sib->idx != NULL) {
3016 reg = gpr_read_address(instr, state, sib->idx->num);
3017 gva += sib->scale * reg;
3018 }
3019 } else if (store->type == STORE_REG) {
3020 if (store->u.reg == NULL) {
3021 /* The base is null. Happens with disp32-only and
3022 * disp16-only. */
3023 } else {
3024 gva = gpr_read_address(instr, state, store->u.reg->num);
3025 }
3026 } else if (store->type == STORE_DUALREG) {
3027 gva = gpr_read_address(instr, state, store->u.dualreg.reg1) +
3028 gpr_read_address(instr, state, store->u.dualreg.reg2);
3029 } else {
3030 gva = store->u.dmo;
3031 }
3032
3033 if (store->disp.type != DISP_NONE) {
3034 gva += store->disp.data;
3035 }
3036
3037 if (store->hardseg != -1) {
3038 seg = store->hardseg;
3039 } else {
3040 if (__predict_false(instr->legpref.seg != -1)) {
3041 seg = instr->legpref.seg;
3042 } else {
3043 seg = NVMM_X64_SEG_DS;
3044 }
3045 }
3046
3047 if (__predict_true(is_long_mode(state))) {
3048 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
3049 segment_apply(&state->segs[seg], &gva);
3050 }
3051 } else {
3052 ret = segment_check(&state->segs[seg], gva, size);
3053 if (ret == -1)
3054 return -1;
3055 segment_apply(&state->segs[seg], &gva);
3056 }
3057
3058 *gvap = gva;
3059 return 0;
3060 }
3061
3062 static int
fetch_segment(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)3063 fetch_segment(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3064 {
3065 struct nvmm_x64_state *state = vcpu->state;
3066 uint8_t inst_bytes[5], byte;
3067 size_t i, fetchsize;
3068 gvaddr_t gva;
3069 int ret, seg;
3070
3071 fetchsize = sizeof(inst_bytes);
3072
3073 gva = state->gprs[NVMM_X64_GPR_RIP];
3074 if (__predict_false(!is_long_mode(state))) {
3075 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3076 fetchsize);
3077 if (ret == -1)
3078 return -1;
3079 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3080 }
3081
3082 ret = read_guest_memory(mach, vcpu, gva, inst_bytes, fetchsize);
3083 if (ret == -1)
3084 return -1;
3085
3086 seg = NVMM_X64_SEG_DS;
3087 for (i = 0; i < fetchsize; i++) {
3088 byte = inst_bytes[i];
3089
3090 if (byte == LEG_OVR_DS) {
3091 seg = NVMM_X64_SEG_DS;
3092 } else if (byte == LEG_OVR_ES) {
3093 seg = NVMM_X64_SEG_ES;
3094 } else if (byte == LEG_OVR_GS) {
3095 seg = NVMM_X64_SEG_GS;
3096 } else if (byte == LEG_OVR_FS) {
3097 seg = NVMM_X64_SEG_FS;
3098 } else if (byte == LEG_OVR_CS) {
3099 seg = NVMM_X64_SEG_CS;
3100 } else if (byte == LEG_OVR_SS) {
3101 seg = NVMM_X64_SEG_SS;
3102 } else if (byte == LEG_OPR_OVR) {
3103 /* nothing */
3104 } else if (byte == LEG_ADR_OVR) {
3105 /* nothing */
3106 } else if (byte == LEG_REP) {
3107 /* nothing */
3108 } else if (byte == LEG_REPN) {
3109 /* nothing */
3110 } else if (byte == LEG_LOCK) {
3111 /* nothing */
3112 } else {
3113 return seg;
3114 }
3115 }
3116
3117 return seg;
3118 }
3119
3120 static int
fetch_instruction(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,struct nvmm_vcpu_exit * exit)3121 fetch_instruction(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3122 struct nvmm_vcpu_exit *exit)
3123 {
3124 struct nvmm_x64_state *state = vcpu->state;
3125 size_t fetchsize;
3126 gvaddr_t gva;
3127 int ret;
3128
3129 fetchsize = sizeof(exit->u.mem.inst_bytes);
3130
3131 gva = state->gprs[NVMM_X64_GPR_RIP];
3132 if (__predict_false(!is_long_mode(state))) {
3133 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3134 fetchsize);
3135 if (ret == -1)
3136 return -1;
3137 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3138 }
3139
3140 ret = read_guest_memory(mach, vcpu, gva, exit->u.mem.inst_bytes,
3141 fetchsize);
3142 if (ret == -1)
3143 return -1;
3144
3145 exit->u.mem.inst_len = fetchsize;
3146
3147 return 0;
3148 }
3149
3150 /*
3151 * Double memory operand, MOVS only.
3152 */
3153 static int
assist_mem_double_movs(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,struct x86_instr * instr)3154 assist_mem_double_movs(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3155 struct x86_instr *instr)
3156 {
3157 struct nvmm_x64_state *state = vcpu->state;
3158 uint8_t data[8];
3159 gvaddr_t gva;
3160 size_t size;
3161 int ret;
3162
3163 size = instr->operand_size;
3164
3165 /* Source. */
3166 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3167 if (ret == -1)
3168 return -1;
3169 ret = read_guest_memory(mach, vcpu, gva, data, size);
3170 if (ret == -1)
3171 return -1;
3172
3173 /* Destination. */
3174 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3175 if (ret == -1)
3176 return -1;
3177 ret = write_guest_memory(mach, vcpu, gva, data, size);
3178 if (ret == -1)
3179 return -1;
3180
3181 if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
3182 state->gprs[NVMM_X64_GPR_RSI] -= size;
3183 state->gprs[NVMM_X64_GPR_RDI] -= size;
3184 } else {
3185 state->gprs[NVMM_X64_GPR_RSI] += size;
3186 state->gprs[NVMM_X64_GPR_RDI] += size;
3187 }
3188
3189 return 0;
3190 }
3191
3192 /*
3193 * Single memory operand, covers most instructions.
3194 */
3195 static int
assist_mem_single(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu,struct x86_instr * instr)3196 assist_mem_single(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
3197 struct x86_instr *instr)
3198 {
3199 struct nvmm_x64_state *state = vcpu->state;
3200 struct nvmm_vcpu_exit *exit = vcpu->exit;
3201 struct nvmm_mem mem;
3202 uint8_t membuf[8];
3203 uint64_t val;
3204
3205 memset(membuf, 0, sizeof(membuf));
3206
3207 mem.mach = mach;
3208 mem.vcpu = vcpu;
3209 mem.gpa = exit->u.mem.gpa;
3210 mem.size = instr->operand_size;
3211 mem.data = membuf;
3212
3213 /* Determine the direction. */
3214 switch (instr->src.type) {
3215 case STORE_REG:
3216 if (instr->src.disp.type != DISP_NONE) {
3217 /* Indirect access. */
3218 mem.write = false;
3219 } else {
3220 /* Direct access. */
3221 mem.write = true;
3222 }
3223 break;
3224 case STORE_DUALREG:
3225 if (instr->src.disp.type == DISP_NONE) {
3226 DISASSEMBLER_BUG();
3227 }
3228 mem.write = false;
3229 break;
3230 case STORE_IMM:
3231 mem.write = true;
3232 break;
3233 case STORE_SIB:
3234 mem.write = false;
3235 break;
3236 case STORE_DMO:
3237 mem.write = false;
3238 break;
3239 default:
3240 DISASSEMBLER_BUG();
3241 }
3242
3243 if (mem.write) {
3244 switch (instr->src.type) {
3245 case STORE_REG:
3246 /* The instruction was "reg -> mem". Fetch the register
3247 * in membuf. */
3248 if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3249 DISASSEMBLER_BUG();
3250 }
3251 val = state->gprs[instr->src.u.reg->num];
3252 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3253 memcpy(mem.data, &val, mem.size);
3254 break;
3255 case STORE_IMM:
3256 /* The instruction was "imm -> mem". Fetch the immediate
3257 * in membuf. */
3258 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3259 break;
3260 default:
3261 DISASSEMBLER_BUG();
3262 }
3263 } else if (instr->emul->readreg) {
3264 /* The instruction was "mem -> reg", but the value of the
3265 * register matters for the emul func. Fetch it in membuf. */
3266 if (__predict_false(instr->dst.type != STORE_REG)) {
3267 DISASSEMBLER_BUG();
3268 }
3269 if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3270 DISASSEMBLER_BUG();
3271 }
3272 val = state->gprs[instr->dst.u.reg->num];
3273 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3274 memcpy(mem.data, &val, mem.size);
3275 }
3276
3277 (*instr->emul->func)(vcpu, &mem, state->gprs);
3278
3279 if (instr->emul->notouch) {
3280 /* We're done. */
3281 return 0;
3282 }
3283
3284 if (!mem.write) {
3285 /* The instruction was "mem -> reg". The emul func has filled
3286 * membuf with the memory content. Install membuf in the
3287 * register. */
3288 if (__predict_false(instr->dst.type != STORE_REG)) {
3289 DISASSEMBLER_BUG();
3290 }
3291 if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3292 DISASSEMBLER_BUG();
3293 }
3294 memcpy(&val, membuf, sizeof(uint64_t));
3295 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3296 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3297 state->gprs[instr->dst.u.reg->num] |= val;
3298 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3299 } else if (instr->emul->backprop) {
3300 /* The instruction was "reg -> mem", but the memory must be
3301 * back-propagated to the register. Install membuf in the
3302 * register. */
3303 if (__predict_false(instr->src.type != STORE_REG)) {
3304 DISASSEMBLER_BUG();
3305 }
3306 if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3307 DISASSEMBLER_BUG();
3308 }
3309 memcpy(&val, membuf, sizeof(uint64_t));
3310 val = __SHIFTIN(val, instr->src.u.reg->mask);
3311 state->gprs[instr->src.u.reg->num] &= ~instr->src.u.reg->mask;
3312 state->gprs[instr->src.u.reg->num] |= val;
3313 state->gprs[instr->src.u.reg->num] &= ~instr->zeroextend_mask;
3314 }
3315
3316 return 0;
3317 }
3318
3319 int
nvmm_assist_mem(struct nvmm_machine * mach,struct nvmm_vcpu * vcpu)3320 nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3321 {
3322 struct nvmm_x64_state *state = vcpu->state;
3323 struct nvmm_vcpu_exit *exit = vcpu->exit;
3324 struct x86_instr instr;
3325 uint64_t cnt = 0; /* GCC */
3326 int ret;
3327
3328 if (__predict_false(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
3329 errno = EINVAL;
3330 return -1;
3331 }
3332
3333 ret = nvmm_vcpu_getstate(mach, vcpu,
3334 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3335 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3336 if (ret == -1)
3337 return -1;
3338
3339 if (exit->u.mem.inst_len == 0) {
3340 /*
3341 * The instruction was not fetched from the kernel. Fetch
3342 * it ourselves.
3343 */
3344 ret = fetch_instruction(mach, vcpu, exit);
3345 if (ret == -1)
3346 return -1;
3347 }
3348
3349 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3350 &instr, state);
3351 if (ret == -1) {
3352 errno = ENODEV;
3353 return -1;
3354 }
3355
3356 if (instr.legpref.rep || instr.legpref.repn) {
3357 cnt = rep_get_cnt(state, instr.address_size);
3358 if (__predict_false(cnt == 0)) {
3359 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3360 goto out;
3361 }
3362 }
3363
3364 if (instr.opcode->movs) {
3365 ret = assist_mem_double_movs(mach, vcpu, &instr);
3366 } else {
3367 ret = assist_mem_single(mach, vcpu, &instr);
3368 }
3369 if (ret == -1) {
3370 errno = ENODEV;
3371 return -1;
3372 }
3373
3374 if (instr.legpref.rep || instr.legpref.repn) {
3375 cnt -= 1;
3376 rep_set_cnt(state, instr.address_size, cnt);
3377 if (cnt == 0) {
3378 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3379 } else if (__predict_false(instr.legpref.repn)) {
3380 if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3381 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3382 }
3383 }
3384 } else {
3385 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3386 }
3387
3388 out:
3389 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
3390 if (ret == -1)
3391 return -1;
3392
3393 return 0;
3394 }
3395