1 /* $NetBSD: libnvmm_x86.c,v 1.29 2019/04/28 14:22:13 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 #include <fcntl.h> 39 #include <errno.h> 40 #include <sys/ioctl.h> 41 #include <sys/mman.h> 42 #include <machine/vmparam.h> 43 #include <machine/pte.h> 44 #include <machine/psl.h> 45 46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) 47 #define __cacheline_aligned __attribute__((__aligned__(64))) 48 49 #include <x86/specialreg.h> 50 51 /* -------------------------------------------------------------------------- */ 52 53 static void 54 nvmm_arch_copystate(void *_dst, void *_src, uint64_t flags) 55 { 56 struct nvmm_x64_state *src = _src; 57 struct nvmm_x64_state *dst = _dst; 58 59 if (flags & NVMM_X64_STATE_GPRS) { 60 memcpy(dst->gprs, src->gprs, sizeof(dst->gprs)); 61 } 62 if (flags & NVMM_X64_STATE_SEGS) { 63 memcpy(dst->segs, src->segs, sizeof(dst->segs)); 64 } 65 if (flags & NVMM_X64_STATE_CRS) { 66 memcpy(dst->crs, src->crs, sizeof(dst->crs)); 67 } 68 if (flags & NVMM_X64_STATE_DRS) { 69 memcpy(dst->drs, src->drs, sizeof(dst->drs)); 70 } 71 if (flags & NVMM_X64_STATE_MSRS) { 72 memcpy(dst->msrs, src->msrs, sizeof(dst->msrs)); 73 } 74 if (flags & NVMM_X64_STATE_INTR) { 75 memcpy(&dst->intr, &src->intr, sizeof(dst->intr)); 76 } 77 if (flags & NVMM_X64_STATE_FPU) { 78 memcpy(&dst->fpu, &src->fpu, sizeof(dst->fpu)); 79 } 80 } 81 82 /* -------------------------------------------------------------------------- */ 83 84 /* 85 * Undocumented debugging function. Helpful. 86 */ 87 int 88 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid) 89 { 90 struct nvmm_x64_state state; 91 uint16_t *attr; 92 size_t i; 93 int ret; 94 95 const char *segnames[] = { 96 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR" 97 }; 98 99 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL); 100 if (ret == -1) 101 return -1; 102 103 printf("+ VCPU id=%d\n", (int)cpuid); 104 printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]); 105 printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]); 106 printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]); 107 printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]); 108 printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]); 109 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]); 110 for (i = 0; i < NVMM_X64_NSEG; i++) { 111 attr = (uint16_t *)&state.segs[i].attrib; 112 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n", 113 segnames[i], 114 state.segs[i].selector, 115 state.segs[i].base, 116 state.segs[i].limit, 117 *attr); 118 } 119 printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]); 120 printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]); 121 printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]); 122 printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]); 123 printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]); 124 125 return 0; 126 } 127 128 /* -------------------------------------------------------------------------- */ 129 130 #define PTE32_L1_SHIFT 12 131 #define PTE32_L2_SHIFT 22 132 133 #define PTE32_L2_MASK 0xffc00000 134 #define PTE32_L1_MASK 0x003ff000 135 136 #define PTE32_L2_FRAME (PTE32_L2_MASK) 137 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK) 138 139 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT) 140 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT) 141 142 #define CR3_FRAME_32BIT PG_FRAME 143 144 typedef uint32_t pte_32bit_t; 145 146 static int 147 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3, 148 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot) 149 { 150 gpaddr_t L2gpa, L1gpa; 151 uintptr_t L2hva, L1hva; 152 pte_32bit_t *pdir, pte; 153 nvmm_prot_t pageprot; 154 155 /* We begin with an RWXU access. */ 156 *prot = NVMM_PROT_ALL; 157 158 /* Parse L2. */ 159 L2gpa = (cr3 & CR3_FRAME_32BIT); 160 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 161 return -1; 162 pdir = (pte_32bit_t *)L2hva; 163 pte = pdir[pte32_l2idx(gva)]; 164 if ((pte & PG_V) == 0) 165 return -1; 166 if ((pte & PG_u) == 0) 167 *prot &= ~NVMM_PROT_USER; 168 if ((pte & PG_KW) == 0) 169 *prot &= ~NVMM_PROT_WRITE; 170 if ((pte & PG_PS) && !has_pse) 171 return -1; 172 if (pte & PG_PS) { 173 *gpa = (pte & PTE32_L2_FRAME); 174 *gpa = *gpa + (gva & PTE32_L1_MASK); 175 return 0; 176 } 177 178 /* Parse L1. */ 179 L1gpa = (pte & PG_FRAME); 180 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 181 return -1; 182 pdir = (pte_32bit_t *)L1hva; 183 pte = pdir[pte32_l1idx(gva)]; 184 if ((pte & PG_V) == 0) 185 return -1; 186 if ((pte & PG_u) == 0) 187 *prot &= ~NVMM_PROT_USER; 188 if ((pte & PG_KW) == 0) 189 *prot &= ~NVMM_PROT_WRITE; 190 if (pte & PG_PS) 191 return -1; 192 193 *gpa = (pte & PG_FRAME); 194 return 0; 195 } 196 197 /* -------------------------------------------------------------------------- */ 198 199 #define PTE32_PAE_L1_SHIFT 12 200 #define PTE32_PAE_L2_SHIFT 21 201 #define PTE32_PAE_L3_SHIFT 30 202 203 #define PTE32_PAE_L3_MASK 0xc0000000 204 #define PTE32_PAE_L2_MASK 0x3fe00000 205 #define PTE32_PAE_L1_MASK 0x001ff000 206 207 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK) 208 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK) 209 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK) 210 211 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT) 212 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT) 213 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT) 214 215 #define CR3_FRAME_32BIT_PAE __BITS(31, 5) 216 217 typedef uint64_t pte_32bit_pae_t; 218 219 static int 220 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3, 221 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 222 { 223 gpaddr_t L3gpa, L2gpa, L1gpa; 224 uintptr_t L3hva, L2hva, L1hva; 225 pte_32bit_pae_t *pdir, pte; 226 nvmm_prot_t pageprot; 227 228 /* We begin with an RWXU access. */ 229 *prot = NVMM_PROT_ALL; 230 231 /* Parse L3. */ 232 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE); 233 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1) 234 return -1; 235 pdir = (pte_32bit_pae_t *)L3hva; 236 pte = pdir[pte32_pae_l3idx(gva)]; 237 if ((pte & PG_V) == 0) 238 return -1; 239 if (pte & PG_NX) 240 *prot &= ~NVMM_PROT_EXEC; 241 if (pte & PG_PS) 242 return -1; 243 244 /* Parse L2. */ 245 L2gpa = (pte & PG_FRAME); 246 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 247 return -1; 248 pdir = (pte_32bit_pae_t *)L2hva; 249 pte = pdir[pte32_pae_l2idx(gva)]; 250 if ((pte & PG_V) == 0) 251 return -1; 252 if ((pte & PG_u) == 0) 253 *prot &= ~NVMM_PROT_USER; 254 if ((pte & PG_KW) == 0) 255 *prot &= ~NVMM_PROT_WRITE; 256 if (pte & PG_NX) 257 *prot &= ~NVMM_PROT_EXEC; 258 if (pte & PG_PS) { 259 *gpa = (pte & PTE32_PAE_L2_FRAME); 260 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK); 261 return 0; 262 } 263 264 /* Parse L1. */ 265 L1gpa = (pte & PG_FRAME); 266 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 267 return -1; 268 pdir = (pte_32bit_pae_t *)L1hva; 269 pte = pdir[pte32_pae_l1idx(gva)]; 270 if ((pte & PG_V) == 0) 271 return -1; 272 if ((pte & PG_u) == 0) 273 *prot &= ~NVMM_PROT_USER; 274 if ((pte & PG_KW) == 0) 275 *prot &= ~NVMM_PROT_WRITE; 276 if (pte & PG_NX) 277 *prot &= ~NVMM_PROT_EXEC; 278 if (pte & PG_PS) 279 return -1; 280 281 *gpa = (pte & PG_FRAME); 282 return 0; 283 } 284 285 /* -------------------------------------------------------------------------- */ 286 287 #define PTE64_L1_SHIFT 12 288 #define PTE64_L2_SHIFT 21 289 #define PTE64_L3_SHIFT 30 290 #define PTE64_L4_SHIFT 39 291 292 #define PTE64_L4_MASK 0x0000ff8000000000 293 #define PTE64_L3_MASK 0x0000007fc0000000 294 #define PTE64_L2_MASK 0x000000003fe00000 295 #define PTE64_L1_MASK 0x00000000001ff000 296 297 #define PTE64_L4_FRAME PTE64_L4_MASK 298 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK) 299 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK) 300 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK) 301 302 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT) 303 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT) 304 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT) 305 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT) 306 307 #define CR3_FRAME_64BIT PG_FRAME 308 309 typedef uint64_t pte_64bit_t; 310 311 static inline bool 312 x86_gva_64bit_canonical(gvaddr_t gva) 313 { 314 /* Bits 63:47 must have the same value. */ 315 #define SIGN_EXTEND 0xffff800000000000ULL 316 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND; 317 } 318 319 static int 320 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3, 321 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 322 { 323 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa; 324 uintptr_t L4hva, L3hva, L2hva, L1hva; 325 pte_64bit_t *pdir, pte; 326 nvmm_prot_t pageprot; 327 328 /* We begin with an RWXU access. */ 329 *prot = NVMM_PROT_ALL; 330 331 if (!x86_gva_64bit_canonical(gva)) 332 return -1; 333 334 /* Parse L4. */ 335 L4gpa = (cr3 & CR3_FRAME_64BIT); 336 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1) 337 return -1; 338 pdir = (pte_64bit_t *)L4hva; 339 pte = pdir[pte64_l4idx(gva)]; 340 if ((pte & PG_V) == 0) 341 return -1; 342 if ((pte & PG_u) == 0) 343 *prot &= ~NVMM_PROT_USER; 344 if ((pte & PG_KW) == 0) 345 *prot &= ~NVMM_PROT_WRITE; 346 if (pte & PG_NX) 347 *prot &= ~NVMM_PROT_EXEC; 348 if (pte & PG_PS) 349 return -1; 350 351 /* Parse L3. */ 352 L3gpa = (pte & PG_FRAME); 353 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1) 354 return -1; 355 pdir = (pte_64bit_t *)L3hva; 356 pte = pdir[pte64_l3idx(gva)]; 357 if ((pte & PG_V) == 0) 358 return -1; 359 if ((pte & PG_u) == 0) 360 *prot &= ~NVMM_PROT_USER; 361 if ((pte & PG_KW) == 0) 362 *prot &= ~NVMM_PROT_WRITE; 363 if (pte & PG_NX) 364 *prot &= ~NVMM_PROT_EXEC; 365 if (pte & PG_PS) { 366 *gpa = (pte & PTE64_L3_FRAME); 367 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK)); 368 return 0; 369 } 370 371 /* Parse L2. */ 372 L2gpa = (pte & PG_FRAME); 373 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 374 return -1; 375 pdir = (pte_64bit_t *)L2hva; 376 pte = pdir[pte64_l2idx(gva)]; 377 if ((pte & PG_V) == 0) 378 return -1; 379 if ((pte & PG_u) == 0) 380 *prot &= ~NVMM_PROT_USER; 381 if ((pte & PG_KW) == 0) 382 *prot &= ~NVMM_PROT_WRITE; 383 if (pte & PG_NX) 384 *prot &= ~NVMM_PROT_EXEC; 385 if (pte & PG_PS) { 386 *gpa = (pte & PTE64_L2_FRAME); 387 *gpa = *gpa + (gva & PTE64_L1_MASK); 388 return 0; 389 } 390 391 /* Parse L1. */ 392 L1gpa = (pte & PG_FRAME); 393 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 394 return -1; 395 pdir = (pte_64bit_t *)L1hva; 396 pte = pdir[pte64_l1idx(gva)]; 397 if ((pte & PG_V) == 0) 398 return -1; 399 if ((pte & PG_u) == 0) 400 *prot &= ~NVMM_PROT_USER; 401 if ((pte & PG_KW) == 0) 402 *prot &= ~NVMM_PROT_WRITE; 403 if (pte & PG_NX) 404 *prot &= ~NVMM_PROT_EXEC; 405 if (pte & PG_PS) 406 return -1; 407 408 *gpa = (pte & PG_FRAME); 409 return 0; 410 } 411 412 static inline int 413 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state, 414 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 415 { 416 bool is_pae, is_lng, has_pse; 417 uint64_t cr3; 418 size_t off; 419 int ret; 420 421 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) { 422 /* No paging. */ 423 *prot = NVMM_PROT_ALL; 424 *gpa = gva; 425 return 0; 426 } 427 428 off = (gva & PAGE_MASK); 429 gva &= ~PAGE_MASK; 430 431 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0; 432 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0; 433 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0; 434 cr3 = state->crs[NVMM_X64_CR_CR3]; 435 436 if (is_pae && is_lng) { 437 /* 64bit */ 438 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot); 439 } else if (is_pae && !is_lng) { 440 /* 32bit PAE */ 441 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot); 442 } else if (!is_pae && !is_lng) { 443 /* 32bit */ 444 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot); 445 } else { 446 ret = -1; 447 } 448 449 if (ret == -1) { 450 errno = EFAULT; 451 } 452 453 *gpa = *gpa + off; 454 455 return ret; 456 } 457 458 int 459 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 460 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 461 { 462 struct nvmm_x64_state state; 463 int ret; 464 465 ret = nvmm_vcpu_getstate(mach, cpuid, &state, 466 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 467 if (ret == -1) 468 return -1; 469 470 return x86_gva_to_gpa(mach, &state, gva, gpa, prot); 471 } 472 473 /* -------------------------------------------------------------------------- */ 474 475 static inline bool 476 is_long_mode(struct nvmm_x64_state *state) 477 { 478 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0; 479 } 480 481 static inline bool 482 is_64bit(struct nvmm_x64_state *state) 483 { 484 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0); 485 } 486 487 static inline bool 488 is_32bit(struct nvmm_x64_state *state) 489 { 490 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) && 491 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1); 492 } 493 494 static inline bool 495 is_16bit(struct nvmm_x64_state *state) 496 { 497 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) && 498 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0); 499 } 500 501 static int 502 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size) 503 { 504 uint64_t limit; 505 506 /* 507 * This is incomplete. We should check topdown, etc, really that's 508 * tiring. 509 */ 510 if (__predict_false(!seg->attrib.p)) { 511 goto error; 512 } 513 514 limit = (uint64_t)seg->limit + 1; 515 if (__predict_true(seg->attrib.g)) { 516 limit *= PAGE_SIZE; 517 } 518 519 if (__predict_false(gva + size > limit)) { 520 goto error; 521 } 522 523 return 0; 524 525 error: 526 errno = EFAULT; 527 return -1; 528 } 529 530 static inline void 531 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva) 532 { 533 *gva += seg->base; 534 } 535 536 static inline uint64_t 537 size_to_mask(size_t size) 538 { 539 switch (size) { 540 case 1: 541 return 0x00000000000000FF; 542 case 2: 543 return 0x000000000000FFFF; 544 case 4: 545 return 0x00000000FFFFFFFF; 546 case 8: 547 default: 548 return 0xFFFFFFFFFFFFFFFF; 549 } 550 } 551 552 static uint64_t 553 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize) 554 { 555 uint64_t mask, cnt; 556 557 mask = size_to_mask(adsize); 558 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask; 559 560 return cnt; 561 } 562 563 static void 564 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt) 565 { 566 uint64_t mask; 567 568 /* XXX: should we zero-extend? */ 569 mask = size_to_mask(adsize); 570 state->gprs[NVMM_X64_GPR_RCX] &= ~mask; 571 state->gprs[NVMM_X64_GPR_RCX] |= cnt; 572 } 573 574 static int 575 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state, 576 gvaddr_t gva, uint8_t *data, size_t size) 577 { 578 struct nvmm_mem mem; 579 nvmm_prot_t prot; 580 gpaddr_t gpa; 581 uintptr_t hva; 582 bool is_mmio; 583 int ret, remain; 584 585 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot); 586 if (__predict_false(ret == -1)) { 587 return -1; 588 } 589 if (__predict_false(!(prot & NVMM_PROT_READ))) { 590 errno = EFAULT; 591 return -1; 592 } 593 594 if ((gva & PAGE_MASK) + size > PAGE_SIZE) { 595 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE); 596 } else { 597 remain = 0; 598 } 599 size -= remain; 600 601 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot); 602 is_mmio = (ret == -1); 603 604 if (is_mmio) { 605 mem.data = data; 606 mem.gpa = gpa; 607 mem.write = false; 608 mem.size = size; 609 (*__callbacks.mem)(&mem); 610 } else { 611 if (__predict_false(!(prot & NVMM_PROT_READ))) { 612 errno = EFAULT; 613 return -1; 614 } 615 memcpy(data, (uint8_t *)hva, size); 616 } 617 618 if (remain > 0) { 619 ret = read_guest_memory(mach, state, gva + size, 620 data + size, remain); 621 } else { 622 ret = 0; 623 } 624 625 return ret; 626 } 627 628 static int 629 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state, 630 gvaddr_t gva, uint8_t *data, size_t size) 631 { 632 struct nvmm_mem mem; 633 nvmm_prot_t prot; 634 gpaddr_t gpa; 635 uintptr_t hva; 636 bool is_mmio; 637 int ret, remain; 638 639 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot); 640 if (__predict_false(ret == -1)) { 641 return -1; 642 } 643 if (__predict_false(!(prot & NVMM_PROT_WRITE))) { 644 errno = EFAULT; 645 return -1; 646 } 647 648 if ((gva & PAGE_MASK) + size > PAGE_SIZE) { 649 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE); 650 } else { 651 remain = 0; 652 } 653 size -= remain; 654 655 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot); 656 is_mmio = (ret == -1); 657 658 if (is_mmio) { 659 mem.data = data; 660 mem.gpa = gpa; 661 mem.write = true; 662 mem.size = size; 663 (*__callbacks.mem)(&mem); 664 } else { 665 if (__predict_false(!(prot & NVMM_PROT_WRITE))) { 666 errno = EFAULT; 667 return -1; 668 } 669 memcpy((uint8_t *)hva, data, size); 670 } 671 672 if (remain > 0) { 673 ret = write_guest_memory(mach, state, gva + size, 674 data + size, remain); 675 } else { 676 ret = 0; 677 } 678 679 return ret; 680 } 681 682 /* -------------------------------------------------------------------------- */ 683 684 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *); 685 686 #define NVMM_IO_BATCH_SIZE 32 687 688 static int 689 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state, 690 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt) 691 { 692 uint8_t iobuf[NVMM_IO_BATCH_SIZE]; 693 size_t i, iosize, iocnt; 694 int ret; 695 696 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE); 697 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE); 698 iocnt = iosize / io->size; 699 700 io->data = iobuf; 701 702 if (!io->in) { 703 ret = read_guest_memory(mach, state, gva, iobuf, iosize); 704 if (ret == -1) 705 return -1; 706 } 707 708 for (i = 0; i < iocnt; i++) { 709 (*__callbacks.io)(io); 710 io->data += io->size; 711 } 712 713 if (io->in) { 714 ret = write_guest_memory(mach, state, gva, iobuf, iosize); 715 if (ret == -1) 716 return -1; 717 } 718 719 return iocnt; 720 } 721 722 int 723 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 724 struct nvmm_exit *exit) 725 { 726 struct nvmm_x64_state state; 727 struct nvmm_io io; 728 uint64_t cnt = 0; /* GCC */ 729 uint8_t iobuf[8]; 730 int iocnt = 1; 731 gvaddr_t gva = 0; /* GCC */ 732 int reg = 0; /* GCC */ 733 int ret, seg; 734 bool psld = false; 735 736 if (__predict_false(exit->reason != NVMM_EXIT_IO)) { 737 errno = EINVAL; 738 return -1; 739 } 740 741 io.port = exit->u.io.port; 742 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN); 743 io.size = exit->u.io.operand_size; 744 io.data = iobuf; 745 746 ret = nvmm_vcpu_getstate(mach, cpuid, &state, 747 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 748 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 749 if (ret == -1) 750 return -1; 751 752 if (exit->u.io.rep) { 753 cnt = rep_get_cnt(&state, exit->u.io.address_size); 754 if (__predict_false(cnt == 0)) { 755 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 756 goto out; 757 } 758 } 759 760 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) { 761 psld = true; 762 } 763 764 /* 765 * Determine GVA. 766 */ 767 if (exit->u.io.str) { 768 if (io.in) { 769 reg = NVMM_X64_GPR_RDI; 770 } else { 771 reg = NVMM_X64_GPR_RSI; 772 } 773 774 gva = state.gprs[reg]; 775 gva &= size_to_mask(exit->u.io.address_size); 776 777 if (exit->u.io.seg != -1) { 778 seg = exit->u.io.seg; 779 } else { 780 if (io.in) { 781 seg = NVMM_X64_SEG_ES; 782 } else { 783 seg = fetch_segment(mach, &state); 784 if (seg == -1) 785 return -1; 786 } 787 } 788 789 if (__predict_true(is_long_mode(&state))) { 790 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) { 791 segment_apply(&state.segs[seg], &gva); 792 } 793 } else { 794 ret = segment_check(&state.segs[seg], gva, io.size); 795 if (ret == -1) 796 return -1; 797 segment_apply(&state.segs[seg], &gva); 798 } 799 800 if (exit->u.io.rep && !psld) { 801 iocnt = assist_io_batch(mach, &state, &io, gva, cnt); 802 if (iocnt == -1) 803 return -1; 804 goto done; 805 } 806 } 807 808 if (!io.in) { 809 if (!exit->u.io.str) { 810 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size); 811 } else { 812 ret = read_guest_memory(mach, &state, gva, io.data, 813 io.size); 814 if (ret == -1) 815 return -1; 816 } 817 } 818 819 (*__callbacks.io)(&io); 820 821 if (io.in) { 822 if (!exit->u.io.str) { 823 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size); 824 if (io.size == 4) { 825 /* Zero-extend to 64 bits. */ 826 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4); 827 } 828 } else { 829 ret = write_guest_memory(mach, &state, gva, io.data, 830 io.size); 831 if (ret == -1) 832 return -1; 833 } 834 } 835 836 done: 837 if (exit->u.io.str) { 838 if (__predict_false(psld)) { 839 state.gprs[reg] -= iocnt * io.size; 840 } else { 841 state.gprs[reg] += iocnt * io.size; 842 } 843 } 844 845 if (exit->u.io.rep) { 846 cnt -= iocnt; 847 rep_set_cnt(&state, exit->u.io.address_size, cnt); 848 if (cnt == 0) { 849 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 850 } 851 } else { 852 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 853 } 854 855 out: 856 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS); 857 if (ret == -1) 858 return -1; 859 860 return 0; 861 } 862 863 /* -------------------------------------------------------------------------- */ 864 865 struct x86_emul { 866 bool read; 867 bool notouch; 868 void (*func)(struct nvmm_mem *, uint64_t *); 869 }; 870 871 static void x86_func_or(struct nvmm_mem *, uint64_t *); 872 static void x86_func_and(struct nvmm_mem *, uint64_t *); 873 static void x86_func_sub(struct nvmm_mem *, uint64_t *); 874 static void x86_func_xor(struct nvmm_mem *, uint64_t *); 875 static void x86_func_cmp(struct nvmm_mem *, uint64_t *); 876 static void x86_func_test(struct nvmm_mem *, uint64_t *); 877 static void x86_func_mov(struct nvmm_mem *, uint64_t *); 878 static void x86_func_stos(struct nvmm_mem *, uint64_t *); 879 static void x86_func_lods(struct nvmm_mem *, uint64_t *); 880 static void x86_func_movs(struct nvmm_mem *, uint64_t *); 881 882 static const struct x86_emul x86_emul_or = { 883 .read = true, 884 .func = x86_func_or 885 }; 886 887 static const struct x86_emul x86_emul_and = { 888 .read = true, 889 .func = x86_func_and 890 }; 891 892 static const struct x86_emul x86_emul_sub = { 893 .read = true, 894 .func = x86_func_sub 895 }; 896 897 static const struct x86_emul x86_emul_xor = { 898 .read = true, 899 .func = x86_func_xor 900 }; 901 902 static const struct x86_emul x86_emul_cmp = { 903 .notouch = true, 904 .func = x86_func_cmp 905 }; 906 907 static const struct x86_emul x86_emul_test = { 908 .notouch = true, 909 .func = x86_func_test 910 }; 911 912 static const struct x86_emul x86_emul_mov = { 913 .func = x86_func_mov 914 }; 915 916 static const struct x86_emul x86_emul_stos = { 917 .func = x86_func_stos 918 }; 919 920 static const struct x86_emul x86_emul_lods = { 921 .func = x86_func_lods 922 }; 923 924 static const struct x86_emul x86_emul_movs = { 925 .func = x86_func_movs 926 }; 927 928 /* Legacy prefixes. */ 929 #define LEG_LOCK 0xF0 930 #define LEG_REPN 0xF2 931 #define LEG_REP 0xF3 932 #define LEG_OVR_CS 0x2E 933 #define LEG_OVR_SS 0x36 934 #define LEG_OVR_DS 0x3E 935 #define LEG_OVR_ES 0x26 936 #define LEG_OVR_FS 0x64 937 #define LEG_OVR_GS 0x65 938 #define LEG_OPR_OVR 0x66 939 #define LEG_ADR_OVR 0x67 940 941 struct x86_legpref { 942 bool opr_ovr:1; 943 bool adr_ovr:1; 944 bool rep:1; 945 bool repn:1; 946 int8_t seg; 947 }; 948 949 struct x86_rexpref { 950 bool b:1; 951 bool x:1; 952 bool r:1; 953 bool w:1; 954 bool present:1; 955 }; 956 957 struct x86_reg { 958 int num; /* NVMM GPR state index */ 959 uint64_t mask; 960 }; 961 962 enum x86_disp_type { 963 DISP_NONE, 964 DISP_0, 965 DISP_1, 966 DISP_4 967 }; 968 969 struct x86_disp { 970 enum x86_disp_type type; 971 uint64_t data; /* 4 bytes, but can be sign-extended */ 972 }; 973 974 enum REGMODRM__Mod { 975 MOD_DIS0, /* also, register indirect */ 976 MOD_DIS1, 977 MOD_DIS4, 978 MOD_REG 979 }; 980 981 enum REGMODRM__Reg { 982 REG_000, /* these fields are indexes to the register map */ 983 REG_001, 984 REG_010, 985 REG_011, 986 REG_100, 987 REG_101, 988 REG_110, 989 REG_111 990 }; 991 992 enum REGMODRM__Rm { 993 RM_000, /* reg */ 994 RM_001, /* reg */ 995 RM_010, /* reg */ 996 RM_011, /* reg */ 997 RM_RSP_SIB, /* reg or SIB, depending on the MOD */ 998 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */ 999 RM_110, 1000 RM_111 1001 }; 1002 1003 struct x86_regmodrm { 1004 uint8_t mod:2; 1005 uint8_t reg:3; 1006 uint8_t rm:3; 1007 }; 1008 1009 struct x86_immediate { 1010 uint64_t data; 1011 }; 1012 1013 struct x86_sib { 1014 uint8_t scale; 1015 const struct x86_reg *idx; 1016 const struct x86_reg *bas; 1017 }; 1018 1019 enum x86_store_type { 1020 STORE_NONE, 1021 STORE_REG, 1022 STORE_IMM, 1023 STORE_SIB, 1024 STORE_DMO 1025 }; 1026 1027 struct x86_store { 1028 enum x86_store_type type; 1029 union { 1030 const struct x86_reg *reg; 1031 struct x86_immediate imm; 1032 struct x86_sib sib; 1033 uint64_t dmo; 1034 } u; 1035 struct x86_disp disp; 1036 int hardseg; 1037 }; 1038 1039 struct x86_instr { 1040 uint8_t len; 1041 struct x86_legpref legpref; 1042 struct x86_rexpref rexpref; 1043 struct x86_regmodrm regmodrm; 1044 uint8_t operand_size; 1045 uint8_t address_size; 1046 uint64_t zeroextend_mask; 1047 1048 const struct x86_opcode *opcode; 1049 const struct x86_emul *emul; 1050 1051 struct x86_store src; 1052 struct x86_store dst; 1053 struct x86_store *strm; 1054 }; 1055 1056 struct x86_decode_fsm { 1057 /* vcpu */ 1058 bool is64bit; 1059 bool is32bit; 1060 bool is16bit; 1061 1062 /* fsm */ 1063 int (*fn)(struct x86_decode_fsm *, struct x86_instr *); 1064 uint8_t *buf; 1065 uint8_t *end; 1066 }; 1067 1068 struct x86_opcode { 1069 bool valid:1; 1070 bool regmodrm:1; 1071 bool regtorm:1; 1072 bool dmo:1; 1073 bool todmo:1; 1074 bool movs:1; 1075 bool stos:1; 1076 bool lods:1; 1077 bool szoverride:1; 1078 bool group1:1; 1079 bool group3:1; 1080 bool group11:1; 1081 bool immediate:1; 1082 uint8_t defsize; 1083 uint8_t flags; 1084 const struct x86_emul *emul; 1085 }; 1086 1087 struct x86_group_entry { 1088 const struct x86_emul *emul; 1089 }; 1090 1091 #define OPSIZE_BYTE 0x01 1092 #define OPSIZE_WORD 0x02 /* 2 bytes */ 1093 #define OPSIZE_DOUB 0x04 /* 4 bytes */ 1094 #define OPSIZE_QUAD 0x08 /* 8 bytes */ 1095 1096 #define FLAG_imm8 0x01 1097 #define FLAG_immz 0x02 1098 #define FLAG_ze 0x04 1099 1100 static const struct x86_group_entry group1[8] __cacheline_aligned = { 1101 [1] = { .emul = &x86_emul_or }, 1102 [4] = { .emul = &x86_emul_and }, 1103 [6] = { .emul = &x86_emul_xor }, 1104 [7] = { .emul = &x86_emul_cmp } 1105 }; 1106 1107 static const struct x86_group_entry group3[8] __cacheline_aligned = { 1108 [0] = { .emul = &x86_emul_test }, 1109 [1] = { .emul = &x86_emul_test } 1110 }; 1111 1112 static const struct x86_group_entry group11[8] __cacheline_aligned = { 1113 [0] = { .emul = &x86_emul_mov } 1114 }; 1115 1116 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = { 1117 /* 1118 * Group1 1119 */ 1120 [0x80] = { 1121 /* Eb, Ib */ 1122 .valid = true, 1123 .regmodrm = true, 1124 .regtorm = true, 1125 .szoverride = false, 1126 .defsize = OPSIZE_BYTE, 1127 .group1 = true, 1128 .immediate = true, 1129 .emul = NULL /* group1 */ 1130 }, 1131 [0x81] = { 1132 /* Ev, Iz */ 1133 .valid = true, 1134 .regmodrm = true, 1135 .regtorm = true, 1136 .szoverride = true, 1137 .defsize = -1, 1138 .group1 = true, 1139 .immediate = true, 1140 .flags = FLAG_immz, 1141 .emul = NULL /* group1 */ 1142 }, 1143 [0x83] = { 1144 /* Ev, Ib */ 1145 .valid = true, 1146 .regmodrm = true, 1147 .regtorm = true, 1148 .szoverride = true, 1149 .defsize = -1, 1150 .group1 = true, 1151 .immediate = true, 1152 .flags = FLAG_imm8, 1153 .emul = NULL /* group1 */ 1154 }, 1155 1156 /* 1157 * Group3 1158 */ 1159 [0xF6] = { 1160 /* Eb, Ib */ 1161 .valid = true, 1162 .regmodrm = true, 1163 .regtorm = true, 1164 .szoverride = false, 1165 .defsize = OPSIZE_BYTE, 1166 .group3 = true, 1167 .immediate = true, 1168 .emul = NULL /* group3 */ 1169 }, 1170 [0xF7] = { 1171 /* Ev, Iz */ 1172 .valid = true, 1173 .regmodrm = true, 1174 .regtorm = true, 1175 .szoverride = true, 1176 .defsize = -1, 1177 .group3 = true, 1178 .immediate = true, 1179 .flags = FLAG_immz, 1180 .emul = NULL /* group3 */ 1181 }, 1182 1183 /* 1184 * Group11 1185 */ 1186 [0xC6] = { 1187 /* Eb, Ib */ 1188 .valid = true, 1189 .regmodrm = true, 1190 .regtorm = true, 1191 .szoverride = false, 1192 .defsize = OPSIZE_BYTE, 1193 .group11 = true, 1194 .immediate = true, 1195 .emul = NULL /* group11 */ 1196 }, 1197 [0xC7] = { 1198 /* Ev, Iz */ 1199 .valid = true, 1200 .regmodrm = true, 1201 .regtorm = true, 1202 .szoverride = true, 1203 .defsize = -1, 1204 .group11 = true, 1205 .immediate = true, 1206 .flags = FLAG_immz, 1207 .emul = NULL /* group11 */ 1208 }, 1209 1210 /* 1211 * OR 1212 */ 1213 [0x08] = { 1214 /* Eb, Gb */ 1215 .valid = true, 1216 .regmodrm = true, 1217 .regtorm = true, 1218 .szoverride = false, 1219 .defsize = OPSIZE_BYTE, 1220 .emul = &x86_emul_or 1221 }, 1222 [0x09] = { 1223 /* Ev, Gv */ 1224 .valid = true, 1225 .regmodrm = true, 1226 .regtorm = true, 1227 .szoverride = true, 1228 .defsize = -1, 1229 .emul = &x86_emul_or 1230 }, 1231 [0x0A] = { 1232 /* Gb, Eb */ 1233 .valid = true, 1234 .regmodrm = true, 1235 .regtorm = false, 1236 .szoverride = false, 1237 .defsize = OPSIZE_BYTE, 1238 .emul = &x86_emul_or 1239 }, 1240 [0x0B] = { 1241 /* Gv, Ev */ 1242 .valid = true, 1243 .regmodrm = true, 1244 .regtorm = false, 1245 .szoverride = true, 1246 .defsize = -1, 1247 .emul = &x86_emul_or 1248 }, 1249 1250 /* 1251 * AND 1252 */ 1253 [0x20] = { 1254 /* Eb, Gb */ 1255 .valid = true, 1256 .regmodrm = true, 1257 .regtorm = true, 1258 .szoverride = false, 1259 .defsize = OPSIZE_BYTE, 1260 .emul = &x86_emul_and 1261 }, 1262 [0x21] = { 1263 /* Ev, Gv */ 1264 .valid = true, 1265 .regmodrm = true, 1266 .regtorm = true, 1267 .szoverride = true, 1268 .defsize = -1, 1269 .emul = &x86_emul_and 1270 }, 1271 [0x22] = { 1272 /* Gb, Eb */ 1273 .valid = true, 1274 .regmodrm = true, 1275 .regtorm = false, 1276 .szoverride = false, 1277 .defsize = OPSIZE_BYTE, 1278 .emul = &x86_emul_and 1279 }, 1280 [0x23] = { 1281 /* Gv, Ev */ 1282 .valid = true, 1283 .regmodrm = true, 1284 .regtorm = false, 1285 .szoverride = true, 1286 .defsize = -1, 1287 .emul = &x86_emul_and 1288 }, 1289 1290 /* 1291 * SUB 1292 */ 1293 [0x28] = { 1294 /* Eb, Gb */ 1295 .valid = true, 1296 .regmodrm = true, 1297 .regtorm = true, 1298 .szoverride = false, 1299 .defsize = OPSIZE_BYTE, 1300 .emul = &x86_emul_sub 1301 }, 1302 [0x29] = { 1303 /* Ev, Gv */ 1304 .valid = true, 1305 .regmodrm = true, 1306 .regtorm = true, 1307 .szoverride = true, 1308 .defsize = -1, 1309 .emul = &x86_emul_sub 1310 }, 1311 [0x2A] = { 1312 /* Gb, Eb */ 1313 .valid = true, 1314 .regmodrm = true, 1315 .regtorm = false, 1316 .szoverride = false, 1317 .defsize = OPSIZE_BYTE, 1318 .emul = &x86_emul_sub 1319 }, 1320 [0x2B] = { 1321 /* Gv, Ev */ 1322 .valid = true, 1323 .regmodrm = true, 1324 .regtorm = false, 1325 .szoverride = true, 1326 .defsize = -1, 1327 .emul = &x86_emul_sub 1328 }, 1329 1330 /* 1331 * XOR 1332 */ 1333 [0x30] = { 1334 /* Eb, Gb */ 1335 .valid = true, 1336 .regmodrm = true, 1337 .regtorm = true, 1338 .szoverride = false, 1339 .defsize = OPSIZE_BYTE, 1340 .emul = &x86_emul_xor 1341 }, 1342 [0x31] = { 1343 /* Ev, Gv */ 1344 .valid = true, 1345 .regmodrm = true, 1346 .regtorm = true, 1347 .szoverride = true, 1348 .defsize = -1, 1349 .emul = &x86_emul_xor 1350 }, 1351 [0x32] = { 1352 /* Gb, Eb */ 1353 .valid = true, 1354 .regmodrm = true, 1355 .regtorm = false, 1356 .szoverride = false, 1357 .defsize = OPSIZE_BYTE, 1358 .emul = &x86_emul_xor 1359 }, 1360 [0x33] = { 1361 /* Gv, Ev */ 1362 .valid = true, 1363 .regmodrm = true, 1364 .regtorm = false, 1365 .szoverride = true, 1366 .defsize = -1, 1367 .emul = &x86_emul_xor 1368 }, 1369 1370 /* 1371 * MOV 1372 */ 1373 [0x88] = { 1374 /* Eb, Gb */ 1375 .valid = true, 1376 .regmodrm = true, 1377 .regtorm = true, 1378 .szoverride = false, 1379 .defsize = OPSIZE_BYTE, 1380 .emul = &x86_emul_mov 1381 }, 1382 [0x89] = { 1383 /* Ev, Gv */ 1384 .valid = true, 1385 .regmodrm = true, 1386 .regtorm = true, 1387 .szoverride = true, 1388 .defsize = -1, 1389 .emul = &x86_emul_mov 1390 }, 1391 [0x8A] = { 1392 /* Gb, Eb */ 1393 .valid = true, 1394 .regmodrm = true, 1395 .regtorm = false, 1396 .szoverride = false, 1397 .defsize = OPSIZE_BYTE, 1398 .emul = &x86_emul_mov 1399 }, 1400 [0x8B] = { 1401 /* Gv, Ev */ 1402 .valid = true, 1403 .regmodrm = true, 1404 .regtorm = false, 1405 .szoverride = true, 1406 .defsize = -1, 1407 .emul = &x86_emul_mov 1408 }, 1409 [0xA0] = { 1410 /* AL, Ob */ 1411 .valid = true, 1412 .dmo = true, 1413 .todmo = false, 1414 .szoverride = false, 1415 .defsize = OPSIZE_BYTE, 1416 .emul = &x86_emul_mov 1417 }, 1418 [0xA1] = { 1419 /* rAX, Ov */ 1420 .valid = true, 1421 .dmo = true, 1422 .todmo = false, 1423 .szoverride = true, 1424 .defsize = -1, 1425 .emul = &x86_emul_mov 1426 }, 1427 [0xA2] = { 1428 /* Ob, AL */ 1429 .valid = true, 1430 .dmo = true, 1431 .todmo = true, 1432 .szoverride = false, 1433 .defsize = OPSIZE_BYTE, 1434 .emul = &x86_emul_mov 1435 }, 1436 [0xA3] = { 1437 /* Ov, rAX */ 1438 .valid = true, 1439 .dmo = true, 1440 .todmo = true, 1441 .szoverride = true, 1442 .defsize = -1, 1443 .emul = &x86_emul_mov 1444 }, 1445 1446 /* 1447 * MOVS 1448 */ 1449 [0xA4] = { 1450 /* Yb, Xb */ 1451 .valid = true, 1452 .movs = true, 1453 .szoverride = false, 1454 .defsize = OPSIZE_BYTE, 1455 .emul = &x86_emul_movs 1456 }, 1457 [0xA5] = { 1458 /* Yv, Xv */ 1459 .valid = true, 1460 .movs = true, 1461 .szoverride = true, 1462 .defsize = -1, 1463 .emul = &x86_emul_movs 1464 }, 1465 1466 /* 1467 * STOS 1468 */ 1469 [0xAA] = { 1470 /* Yb, AL */ 1471 .valid = true, 1472 .stos = true, 1473 .szoverride = false, 1474 .defsize = OPSIZE_BYTE, 1475 .emul = &x86_emul_stos 1476 }, 1477 [0xAB] = { 1478 /* Yv, rAX */ 1479 .valid = true, 1480 .stos = true, 1481 .szoverride = true, 1482 .defsize = -1, 1483 .emul = &x86_emul_stos 1484 }, 1485 1486 /* 1487 * LODS 1488 */ 1489 [0xAC] = { 1490 /* AL, Xb */ 1491 .valid = true, 1492 .lods = true, 1493 .szoverride = false, 1494 .defsize = OPSIZE_BYTE, 1495 .emul = &x86_emul_lods 1496 }, 1497 [0xAD] = { 1498 /* rAX, Xv */ 1499 .valid = true, 1500 .lods = true, 1501 .szoverride = true, 1502 .defsize = -1, 1503 .emul = &x86_emul_lods 1504 }, 1505 }; 1506 1507 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = { 1508 /* 1509 * MOVZX 1510 */ 1511 [0xB6] = { 1512 /* Gv, Eb */ 1513 .valid = true, 1514 .regmodrm = true, 1515 .regtorm = false, 1516 .szoverride = true, 1517 .defsize = OPSIZE_BYTE, 1518 .flags = FLAG_ze, 1519 .emul = &x86_emul_mov 1520 }, 1521 [0xB7] = { 1522 /* Gv, Ew */ 1523 .valid = true, 1524 .regmodrm = true, 1525 .regtorm = false, 1526 .szoverride = true, 1527 .defsize = OPSIZE_WORD, 1528 .flags = FLAG_ze, 1529 .emul = &x86_emul_mov 1530 }, 1531 }; 1532 1533 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF }; 1534 1535 /* [REX-present][enc][opsize] */ 1536 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = { 1537 [false] = { 1538 /* No REX prefix. */ 1539 [0b00] = { 1540 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */ 1541 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */ 1542 [2] = { -1, 0 }, 1543 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */ 1544 [4] = { -1, 0 }, 1545 [5] = { -1, 0 }, 1546 [6] = { -1, 0 }, 1547 [7] = { -1, 0 }, 1548 }, 1549 [0b01] = { 1550 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */ 1551 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */ 1552 [2] = { -1, 0 }, 1553 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */ 1554 [4] = { -1, 0 }, 1555 [5] = { -1, 0 }, 1556 [6] = { -1, 0 }, 1557 [7] = { -1, 0 }, 1558 }, 1559 [0b10] = { 1560 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */ 1561 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */ 1562 [2] = { -1, 0 }, 1563 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */ 1564 [4] = { -1, 0 }, 1565 [5] = { -1, 0 }, 1566 [6] = { -1, 0 }, 1567 [7] = { -1, 0 }, 1568 }, 1569 [0b11] = { 1570 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */ 1571 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */ 1572 [2] = { -1, 0 }, 1573 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */ 1574 [4] = { -1, 0 }, 1575 [5] = { -1, 0 }, 1576 [6] = { -1, 0 }, 1577 [7] = { -1, 0 }, 1578 } 1579 }, 1580 [true] = { 1581 /* Has REX prefix. */ 1582 [0b00] = { 1583 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */ 1584 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */ 1585 [2] = { -1, 0 }, 1586 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */ 1587 [4] = { -1, 0 }, 1588 [5] = { -1, 0 }, 1589 [6] = { -1, 0 }, 1590 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */ 1591 }, 1592 [0b01] = { 1593 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */ 1594 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */ 1595 [2] = { -1, 0 }, 1596 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */ 1597 [4] = { -1, 0 }, 1598 [5] = { -1, 0 }, 1599 [6] = { -1, 0 }, 1600 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */ 1601 }, 1602 [0b10] = { 1603 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */ 1604 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */ 1605 [2] = { -1, 0 }, 1606 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */ 1607 [4] = { -1, 0 }, 1608 [5] = { -1, 0 }, 1609 [6] = { -1, 0 }, 1610 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */ 1611 }, 1612 [0b11] = { 1613 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */ 1614 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */ 1615 [2] = { -1, 0 }, 1616 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */ 1617 [4] = { -1, 0 }, 1618 [5] = { -1, 0 }, 1619 [6] = { -1, 0 }, 1620 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */ 1621 } 1622 } 1623 }; 1624 1625 /* [depends][enc][size] */ 1626 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = { 1627 [false] = { 1628 /* Not extended. */ 1629 [0b000] = { 1630 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */ 1631 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */ 1632 [2] = { -1, 0 }, 1633 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */ 1634 [4] = { -1, 0 }, 1635 [5] = { -1, 0 }, 1636 [6] = { -1, 0 }, 1637 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */ 1638 }, 1639 [0b001] = { 1640 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */ 1641 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */ 1642 [2] = { -1, 0 }, 1643 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */ 1644 [4] = { -1, 0 }, 1645 [5] = { -1, 0 }, 1646 [6] = { -1, 0 }, 1647 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */ 1648 }, 1649 [0b010] = { 1650 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */ 1651 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */ 1652 [2] = { -1, 0 }, 1653 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */ 1654 [4] = { -1, 0 }, 1655 [5] = { -1, 0 }, 1656 [6] = { -1, 0 }, 1657 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */ 1658 }, 1659 [0b011] = { 1660 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */ 1661 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */ 1662 [2] = { -1, 0 }, 1663 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */ 1664 [4] = { -1, 0 }, 1665 [5] = { -1, 0 }, 1666 [6] = { -1, 0 }, 1667 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */ 1668 }, 1669 [0b100] = { 1670 [0] = { -1, 0 }, /* SPECIAL */ 1671 [1] = { -1, 0 }, /* SPECIAL */ 1672 [2] = { -1, 0 }, 1673 [3] = { -1, 0 }, /* SPECIAL */ 1674 [4] = { -1, 0 }, 1675 [5] = { -1, 0 }, 1676 [6] = { -1, 0 }, 1677 [7] = { -1, 0 }, /* SPECIAL */ 1678 }, 1679 [0b101] = { 1680 [0] = { -1, 0 }, /* SPECIAL */ 1681 [1] = { -1, 0 }, /* SPECIAL */ 1682 [2] = { -1, 0 }, 1683 [3] = { -1, 0 }, /* SPECIAL */ 1684 [4] = { -1, 0 }, 1685 [5] = { -1, 0 }, 1686 [6] = { -1, 0 }, 1687 [7] = { -1, 0 }, /* SPECIAL */ 1688 }, 1689 [0b110] = { 1690 [0] = { -1, 0 }, /* SPECIAL */ 1691 [1] = { -1, 0 }, /* SPECIAL */ 1692 [2] = { -1, 0 }, 1693 [3] = { -1, 0 }, /* SPECIAL */ 1694 [4] = { -1, 0 }, 1695 [5] = { -1, 0 }, 1696 [6] = { -1, 0 }, 1697 [7] = { -1, 0 }, /* SPECIAL */ 1698 }, 1699 [0b111] = { 1700 [0] = { -1, 0 }, /* SPECIAL */ 1701 [1] = { -1, 0 }, /* SPECIAL */ 1702 [2] = { -1, 0 }, 1703 [3] = { -1, 0 }, /* SPECIAL */ 1704 [4] = { -1, 0 }, 1705 [5] = { -1, 0 }, 1706 [6] = { -1, 0 }, 1707 [7] = { -1, 0 }, /* SPECIAL */ 1708 }, 1709 }, 1710 [true] = { 1711 /* Extended. */ 1712 [0b000] = { 1713 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */ 1714 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */ 1715 [2] = { -1, 0 }, 1716 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */ 1717 [4] = { -1, 0 }, 1718 [5] = { -1, 0 }, 1719 [6] = { -1, 0 }, 1720 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */ 1721 }, 1722 [0b001] = { 1723 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */ 1724 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */ 1725 [2] = { -1, 0 }, 1726 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */ 1727 [4] = { -1, 0 }, 1728 [5] = { -1, 0 }, 1729 [6] = { -1, 0 }, 1730 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */ 1731 }, 1732 [0b010] = { 1733 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */ 1734 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */ 1735 [2] = { -1, 0 }, 1736 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */ 1737 [4] = { -1, 0 }, 1738 [5] = { -1, 0 }, 1739 [6] = { -1, 0 }, 1740 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */ 1741 }, 1742 [0b011] = { 1743 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */ 1744 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */ 1745 [2] = { -1, 0 }, 1746 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */ 1747 [4] = { -1, 0 }, 1748 [5] = { -1, 0 }, 1749 [6] = { -1, 0 }, 1750 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */ 1751 }, 1752 [0b100] = { 1753 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */ 1754 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */ 1755 [2] = { -1, 0 }, 1756 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */ 1757 [4] = { -1, 0 }, 1758 [5] = { -1, 0 }, 1759 [6] = { -1, 0 }, 1760 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */ 1761 }, 1762 [0b101] = { 1763 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */ 1764 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */ 1765 [2] = { -1, 0 }, 1766 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */ 1767 [4] = { -1, 0 }, 1768 [5] = { -1, 0 }, 1769 [6] = { -1, 0 }, 1770 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */ 1771 }, 1772 [0b110] = { 1773 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */ 1774 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */ 1775 [2] = { -1, 0 }, 1776 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */ 1777 [4] = { -1, 0 }, 1778 [5] = { -1, 0 }, 1779 [6] = { -1, 0 }, 1780 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */ 1781 }, 1782 [0b111] = { 1783 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */ 1784 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */ 1785 [2] = { -1, 0 }, 1786 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */ 1787 [4] = { -1, 0 }, 1788 [5] = { -1, 0 }, 1789 [6] = { -1, 0 }, 1790 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */ 1791 }, 1792 } 1793 }; 1794 1795 static int 1796 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1797 { 1798 fsm->fn = NULL; 1799 return -1; 1800 } 1801 1802 static int 1803 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n) 1804 { 1805 if (fsm->buf + n > fsm->end) { 1806 return -1; 1807 } 1808 memcpy(bytes, fsm->buf, n); 1809 return 0; 1810 } 1811 1812 static inline void 1813 fsm_advance(struct x86_decode_fsm *fsm, size_t n, 1814 int (*fn)(struct x86_decode_fsm *, struct x86_instr *)) 1815 { 1816 fsm->buf += n; 1817 if (fsm->buf > fsm->end) { 1818 fsm->fn = node_overflow; 1819 } else { 1820 fsm->fn = fn; 1821 } 1822 } 1823 1824 static const struct x86_reg * 1825 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize) 1826 { 1827 enc &= 0b11; 1828 if (regsize == 8) { 1829 /* May be 64bit without REX */ 1830 return &gpr_map__special[1][enc][regsize-1]; 1831 } 1832 return &gpr_map__special[instr->rexpref.present][enc][regsize-1]; 1833 } 1834 1835 /* 1836 * Special node, for MOVS. Fake two displacements of zero on the source and 1837 * destination registers. 1838 */ 1839 static int 1840 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1841 { 1842 size_t adrsize; 1843 1844 adrsize = instr->address_size; 1845 1846 /* DS:RSI */ 1847 instr->src.type = STORE_REG; 1848 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1]; 1849 instr->src.disp.type = DISP_0; 1850 1851 /* ES:RDI, force ES */ 1852 instr->dst.type = STORE_REG; 1853 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1]; 1854 instr->dst.disp.type = DISP_0; 1855 instr->dst.hardseg = NVMM_X64_SEG_ES; 1856 1857 fsm_advance(fsm, 0, NULL); 1858 1859 return 0; 1860 } 1861 1862 /* 1863 * Special node, for STOS and LODS. Fake a displacement of zero on the 1864 * destination register. 1865 */ 1866 static int 1867 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1868 { 1869 const struct x86_opcode *opcode = instr->opcode; 1870 struct x86_store *stlo, *streg; 1871 size_t adrsize, regsize; 1872 1873 adrsize = instr->address_size; 1874 regsize = instr->operand_size; 1875 1876 if (opcode->stos) { 1877 streg = &instr->src; 1878 stlo = &instr->dst; 1879 } else { 1880 streg = &instr->dst; 1881 stlo = &instr->src; 1882 } 1883 1884 streg->type = STORE_REG; 1885 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */ 1886 1887 stlo->type = STORE_REG; 1888 if (opcode->stos) { 1889 /* ES:RDI, force ES */ 1890 stlo->u.reg = &gpr_map__special[1][3][adrsize-1]; 1891 stlo->hardseg = NVMM_X64_SEG_ES; 1892 } else { 1893 /* DS:RSI */ 1894 stlo->u.reg = &gpr_map__special[1][2][adrsize-1]; 1895 } 1896 stlo->disp.type = DISP_0; 1897 1898 fsm_advance(fsm, 0, NULL); 1899 1900 return 0; 1901 } 1902 1903 static int 1904 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1905 { 1906 const struct x86_opcode *opcode = instr->opcode; 1907 struct x86_store *stdmo, *streg; 1908 size_t adrsize, regsize; 1909 1910 adrsize = instr->address_size; 1911 regsize = instr->operand_size; 1912 1913 if (opcode->todmo) { 1914 streg = &instr->src; 1915 stdmo = &instr->dst; 1916 } else { 1917 streg = &instr->dst; 1918 stdmo = &instr->src; 1919 } 1920 1921 streg->type = STORE_REG; 1922 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */ 1923 1924 stdmo->type = STORE_DMO; 1925 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) { 1926 return -1; 1927 } 1928 fsm_advance(fsm, adrsize, NULL); 1929 1930 return 0; 1931 } 1932 1933 static inline uint64_t 1934 sign_extend(uint64_t val, int size) 1935 { 1936 if (size == 1) { 1937 if (val & __BIT(7)) 1938 val |= 0xFFFFFFFFFFFFFF00; 1939 } else if (size == 2) { 1940 if (val & __BIT(15)) 1941 val |= 0xFFFFFFFFFFFF0000; 1942 } else if (size == 4) { 1943 if (val & __BIT(31)) 1944 val |= 0xFFFFFFFF00000000; 1945 } 1946 return val; 1947 } 1948 1949 static int 1950 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1951 { 1952 const struct x86_opcode *opcode = instr->opcode; 1953 struct x86_store *store; 1954 uint8_t immsize; 1955 size_t sesize = 0; 1956 1957 /* The immediate is the source */ 1958 store = &instr->src; 1959 immsize = instr->operand_size; 1960 1961 if (opcode->flags & FLAG_imm8) { 1962 sesize = immsize; 1963 immsize = 1; 1964 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) { 1965 sesize = immsize; 1966 immsize = 4; 1967 } 1968 1969 store->type = STORE_IMM; 1970 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) { 1971 return -1; 1972 } 1973 fsm_advance(fsm, immsize, NULL); 1974 1975 if (sesize != 0) { 1976 store->u.imm.data = sign_extend(store->u.imm.data, sesize); 1977 } 1978 1979 return 0; 1980 } 1981 1982 static int 1983 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1984 { 1985 const struct x86_opcode *opcode = instr->opcode; 1986 uint64_t data = 0; 1987 size_t n; 1988 1989 if (instr->strm->disp.type == DISP_1) { 1990 n = 1; 1991 } else { /* DISP4 */ 1992 n = 4; 1993 } 1994 1995 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) { 1996 return -1; 1997 } 1998 1999 if (__predict_true(fsm->is64bit)) { 2000 data = sign_extend(data, n); 2001 } 2002 2003 instr->strm->disp.data = data; 2004 2005 if (opcode->immediate) { 2006 fsm_advance(fsm, n, node_immediate); 2007 } else { 2008 fsm_advance(fsm, n, NULL); 2009 } 2010 2011 return 0; 2012 } 2013 2014 static const struct x86_reg * 2015 get_register_idx(struct x86_instr *instr, uint8_t index) 2016 { 2017 uint8_t enc = index; 2018 const struct x86_reg *reg; 2019 size_t regsize; 2020 2021 regsize = instr->address_size; 2022 reg = &gpr_map[instr->rexpref.x][enc][regsize-1]; 2023 2024 if (reg->num == -1) { 2025 reg = resolve_special_register(instr, enc, regsize); 2026 } 2027 2028 return reg; 2029 } 2030 2031 static const struct x86_reg * 2032 get_register_bas(struct x86_instr *instr, uint8_t base) 2033 { 2034 uint8_t enc = base; 2035 const struct x86_reg *reg; 2036 size_t regsize; 2037 2038 regsize = instr->address_size; 2039 reg = &gpr_map[instr->rexpref.b][enc][regsize-1]; 2040 if (reg->num == -1) { 2041 reg = resolve_special_register(instr, enc, regsize); 2042 } 2043 2044 return reg; 2045 } 2046 2047 static int 2048 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2049 { 2050 const struct x86_opcode *opcode; 2051 uint8_t scale, index, base; 2052 bool noindex, nobase; 2053 uint8_t byte; 2054 2055 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2056 return -1; 2057 } 2058 2059 scale = ((byte & 0b11000000) >> 6); 2060 index = ((byte & 0b00111000) >> 3); 2061 base = ((byte & 0b00000111) >> 0); 2062 2063 opcode = instr->opcode; 2064 2065 noindex = false; 2066 nobase = false; 2067 2068 if (index == 0b100 && !instr->rexpref.x) { 2069 /* Special case: the index is null */ 2070 noindex = true; 2071 } 2072 2073 if (instr->regmodrm.mod == 0b00 && base == 0b101) { 2074 /* Special case: the base is null + disp32 */ 2075 instr->strm->disp.type = DISP_4; 2076 nobase = true; 2077 } 2078 2079 instr->strm->type = STORE_SIB; 2080 instr->strm->u.sib.scale = (1 << scale); 2081 if (!noindex) 2082 instr->strm->u.sib.idx = get_register_idx(instr, index); 2083 if (!nobase) 2084 instr->strm->u.sib.bas = get_register_bas(instr, base); 2085 2086 /* May have a displacement, or an immediate */ 2087 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) { 2088 fsm_advance(fsm, 1, node_disp); 2089 } else if (opcode->immediate) { 2090 fsm_advance(fsm, 1, node_immediate); 2091 } else { 2092 fsm_advance(fsm, 1, NULL); 2093 } 2094 2095 return 0; 2096 } 2097 2098 static const struct x86_reg * 2099 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode) 2100 { 2101 uint8_t enc = instr->regmodrm.reg; 2102 const struct x86_reg *reg; 2103 size_t regsize; 2104 2105 regsize = instr->operand_size; 2106 2107 reg = &gpr_map[instr->rexpref.r][enc][regsize-1]; 2108 if (reg->num == -1) { 2109 reg = resolve_special_register(instr, enc, regsize); 2110 } 2111 2112 return reg; 2113 } 2114 2115 static const struct x86_reg * 2116 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode) 2117 { 2118 uint8_t enc = instr->regmodrm.rm; 2119 const struct x86_reg *reg; 2120 size_t regsize; 2121 2122 if (instr->strm->disp.type == DISP_NONE) { 2123 regsize = instr->operand_size; 2124 } else { 2125 /* Indirect access, the size is that of the address. */ 2126 regsize = instr->address_size; 2127 } 2128 2129 reg = &gpr_map[instr->rexpref.b][enc][regsize-1]; 2130 if (reg->num == -1) { 2131 reg = resolve_special_register(instr, enc, regsize); 2132 } 2133 2134 return reg; 2135 } 2136 2137 static inline bool 2138 has_sib(struct x86_instr *instr) 2139 { 2140 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4); 2141 } 2142 2143 static inline bool 2144 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2145 { 2146 return (fsm->is64bit && instr->strm->disp.type == DISP_0 && 2147 instr->regmodrm.rm == RM_RBP_DISP32); 2148 } 2149 2150 static inline bool 2151 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2152 { 2153 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 && 2154 instr->regmodrm.rm == RM_RBP_DISP32); 2155 } 2156 2157 static enum x86_disp_type 2158 get_disp_type(struct x86_instr *instr) 2159 { 2160 switch (instr->regmodrm.mod) { 2161 case MOD_DIS0: /* indirect */ 2162 return DISP_0; 2163 case MOD_DIS1: /* indirect+1 */ 2164 return DISP_1; 2165 case MOD_DIS4: /* indirect+4 */ 2166 return DISP_4; 2167 case MOD_REG: /* direct */ 2168 default: /* gcc */ 2169 return DISP_NONE; 2170 } 2171 } 2172 2173 static int 2174 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2175 { 2176 struct x86_store *strg, *strm; 2177 const struct x86_opcode *opcode; 2178 const struct x86_reg *reg; 2179 uint8_t byte; 2180 2181 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2182 return -1; 2183 } 2184 2185 opcode = instr->opcode; 2186 2187 instr->regmodrm.rm = ((byte & 0b00000111) >> 0); 2188 instr->regmodrm.reg = ((byte & 0b00111000) >> 3); 2189 instr->regmodrm.mod = ((byte & 0b11000000) >> 6); 2190 2191 if (opcode->regtorm) { 2192 strg = &instr->src; 2193 strm = &instr->dst; 2194 } else { /* RM to REG */ 2195 strm = &instr->src; 2196 strg = &instr->dst; 2197 } 2198 2199 /* Save for later use. */ 2200 instr->strm = strm; 2201 2202 /* 2203 * Special cases: Groups. The REG field of REGMODRM is the index in 2204 * the group. op1 gets overwritten in the Immediate node, if any. 2205 */ 2206 if (opcode->group1) { 2207 if (group1[instr->regmodrm.reg].emul == NULL) { 2208 return -1; 2209 } 2210 instr->emul = group1[instr->regmodrm.reg].emul; 2211 } else if (opcode->group3) { 2212 if (group3[instr->regmodrm.reg].emul == NULL) { 2213 return -1; 2214 } 2215 instr->emul = group3[instr->regmodrm.reg].emul; 2216 } else if (opcode->group11) { 2217 if (group11[instr->regmodrm.reg].emul == NULL) { 2218 return -1; 2219 } 2220 instr->emul = group11[instr->regmodrm.reg].emul; 2221 } 2222 2223 if (!opcode->immediate) { 2224 reg = get_register_reg(instr, opcode); 2225 if (reg == NULL) { 2226 return -1; 2227 } 2228 strg->type = STORE_REG; 2229 strg->u.reg = reg; 2230 } 2231 2232 /* The displacement applies to RM. */ 2233 strm->disp.type = get_disp_type(instr); 2234 2235 if (has_sib(instr)) { 2236 /* Overwrites RM */ 2237 fsm_advance(fsm, 1, node_sib); 2238 return 0; 2239 } 2240 2241 if (is_rip_relative(fsm, instr)) { 2242 /* Overwrites RM */ 2243 strm->type = STORE_REG; 2244 strm->u.reg = &gpr_map__rip; 2245 strm->disp.type = DISP_4; 2246 fsm_advance(fsm, 1, node_disp); 2247 return 0; 2248 } 2249 2250 if (is_disp32_only(fsm, instr)) { 2251 /* Overwrites RM */ 2252 strm->type = STORE_REG; 2253 strm->u.reg = NULL; 2254 strm->disp.type = DISP_4; 2255 fsm_advance(fsm, 1, node_disp); 2256 return 0; 2257 } 2258 2259 reg = get_register_rm(instr, opcode); 2260 if (reg == NULL) { 2261 return -1; 2262 } 2263 strm->type = STORE_REG; 2264 strm->u.reg = reg; 2265 2266 if (strm->disp.type == DISP_NONE) { 2267 /* Direct register addressing mode */ 2268 if (opcode->immediate) { 2269 fsm_advance(fsm, 1, node_immediate); 2270 } else { 2271 fsm_advance(fsm, 1, NULL); 2272 } 2273 } else if (strm->disp.type == DISP_0) { 2274 /* Indirect register addressing mode */ 2275 if (opcode->immediate) { 2276 fsm_advance(fsm, 1, node_immediate); 2277 } else { 2278 fsm_advance(fsm, 1, NULL); 2279 } 2280 } else { 2281 fsm_advance(fsm, 1, node_disp); 2282 } 2283 2284 return 0; 2285 } 2286 2287 static size_t 2288 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2289 { 2290 const struct x86_opcode *opcode = instr->opcode; 2291 int opsize; 2292 2293 /* Get the opsize */ 2294 if (!opcode->szoverride) { 2295 opsize = opcode->defsize; 2296 } else if (instr->rexpref.present && instr->rexpref.w) { 2297 opsize = 8; 2298 } else { 2299 if (!fsm->is16bit) { 2300 if (instr->legpref.opr_ovr) { 2301 opsize = 2; 2302 } else { 2303 opsize = 4; 2304 } 2305 } else { /* 16bit */ 2306 if (instr->legpref.opr_ovr) { 2307 opsize = 4; 2308 } else { 2309 opsize = 2; 2310 } 2311 } 2312 } 2313 2314 return opsize; 2315 } 2316 2317 static size_t 2318 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2319 { 2320 if (fsm->is64bit) { 2321 if (__predict_false(instr->legpref.adr_ovr)) { 2322 return 4; 2323 } 2324 return 8; 2325 } 2326 2327 if (fsm->is32bit) { 2328 if (__predict_false(instr->legpref.adr_ovr)) { 2329 return 2; 2330 } 2331 return 4; 2332 } 2333 2334 /* 16bit. */ 2335 if (__predict_false(instr->legpref.adr_ovr)) { 2336 return 4; 2337 } 2338 return 2; 2339 } 2340 2341 static int 2342 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2343 { 2344 const struct x86_opcode *opcode; 2345 uint8_t byte; 2346 2347 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2348 return -1; 2349 } 2350 2351 opcode = &primary_opcode_table[byte]; 2352 if (__predict_false(!opcode->valid)) { 2353 return -1; 2354 } 2355 2356 instr->opcode = opcode; 2357 instr->emul = opcode->emul; 2358 instr->operand_size = get_operand_size(fsm, instr); 2359 instr->address_size = get_address_size(fsm, instr); 2360 2361 if (fsm->is64bit && (instr->operand_size == 4)) { 2362 /* Zero-extend to 64 bits. */ 2363 instr->zeroextend_mask = ~size_to_mask(4); 2364 } 2365 2366 if (opcode->regmodrm) { 2367 fsm_advance(fsm, 1, node_regmodrm); 2368 } else if (opcode->dmo) { 2369 /* Direct-Memory Offsets */ 2370 fsm_advance(fsm, 1, node_dmo); 2371 } else if (opcode->stos || opcode->lods) { 2372 fsm_advance(fsm, 1, node_stlo); 2373 } else if (opcode->movs) { 2374 fsm_advance(fsm, 1, node_movs); 2375 } else { 2376 return -1; 2377 } 2378 2379 return 0; 2380 } 2381 2382 static int 2383 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2384 { 2385 const struct x86_opcode *opcode; 2386 uint8_t byte; 2387 2388 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2389 return -1; 2390 } 2391 2392 opcode = &secondary_opcode_table[byte]; 2393 if (__predict_false(!opcode->valid)) { 2394 return -1; 2395 } 2396 2397 instr->opcode = opcode; 2398 instr->emul = opcode->emul; 2399 instr->operand_size = get_operand_size(fsm, instr); 2400 instr->address_size = get_address_size(fsm, instr); 2401 2402 if (fsm->is64bit && (instr->operand_size == 4)) { 2403 /* Zero-extend to 64 bits. */ 2404 instr->zeroextend_mask = ~size_to_mask(4); 2405 } 2406 2407 if (opcode->flags & FLAG_ze) { 2408 /* 2409 * Compute the mask for zero-extend. Update the operand size, 2410 * we move fewer bytes. 2411 */ 2412 instr->zeroextend_mask |= size_to_mask(instr->operand_size); 2413 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize); 2414 instr->operand_size = opcode->defsize; 2415 } 2416 2417 if (opcode->regmodrm) { 2418 fsm_advance(fsm, 1, node_regmodrm); 2419 } else { 2420 return -1; 2421 } 2422 2423 return 0; 2424 } 2425 2426 static int 2427 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2428 { 2429 uint8_t byte; 2430 2431 #define ESCAPE 0x0F 2432 #define VEX_1 0xC5 2433 #define VEX_2 0xC4 2434 #define XOP 0x8F 2435 2436 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2437 return -1; 2438 } 2439 2440 /* 2441 * We don't take XOP. It is AMD-specific, and it was removed shortly 2442 * after being introduced. 2443 */ 2444 if (byte == ESCAPE) { 2445 fsm_advance(fsm, 1, node_secondary_opcode); 2446 } else if (!instr->rexpref.present) { 2447 if (byte == VEX_1) { 2448 return -1; 2449 } else if (byte == VEX_2) { 2450 return -1; 2451 } else { 2452 fsm->fn = node_primary_opcode; 2453 } 2454 } else { 2455 fsm->fn = node_primary_opcode; 2456 } 2457 2458 return 0; 2459 } 2460 2461 static int 2462 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2463 { 2464 struct x86_rexpref *rexpref = &instr->rexpref; 2465 uint8_t byte; 2466 size_t n = 0; 2467 2468 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2469 return -1; 2470 } 2471 2472 if (byte >= 0x40 && byte <= 0x4F) { 2473 if (__predict_false(!fsm->is64bit)) { 2474 return -1; 2475 } 2476 rexpref->b = ((byte & 0x1) != 0); 2477 rexpref->x = ((byte & 0x2) != 0); 2478 rexpref->r = ((byte & 0x4) != 0); 2479 rexpref->w = ((byte & 0x8) != 0); 2480 rexpref->present = true; 2481 n = 1; 2482 } 2483 2484 fsm_advance(fsm, n, node_main); 2485 return 0; 2486 } 2487 2488 static int 2489 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2490 { 2491 uint8_t byte; 2492 2493 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2494 return -1; 2495 } 2496 2497 if (byte == LEG_OPR_OVR) { 2498 instr->legpref.opr_ovr = 1; 2499 } else if (byte == LEG_OVR_DS) { 2500 instr->legpref.seg = NVMM_X64_SEG_DS; 2501 } else if (byte == LEG_OVR_ES) { 2502 instr->legpref.seg = NVMM_X64_SEG_ES; 2503 } else if (byte == LEG_REP) { 2504 instr->legpref.rep = 1; 2505 } else if (byte == LEG_OVR_GS) { 2506 instr->legpref.seg = NVMM_X64_SEG_GS; 2507 } else if (byte == LEG_OVR_FS) { 2508 instr->legpref.seg = NVMM_X64_SEG_FS; 2509 } else if (byte == LEG_ADR_OVR) { 2510 instr->legpref.adr_ovr = 1; 2511 } else if (byte == LEG_OVR_CS) { 2512 instr->legpref.seg = NVMM_X64_SEG_CS; 2513 } else if (byte == LEG_OVR_SS) { 2514 instr->legpref.seg = NVMM_X64_SEG_SS; 2515 } else if (byte == LEG_REPN) { 2516 instr->legpref.repn = 1; 2517 } else if (byte == LEG_LOCK) { 2518 /* ignore */ 2519 } else { 2520 /* not a legacy prefix */ 2521 fsm_advance(fsm, 0, node_rex_prefix); 2522 return 0; 2523 } 2524 2525 fsm_advance(fsm, 1, node_legacy_prefix); 2526 return 0; 2527 } 2528 2529 static int 2530 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr, 2531 struct nvmm_x64_state *state) 2532 { 2533 struct x86_decode_fsm fsm; 2534 int ret; 2535 2536 memset(instr, 0, sizeof(*instr)); 2537 instr->legpref.seg = -1; 2538 instr->src.hardseg = -1; 2539 instr->dst.hardseg = -1; 2540 2541 fsm.is64bit = is_64bit(state); 2542 fsm.is32bit = is_32bit(state); 2543 fsm.is16bit = is_16bit(state); 2544 2545 fsm.fn = node_legacy_prefix; 2546 fsm.buf = inst_bytes; 2547 fsm.end = inst_bytes + inst_len; 2548 2549 while (fsm.fn != NULL) { 2550 ret = (*fsm.fn)(&fsm, instr); 2551 if (ret == -1) 2552 return -1; 2553 } 2554 2555 instr->len = fsm.buf - inst_bytes; 2556 2557 return 0; 2558 } 2559 2560 /* -------------------------------------------------------------------------- */ 2561 2562 #define EXEC_INSTR(sz, instr) \ 2563 static uint##sz##_t \ 2564 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \ 2565 { \ 2566 uint##sz##_t res; \ 2567 __asm __volatile ( \ 2568 #instr " %2, %3;" \ 2569 "mov %3, %1;" \ 2570 "pushfq;" \ 2571 "popq %0" \ 2572 : "=r" (*rflags), "=r" (res) \ 2573 : "r" (op1), "r" (op2)); \ 2574 return res; \ 2575 } 2576 2577 #define EXEC_DISPATCHER(instr) \ 2578 static uint64_t \ 2579 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \ 2580 { \ 2581 switch (opsize) { \ 2582 case 1: \ 2583 return exec_##instr##8(op1, op2, rflags); \ 2584 case 2: \ 2585 return exec_##instr##16(op1, op2, rflags); \ 2586 case 4: \ 2587 return exec_##instr##32(op1, op2, rflags); \ 2588 default: \ 2589 return exec_##instr##64(op1, op2, rflags); \ 2590 } \ 2591 } 2592 2593 /* SUB: ret = op1 - op2 */ 2594 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF) 2595 EXEC_INSTR(8, sub) 2596 EXEC_INSTR(16, sub) 2597 EXEC_INSTR(32, sub) 2598 EXEC_INSTR(64, sub) 2599 EXEC_DISPATCHER(sub) 2600 2601 /* OR: ret = op1 | op2 */ 2602 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2603 EXEC_INSTR(8, or) 2604 EXEC_INSTR(16, or) 2605 EXEC_INSTR(32, or) 2606 EXEC_INSTR(64, or) 2607 EXEC_DISPATCHER(or) 2608 2609 /* AND: ret = op1 & op2 */ 2610 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2611 EXEC_INSTR(8, and) 2612 EXEC_INSTR(16, and) 2613 EXEC_INSTR(32, and) 2614 EXEC_INSTR(64, and) 2615 EXEC_DISPATCHER(and) 2616 2617 /* XOR: ret = op1 ^ op2 */ 2618 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2619 EXEC_INSTR(8, xor) 2620 EXEC_INSTR(16, xor) 2621 EXEC_INSTR(32, xor) 2622 EXEC_INSTR(64, xor) 2623 EXEC_DISPATCHER(xor) 2624 2625 /* -------------------------------------------------------------------------- */ 2626 2627 /* 2628 * Emulation functions. We don't care about the order of the operands, except 2629 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who 2630 * is op1 and who is op2. 2631 */ 2632 2633 static void 2634 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs) 2635 { 2636 uint64_t *retval = (uint64_t *)mem->data; 2637 const bool write = mem->write; 2638 uint64_t *op1, op2, fl, ret; 2639 2640 op1 = (uint64_t *)mem->data; 2641 op2 = 0; 2642 2643 /* Fetch the value to be OR'ed (op2). */ 2644 mem->data = (uint8_t *)&op2; 2645 mem->write = false; 2646 (*__callbacks.mem)(mem); 2647 2648 /* Perform the OR. */ 2649 ret = exec_or(*op1, op2, &fl, mem->size); 2650 2651 if (write) { 2652 /* Write back the result. */ 2653 mem->data = (uint8_t *)&ret; 2654 mem->write = true; 2655 (*__callbacks.mem)(mem); 2656 } else { 2657 /* Return data to the caller. */ 2658 *retval = ret; 2659 } 2660 2661 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK; 2662 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK); 2663 } 2664 2665 static void 2666 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs) 2667 { 2668 uint64_t *retval = (uint64_t *)mem->data; 2669 const bool write = mem->write; 2670 uint64_t *op1, op2, fl, ret; 2671 2672 op1 = (uint64_t *)mem->data; 2673 op2 = 0; 2674 2675 /* Fetch the value to be AND'ed (op2). */ 2676 mem->data = (uint8_t *)&op2; 2677 mem->write = false; 2678 (*__callbacks.mem)(mem); 2679 2680 /* Perform the AND. */ 2681 ret = exec_and(*op1, op2, &fl, mem->size); 2682 2683 if (write) { 2684 /* Write back the result. */ 2685 mem->data = (uint8_t *)&ret; 2686 mem->write = true; 2687 (*__callbacks.mem)(mem); 2688 } else { 2689 /* Return data to the caller. */ 2690 *retval = ret; 2691 } 2692 2693 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; 2694 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); 2695 } 2696 2697 static void 2698 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs) 2699 { 2700 uint64_t *retval = (uint64_t *)mem->data; 2701 const bool write = mem->write; 2702 uint64_t *op1, *op2, fl, ret; 2703 uint64_t tmp; 2704 bool memop1; 2705 2706 memop1 = !mem->write; 2707 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2708 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2709 2710 /* Fetch the value to be SUB'ed (op1 or op2). */ 2711 mem->data = (uint8_t *)&tmp; 2712 mem->write = false; 2713 (*__callbacks.mem)(mem); 2714 2715 /* Perform the SUB. */ 2716 ret = exec_sub(*op1, *op2, &fl, mem->size); 2717 2718 if (write) { 2719 /* Write back the result. */ 2720 mem->data = (uint8_t *)&ret; 2721 mem->write = true; 2722 (*__callbacks.mem)(mem); 2723 } else { 2724 /* Return data to the caller. */ 2725 *retval = ret; 2726 } 2727 2728 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; 2729 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); 2730 } 2731 2732 static void 2733 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs) 2734 { 2735 uint64_t *retval = (uint64_t *)mem->data; 2736 const bool write = mem->write; 2737 uint64_t *op1, op2, fl, ret; 2738 2739 op1 = (uint64_t *)mem->data; 2740 op2 = 0; 2741 2742 /* Fetch the value to be XOR'ed (op2). */ 2743 mem->data = (uint8_t *)&op2; 2744 mem->write = false; 2745 (*__callbacks.mem)(mem); 2746 2747 /* Perform the XOR. */ 2748 ret = exec_xor(*op1, op2, &fl, mem->size); 2749 2750 if (write) { 2751 /* Write back the result. */ 2752 mem->data = (uint8_t *)&ret; 2753 mem->write = true; 2754 (*__callbacks.mem)(mem); 2755 } else { 2756 /* Return data to the caller. */ 2757 *retval = ret; 2758 } 2759 2760 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK; 2761 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK); 2762 } 2763 2764 static void 2765 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs) 2766 { 2767 uint64_t *op1, *op2, fl; 2768 uint64_t tmp; 2769 bool memop1; 2770 2771 memop1 = !mem->write; 2772 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2773 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2774 2775 /* Fetch the value to be CMP'ed (op1 or op2). */ 2776 mem->data = (uint8_t *)&tmp; 2777 mem->write = false; 2778 (*__callbacks.mem)(mem); 2779 2780 /* Perform the CMP. */ 2781 exec_sub(*op1, *op2, &fl, mem->size); 2782 2783 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; 2784 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); 2785 } 2786 2787 static void 2788 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs) 2789 { 2790 uint64_t *op1, *op2, fl; 2791 uint64_t tmp; 2792 bool memop1; 2793 2794 memop1 = !mem->write; 2795 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2796 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2797 2798 /* Fetch the value to be TEST'ed (op1 or op2). */ 2799 mem->data = (uint8_t *)&tmp; 2800 mem->write = false; 2801 (*__callbacks.mem)(mem); 2802 2803 /* Perform the TEST. */ 2804 exec_and(*op1, *op2, &fl, mem->size); 2805 2806 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; 2807 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); 2808 } 2809 2810 static void 2811 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs) 2812 { 2813 /* 2814 * Nothing special, just move without emulation. 2815 */ 2816 (*__callbacks.mem)(mem); 2817 } 2818 2819 static void 2820 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs) 2821 { 2822 /* 2823 * Just move, and update RDI. 2824 */ 2825 (*__callbacks.mem)(mem); 2826 2827 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2828 gprs[NVMM_X64_GPR_RDI] -= mem->size; 2829 } else { 2830 gprs[NVMM_X64_GPR_RDI] += mem->size; 2831 } 2832 } 2833 2834 static void 2835 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs) 2836 { 2837 /* 2838 * Just move, and update RSI. 2839 */ 2840 (*__callbacks.mem)(mem); 2841 2842 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2843 gprs[NVMM_X64_GPR_RSI] -= mem->size; 2844 } else { 2845 gprs[NVMM_X64_GPR_RSI] += mem->size; 2846 } 2847 } 2848 2849 static void 2850 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs) 2851 { 2852 /* 2853 * Special instruction: double memory operand. Don't call the cb, 2854 * because the storage has already been performed earlier. 2855 */ 2856 2857 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2858 gprs[NVMM_X64_GPR_RSI] -= mem->size; 2859 gprs[NVMM_X64_GPR_RDI] -= mem->size; 2860 } else { 2861 gprs[NVMM_X64_GPR_RSI] += mem->size; 2862 gprs[NVMM_X64_GPR_RDI] += mem->size; 2863 } 2864 } 2865 2866 /* -------------------------------------------------------------------------- */ 2867 2868 static inline uint64_t 2869 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr) 2870 { 2871 uint64_t val; 2872 2873 val = state->gprs[gpr]; 2874 val &= size_to_mask(instr->address_size); 2875 2876 return val; 2877 } 2878 2879 static int 2880 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr, 2881 struct x86_store *store, gvaddr_t *gvap, size_t size) 2882 { 2883 struct x86_sib *sib; 2884 gvaddr_t gva = 0; 2885 uint64_t reg; 2886 int ret, seg; 2887 2888 if (store->type == STORE_SIB) { 2889 sib = &store->u.sib; 2890 if (sib->bas != NULL) 2891 gva += gpr_read_address(instr, state, sib->bas->num); 2892 if (sib->idx != NULL) { 2893 reg = gpr_read_address(instr, state, sib->idx->num); 2894 gva += sib->scale * reg; 2895 } 2896 } else if (store->type == STORE_REG) { 2897 if (store->u.reg == NULL) { 2898 /* The base is null. Happens with disp32-only. */ 2899 } else { 2900 gva = gpr_read_address(instr, state, store->u.reg->num); 2901 } 2902 } else { 2903 gva = store->u.dmo; 2904 } 2905 2906 if (store->disp.type != DISP_NONE) { 2907 gva += store->disp.data; 2908 } 2909 2910 if (store->hardseg != -1) { 2911 seg = store->hardseg; 2912 } else { 2913 if (__predict_false(instr->legpref.seg != -1)) { 2914 seg = instr->legpref.seg; 2915 } else { 2916 seg = NVMM_X64_SEG_DS; 2917 } 2918 } 2919 2920 if (__predict_true(is_long_mode(state))) { 2921 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) { 2922 segment_apply(&state->segs[seg], &gva); 2923 } 2924 } else { 2925 ret = segment_check(&state->segs[seg], gva, size); 2926 if (ret == -1) 2927 return -1; 2928 segment_apply(&state->segs[seg], &gva); 2929 } 2930 2931 *gvap = gva; 2932 return 0; 2933 } 2934 2935 static int 2936 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state) 2937 { 2938 uint8_t inst_bytes[5], byte; 2939 size_t i, fetchsize; 2940 gvaddr_t gva; 2941 int ret, seg; 2942 2943 fetchsize = sizeof(inst_bytes); 2944 2945 gva = state->gprs[NVMM_X64_GPR_RIP]; 2946 if (__predict_false(!is_long_mode(state))) { 2947 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva, 2948 fetchsize); 2949 if (ret == -1) 2950 return -1; 2951 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva); 2952 } 2953 2954 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize); 2955 if (ret == -1) 2956 return -1; 2957 2958 seg = NVMM_X64_SEG_DS; 2959 for (i = 0; i < fetchsize; i++) { 2960 byte = inst_bytes[i]; 2961 2962 if (byte == LEG_OVR_DS) { 2963 seg = NVMM_X64_SEG_DS; 2964 } else if (byte == LEG_OVR_ES) { 2965 seg = NVMM_X64_SEG_ES; 2966 } else if (byte == LEG_OVR_GS) { 2967 seg = NVMM_X64_SEG_GS; 2968 } else if (byte == LEG_OVR_FS) { 2969 seg = NVMM_X64_SEG_FS; 2970 } else if (byte == LEG_OVR_CS) { 2971 seg = NVMM_X64_SEG_CS; 2972 } else if (byte == LEG_OVR_SS) { 2973 seg = NVMM_X64_SEG_SS; 2974 } else if (byte == LEG_OPR_OVR) { 2975 /* nothing */ 2976 } else if (byte == LEG_ADR_OVR) { 2977 /* nothing */ 2978 } else if (byte == LEG_REP) { 2979 /* nothing */ 2980 } else if (byte == LEG_REPN) { 2981 /* nothing */ 2982 } else if (byte == LEG_LOCK) { 2983 /* nothing */ 2984 } else { 2985 return seg; 2986 } 2987 } 2988 2989 return seg; 2990 } 2991 2992 static int 2993 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state, 2994 struct nvmm_exit *exit) 2995 { 2996 size_t fetchsize; 2997 gvaddr_t gva; 2998 int ret; 2999 3000 fetchsize = sizeof(exit->u.mem.inst_bytes); 3001 3002 gva = state->gprs[NVMM_X64_GPR_RIP]; 3003 if (__predict_false(!is_long_mode(state))) { 3004 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva, 3005 fetchsize); 3006 if (ret == -1) 3007 return -1; 3008 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva); 3009 } 3010 3011 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes, 3012 fetchsize); 3013 if (ret == -1) 3014 return -1; 3015 3016 exit->u.mem.inst_len = fetchsize; 3017 3018 return 0; 3019 } 3020 3021 static int 3022 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state, 3023 struct x86_instr *instr) 3024 { 3025 struct nvmm_mem mem; 3026 uint8_t data[8]; 3027 gvaddr_t gva; 3028 size_t size; 3029 int ret; 3030 3031 size = instr->operand_size; 3032 3033 /* Source. */ 3034 ret = store_to_gva(state, instr, &instr->src, &gva, size); 3035 if (ret == -1) 3036 return -1; 3037 ret = read_guest_memory(mach, state, gva, data, size); 3038 if (ret == -1) 3039 return -1; 3040 3041 /* Destination. */ 3042 ret = store_to_gva(state, instr, &instr->dst, &gva, size); 3043 if (ret == -1) 3044 return -1; 3045 ret = write_guest_memory(mach, state, gva, data, size); 3046 if (ret == -1) 3047 return -1; 3048 3049 mem.size = size; 3050 (*instr->emul->func)(&mem, state->gprs); 3051 3052 return 0; 3053 } 3054 3055 #define DISASSEMBLER_BUG() \ 3056 do { \ 3057 errno = EINVAL; \ 3058 return -1; \ 3059 } while (0); 3060 3061 static int 3062 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state, 3063 struct x86_instr *instr, struct nvmm_exit *exit) 3064 { 3065 struct nvmm_mem mem; 3066 uint8_t membuf[8]; 3067 uint64_t val; 3068 3069 memset(membuf, 0, sizeof(membuf)); 3070 3071 mem.gpa = exit->u.mem.gpa; 3072 mem.size = instr->operand_size; 3073 mem.data = membuf; 3074 3075 /* Determine the direction. */ 3076 switch (instr->src.type) { 3077 case STORE_REG: 3078 if (instr->src.disp.type != DISP_NONE) { 3079 /* Indirect access. */ 3080 mem.write = false; 3081 } else { 3082 /* Direct access. */ 3083 mem.write = true; 3084 } 3085 break; 3086 case STORE_IMM: 3087 mem.write = true; 3088 break; 3089 case STORE_SIB: 3090 mem.write = false; 3091 break; 3092 case STORE_DMO: 3093 mem.write = false; 3094 break; 3095 default: 3096 DISASSEMBLER_BUG(); 3097 } 3098 3099 if (mem.write) { 3100 switch (instr->src.type) { 3101 case STORE_REG: 3102 if (instr->src.disp.type != DISP_NONE) { 3103 DISASSEMBLER_BUG(); 3104 } 3105 val = state->gprs[instr->src.u.reg->num]; 3106 val = __SHIFTOUT(val, instr->src.u.reg->mask); 3107 memcpy(mem.data, &val, mem.size); 3108 break; 3109 case STORE_IMM: 3110 memcpy(mem.data, &instr->src.u.imm.data, mem.size); 3111 break; 3112 default: 3113 DISASSEMBLER_BUG(); 3114 } 3115 } else if (instr->emul->read) { 3116 if (instr->dst.type != STORE_REG) { 3117 DISASSEMBLER_BUG(); 3118 } 3119 if (instr->dst.disp.type != DISP_NONE) { 3120 DISASSEMBLER_BUG(); 3121 } 3122 val = state->gprs[instr->dst.u.reg->num]; 3123 val = __SHIFTOUT(val, instr->dst.u.reg->mask); 3124 memcpy(mem.data, &val, mem.size); 3125 } 3126 3127 (*instr->emul->func)(&mem, state->gprs); 3128 3129 if (!instr->emul->notouch && !mem.write) { 3130 if (instr->dst.type != STORE_REG) { 3131 DISASSEMBLER_BUG(); 3132 } 3133 memcpy(&val, membuf, sizeof(uint64_t)); 3134 val = __SHIFTIN(val, instr->dst.u.reg->mask); 3135 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask; 3136 state->gprs[instr->dst.u.reg->num] |= val; 3137 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask; 3138 } 3139 3140 return 0; 3141 } 3142 3143 int 3144 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 3145 struct nvmm_exit *exit) 3146 { 3147 struct nvmm_x64_state state; 3148 struct x86_instr instr; 3149 uint64_t cnt = 0; /* GCC */ 3150 int ret; 3151 3152 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) { 3153 errno = EINVAL; 3154 return -1; 3155 } 3156 3157 ret = nvmm_vcpu_getstate(mach, cpuid, &state, 3158 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 3159 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 3160 if (ret == -1) 3161 return -1; 3162 3163 if (exit->u.mem.inst_len == 0) { 3164 /* 3165 * The instruction was not fetched from the kernel. Fetch 3166 * it ourselves. 3167 */ 3168 ret = fetch_instruction(mach, &state, exit); 3169 if (ret == -1) 3170 return -1; 3171 } 3172 3173 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len, 3174 &instr, &state); 3175 if (ret == -1) { 3176 errno = ENODEV; 3177 return -1; 3178 } 3179 3180 if (instr.legpref.rep || instr.legpref.repn) { 3181 cnt = rep_get_cnt(&state, instr.address_size); 3182 if (__predict_false(cnt == 0)) { 3183 state.gprs[NVMM_X64_GPR_RIP] += instr.len; 3184 goto out; 3185 } 3186 } 3187 3188 if (instr.opcode->movs) { 3189 ret = assist_mem_double(mach, &state, &instr); 3190 } else { 3191 ret = assist_mem_single(mach, &state, &instr, exit); 3192 } 3193 if (ret == -1) { 3194 errno = ENODEV; 3195 return -1; 3196 } 3197 3198 if (instr.legpref.rep || instr.legpref.repn) { 3199 cnt -= 1; 3200 rep_set_cnt(&state, instr.address_size, cnt); 3201 if (cnt == 0) { 3202 state.gprs[NVMM_X64_GPR_RIP] += instr.len; 3203 } else if (__predict_false(instr.legpref.repn)) { 3204 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) { 3205 state.gprs[NVMM_X64_GPR_RIP] += instr.len; 3206 } 3207 } 3208 } else { 3209 state.gprs[NVMM_X64_GPR_RIP] += instr.len; 3210 } 3211 3212 out: 3213 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS); 3214 if (ret == -1) 3215 return -1; 3216 3217 return 0; 3218 } 3219