1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16 /* Capstone Disassembly Engine */
17 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
18
19 #ifdef CAPSTONE_HAS_X86
20
21 #include <stdarg.h> /* for va_*() */
22 #if defined(CAPSTONE_HAS_OSXKERNEL)
23 #include <libkern/libkern.h>
24 #else
25 #include <stdlib.h> /* for exit() */
26 #endif
27
28 #include <string.h>
29
30 #include "../../cs_priv.h"
31 #include "../../utils.h"
32
33 #include "X86DisassemblerDecoder.h"
34 #include "X86Mapping.h"
35
36 /// Specifies whether a ModR/M byte is needed and (if so) which
37 /// instruction each possible value of the ModR/M byte corresponds to. Once
38 /// this information is known, we have narrowed down to a single instruction.
39 struct ModRMDecision {
40 uint8_t modrm_type;
41 uint16_t instructionIDs;
42 };
43
44 /// Specifies which set of ModR/M->instruction tables to look at
45 /// given a particular opcode.
46 struct OpcodeDecision {
47 struct ModRMDecision modRMDecisions[256];
48 };
49
50 /// Specifies which opcode->instruction tables to look at given
51 /// a particular context (set of attributes). Since there are many possible
52 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
53 /// applies given a specific set of attributes. Hence there are only IC_max
54 /// entries in this table, rather than 2^(ATTR_max).
55 struct ContextDecision {
56 struct OpcodeDecision opcodeDecisions[IC_max];
57 };
58
59 #ifdef CAPSTONE_X86_REDUCE
60 #include "X86GenDisassemblerTables_reduce.inc"
61 #include "X86GenDisassemblerTables_reduce2.inc"
62 #include "X86Lookup16_reduce.inc"
63 #else
64 #include "X86GenDisassemblerTables.inc"
65 #include "X86GenDisassemblerTables2.inc"
66 #include "X86Lookup16.inc"
67 #endif
68
69 /*
70 * contextForAttrs - Client for the instruction context table. Takes a set of
71 * attributes and returns the appropriate decode context.
72 *
73 * @param attrMask - Attributes, from the enumeration attributeBits.
74 * @return - The InstructionContext to use when looking up an
75 * an instruction with these attributes.
76 */
contextForAttrs(uint16_t attrMask)77 static InstructionContext contextForAttrs(uint16_t attrMask)
78 {
79 return CONTEXTS_SYM[attrMask];
80 }
81
82 /*
83 * modRMRequired - Reads the appropriate instruction table to determine whether
84 * the ModR/M byte is required to decode a particular instruction.
85 *
86 * @param type - The opcode type (i.e., how many bytes it has).
87 * @param insnContext - The context for the instruction, as returned by
88 * contextForAttrs.
89 * @param opcode - The last byte of the instruction's opcode, not counting
90 * ModR/M extensions and escapes.
91 * @return - true if the ModR/M byte is required, false otherwise.
92 */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint16_t opcode)93 static int modRMRequired(OpcodeType type,
94 InstructionContext insnContext,
95 uint16_t opcode)
96 {
97 const struct OpcodeDecision *decision = NULL;
98 const uint8_t *indextable = NULL;
99 unsigned int index;
100
101 switch (type) {
102 default: break;
103 case ONEBYTE:
104 decision = ONEBYTE_SYM;
105 indextable = index_x86DisassemblerOneByteOpcodes;
106 break;
107 case TWOBYTE:
108 decision = TWOBYTE_SYM;
109 indextable = index_x86DisassemblerTwoByteOpcodes;
110 break;
111 case THREEBYTE_38:
112 decision = THREEBYTE38_SYM;
113 indextable = index_x86DisassemblerThreeByte38Opcodes;
114 break;
115 case THREEBYTE_3A:
116 decision = THREEBYTE3A_SYM;
117 indextable = index_x86DisassemblerThreeByte3AOpcodes;
118 break;
119 #ifndef CAPSTONE_X86_REDUCE
120 case XOP8_MAP:
121 decision = XOP8_MAP_SYM;
122 indextable = index_x86DisassemblerXOP8Opcodes;
123 break;
124 case XOP9_MAP:
125 decision = XOP9_MAP_SYM;
126 indextable = index_x86DisassemblerXOP9Opcodes;
127 break;
128 case XOPA_MAP:
129 decision = XOPA_MAP_SYM;
130 indextable = index_x86DisassemblerXOPAOpcodes;
131 break;
132 case THREEDNOW_MAP:
133 // 3DNow instructions always have ModRM byte
134 return true;
135 #endif
136 }
137
138 // return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;
139 index = indextable[insnContext];
140 if (index)
141 return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;
142 else
143 return false;
144 }
145
146 /*
147 * decode - Reads the appropriate instruction table to obtain the unique ID of
148 * an instruction.
149 *
150 * @param type - See modRMRequired().
151 * @param insnContext - See modRMRequired().
152 * @param opcode - See modRMRequired().
153 * @param modRM - The ModR/M byte if required, or any value if not.
154 * @return - The UID of the instruction, or 0 on failure.
155 */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)156 static InstrUID decode(OpcodeType type,
157 InstructionContext insnContext,
158 uint8_t opcode,
159 uint8_t modRM)
160 {
161 const struct ModRMDecision *dec = NULL;
162 unsigned int index;
163 static const struct OpcodeDecision emptyDecision = { 0 };
164
165 switch (type) {
166 default: break; // never reach
167 case ONEBYTE:
168 // dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
169 index = index_x86DisassemblerOneByteOpcodes[insnContext];
170 if (index)
171 dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode];
172 else
173 dec = &emptyDecision.modRMDecisions[opcode];
174 break;
175 case TWOBYTE:
176 //dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
177 index = index_x86DisassemblerTwoByteOpcodes[insnContext];
178 if (index)
179 dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode];
180 else
181 dec = &emptyDecision.modRMDecisions[opcode];
182 break;
183 case THREEBYTE_38:
184 // dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
185 index = index_x86DisassemblerThreeByte38Opcodes[insnContext];
186 if (index)
187 dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode];
188 else
189 dec = &emptyDecision.modRMDecisions[opcode];
190 break;
191 case THREEBYTE_3A:
192 //dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
193 index = index_x86DisassemblerThreeByte3AOpcodes[insnContext];
194 if (index)
195 dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode];
196 else
197 dec = &emptyDecision.modRMDecisions[opcode];
198 break;
199 #ifndef CAPSTONE_X86_REDUCE
200 case XOP8_MAP:
201 // dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
202 index = index_x86DisassemblerXOP8Opcodes[insnContext];
203 if (index)
204 dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode];
205 else
206 dec = &emptyDecision.modRMDecisions[opcode];
207 break;
208 case XOP9_MAP:
209 // dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
210 index = index_x86DisassemblerXOP9Opcodes[insnContext];
211 if (index)
212 dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode];
213 else
214 dec = &emptyDecision.modRMDecisions[opcode];
215 break;
216 case XOPA_MAP:
217 // dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
218 index = index_x86DisassemblerXOPAOpcodes[insnContext];
219 if (index)
220 dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode];
221 else
222 dec = &emptyDecision.modRMDecisions[opcode];
223 break;
224 case THREEDNOW_MAP:
225 // dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
226 index = index_x86Disassembler3DNowOpcodes[insnContext];
227 if (index)
228 dec = &THREEDNOW_MAP_SYM[index - 1].modRMDecisions[opcode];
229 else
230 dec = &emptyDecision.modRMDecisions[opcode];
231 break;
232 #endif
233 }
234
235 switch (dec->modrm_type) {
236 default:
237 // debug("Corrupt table! Unknown modrm_type");
238 return 0;
239 case MODRM_ONEENTRY:
240 return modRMTable[dec->instructionIDs];
241 case MODRM_SPLITRM:
242 if (modFromModRM(modRM) == 0x3)
243 return modRMTable[dec->instructionIDs + 1];
244 return modRMTable[dec->instructionIDs];
245 case MODRM_SPLITREG:
246 if (modFromModRM(modRM) == 0x3)
247 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3) + 8];
248 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
249 case MODRM_SPLITMISC:
250 if (modFromModRM(modRM) == 0x3)
251 return modRMTable[dec->instructionIDs+(modRM & 0x3f) + 8];
252 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
253 case MODRM_FULL:
254 return modRMTable[dec->instructionIDs+modRM];
255 }
256 }
257
258 /*
259 * specifierForUID - Given a UID, returns the name and operand specification for
260 * that instruction.
261 *
262 * @param uid - The unique ID for the instruction. This should be returned by
263 * decode(); specifierForUID will not check bounds.
264 * @return - A pointer to the specification for that instruction.
265 */
specifierForUID(InstrUID uid)266 static const struct InstructionSpecifier *specifierForUID(InstrUID uid)
267 {
268 return &INSTRUCTIONS_SYM[uid];
269 }
270
271 /*
272 * consumeByte - Uses the reader function provided by the user to consume one
273 * byte from the instruction's memory and advance the cursor.
274 *
275 * @param insn - The instruction with the reader function to use. The cursor
276 * for this instruction is advanced.
277 * @param byte - A pointer to a pre-allocated memory buffer to be populated
278 * with the data read.
279 * @return - 0 if the read was successful; nonzero otherwise.
280 */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)281 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte)
282 {
283 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
284
285 if (!ret)
286 ++(insn->readerCursor);
287
288 return ret;
289 }
290
291 /*
292 * lookAtByte - Like consumeByte, but does not advance the cursor.
293 *
294 * @param insn - See consumeByte().
295 * @param byte - See consumeByte().
296 * @return - See consumeByte().
297 */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)298 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte)
299 {
300 return insn->reader(insn->readerArg, byte, insn->readerCursor);
301 }
302
unconsumeByte(struct InternalInstruction * insn)303 static void unconsumeByte(struct InternalInstruction* insn)
304 {
305 insn->readerCursor--;
306 }
307
308 #define CONSUME_FUNC(name, type) \
309 static int name(struct InternalInstruction* insn, type* ptr) { \
310 type combined = 0; \
311 unsigned offset; \
312 for (offset = 0; offset < sizeof(type); ++offset) { \
313 uint8_t byte; \
314 int ret = insn->reader(insn->readerArg, \
315 &byte, \
316 insn->readerCursor + offset); \
317 if (ret) \
318 return ret; \
319 combined = combined | ((uint64_t)byte << (offset * 8)); \
320 } \
321 *ptr = combined; \
322 insn->readerCursor += sizeof(type); \
323 return 0; \
324 }
325
326 /*
327 * consume* - Use the reader function provided by the user to consume data
328 * values of various sizes from the instruction's memory and advance the
329 * cursor appropriately. These readers perform endian conversion.
330 *
331 * @param insn - See consumeByte().
332 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
333 * be populated with the data read.
334 * @return - See consumeByte().
335 */
CONSUME_FUNC(consumeInt8,int8_t)336 CONSUME_FUNC(consumeInt8, int8_t)
337 CONSUME_FUNC(consumeInt16, int16_t)
338 CONSUME_FUNC(consumeInt32, int32_t)
339 CONSUME_FUNC(consumeUInt16, uint16_t)
340 CONSUME_FUNC(consumeUInt32, uint32_t)
341 CONSUME_FUNC(consumeUInt64, uint64_t)
342
343 static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
344 {
345 if (insn->mode == MODE_64BIT)
346 return prefix >= 0x40 && prefix <= 0x4f;
347
348 return false;
349 }
350
351 /*
352 * setPrefixPresent - Marks that a particular prefix is present as mandatory
353 *
354 * @param insn - The instruction to be marked as having the prefix.
355 * @param prefix - The prefix that is present.
356 */
setPrefixPresent(struct InternalInstruction * insn,uint8_t prefix)357 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix)
358 {
359 uint8_t nextByte;
360
361 switch (prefix) {
362 case 0xf0: // LOCK
363 insn->hasLockPrefix = true;
364 insn->repeatPrefix = 0;
365 break;
366
367 case 0xf2: // REPNE/REPNZ
368 case 0xf3: // REP or REPE/REPZ
369 if (lookAtByte(insn, &nextByte))
370 break;
371 // TODO:
372 // 1. There could be several 0x66
373 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
374 // it's not mandatory prefix
375 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
376 // 0x0f exactly after it to be mandatory prefix
377 if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
378 // The last of 0xf2 /0xf3 is mandatory prefix
379 insn->mandatoryPrefix = prefix;
380
381 insn->repeatPrefix = prefix;
382 insn->hasLockPrefix = false;
383 break;
384
385 case 0x66:
386 if (lookAtByte(insn, &nextByte))
387 break;
388 // 0x66 can't overwrite existing mandatory prefix and should be ignored
389 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
390 insn->mandatoryPrefix = prefix;
391 break;
392 }
393 }
394
395 /*
396 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
397 * instruction as having them. Also sets the instruction's default operand,
398 * address, and other relevant data sizes to report operands correctly.
399 *
400 * @param insn - The instruction whose prefixes are to be read.
401 * @return - 0 if the instruction could be read until the end of the prefix
402 * bytes, and no prefixes conflicted; nonzero otherwise.
403 */
readPrefixes(struct InternalInstruction * insn)404 static int readPrefixes(struct InternalInstruction* insn)
405 {
406 bool isPrefix = true;
407 uint8_t byte = 0;
408 uint8_t nextByte;
409
410 while (isPrefix) {
411 if (insn->mode == MODE_64BIT) {
412 // eliminate consecutive redundant REX bytes in front
413 if (consumeByte(insn, &byte))
414 return -1;
415
416 if ((byte & 0xf0) == 0x40) {
417 while(true) {
418 if (lookAtByte(insn, &byte)) // out of input code
419 return -1;
420 if ((byte & 0xf0) == 0x40) {
421 // another REX prefix, but we only remember the last one
422 if (consumeByte(insn, &byte))
423 return -1;
424 } else
425 break;
426 }
427
428 // recover the last REX byte if next byte is not a legacy prefix
429 switch (byte) {
430 case 0xf2: /* REPNE/REPNZ */
431 case 0xf3: /* REP or REPE/REPZ */
432 case 0xf0: /* LOCK */
433 case 0x2e: /* CS segment override -OR- Branch not taken */
434 case 0x36: /* SS segment override -OR- Branch taken */
435 case 0x3e: /* DS segment override */
436 case 0x26: /* ES segment override */
437 case 0x64: /* FS segment override */
438 case 0x65: /* GS segment override */
439 case 0x66: /* Operand-size override */
440 case 0x67: /* Address-size override */
441 break;
442 default: /* Not a prefix byte */
443 unconsumeByte(insn);
444 break;
445 }
446 } else {
447 unconsumeByte(insn);
448 }
449 }
450
451 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
452 if (consumeByte(insn, &byte))
453 return -1;
454
455 if (insn->readerCursor - 1 == insn->startLocation
456 && (byte == 0xf2 || byte == 0xf3)) {
457 // prefix requires next byte
458 if (lookAtByte(insn, &nextByte))
459 return -1;
460
461 /*
462 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
463 * met:
464 * - it is followed by a LOCK (0xf0) prefix
465 * - it is followed by an xchg instruction
466 * then it should be disassembled as a xacquire/xrelease not repne/rep.
467 */
468 if (((nextByte == 0xf0) ||
469 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
470 insn->xAcquireRelease = byte;
471 }
472
473 /*
474 * Also if the byte is 0xf3, and the following condition is met:
475 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
476 * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
477 * then it should be disassembled as an xrelease not rep.
478 */
479 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
480 nextByte == 0xc6 || nextByte == 0xc7)) {
481 insn->xAcquireRelease = byte;
482 }
483
484 if (isREX(insn, nextByte)) {
485 uint8_t nnextByte;
486
487 // Go to REX prefix after the current one
488 if (consumeByte(insn, &nnextByte))
489 return -1;
490
491 // We should be able to read next byte after REX prefix
492 if (lookAtByte(insn, &nnextByte))
493 return -1;
494
495 unconsumeByte(insn);
496 }
497 }
498
499 switch (byte) {
500 case 0xf0: /* LOCK */
501 case 0xf2: /* REPNE/REPNZ */
502 case 0xf3: /* REP or REPE/REPZ */
503 // only accept the last prefix
504 setPrefixPresent(insn, byte);
505 insn->prefix0 = byte;
506 break;
507
508 case 0x2e: /* CS segment override -OR- Branch not taken */
509 case 0x36: /* SS segment override -OR- Branch taken */
510 case 0x3e: /* DS segment override */
511 case 0x26: /* ES segment override */
512 case 0x64: /* FS segment override */
513 case 0x65: /* GS segment override */
514 switch (byte) {
515 case 0x2e:
516 insn->segmentOverride = SEG_OVERRIDE_CS;
517 insn->prefix1 = byte;
518 break;
519 case 0x36:
520 insn->segmentOverride = SEG_OVERRIDE_SS;
521 insn->prefix1 = byte;
522 break;
523 case 0x3e:
524 insn->segmentOverride = SEG_OVERRIDE_DS;
525 insn->prefix1 = byte;
526 break;
527 case 0x26:
528 insn->segmentOverride = SEG_OVERRIDE_ES;
529 insn->prefix1 = byte;
530 break;
531 case 0x64:
532 insn->segmentOverride = SEG_OVERRIDE_FS;
533 insn->prefix1 = byte;
534 break;
535 case 0x65:
536 insn->segmentOverride = SEG_OVERRIDE_GS;
537 insn->prefix1 = byte;
538 break;
539 default:
540 // debug("Unhandled override");
541 return -1;
542 }
543 setPrefixPresent(insn, byte);
544 break;
545
546 case 0x66: /* Operand-size override */
547 insn->hasOpSize = true;
548 setPrefixPresent(insn, byte);
549 insn->prefix2 = byte;
550 break;
551
552 case 0x67: /* Address-size override */
553 insn->hasAdSize = true;
554 setPrefixPresent(insn, byte);
555 insn->prefix3 = byte;
556 break;
557 default: /* Not a prefix byte */
558 isPrefix = false;
559 break;
560 }
561 }
562
563 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
564
565 if (byte == 0x62) {
566 uint8_t byte1, byte2;
567
568 if (consumeByte(insn, &byte1)) {
569 // dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
570 return -1;
571 }
572
573 if (lookAtByte(insn, &byte2)) {
574 // dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
575 unconsumeByte(insn); /* unconsume byte1 */
576 unconsumeByte(insn); /* unconsume byte */
577 } else {
578 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
579 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
580 insn->vectorExtensionType = TYPE_EVEX;
581 } else {
582 unconsumeByte(insn); /* unconsume byte1 */
583 unconsumeByte(insn); /* unconsume byte */
584 }
585 }
586
587 if (insn->vectorExtensionType == TYPE_EVEX) {
588 insn->vectorExtensionPrefix[0] = byte;
589 insn->vectorExtensionPrefix[1] = byte1;
590 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
591 // dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
592 return -1;
593 }
594
595 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
596 // dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
597 return -1;
598 }
599
600 /* We simulate the REX prefix for simplicity's sake */
601 if (insn->mode == MODE_64BIT) {
602 insn->rexPrefix = 0x40
603 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
604 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
605 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
606 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
607 }
608
609 // dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
610 // insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
611 // insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
612 }
613 } else if (byte == 0xc4) {
614 uint8_t byte1;
615
616 if (lookAtByte(insn, &byte1)) {
617 // dbgprintf(insn, "Couldn't read second byte of VEX");
618 return -1;
619 }
620
621 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
622 insn->vectorExtensionType = TYPE_VEX_3B;
623 else
624 unconsumeByte(insn);
625
626 if (insn->vectorExtensionType == TYPE_VEX_3B) {
627 insn->vectorExtensionPrefix[0] = byte;
628 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
629 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
630
631 /* We simulate the REX prefix for simplicity's sake */
632 if (insn->mode == MODE_64BIT)
633 insn->rexPrefix = 0x40
634 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
635 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
636 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
637 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
638
639 // dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
640 // insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
641 // insn->vectorExtensionPrefix[2]);
642 }
643 } else if (byte == 0xc5) {
644 uint8_t byte1;
645
646 if (lookAtByte(insn, &byte1)) {
647 // dbgprintf(insn, "Couldn't read second byte of VEX");
648 return -1;
649 }
650
651 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
652 insn->vectorExtensionType = TYPE_VEX_2B;
653 else
654 unconsumeByte(insn);
655
656 if (insn->vectorExtensionType == TYPE_VEX_2B) {
657 insn->vectorExtensionPrefix[0] = byte;
658 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
659
660 if (insn->mode == MODE_64BIT)
661 insn->rexPrefix = 0x40
662 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
663
664 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
665 default:
666 break;
667 case VEX_PREFIX_66:
668 insn->hasOpSize = true;
669 break;
670 }
671
672 // dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
673 // insn->vectorExtensionPrefix[0],
674 // insn->vectorExtensionPrefix[1]);
675 }
676 } else if (byte == 0x8f) {
677 uint8_t byte1;
678
679 if (lookAtByte(insn, &byte1)) {
680 // dbgprintf(insn, "Couldn't read second byte of XOP");
681 return -1;
682 }
683
684 if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
685 insn->vectorExtensionType = TYPE_XOP;
686 else
687 unconsumeByte(insn);
688
689 if (insn->vectorExtensionType == TYPE_XOP) {
690 insn->vectorExtensionPrefix[0] = byte;
691 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
692 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
693
694 /* We simulate the REX prefix for simplicity's sake */
695 if (insn->mode == MODE_64BIT)
696 insn->rexPrefix = 0x40
697 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
698 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
699 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
700 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
701
702 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
703 default:
704 break;
705 case VEX_PREFIX_66:
706 insn->hasOpSize = true;
707 break;
708 }
709
710 // dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
711 // insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
712 // insn->vectorExtensionPrefix[2]);
713 }
714 } else if (isREX(insn, byte)) {
715 if (lookAtByte(insn, &nextByte))
716 return -1;
717
718 insn->rexPrefix = byte;
719 // dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
720 } else
721 unconsumeByte(insn);
722
723 if (insn->mode == MODE_16BIT) {
724 insn->registerSize = (insn->hasOpSize ? 4 : 2);
725 insn->addressSize = (insn->hasAdSize ? 4 : 2);
726 insn->displacementSize = (insn->hasAdSize ? 4 : 2);
727 insn->immediateSize = (insn->hasOpSize ? 4 : 2);
728 insn->immSize = (insn->hasOpSize ? 4 : 2);
729 } else if (insn->mode == MODE_32BIT) {
730 insn->registerSize = (insn->hasOpSize ? 2 : 4);
731 insn->addressSize = (insn->hasAdSize ? 2 : 4);
732 insn->displacementSize = (insn->hasAdSize ? 2 : 4);
733 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
734 insn->immSize = (insn->hasOpSize ? 2 : 4);
735 } else if (insn->mode == MODE_64BIT) {
736 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
737 insn->registerSize = 8;
738 insn->addressSize = (insn->hasAdSize ? 4 : 8);
739 insn->displacementSize = 4;
740 insn->immediateSize = 4;
741 insn->immSize = 4;
742 } else {
743 insn->registerSize = (insn->hasOpSize ? 2 : 4);
744 insn->addressSize = (insn->hasAdSize ? 4 : 8);
745 insn->displacementSize = (insn->hasOpSize ? 2 : 4);
746 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
747 insn->immSize = (insn->hasOpSize ? 4 : 8);
748 }
749 }
750
751 return 0;
752 }
753
754 static int readModRM(struct InternalInstruction* insn);
755
756 /*
757 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
758 * extended or escape opcodes).
759 *
760 * @param insn - The instruction whose opcode is to be read.
761 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
762 */
readOpcode(struct InternalInstruction * insn)763 static int readOpcode(struct InternalInstruction* insn)
764 {
765 uint8_t current;
766
767 // dbgprintf(insn, "readOpcode()");
768
769 insn->opcodeType = ONEBYTE;
770
771 if (insn->vectorExtensionType == TYPE_EVEX) {
772 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
773 default:
774 // dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
775 // mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
776 return -1;
777 case VEX_LOB_0F:
778 insn->opcodeType = TWOBYTE;
779 return consumeByte(insn, &insn->opcode);
780 case VEX_LOB_0F38:
781 insn->opcodeType = THREEBYTE_38;
782 return consumeByte(insn, &insn->opcode);
783 case VEX_LOB_0F3A:
784 insn->opcodeType = THREEBYTE_3A;
785 return consumeByte(insn, &insn->opcode);
786 }
787 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
788 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
789 default:
790 // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
791 // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
792 return -1;
793 case VEX_LOB_0F:
794 //insn->twoByteEscape = 0x0f;
795 insn->opcodeType = TWOBYTE;
796 return consumeByte(insn, &insn->opcode);
797 case VEX_LOB_0F38:
798 //insn->twoByteEscape = 0x0f;
799 insn->opcodeType = THREEBYTE_38;
800 return consumeByte(insn, &insn->opcode);
801 case VEX_LOB_0F3A:
802 //insn->twoByteEscape = 0x0f;
803 insn->opcodeType = THREEBYTE_3A;
804 return consumeByte(insn, &insn->opcode);
805 }
806 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
807 //insn->twoByteEscape = 0x0f;
808 insn->opcodeType = TWOBYTE;
809 return consumeByte(insn, &insn->opcode);
810 } else if (insn->vectorExtensionType == TYPE_XOP) {
811 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
812 default:
813 // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
814 // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
815 return -1;
816 case XOP_MAP_SELECT_8:
817 insn->opcodeType = XOP8_MAP;
818 return consumeByte(insn, &insn->opcode);
819 case XOP_MAP_SELECT_9:
820 insn->opcodeType = XOP9_MAP;
821 return consumeByte(insn, &insn->opcode);
822 case XOP_MAP_SELECT_A:
823 insn->opcodeType = XOPA_MAP;
824 return consumeByte(insn, &insn->opcode);
825 }
826 }
827
828 if (consumeByte(insn, ¤t))
829 return -1;
830
831 // save this first byte for MOVcr, MOVdr, MOVrc, MOVrd
832 insn->firstByte = current;
833
834 if (current == 0x0f) {
835 // dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
836 insn->twoByteEscape = current;
837
838 if (consumeByte(insn, ¤t))
839 return -1;
840
841 if (current == 0x38) {
842 // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
843 if (consumeByte(insn, ¤t))
844 return -1;
845
846 insn->opcodeType = THREEBYTE_38;
847 } else if (current == 0x3a) {
848 // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
849 if (consumeByte(insn, ¤t))
850 return -1;
851
852 insn->opcodeType = THREEBYTE_3A;
853 } else if (current == 0x0f) {
854 // dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
855 // Consume operands before the opcode to comply with the 3DNow encoding
856 if (readModRM(insn))
857 return -1;
858
859 if (consumeByte(insn, ¤t))
860 return -1;
861
862 insn->opcodeType = THREEDNOW_MAP;
863 } else {
864 // dbgprintf(insn, "Didn't find a three-byte escape prefix");
865 insn->opcodeType = TWOBYTE;
866 }
867 } else if (insn->mandatoryPrefix)
868 // The opcode with mandatory prefix must start with opcode escape.
869 // If not it's legacy repeat prefix
870 insn->mandatoryPrefix = 0;
871
872 /*
873 * At this point we have consumed the full opcode.
874 * Anything we consume from here on must be unconsumed.
875 */
876
877 insn->opcode = current;
878
879 return 0;
880 }
881
882 // Hacky for FEMMS
883 #define GET_INSTRINFO_ENUM
884 #ifndef CAPSTONE_X86_REDUCE
885 #include "X86GenInstrInfo.inc"
886 #else
887 #include "X86GenInstrInfo_reduce.inc"
888 #endif
889
890 /*
891 * getIDWithAttrMask - Determines the ID of an instruction, consuming
892 * the ModR/M byte as appropriate for extended and escape opcodes,
893 * and using a supplied attribute mask.
894 *
895 * @param instructionID - A pointer whose target is filled in with the ID of the
896 * instruction.
897 * @param insn - The instruction whose ID is to be determined.
898 * @param attrMask - The attribute mask to search.
899 * @return - 0 if the ModR/M could be read when needed or was not
900 * needed; nonzero otherwise.
901 */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)902 static int getIDWithAttrMask(uint16_t *instructionID,
903 struct InternalInstruction* insn,
904 uint16_t attrMask)
905 {
906 bool hasModRMExtension;
907
908 InstructionContext instructionClass = contextForAttrs(attrMask);
909
910 hasModRMExtension = modRMRequired(insn->opcodeType,
911 instructionClass,
912 insn->opcode);
913
914 if (hasModRMExtension) {
915 if (readModRM(insn))
916 return -1;
917
918 *instructionID = decode(insn->opcodeType,
919 instructionClass,
920 insn->opcode,
921 insn->modRM);
922 } else {
923 *instructionID = decode(insn->opcodeType,
924 instructionClass,
925 insn->opcode,
926 0);
927 }
928
929 return 0;
930 }
931
932 /*
933 * is16BitEquivalent - Determines whether two instruction names refer to
934 * equivalent instructions but one is 16-bit whereas the other is not.
935 *
936 * @param orig - The instruction ID that is not 16-bit
937 * @param equiv - The instruction ID that is 16-bit
938 */
is16BitEquivalent(unsigned orig,unsigned equiv)939 static bool is16BitEquivalent(unsigned orig, unsigned equiv)
940 {
941 size_t i;
942 uint16_t idx;
943
944 if ((idx = x86_16_bit_eq_lookup[orig]) != 0) {
945 for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) {
946 if (x86_16_bit_eq_tbl[i].second == equiv)
947 return true;
948 }
949 }
950
951 return false;
952 }
953
954 /*
955 * is64Bit - Determines whether this instruction is a 64-bit instruction.
956 *
957 * @param name - The instruction that is not 16-bit
958 */
is64Bit(uint16_t id)959 static bool is64Bit(uint16_t id)
960 {
961 unsigned int i = find_insn(id);
962 if (i != -1) {
963 return insns[i].is64bit;
964 }
965
966 // not found??
967 return false;
968 }
969
970 /*
971 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
972 * appropriate for extended and escape opcodes. Determines the attributes and
973 * context for the instruction before doing so.
974 *
975 * @param insn - The instruction whose ID is to be determined.
976 * @return - 0 if the ModR/M could be read when needed or was not needed;
977 * nonzero otherwise.
978 */
getID(struct InternalInstruction * insn)979 static int getID(struct InternalInstruction *insn)
980 {
981 uint16_t attrMask;
982 uint16_t instructionID;
983
984 attrMask = ATTR_NONE;
985
986 if (insn->mode == MODE_64BIT)
987 attrMask |= ATTR_64BIT;
988
989 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
990 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
991
992 if (insn->vectorExtensionType == TYPE_EVEX) {
993 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
994 case VEX_PREFIX_66:
995 attrMask |= ATTR_OPSIZE;
996 break;
997 case VEX_PREFIX_F3:
998 attrMask |= ATTR_XS;
999 break;
1000 case VEX_PREFIX_F2:
1001 attrMask |= ATTR_XD;
1002 break;
1003 }
1004
1005 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1006 attrMask |= ATTR_EVEXKZ;
1007 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1008 attrMask |= ATTR_EVEXB;
1009 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1010 attrMask |= ATTR_EVEXK;
1011 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1012 attrMask |= ATTR_EVEXL;
1013 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1014 attrMask |= ATTR_EVEXL2;
1015 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1016 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1017 case VEX_PREFIX_66:
1018 attrMask |= ATTR_OPSIZE;
1019 break;
1020 case VEX_PREFIX_F3:
1021 attrMask |= ATTR_XS;
1022 break;
1023 case VEX_PREFIX_F2:
1024 attrMask |= ATTR_XD;
1025 break;
1026 }
1027
1028 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1029 attrMask |= ATTR_VEXL;
1030 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1031 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1032 case VEX_PREFIX_66:
1033 attrMask |= ATTR_OPSIZE;
1034 break;
1035 case VEX_PREFIX_F3:
1036 attrMask |= ATTR_XS;
1037 break;
1038 case VEX_PREFIX_F2:
1039 attrMask |= ATTR_XD;
1040 break;
1041 }
1042
1043 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1044 attrMask |= ATTR_VEXL;
1045 } else if (insn->vectorExtensionType == TYPE_XOP) {
1046 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1047 case VEX_PREFIX_66:
1048 attrMask |= ATTR_OPSIZE;
1049 break;
1050 case VEX_PREFIX_F3:
1051 attrMask |= ATTR_XS;
1052 break;
1053 case VEX_PREFIX_F2:
1054 attrMask |= ATTR_XD;
1055 break;
1056 }
1057
1058 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1059 attrMask |= ATTR_VEXL;
1060 } else {
1061 return -1;
1062 }
1063 } else if (!insn->mandatoryPrefix) {
1064 // If we don't have mandatory prefix we should use legacy prefixes here
1065 if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1066 attrMask |= ATTR_OPSIZE;
1067 if (insn->hasAdSize)
1068 attrMask |= ATTR_ADSIZE;
1069 if (insn->opcodeType == ONEBYTE) {
1070 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1071 // Special support for PAUSE
1072 attrMask |= ATTR_XS;
1073 } else {
1074 if (insn->repeatPrefix == 0xf2)
1075 attrMask |= ATTR_XD;
1076 else if (insn->repeatPrefix == 0xf3)
1077 attrMask |= ATTR_XS;
1078 }
1079 } else {
1080 switch (insn->mandatoryPrefix) {
1081 case 0xf2:
1082 attrMask |= ATTR_XD;
1083 break;
1084 case 0xf3:
1085 attrMask |= ATTR_XS;
1086 break;
1087 case 0x66:
1088 if (insn->mode != MODE_16BIT)
1089 attrMask |= ATTR_OPSIZE;
1090 break;
1091 case 0x67:
1092 attrMask |= ATTR_ADSIZE;
1093 break;
1094 }
1095
1096 }
1097
1098 if (insn->rexPrefix & 0x08) {
1099 attrMask |= ATTR_REXW;
1100 attrMask &= ~ATTR_ADSIZE;
1101 }
1102
1103 /*
1104 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1105 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
1106 */
1107 if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
1108 insn->opcode == 0xE3)
1109 attrMask ^= ATTR_ADSIZE;
1110
1111 /*
1112 * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
1113 * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
1114 */
1115 if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
1116 switch (insn->opcode) {
1117 case 0xE8:
1118 case 0xE9:
1119 // Take care of psubsb and other mmx instructions.
1120 if (insn->opcodeType == ONEBYTE) {
1121 attrMask ^= ATTR_OPSIZE;
1122 insn->immediateSize = 4;
1123 insn->displacementSize = 4;
1124 }
1125 break;
1126 case 0x82:
1127 case 0x83:
1128 case 0x84:
1129 case 0x85:
1130 case 0x86:
1131 case 0x87:
1132 case 0x88:
1133 case 0x89:
1134 case 0x8A:
1135 case 0x8B:
1136 case 0x8C:
1137 case 0x8D:
1138 case 0x8E:
1139 case 0x8F:
1140 // Take care of lea and three byte ops.
1141 if (insn->opcodeType == TWOBYTE) {
1142 attrMask ^= ATTR_OPSIZE;
1143 insn->immediateSize = 4;
1144 insn->displacementSize = 4;
1145 }
1146 break;
1147 }
1148 }
1149
1150 if (getIDWithAttrMask(&instructionID, insn, attrMask)) {
1151 return -1;
1152 }
1153
1154 /* The following clauses compensate for limitations of the tables. */
1155 if (insn->mode != MODE_64BIT &&
1156 insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1157 /*
1158 * The tables can't distinquish between cases where the W-bit is used to
1159 * select register size and cases where its a required part of the opcode.
1160 */
1161 if ((insn->vectorExtensionType == TYPE_EVEX &&
1162 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1163 (insn->vectorExtensionType == TYPE_VEX_3B &&
1164 wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1165 (insn->vectorExtensionType == TYPE_XOP &&
1166 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1167 uint16_t instructionIDWithREXW;
1168
1169 if (getIDWithAttrMask(&instructionIDWithREXW,
1170 insn, attrMask | ATTR_REXW)) {
1171 insn->instructionID = instructionID;
1172 insn->spec = specifierForUID(instructionID);
1173 return 0;
1174 }
1175
1176 // If not a 64-bit instruction. Switch the opcode.
1177 if (!is64Bit(instructionIDWithREXW)) {
1178 insn->instructionID = instructionIDWithREXW;
1179 insn->spec = specifierForUID(instructionIDWithREXW);
1180
1181 return 0;
1182 }
1183 }
1184 }
1185
1186 /*
1187 * Absolute moves, umonitor, and movdir64b need special handling.
1188 * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1189 * inverted w.r.t.
1190 * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1191 * any position.
1192 */
1193 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1194 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1195 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1196 /* Make sure we observed the prefixes in any position. */
1197 if (insn->hasAdSize)
1198 attrMask |= ATTR_ADSIZE;
1199
1200 if (insn->hasOpSize)
1201 attrMask |= ATTR_OPSIZE;
1202
1203 /* In 16-bit, invert the attributes. */
1204 if (insn->mode == MODE_16BIT) {
1205 attrMask ^= ATTR_ADSIZE;
1206
1207 /* The OpSize attribute is only valid with the absolute moves. */
1208 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1209 attrMask ^= ATTR_OPSIZE;
1210 }
1211
1212 if (getIDWithAttrMask(&instructionID, insn, attrMask)) {
1213 return -1;
1214 }
1215
1216 insn->instructionID = instructionID;
1217 insn->spec = specifierForUID(instructionID);
1218
1219 return 0;
1220 }
1221
1222 if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1223 !(attrMask & ATTR_OPSIZE)) {
1224 /*
1225 * The instruction tables make no distinction between instructions that
1226 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1227 * particular spot (i.e., many MMX operations). In general we're
1228 * conservative, but in the specific case where OpSize is present but not
1229 * in the right place we check if there's a 16-bit operation.
1230 */
1231 const struct InstructionSpecifier *spec;
1232 uint16_t instructionIDWithOpsize;
1233
1234 spec = specifierForUID(instructionID);
1235
1236 if (getIDWithAttrMask(&instructionIDWithOpsize,
1237 insn,
1238 attrMask | ATTR_OPSIZE)) {
1239 /*
1240 * ModRM required with OpSize but not present; give up and return version
1241 * without OpSize set
1242 */
1243 insn->instructionID = instructionID;
1244 insn->spec = spec;
1245
1246 return 0;
1247 }
1248
1249 if (is16BitEquivalent(instructionID, instructionIDWithOpsize) &&
1250 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1251 insn->instructionID = instructionIDWithOpsize;
1252 insn->spec = specifierForUID(instructionIDWithOpsize);
1253 } else {
1254 insn->instructionID = instructionID;
1255 insn->spec = spec;
1256 }
1257
1258 return 0;
1259 }
1260
1261 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1262 insn->rexPrefix & 0x01) {
1263 /*
1264 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1265 * it should decode as XCHG %r8, %eax.
1266 */
1267 const struct InstructionSpecifier *spec;
1268 uint16_t instructionIDWithNewOpcode;
1269 const struct InstructionSpecifier *specWithNewOpcode;
1270
1271 spec = specifierForUID(instructionID);
1272
1273 /* Borrow opcode from one of the other XCHGar opcodes */
1274 insn->opcode = 0x91;
1275
1276 if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) {
1277 insn->opcode = 0x90;
1278
1279 insn->instructionID = instructionID;
1280 insn->spec = spec;
1281
1282 return 0;
1283 }
1284
1285 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1286
1287 /* Change back */
1288 insn->opcode = 0x90;
1289
1290 insn->instructionID = instructionIDWithNewOpcode;
1291 insn->spec = specWithNewOpcode;
1292
1293 return 0;
1294 }
1295
1296 insn->instructionID = instructionID;
1297 insn->spec = specifierForUID(insn->instructionID);
1298
1299 return 0;
1300 }
1301
1302 /*
1303 * readSIB - Consumes the SIB byte to determine addressing information for an
1304 * instruction.
1305 *
1306 * @param insn - The instruction whose SIB byte is to be read.
1307 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1308 */
readSIB(struct InternalInstruction * insn)1309 static int readSIB(struct InternalInstruction* insn)
1310 {
1311 SIBBase sibBaseBase = SIB_BASE_NONE;
1312 uint8_t index, base;
1313
1314 // dbgprintf(insn, "readSIB()");
1315
1316 if (insn->consumedSIB)
1317 return 0;
1318
1319 insn->consumedSIB = true;
1320
1321 switch (insn->addressSize) {
1322 case 2:
1323 // dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1324 return -1;
1325 case 4:
1326 insn->sibIndexBase = SIB_INDEX_EAX;
1327 sibBaseBase = SIB_BASE_EAX;
1328 break;
1329 case 8:
1330 insn->sibIndexBase = SIB_INDEX_RAX;
1331 sibBaseBase = SIB_BASE_RAX;
1332 break;
1333 }
1334
1335 if (consumeByte(insn, &insn->sib))
1336 return -1;
1337
1338 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1339
1340 if (index == 0x4) {
1341 insn->sibIndex = SIB_INDEX_NONE;
1342 } else {
1343 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
1344 }
1345
1346 insn->sibScale = 1 << scaleFromSIB(insn->sib);
1347
1348 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1349
1350 switch (base) {
1351 case 0x5:
1352 case 0xd:
1353 switch (modFromModRM(insn->modRM)) {
1354 case 0x0:
1355 insn->eaDisplacement = EA_DISP_32;
1356 insn->sibBase = SIB_BASE_NONE;
1357 break;
1358 case 0x1:
1359 insn->eaDisplacement = EA_DISP_8;
1360 insn->sibBase = (SIBBase)(sibBaseBase + base);
1361 break;
1362 case 0x2:
1363 insn->eaDisplacement = EA_DISP_32;
1364 insn->sibBase = (SIBBase)(sibBaseBase + base);
1365 break;
1366 case 0x3:
1367 // debug("Cannot have Mod = 0b11 and a SIB byte");
1368 return -1;
1369 }
1370 break;
1371 default:
1372 insn->sibBase = (SIBBase)(sibBaseBase + base);
1373 break;
1374 }
1375
1376 return 0;
1377 }
1378
1379 /*
1380 * readDisplacement - Consumes the displacement of an instruction.
1381 *
1382 * @param insn - The instruction whose displacement is to be read.
1383 * @return - 0 if the displacement byte was successfully read; nonzero
1384 * otherwise.
1385 */
readDisplacement(struct InternalInstruction * insn)1386 static int readDisplacement(struct InternalInstruction* insn)
1387 {
1388 int8_t d8;
1389 int16_t d16;
1390 int32_t d32;
1391
1392 // dbgprintf(insn, "readDisplacement()");
1393
1394 if (insn->consumedDisplacement)
1395 return 0;
1396
1397 insn->consumedDisplacement = true;
1398 insn->displacementOffset = insn->readerCursor - insn->startLocation;
1399
1400 switch (insn->eaDisplacement) {
1401 case EA_DISP_NONE:
1402 insn->consumedDisplacement = false;
1403 break;
1404 case EA_DISP_8:
1405 if (consumeInt8(insn, &d8))
1406 return -1;
1407 insn->displacement = d8;
1408 break;
1409 case EA_DISP_16:
1410 if (consumeInt16(insn, &d16))
1411 return -1;
1412 insn->displacement = d16;
1413 break;
1414 case EA_DISP_32:
1415 if (consumeInt32(insn, &d32))
1416 return -1;
1417 insn->displacement = d32;
1418 break;
1419 }
1420
1421
1422 return 0;
1423 }
1424
1425 /*
1426 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1427 * displacement) for an instruction and interprets it.
1428 *
1429 * @param insn - The instruction whose addressing information is to be read.
1430 * @return - 0 if the information was successfully read; nonzero otherwise.
1431 */
readModRM(struct InternalInstruction * insn)1432 static int readModRM(struct InternalInstruction* insn)
1433 {
1434 uint8_t mod, rm, reg, evexrm;
1435
1436 // dbgprintf(insn, "readModRM()");
1437
1438 if (insn->consumedModRM)
1439 return 0;
1440
1441 insn->modRMOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1442
1443 if (consumeByte(insn, &insn->modRM))
1444 return -1;
1445
1446 insn->consumedModRM = true;
1447
1448 // save original ModRM for later reference
1449 insn->orgModRM = insn->modRM;
1450
1451 // handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 3
1452 if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) &&
1453 (insn->opcode >= 0x20 && insn->opcode <= 0x23 ))
1454 insn->modRM |= 0xC0;
1455
1456 mod = modFromModRM(insn->modRM);
1457 rm = rmFromModRM(insn->modRM);
1458 reg = regFromModRM(insn->modRM);
1459
1460 /*
1461 * This goes by insn->registerSize to pick the correct register, which messes
1462 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1463 * fixupReg().
1464 */
1465 switch (insn->registerSize) {
1466 case 2:
1467 insn->regBase = MODRM_REG_AX;
1468 insn->eaRegBase = EA_REG_AX;
1469 break;
1470 case 4:
1471 insn->regBase = MODRM_REG_EAX;
1472 insn->eaRegBase = EA_REG_EAX;
1473 break;
1474 case 8:
1475 insn->regBase = MODRM_REG_RAX;
1476 insn->eaRegBase = EA_REG_RAX;
1477 break;
1478 }
1479
1480 reg |= rFromREX(insn->rexPrefix) << 3;
1481 rm |= bFromREX(insn->rexPrefix) << 3;
1482
1483 evexrm = 0;
1484 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
1485 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1486 evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1487 }
1488
1489 insn->reg = (Reg)(insn->regBase + reg);
1490
1491 switch (insn->addressSize) {
1492 case 2: {
1493 EABase eaBaseBase = EA_BASE_BX_SI;
1494
1495 switch (mod) {
1496 case 0x0:
1497 if (rm == 0x6) {
1498 insn->eaBase = EA_BASE_NONE;
1499 insn->eaDisplacement = EA_DISP_16;
1500 if (readDisplacement(insn))
1501 return -1;
1502 } else {
1503 insn->eaBase = (EABase)(eaBaseBase + rm);
1504 insn->eaDisplacement = EA_DISP_NONE;
1505 }
1506 break;
1507 case 0x1:
1508 insn->eaBase = (EABase)(eaBaseBase + rm);
1509 insn->eaDisplacement = EA_DISP_8;
1510 insn->displacementSize = 1;
1511 if (readDisplacement(insn))
1512 return -1;
1513 break;
1514 case 0x2:
1515 insn->eaBase = (EABase)(eaBaseBase + rm);
1516 insn->eaDisplacement = EA_DISP_16;
1517 if (readDisplacement(insn))
1518 return -1;
1519 break;
1520 case 0x3:
1521 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1522 if (readDisplacement(insn))
1523 return -1;
1524 break;
1525 }
1526 break;
1527 }
1528
1529 case 4:
1530 case 8: {
1531 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1532
1533 switch (mod) {
1534 default: break;
1535 case 0x0:
1536 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1537 // In determining whether RIP-relative mode is used (rm=5),
1538 // or whether a SIB byte is present (rm=4),
1539 // the extension bits (REX.b and EVEX.x) are ignored.
1540 switch (rm & 7) {
1541 case 0x4: // SIB byte is present
1542 insn->eaBase = (insn->addressSize == 4 ?
1543 EA_BASE_sib : EA_BASE_sib64);
1544 if (readSIB(insn) || readDisplacement(insn))
1545 return -1;
1546 break;
1547 case 0x5: // RIP-relative
1548 insn->eaBase = EA_BASE_NONE;
1549 insn->eaDisplacement = EA_DISP_32;
1550 if (readDisplacement(insn))
1551 return -1;
1552 break;
1553 default:
1554 insn->eaBase = (EABase)(eaBaseBase + rm);
1555 break;
1556 }
1557 break;
1558 case 0x1:
1559 insn->displacementSize = 1;
1560 /* FALLTHROUGH */
1561 case 0x2:
1562 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1563 switch (rm & 7) {
1564 case 0x4: // SIB byte is present
1565 insn->eaBase = EA_BASE_sib;
1566 if (readSIB(insn) || readDisplacement(insn))
1567 return -1;
1568 break;
1569 default:
1570 insn->eaBase = (EABase)(eaBaseBase + rm);
1571 if (readDisplacement(insn))
1572 return -1;
1573 break;
1574 }
1575 break;
1576 case 0x3:
1577 insn->eaDisplacement = EA_DISP_NONE;
1578 insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
1579 break;
1580 }
1581
1582 break;
1583 }
1584 } /* switch (insn->addressSize) */
1585
1586 return 0;
1587 }
1588
1589 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
1590 static uint16_t name(struct InternalInstruction *insn, \
1591 OperandType type, \
1592 uint8_t index, \
1593 uint8_t *valid) { \
1594 *valid = 1; \
1595 switch (type) { \
1596 default: \
1597 *valid = 0; \
1598 return 0; \
1599 case TYPE_Rv: \
1600 return base + index; \
1601 case TYPE_R8: \
1602 index &= mask; \
1603 if (index > 0xf) \
1604 *valid = 0; \
1605 if (insn->rexPrefix && \
1606 index >= 4 && index <= 7) { \
1607 return prefix##_SPL + (index - 4); \
1608 } else { \
1609 return prefix##_AL + index; \
1610 } \
1611 case TYPE_R16: \
1612 index &= mask; \
1613 if (index > 0xf) \
1614 *valid = 0; \
1615 return prefix##_AX + index; \
1616 case TYPE_R32: \
1617 index &= mask; \
1618 if (index > 0xf) \
1619 *valid = 0; \
1620 return prefix##_EAX + index; \
1621 case TYPE_R64: \
1622 index &= mask; \
1623 if (index > 0xf) \
1624 *valid = 0; \
1625 return prefix##_RAX + index; \
1626 case TYPE_ZMM: \
1627 return prefix##_ZMM0 + index; \
1628 case TYPE_YMM: \
1629 return prefix##_YMM0 + index; \
1630 case TYPE_XMM: \
1631 return prefix##_XMM0 + index; \
1632 case TYPE_VK: \
1633 index &= 0xf; \
1634 if (index > 7) \
1635 *valid = 0; \
1636 return prefix##_K0 + index; \
1637 case TYPE_MM64: \
1638 return prefix##_MM0 + (index & 0x7); \
1639 case TYPE_SEGMENTREG: \
1640 if ((index & 7) > 5) \
1641 *valid = 0; \
1642 return prefix##_ES + (index & 7); \
1643 case TYPE_DEBUGREG: \
1644 return prefix##_DR0 + index; \
1645 case TYPE_CONTROLREG: \
1646 return prefix##_CR0 + index; \
1647 case TYPE_BNDR: \
1648 if (index > 3) \
1649 *valid = 0; \
1650 return prefix##_BND0 + index; \
1651 case TYPE_MVSIBX: \
1652 return prefix##_XMM0 + index; \
1653 case TYPE_MVSIBY: \
1654 return prefix##_YMM0 + index; \
1655 case TYPE_MVSIBZ: \
1656 return prefix##_ZMM0 + index; \
1657 } \
1658 }
1659
1660 /*
1661 * fixup*Value - Consults an operand type to determine the meaning of the
1662 * reg or R/M field. If the operand is an XMM operand, for example, an
1663 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1664 * misinterpret it as.
1665 *
1666 * @param insn - The instruction containing the operand.
1667 * @param type - The operand type.
1668 * @param index - The existing value of the field as reported by readModRM().
1669 * @param valid - The address of a uint8_t. The target is set to 1 if the
1670 * field is valid for the register class; 0 if not.
1671 * @return - The proper value.
1672 */
1673 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
1674 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
1675
1676 /*
1677 * fixupReg - Consults an operand specifier to determine which of the
1678 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1679 *
1680 * @param insn - See fixup*Value().
1681 * @param op - The operand specifier.
1682 * @return - 0 if fixup was successful; -1 if the register returned was
1683 * invalid for its class.
1684 */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)1685 static int fixupReg(struct InternalInstruction *insn,
1686 const struct OperandSpecifier *op)
1687 {
1688 uint8_t valid;
1689
1690 switch ((OperandEncoding)op->encoding) {
1691 default:
1692 // debug("Expected a REG or R/M encoding in fixupReg");
1693 return -1;
1694 case ENCODING_VVVV:
1695 insn->vvvv = (Reg)fixupRegValue(insn,
1696 (OperandType)op->type,
1697 insn->vvvv,
1698 &valid);
1699 if (!valid)
1700 return -1;
1701 break;
1702 case ENCODING_REG:
1703 insn->reg = (Reg)fixupRegValue(insn,
1704 (OperandType)op->type,
1705 insn->reg - insn->regBase,
1706 &valid);
1707 if (!valid)
1708 return -1;
1709 break;
1710 CASE_ENCODING_RM:
1711 if (insn->eaBase >= insn->eaRegBase) {
1712 insn->eaBase = (EABase)fixupRMValue(insn,
1713 (OperandType)op->type,
1714 insn->eaBase - insn->eaRegBase,
1715 &valid);
1716 if (!valid)
1717 return -1;
1718 }
1719 break;
1720 }
1721
1722 return 0;
1723 }
1724
1725 /*
1726 * readOpcodeRegister - Reads an operand from the opcode field of an
1727 * instruction and interprets it appropriately given the operand width.
1728 * Handles AddRegFrm instructions.
1729 *
1730 * @param insn - the instruction whose opcode field is to be read.
1731 * @param size - The width (in bytes) of the register being specified.
1732 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1733 * RAX.
1734 * @return - 0 on success; nonzero otherwise.
1735 */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1736 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size)
1737 {
1738 if (size == 0)
1739 size = insn->registerSize;
1740
1741 switch (size) {
1742 case 1:
1743 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1744 | (insn->opcode & 7)));
1745 if (insn->rexPrefix &&
1746 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1747 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1748 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1749 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1750 }
1751
1752 break;
1753 case 2:
1754 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1755 + ((bFromREX(insn->rexPrefix) << 3)
1756 | (insn->opcode & 7)));
1757 break;
1758 case 4:
1759 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1760 + ((bFromREX(insn->rexPrefix) << 3)
1761 | (insn->opcode & 7)));
1762 break;
1763 case 8:
1764 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1765 + ((bFromREX(insn->rexPrefix) << 3)
1766 | (insn->opcode & 7)));
1767 break;
1768 }
1769
1770 return 0;
1771 }
1772
1773 /*
1774 * readImmediate - Consumes an immediate operand from an instruction, given the
1775 * desired operand size.
1776 *
1777 * @param insn - The instruction whose operand is to be read.
1778 * @param size - The width (in bytes) of the operand.
1779 * @return - 0 if the immediate was successfully consumed; nonzero
1780 * otherwise.
1781 */
readImmediate(struct InternalInstruction * insn,uint8_t size)1782 static int readImmediate(struct InternalInstruction* insn, uint8_t size)
1783 {
1784 uint8_t imm8;
1785 uint16_t imm16;
1786 uint32_t imm32;
1787 uint64_t imm64;
1788
1789 if (insn->numImmediatesConsumed == 2) {
1790 // debug("Already consumed two immediates");
1791 return -1;
1792 }
1793
1794 if (size == 0)
1795 size = insn->immediateSize;
1796 else
1797 insn->immediateSize = size;
1798
1799 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1800
1801 switch (size) {
1802 case 1:
1803 if (consumeByte(insn, &imm8))
1804 return -1;
1805
1806 insn->immediates[insn->numImmediatesConsumed] = imm8;
1807 break;
1808 case 2:
1809 if (consumeUInt16(insn, &imm16))
1810 return -1;
1811
1812 insn->immediates[insn->numImmediatesConsumed] = imm16;
1813 break;
1814 case 4:
1815 if (consumeUInt32(insn, &imm32))
1816 return -1;
1817
1818 insn->immediates[insn->numImmediatesConsumed] = imm32;
1819 break;
1820 case 8:
1821 if (consumeUInt64(insn, &imm64))
1822 return -1;
1823 insn->immediates[insn->numImmediatesConsumed] = imm64;
1824 break;
1825 }
1826
1827 insn->numImmediatesConsumed++;
1828
1829 return 0;
1830 }
1831
1832 /*
1833 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1834 *
1835 * @param insn - The instruction whose operand is to be read.
1836 * @return - 0 if the vvvv was successfully consumed; nonzero
1837 * otherwise.
1838 */
readVVVV(struct InternalInstruction * insn)1839 static int readVVVV(struct InternalInstruction* insn)
1840 {
1841 int vvvv;
1842
1843 if (insn->vectorExtensionType == TYPE_EVEX)
1844 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1845 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1846 else if (insn->vectorExtensionType == TYPE_VEX_3B)
1847 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1848 else if (insn->vectorExtensionType == TYPE_VEX_2B)
1849 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1850 else if (insn->vectorExtensionType == TYPE_XOP)
1851 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1852 else
1853 return -1;
1854
1855 if (insn->mode != MODE_64BIT)
1856 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1857
1858 insn->vvvv = (Reg)vvvv;
1859
1860 return 0;
1861 }
1862
1863 /*
1864 * readMaskRegister - Reads an mask register from the opcode field of an
1865 * instruction.
1866 *
1867 * @param insn - The instruction whose opcode field is to be read.
1868 * @return - 0 on success; nonzero otherwise.
1869 */
readMaskRegister(struct InternalInstruction * insn)1870 static int readMaskRegister(struct InternalInstruction* insn)
1871 {
1872 if (insn->vectorExtensionType != TYPE_EVEX)
1873 return -1;
1874
1875 insn->writemask = (Reg)(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1876
1877 return 0;
1878 }
1879
1880 /*
1881 * readOperands - Consults the specifier for an instruction and consumes all
1882 * operands for that instruction, interpreting them as it goes.
1883 *
1884 * @param insn - The instruction whose operands are to be read and interpreted.
1885 * @return - 0 if all operands could be read; nonzero otherwise.
1886 */
readOperands(struct InternalInstruction * insn)1887 static int readOperands(struct InternalInstruction* insn)
1888 {
1889 int hasVVVV, needVVVV;
1890 int sawRegImm = 0;
1891 int i;
1892
1893 /* If non-zero vvvv specified, need to make sure one of the operands
1894 uses it. */
1895 hasVVVV = !readVVVV(insn);
1896 needVVVV = hasVVVV && (insn->vvvv != 0);
1897
1898 for (i = 0; i < X86_MAX_OPERANDS; ++i) {
1899 const OperandSpecifier *op = &x86OperandSets[insn->spec->operands][i];
1900 switch (op->encoding) {
1901 case ENCODING_NONE:
1902 case ENCODING_SI:
1903 case ENCODING_DI:
1904 break;
1905
1906 CASE_ENCODING_VSIB:
1907 // VSIB can use the V2 bit so check only the other bits.
1908 if (needVVVV)
1909 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1910
1911 if (readModRM(insn))
1912 return -1;
1913
1914 // Reject if SIB wasn't used.
1915 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1916 return -1;
1917
1918 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1919 if (insn->sibIndex == SIB_INDEX_NONE)
1920 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1921
1922 // If EVEX.v2 is set this is one of the 16-31 registers.
1923 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1924 v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1925 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1926
1927 // Adjust the index register to the correct size.
1928 switch (op->type) {
1929 default:
1930 // debug("Unhandled VSIB index type");
1931 return -1;
1932 case TYPE_MVSIBX:
1933 insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
1934 (insn->sibIndex - insn->sibIndexBase));
1935 break;
1936 case TYPE_MVSIBY:
1937 insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
1938 (insn->sibIndex - insn->sibIndexBase));
1939 break;
1940 case TYPE_MVSIBZ:
1941 insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
1942 (insn->sibIndex - insn->sibIndexBase));
1943 break;
1944 }
1945
1946 // Apply the AVX512 compressed displacement scaling factor.
1947 if (op->encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1948 insn->displacement *= 1 << (op->encoding - ENCODING_VSIB);
1949 break;
1950
1951 case ENCODING_REG:
1952 CASE_ENCODING_RM:
1953 if (readModRM(insn))
1954 return -1;
1955
1956 if (fixupReg(insn, op))
1957 return -1;
1958
1959 // Apply the AVX512 compressed displacement scaling factor.
1960 if (op->encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1961 insn->displacement *= 1 << (op->encoding - ENCODING_RM);
1962 break;
1963
1964 case ENCODING_IB:
1965 if (sawRegImm) {
1966 /* Saw a register immediate so don't read again and instead split the
1967 previous immediate. FIXME: This is a hack. */
1968 insn->immediates[insn->numImmediatesConsumed] =
1969 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1970 ++insn->numImmediatesConsumed;
1971 break;
1972 }
1973 if (readImmediate(insn, 1))
1974 return -1;
1975 if (op->type == TYPE_XMM || op->type == TYPE_YMM)
1976 sawRegImm = 1;
1977 break;
1978
1979 case ENCODING_IW:
1980 if (readImmediate(insn, 2))
1981 return -1;
1982 break;
1983
1984 case ENCODING_ID:
1985 if (readImmediate(insn, 4))
1986 return -1;
1987 break;
1988
1989 case ENCODING_IO:
1990 if (readImmediate(insn, 8))
1991 return -1;
1992 break;
1993
1994 case ENCODING_Iv:
1995 if (readImmediate(insn, insn->immediateSize))
1996 return -1;
1997 break;
1998
1999 case ENCODING_Ia:
2000 if (readImmediate(insn, insn->addressSize))
2001 return -1;
2002 break;
2003
2004 case ENCODING_IRC:
2005 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
2006 lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
2007 break;
2008
2009 case ENCODING_RB:
2010 if (readOpcodeRegister(insn, 1))
2011 return -1;
2012 break;
2013
2014 case ENCODING_RW:
2015 if (readOpcodeRegister(insn, 2))
2016 return -1;
2017 break;
2018
2019 case ENCODING_RD:
2020 if (readOpcodeRegister(insn, 4))
2021 return -1;
2022 break;
2023
2024 case ENCODING_RO:
2025 if (readOpcodeRegister(insn, 8))
2026 return -1;
2027 break;
2028
2029 case ENCODING_Rv:
2030 if (readOpcodeRegister(insn, 0))
2031 return -1;
2032 break;
2033
2034 case ENCODING_FP:
2035 break;
2036
2037 case ENCODING_VVVV:
2038 if (!hasVVVV)
2039 return -1;
2040
2041 needVVVV = 0; /* Mark that we have found a VVVV operand. */
2042
2043 if (insn->mode != MODE_64BIT)
2044 insn->vvvv = (Reg)(insn->vvvv & 0x7);
2045
2046 if (fixupReg(insn, op))
2047 return -1;
2048 break;
2049
2050 case ENCODING_WRITEMASK:
2051 if (readMaskRegister(insn))
2052 return -1;
2053 break;
2054
2055 case ENCODING_DUP:
2056 break;
2057
2058 default:
2059 // dbgprintf(insn, "Encountered an operand with an unknown encoding.");
2060 return -1;
2061 }
2062 }
2063
2064 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
2065 if (needVVVV)
2066 return -1;
2067
2068 return 0;
2069 }
2070
2071 // return True if instruction is illegal to use with prefixes
2072 // This also check & fix the isPrefixNN when a prefix is irrelevant.
checkPrefix(struct InternalInstruction * insn)2073 static bool checkPrefix(struct InternalInstruction *insn)
2074 {
2075 // LOCK prefix
2076 if (insn->hasLockPrefix) {
2077 switch(insn->instructionID) {
2078 default:
2079 // invalid LOCK
2080 return true;
2081
2082 // nop dword [rax]
2083 case X86_NOOPL:
2084
2085 // DEC
2086 case X86_DEC16m:
2087 case X86_DEC32m:
2088 case X86_DEC64m:
2089 case X86_DEC8m:
2090
2091 // ADC
2092 case X86_ADC16mi:
2093 case X86_ADC16mi8:
2094 case X86_ADC16mr:
2095 case X86_ADC32mi:
2096 case X86_ADC32mi8:
2097 case X86_ADC32mr:
2098 case X86_ADC64mi32:
2099 case X86_ADC64mi8:
2100 case X86_ADC64mr:
2101 case X86_ADC8mi:
2102 case X86_ADC8mi8:
2103 case X86_ADC8mr:
2104 case X86_ADC8rm:
2105 case X86_ADC16rm:
2106 case X86_ADC32rm:
2107 case X86_ADC64rm:
2108
2109 // ADD
2110 case X86_ADD16mi:
2111 case X86_ADD16mi8:
2112 case X86_ADD16mr:
2113 case X86_ADD32mi:
2114 case X86_ADD32mi8:
2115 case X86_ADD32mr:
2116 case X86_ADD64mi32:
2117 case X86_ADD64mi8:
2118 case X86_ADD64mr:
2119 case X86_ADD8mi:
2120 case X86_ADD8mi8:
2121 case X86_ADD8mr:
2122 case X86_ADD8rm:
2123 case X86_ADD16rm:
2124 case X86_ADD32rm:
2125 case X86_ADD64rm:
2126
2127 // AND
2128 case X86_AND16mi:
2129 case X86_AND16mi8:
2130 case X86_AND16mr:
2131 case X86_AND32mi:
2132 case X86_AND32mi8:
2133 case X86_AND32mr:
2134 case X86_AND64mi32:
2135 case X86_AND64mi8:
2136 case X86_AND64mr:
2137 case X86_AND8mi:
2138 case X86_AND8mi8:
2139 case X86_AND8mr:
2140 case X86_AND8rm:
2141 case X86_AND16rm:
2142 case X86_AND32rm:
2143 case X86_AND64rm:
2144
2145 // BTC
2146 case X86_BTC16mi8:
2147 case X86_BTC16mr:
2148 case X86_BTC32mi8:
2149 case X86_BTC32mr:
2150 case X86_BTC64mi8:
2151 case X86_BTC64mr:
2152
2153 // BTR
2154 case X86_BTR16mi8:
2155 case X86_BTR16mr:
2156 case X86_BTR32mi8:
2157 case X86_BTR32mr:
2158 case X86_BTR64mi8:
2159 case X86_BTR64mr:
2160
2161 // BTS
2162 case X86_BTS16mi8:
2163 case X86_BTS16mr:
2164 case X86_BTS32mi8:
2165 case X86_BTS32mr:
2166 case X86_BTS64mi8:
2167 case X86_BTS64mr:
2168
2169 // CMPXCHG
2170 case X86_CMPXCHG16B:
2171 case X86_CMPXCHG16rm:
2172 case X86_CMPXCHG32rm:
2173 case X86_CMPXCHG64rm:
2174 case X86_CMPXCHG8rm:
2175 case X86_CMPXCHG8B:
2176
2177 // INC
2178 case X86_INC16m:
2179 case X86_INC32m:
2180 case X86_INC64m:
2181 case X86_INC8m:
2182
2183 // NEG
2184 case X86_NEG16m:
2185 case X86_NEG32m:
2186 case X86_NEG64m:
2187 case X86_NEG8m:
2188
2189 // NOT
2190 case X86_NOT16m:
2191 case X86_NOT32m:
2192 case X86_NOT64m:
2193 case X86_NOT8m:
2194
2195 // OR
2196 case X86_OR16mi:
2197 case X86_OR16mi8:
2198 case X86_OR16mr:
2199 case X86_OR32mi:
2200 case X86_OR32mi8:
2201 case X86_OR32mr:
2202 case X86_OR64mi32:
2203 case X86_OR64mi8:
2204 case X86_OR64mr:
2205 case X86_OR8mi8:
2206 case X86_OR8mi:
2207 case X86_OR8mr:
2208 case X86_OR8rm:
2209 case X86_OR16rm:
2210 case X86_OR32rm:
2211 case X86_OR64rm:
2212
2213 // SBB
2214 case X86_SBB16mi:
2215 case X86_SBB16mi8:
2216 case X86_SBB16mr:
2217 case X86_SBB32mi:
2218 case X86_SBB32mi8:
2219 case X86_SBB32mr:
2220 case X86_SBB64mi32:
2221 case X86_SBB64mi8:
2222 case X86_SBB64mr:
2223 case X86_SBB8mi:
2224 case X86_SBB8mi8:
2225 case X86_SBB8mr:
2226
2227 // SUB
2228 case X86_SUB16mi:
2229 case X86_SUB16mi8:
2230 case X86_SUB16mr:
2231 case X86_SUB32mi:
2232 case X86_SUB32mi8:
2233 case X86_SUB32mr:
2234 case X86_SUB64mi32:
2235 case X86_SUB64mi8:
2236 case X86_SUB64mr:
2237 case X86_SUB8mi8:
2238 case X86_SUB8mi:
2239 case X86_SUB8mr:
2240 case X86_SUB8rm:
2241 case X86_SUB16rm:
2242 case X86_SUB32rm:
2243 case X86_SUB64rm:
2244
2245 // XADD
2246 case X86_XADD16rm:
2247 case X86_XADD32rm:
2248 case X86_XADD64rm:
2249 case X86_XADD8rm:
2250
2251 // XCHG
2252 case X86_XCHG16rm:
2253 case X86_XCHG32rm:
2254 case X86_XCHG64rm:
2255 case X86_XCHG8rm:
2256
2257 // XOR
2258 case X86_XOR16mi:
2259 case X86_XOR16mi8:
2260 case X86_XOR16mr:
2261 case X86_XOR32mi:
2262 case X86_XOR32mi8:
2263 case X86_XOR32mr:
2264 case X86_XOR64mi32:
2265 case X86_XOR64mi8:
2266 case X86_XOR64mr:
2267 case X86_XOR8mi8:
2268 case X86_XOR8mi:
2269 case X86_XOR8mr:
2270 case X86_XOR8rm:
2271 case X86_XOR16rm:
2272 case X86_XOR32rm:
2273 case X86_XOR64rm:
2274
2275 // this instruction can be used with LOCK prefix
2276 return false;
2277 }
2278 }
2279
2280 #if 0
2281 // REPNE prefix
2282 if (insn->repeatPrefix) {
2283 // 0xf2 can be a part of instruction encoding, but not really a prefix.
2284 // In such a case, clear it.
2285 if (insn->twoByteEscape == 0x0f) {
2286 insn->prefix0 = 0;
2287 }
2288 }
2289 #endif
2290
2291 // no invalid prefixes
2292 return false;
2293 }
2294
2295 /*
2296 * decodeInstruction - Reads and interprets a full instruction provided by the
2297 * user.
2298 *
2299 * @param insn - A pointer to the instruction to be populated. Must be
2300 * pre-allocated.
2301 * @param reader - The function to be used to read the instruction's bytes.
2302 * @param readerArg - A generic argument to be passed to the reader to store
2303 * any internal state.
2304 * @param startLoc - The address (in the reader's address space) of the first
2305 * byte in the instruction.
2306 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
2307 * decode the instruction in.
2308 * @return - 0 if instruction is valid; nonzero if not.
2309 */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,uint64_t startLoc,DisassemblerMode mode)2310 int decodeInstruction(struct InternalInstruction *insn,
2311 byteReader_t reader,
2312 const void *readerArg,
2313 uint64_t startLoc,
2314 DisassemblerMode mode)
2315 {
2316 insn->reader = reader;
2317 insn->readerArg = readerArg;
2318 insn->startLocation = startLoc;
2319 insn->readerCursor = startLoc;
2320 insn->mode = mode;
2321 insn->numImmediatesConsumed = 0;
2322
2323 if (readPrefixes(insn) ||
2324 readOpcode(insn) ||
2325 getID(insn) ||
2326 insn->instructionID == 0 ||
2327 checkPrefix(insn) ||
2328 readOperands(insn))
2329 return -1;
2330
2331 insn->length = (size_t)(insn->readerCursor - insn->startLocation);
2332
2333 // instruction length must be <= 15 to be valid
2334 if (insn->length > 15)
2335 return -1;
2336
2337 if (insn->operandSize == 0)
2338 insn->operandSize = insn->registerSize;
2339
2340 insn->operands = &x86OperandSets[insn->spec->operands][0];
2341
2342 return 0;
2343 }
2344
2345 #endif
2346
2347