1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2  *
3  *                     The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the implementation of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 /* Capstone Disassembly Engine */
17 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2015 */
18 
19 #ifdef CAPSTONE_HAS_X86
20 
21 #include <stdarg.h>   /* for va_*()       */
22 #if defined(CAPSTONE_HAS_OSXKERNEL)
23 #include <libkern/libkern.h>
24 #else
25 #include <stdlib.h>   /* for exit()       */
26 #endif
27 
28 #include "../../cs_priv.h"
29 #include "../../utils.h"
30 
31 #include "X86DisassemblerDecoder.h"
32 
33 /// Specifies whether a ModR/M byte is needed and (if so) which
34 /// instruction each possible value of the ModR/M byte corresponds to.  Once
35 /// this information is known, we have narrowed down to a single instruction.
36 struct ModRMDecision {
37 	uint8_t modrm_type;
38 	uint16_t instructionIDs;
39 };
40 
41 /// Specifies which set of ModR/M->instruction tables to look at
42 /// given a particular opcode.
43 struct OpcodeDecision {
44 	struct ModRMDecision modRMDecisions[256];
45 };
46 
47 /// Specifies which opcode->instruction tables to look at given
48 /// a particular context (set of attributes).  Since there are many possible
49 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
50 /// applies given a specific set of attributes.  Hence there are only IC_max
51 /// entries in this table, rather than 2^(ATTR_max).
52 struct ContextDecision {
53 	struct OpcodeDecision opcodeDecisions[IC_max];
54 };
55 
56 #ifdef CAPSTONE_X86_REDUCE
57 #include "X86GenDisassemblerTables_reduce.inc"
58 #else
59 #include "X86GenDisassemblerTables.inc"
60 #endif
61 
62 //#define GET_INSTRINFO_ENUM
63 #define GET_INSTRINFO_MC_DESC
64 #ifdef CAPSTONE_X86_REDUCE
65 #include "X86GenInstrInfo_reduce.inc"
66 #else
67 #include "X86GenInstrInfo.inc"
68 #endif
69 
70 /*
71  * contextForAttrs - Client for the instruction context table.  Takes a set of
72  *   attributes and returns the appropriate decode context.
73  *
74  * @param attrMask  - Attributes, from the enumeration attributeBits.
75  * @return          - The InstructionContext to use when looking up an
76  *                    an instruction with these attributes.
77  */
contextForAttrs(uint16_t attrMask)78 static InstructionContext contextForAttrs(uint16_t attrMask)
79 {
80 	return CONTEXTS_SYM[attrMask];
81 }
82 
83 /*
84  * modRMRequired - Reads the appropriate instruction table to determine whether
85  *   the ModR/M byte is required to decode a particular instruction.
86  *
87  * @param type        - The opcode type (i.e., how many bytes it has).
88  * @param insnContext - The context for the instruction, as returned by
89  *                      contextForAttrs.
90  * @param opcode      - The last byte of the instruction's opcode, not counting
91  *                      ModR/M extensions and escapes.
92  * @return            - true if the ModR/M byte is required, false otherwise.
93  */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint16_t opcode)94 static int modRMRequired(OpcodeType type,
95 		InstructionContext insnContext,
96 		uint16_t opcode)
97 {
98 	const struct OpcodeDecision *decision = NULL;
99 	const uint8_t *indextable = NULL;
100 	uint8_t index;
101 
102 	switch (type) {
103 		default:
104 		case ONEBYTE:
105 			decision = ONEBYTE_SYM;
106 			indextable = index_x86DisassemblerOneByteOpcodes;
107 			break;
108 		case TWOBYTE:
109 			decision = TWOBYTE_SYM;
110 			indextable = index_x86DisassemblerTwoByteOpcodes;
111 			break;
112 		case THREEBYTE_38:
113 			decision = THREEBYTE38_SYM;
114 			indextable = index_x86DisassemblerThreeByte38Opcodes;
115 			break;
116 		case THREEBYTE_3A:
117 			decision = THREEBYTE3A_SYM;
118 			indextable = index_x86DisassemblerThreeByte3AOpcodes;
119 			break;
120 #ifndef CAPSTONE_X86_REDUCE
121 		case XOP8_MAP:
122 			decision = XOP8_MAP_SYM;
123 			indextable = index_x86DisassemblerXOP8Opcodes;
124 			break;
125 		case XOP9_MAP:
126 			decision = XOP9_MAP_SYM;
127 			indextable = index_x86DisassemblerXOP9Opcodes;
128 			break;
129 		case XOPA_MAP:
130 			decision = XOPA_MAP_SYM;
131 			indextable = index_x86DisassemblerXOPAOpcodes;
132 			break;
133 		case T3DNOW_MAP:
134 			// 3DNow instructions always have ModRM byte
135 			return true;
136 #endif
137 	}
138 
139 	index = indextable[insnContext];
140 	if (index)
141 		return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;
142 	else
143 		return false;
144 }
145 
146 /*
147  * decode - Reads the appropriate instruction table to obtain the unique ID of
148  *   an instruction.
149  *
150  * @param type        - See modRMRequired().
151  * @param insnContext - See modRMRequired().
152  * @param opcode      - See modRMRequired().
153  * @param modRM       - The ModR/M byte if required, or any value if not.
154  * @return            - The UID of the instruction, or 0 on failure.
155  */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)156 static InstrUID decode(OpcodeType type,
157 		InstructionContext insnContext,
158 		uint8_t opcode,
159 		uint8_t modRM)
160 {
161 	const struct ModRMDecision *dec = NULL;
162 	const uint8_t *indextable = NULL;
163 	uint8_t index;
164 
165 	switch (type) {
166 		default:
167 		case ONEBYTE:
168 			indextable = index_x86DisassemblerOneByteOpcodes;
169 			index = indextable[insnContext];
170 			if (index)
171 				dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode];
172 			else
173 				dec = &emptyTable.modRMDecisions[opcode];
174 			break;
175 		case TWOBYTE:
176 			indextable = index_x86DisassemblerTwoByteOpcodes;
177 			index = indextable[insnContext];
178 			if (index)
179 				dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode];
180 			else
181 				dec = &emptyTable.modRMDecisions[opcode];
182 			break;
183 		case THREEBYTE_38:
184 			indextable = index_x86DisassemblerThreeByte38Opcodes;
185 			index = indextable[insnContext];
186 			if (index)
187 				dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode];
188 			else
189 				dec = &emptyTable.modRMDecisions[opcode];
190 			break;
191 		case THREEBYTE_3A:
192 			indextable = index_x86DisassemblerThreeByte3AOpcodes;
193 			index = indextable[insnContext];
194 			if (index)
195 				dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode];
196 			else
197 				dec = &emptyTable.modRMDecisions[opcode];
198 			break;
199 #ifndef CAPSTONE_X86_REDUCE
200 		case XOP8_MAP:
201 			indextable = index_x86DisassemblerXOP8Opcodes;
202 			index = indextable[insnContext];
203 			if (index)
204 				dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode];
205 			else
206 				dec = &emptyTable.modRMDecisions[opcode];
207 			break;
208 		case XOP9_MAP:
209 			indextable = index_x86DisassemblerXOP9Opcodes;
210 			index = indextable[insnContext];
211 			if (index)
212 				dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode];
213 			else
214 				dec = &emptyTable.modRMDecisions[opcode];
215 			break;
216 		case XOPA_MAP:
217 			indextable = index_x86DisassemblerXOPAOpcodes;
218 			index = indextable[insnContext];
219 			if (index)
220 				dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode];
221 			else
222 				dec = &emptyTable.modRMDecisions[opcode];
223 			break;
224 		case T3DNOW_MAP:
225 			indextable = index_x86DisassemblerT3DNOWOpcodes;
226 			index = indextable[insnContext];
227 			if (index)
228 				dec = &T3DNOW_MAP_SYM[index - 1].modRMDecisions[opcode];
229 			else
230 				dec = &emptyTable.modRMDecisions[opcode];
231 			break;
232 #endif
233 	}
234 
235 	switch (dec->modrm_type) {
236 		default:
237 			//debug("Corrupt table!  Unknown modrm_type");
238 			return 0;
239 		case MODRM_ONEENTRY:
240 			return modRMTable[dec->instructionIDs];
241 		case MODRM_SPLITRM:
242 			if (modFromModRM(modRM) == 0x3)
243 				return modRMTable[dec->instructionIDs+1];
244 			return modRMTable[dec->instructionIDs];
245 		case MODRM_SPLITREG:
246 			if (modFromModRM(modRM) == 0x3)
247 				return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
248 			return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
249 		case MODRM_SPLITMISC:
250 			if (modFromModRM(modRM) == 0x3)
251 				return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
252 			return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
253 		case MODRM_FULL:
254 			return modRMTable[dec->instructionIDs+modRM];
255 	}
256 }
257 
258 /*
259  * specifierForUID - Given a UID, returns the name and operand specification for
260  *   that instruction.
261  *
262  * @param uid - The unique ID for the instruction.  This should be returned by
263  *              decode(); specifierForUID will not check bounds.
264  * @return    - A pointer to the specification for that instruction.
265  */
specifierForUID(InstrUID uid)266 static const struct InstructionSpecifier *specifierForUID(InstrUID uid)
267 {
268 	return &INSTRUCTIONS_SYM[uid];
269 }
270 
271 /*
272  * consumeByte - Uses the reader function provided by the user to consume one
273  *   byte from the instruction's memory and advance the cursor.
274  *
275  * @param insn  - The instruction with the reader function to use.  The cursor
276  *                for this instruction is advanced.
277  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
278  *                with the data read.
279  * @return      - 0 if the read was successful; nonzero otherwise.
280  */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)281 static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
282 {
283 	int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
284 
285 	if (!ret)
286 		++(insn->readerCursor);
287 
288 	return ret;
289 }
290 
291 /*
292  * lookAtByte - Like consumeByte, but does not advance the cursor.
293  *
294  * @param insn  - See consumeByte().
295  * @param byte  - See consumeByte().
296  * @return      - See consumeByte().
297  */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)298 static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
299 {
300 	return insn->reader(insn->readerArg, byte, insn->readerCursor);
301 }
302 
unconsumeByte(struct InternalInstruction * insn)303 static void unconsumeByte(struct InternalInstruction *insn)
304 {
305 	insn->readerCursor--;
306 }
307 
308 #define CONSUME_FUNC(name, type)                                  \
309 	static int name(struct InternalInstruction *insn, type *ptr) {  \
310 		type combined = 0;                                            \
311 		unsigned offset;                                              \
312 		for (offset = 0; offset < sizeof(type); ++offset) {           \
313 			uint8_t byte;                                               \
314 			int ret = insn->reader(insn->readerArg,                     \
315 					&byte,                               \
316 					insn->readerCursor + offset);        \
317 			if (ret)                                                    \
318 			return ret;                                               \
319 			combined = combined | (type)((uint64_t)byte << (offset * 8));     \
320 		}                                                             \
321 		*ptr = combined;                                              \
322 		insn->readerCursor += sizeof(type);                           \
323 		return 0;                                                     \
324 	}
325 
326 /*
327  * consume* - Use the reader function provided by the user to consume data
328  *   values of various sizes from the instruction's memory and advance the
329  *   cursor appropriately.  These readers perform endian conversion.
330  *
331  * @param insn    - See consumeByte().
332  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
333  *                  be populated with the data read.
334  * @return        - See consumeByte().
335  */
CONSUME_FUNC(consumeInt8,int8_t)336 CONSUME_FUNC(consumeInt8, int8_t)
337 CONSUME_FUNC(consumeInt16, int16_t)
338 CONSUME_FUNC(consumeInt32, int32_t)
339 CONSUME_FUNC(consumeUInt16, uint16_t)
340 CONSUME_FUNC(consumeUInt32, uint32_t)
341 CONSUME_FUNC(consumeUInt64, uint64_t)
342 
343 /*
344  * setPrefixPresent - Marks that a particular prefix is present at a particular
345  *   location.
346  *
347  * @param insn      - The instruction to be marked as having the prefix.
348  * @param prefix    - The prefix that is present.
349  * @param location  - The location where the prefix is located (in the address
350  *                    space of the instruction's reader).
351  */
352 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
353 {
354 	switch (prefix) {
355 	case 0x26:
356 		insn->isPrefix26 = true;
357 		insn->prefix26 = location;
358 		break;
359 	case 0x2e:
360 		insn->isPrefix2e = true;
361 		insn->prefix2e = location;
362 		break;
363 	case 0x36:
364 		insn->isPrefix36 = true;
365 		insn->prefix36 = location;
366 		break;
367 	case 0x3e:
368 		insn->isPrefix3e = true;
369 		insn->prefix3e = location;
370 		break;
371 	case 0x64:
372 		insn->isPrefix64 = true;
373 		insn->prefix64 = location;
374 		break;
375 	case 0x65:
376 		insn->isPrefix65 = true;
377 		insn->prefix65 = location;
378 		break;
379 	case 0x66:
380 		insn->isPrefix66 = true;
381 		insn->prefix66 = location;
382 		break;
383 	case 0x67:
384 		insn->isPrefix67 = true;
385 		insn->prefix67 = location;
386 		break;
387 	case 0xf0:
388 		insn->isPrefixf0 = true;
389 		insn->prefixf0 = location;
390 		break;
391 	case 0xf2:
392 		insn->isPrefixf2 = true;
393 		insn->prefixf2 = location;
394 		break;
395 	case 0xf3:
396 		insn->isPrefixf3 = true;
397 		insn->prefixf3 = location;
398 		break;
399 	default:
400 		break;
401 	}
402 }
403 
404 /*
405  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
406  *   present at a given location.
407  *
408  * @param insn      - The instruction to be queried.
409  * @param prefix    - The prefix.
410  * @param location  - The location to query.
411  * @return          - Whether the prefix is at that location.
412  */
isPrefixAtLocation(struct InternalInstruction * insn,uint8_t prefix,uint64_t location)413 static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix,
414 		uint64_t location)
415 {
416 	switch (prefix) {
417 	case 0x26:
418 		if (insn->isPrefix26 && insn->prefix26 == location)
419 			return true;
420 		break;
421 	case 0x2e:
422 		if (insn->isPrefix2e && insn->prefix2e == location)
423 			return true;
424 		break;
425 	case 0x36:
426 		if (insn->isPrefix36 && insn->prefix36 == location)
427 			return true;
428 		break;
429 	case 0x3e:
430 		if (insn->isPrefix3e && insn->prefix3e == location)
431 			return true;
432 		break;
433 	case 0x64:
434 		if (insn->isPrefix64 && insn->prefix64 == location)
435 			return true;
436 		break;
437 	case 0x65:
438 		if (insn->isPrefix65 && insn->prefix65 == location)
439 			return true;
440 		break;
441 	case 0x66:
442 		if (insn->isPrefix66 && insn->prefix66 == location)
443 			return true;
444 		break;
445 	case 0x67:
446 		if (insn->isPrefix67 && insn->prefix67 == location)
447 			return true;
448 		break;
449 	case 0xf0:
450 		if (insn->isPrefixf0 && insn->prefixf0 == location)
451 			return true;
452 		break;
453 	case 0xf2:
454 		if (insn->isPrefixf2 && insn->prefixf2 == location)
455 			return true;
456 		break;
457 	case 0xf3:
458 		if (insn->isPrefixf3 && insn->prefixf3 == location)
459 			return true;
460 		break;
461 	default:
462 		break;
463 	}
464 	return false;
465 }
466 
467 /*
468  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
469  *   instruction as having them.  Also sets the instruction's default operand,
470  *   address, and other relevant data sizes to report operands correctly.
471  *
472  * @param insn  - The instruction whose prefixes are to be read.
473  * @return      - 0 if the instruction could be read until the end of the prefix
474  *                bytes, and no prefixes conflicted; nonzero otherwise.
475  */
readPrefixes(struct InternalInstruction * insn)476 static int readPrefixes(struct InternalInstruction *insn)
477 {
478 	bool isPrefix = true;
479 	uint64_t prefixLocation;
480 	uint8_t byte = 0, nextByte;
481 
482 	bool hasAdSize = false;
483 	bool hasOpSize = false;
484 
485 	//initialize to an impossible value
486 	insn->necessaryPrefixLocation = insn->readerCursor - 1;
487 	while (isPrefix) {
488 		if (insn->mode == MODE_64BIT) {
489 			// eliminate consecutive redundant REX bytes in front
490 			if (consumeByte(insn, &byte))
491 				return -1;
492 
493 			if ((byte & 0xf0) == 0x40) {
494 				while(true) {
495 					if (lookAtByte(insn, &byte))	// out of input code
496 						return -1;
497 					if ((byte & 0xf0) == 0x40) {
498 						// another REX prefix, but we only remember the last one
499 						if (consumeByte(insn, &byte))
500 							return -1;
501 					} else
502 						break;
503 				}
504 
505 				// recover the last REX byte if next byte is not a legacy prefix
506 				switch (byte) {
507 					case 0xf2:  /* REPNE/REPNZ */
508 					case 0xf3:  /* REP or REPE/REPZ */
509 					case 0xf0:  /* LOCK */
510 					case 0x2e:  /* CS segment override -OR- Branch not taken */
511 					case 0x36:  /* SS segment override -OR- Branch taken */
512 					case 0x3e:  /* DS segment override */
513 					case 0x26:  /* ES segment override */
514 					case 0x64:  /* FS segment override */
515 					case 0x65:  /* GS segment override */
516 					case 0x66:  /* Operand-size override */
517 					case 0x67:  /* Address-size override */
518 						break;
519 					default:    /* Not a prefix byte */
520 						unconsumeByte(insn);
521 						break;
522 				}
523 			} else {
524 				unconsumeByte(insn);
525 			}
526 		}
527 
528 		prefixLocation = insn->readerCursor;
529 
530 		/* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
531 		if (consumeByte(insn, &byte))
532 			return -1;
533 
534 		if (insn->readerCursor - 1 == insn->startLocation
535 				&& (byte == 0xf2 || byte == 0xf3)) {
536 
537 			if (lookAtByte(insn, &nextByte))
538 				return -1;
539 
540 			/*
541 			 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
542 			 * met:
543 			 * - it is followed by a LOCK (0xf0) prefix
544 			 * - it is followed by an xchg instruction
545 			 * then it should be disassembled as a xacquire/xrelease not repne/rep.
546 			 */
547 			if (((nextByte == 0xf0) ||
548 				((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
549 				insn->xAcquireRelease = byte;
550 
551 			/*
552 			 * Also if the byte is 0xf3, and the following condition is met:
553 			 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
554 			 *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
555 			 * then it should be disassembled as an xrelease not rep.
556 			 */
557 			if (byte == 0xf3 &&
558 					(nextByte == 0x88 || nextByte == 0x89 ||
559 					 nextByte == 0xc6 || nextByte == 0xc7))
560 				insn->xAcquireRelease = byte;
561 
562 			if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
563 				if (consumeByte(insn, &nextByte))
564 					return -1;
565 				if (lookAtByte(insn, &nextByte))
566 					return -1;
567 				unconsumeByte(insn);
568 			}
569 		}
570 
571 		switch (byte) {
572 			case 0xf2:  /* REPNE/REPNZ */
573 			case 0xf3:  /* REP or REPE/REPZ */
574 			case 0xf0:  /* LOCK */
575 				// only accept the last prefix
576 				insn->isPrefixf2 = false;
577 				insn->isPrefixf3 = false;
578 				insn->isPrefixf0 = false;
579 				setPrefixPresent(insn, byte, prefixLocation);
580 				insn->prefix0 = byte;
581 				break;
582 			case 0x2e:  /* CS segment override -OR- Branch not taken */
583 				insn->segmentOverride = SEG_OVERRIDE_CS;
584 				// only accept the last prefix
585 				insn->isPrefix2e = false;
586 				insn->isPrefix36 = false;
587 				insn->isPrefix3e = false;
588 				insn->isPrefix26 = false;
589 				insn->isPrefix64 = false;
590 				insn->isPrefix65 = false;
591 
592 				setPrefixPresent(insn, byte, prefixLocation);
593 				insn->prefix1 = byte;
594 				break;
595 			case 0x36:  /* SS segment override -OR- Branch taken */
596 				insn->segmentOverride = SEG_OVERRIDE_SS;
597 				// only accept the last prefix
598 				insn->isPrefix2e = false;
599 				insn->isPrefix36 = false;
600 				insn->isPrefix3e = false;
601 				insn->isPrefix26 = false;
602 				insn->isPrefix64 = false;
603 				insn->isPrefix65 = false;
604 
605 				setPrefixPresent(insn, byte, prefixLocation);
606 				insn->prefix1 = byte;
607 				break;
608 			case 0x3e:  /* DS segment override */
609 				insn->segmentOverride = SEG_OVERRIDE_DS;
610 				// only accept the last prefix
611 				insn->isPrefix2e = false;
612 				insn->isPrefix36 = false;
613 				insn->isPrefix3e = false;
614 				insn->isPrefix26 = false;
615 				insn->isPrefix64 = false;
616 				insn->isPrefix65 = false;
617 
618 				setPrefixPresent(insn, byte, prefixLocation);
619 				insn->prefix1 = byte;
620 				break;
621 			case 0x26:  /* ES segment override */
622 				insn->segmentOverride = SEG_OVERRIDE_ES;
623 				// only accept the last prefix
624 				insn->isPrefix2e = false;
625 				insn->isPrefix36 = false;
626 				insn->isPrefix3e = false;
627 				insn->isPrefix26 = false;
628 				insn->isPrefix64 = false;
629 				insn->isPrefix65 = false;
630 
631 				setPrefixPresent(insn, byte, prefixLocation);
632 				insn->prefix1 = byte;
633 				break;
634 			case 0x64:  /* FS segment override */
635 				insn->segmentOverride = SEG_OVERRIDE_FS;
636 				// only accept the last prefix
637 				insn->isPrefix2e = false;
638 				insn->isPrefix36 = false;
639 				insn->isPrefix3e = false;
640 				insn->isPrefix26 = false;
641 				insn->isPrefix64 = false;
642 				insn->isPrefix65 = false;
643 
644 				setPrefixPresent(insn, byte, prefixLocation);
645 				insn->prefix1 = byte;
646 				break;
647 			case 0x65:  /* GS segment override */
648 				insn->segmentOverride = SEG_OVERRIDE_GS;
649 				// only accept the last prefix
650 				insn->isPrefix2e = false;
651 				insn->isPrefix36 = false;
652 				insn->isPrefix3e = false;
653 				insn->isPrefix26 = false;
654 				insn->isPrefix64 = false;
655 				insn->isPrefix65 = false;
656 
657 				setPrefixPresent(insn, byte, prefixLocation);
658 				insn->prefix1 = byte;
659 				break;
660 			case 0x66:  /* Operand-size override */
661 				hasOpSize = true;
662 				setPrefixPresent(insn, byte, prefixLocation);
663 				insn->prefix2 = byte;
664 				break;
665 			case 0x67:  /* Address-size override */
666 				hasAdSize = true;
667 				setPrefixPresent(insn, byte, prefixLocation);
668 				insn->prefix3 = byte;
669 				break;
670 			default:    /* Not a prefix byte */
671 				isPrefix = false;
672 				break;
673 		}
674 
675 		//if (isPrefix)
676 		//	dbgprintf(insn, "Found prefix 0x%hhx", byte);
677 	}
678 
679 	insn->vectorExtensionType = TYPE_NO_VEX_XOP;
680 
681 
682 	if (byte == 0x62) {
683 		uint8_t byte1, byte2;
684 
685 		if (consumeByte(insn, &byte1)) {
686 			//dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
687 			return -1;
688 		}
689 
690 		if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
691 				((~byte1 & 0xc) == 0xc)) {
692 			if (lookAtByte(insn, &byte2)) {
693 				//dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
694 				return -1;
695 			}
696 
697 			if ((byte2 & 0x4) == 0x4) {
698 				insn->vectorExtensionType = TYPE_EVEX;
699 			} else {
700 				unconsumeByte(insn); /* unconsume byte1 */
701 				unconsumeByte(insn); /* unconsume byte  */
702 				insn->necessaryPrefixLocation = insn->readerCursor - 2;
703 			}
704 
705 			if (insn->vectorExtensionType == TYPE_EVEX) {
706 				insn->vectorExtensionPrefix[0] = byte;
707 				insn->vectorExtensionPrefix[1] = byte1;
708 
709 				if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
710 					//dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
711 					return -1;
712 				}
713 
714 				if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
715 					//dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
716 					return -1;
717 				}
718 
719 				/* We simulate the REX prefix for simplicity's sake */
720 				if (insn->mode == MODE_64BIT) {
721 					insn->rexPrefix = 0x40
722 						| (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
723 						| (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
724 						| (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
725 						| (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
726 				}
727 				switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
728 					default:
729 						break;
730 					case VEX_PREFIX_66:
731 						hasOpSize = true;
732 						break;
733 				}
734 				//dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
735 				//		insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
736 				//		insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
737 			}
738 		} else {
739 			// BOUND instruction
740 			unconsumeByte(insn); /* unconsume byte1 */
741 			unconsumeByte(insn); /* unconsume byte */
742 		}
743 	} else if (byte == 0xc4) {
744 		uint8_t byte1;
745 
746 		if (lookAtByte(insn, &byte1)) {
747 			//dbgprintf(insn, "Couldn't read second byte of VEX");
748 			return -1;
749 		}
750 
751 		if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
752 			insn->vectorExtensionType = TYPE_VEX_3B;
753 			insn->necessaryPrefixLocation = insn->readerCursor - 1;
754 		} else {
755 			unconsumeByte(insn);
756 			insn->necessaryPrefixLocation = insn->readerCursor - 1;
757 		}
758 
759 		if (insn->vectorExtensionType == TYPE_VEX_3B) {
760 			insn->vectorExtensionPrefix[0] = byte;
761 			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
762 				return -1;
763 			if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
764 				return -1;
765 
766 			/* We simulate the REX prefix for simplicity's sake */
767 			if (insn->mode == MODE_64BIT) {
768 				insn->rexPrefix = 0x40
769 					| (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
770 					| (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
771 					| (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
772 					| (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
773 
774 			}
775 			switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
776 				default:
777 					break;
778 				case VEX_PREFIX_66:
779 					hasOpSize = true;
780 					break;
781 			}
782 		}
783 	} else if (byte == 0xc5) {
784 		uint8_t byte1;
785 
786 		if (lookAtByte(insn, &byte1)) {
787 			//dbgprintf(insn, "Couldn't read second byte of VEX");
788 			return -1;
789 		}
790 
791 		if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
792 			insn->vectorExtensionType = TYPE_VEX_2B;
793 		} else {
794 			unconsumeByte(insn);
795 		}
796 
797 		if (insn->vectorExtensionType == TYPE_VEX_2B) {
798 			insn->vectorExtensionPrefix[0] = byte;
799 			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
800 				return -1;
801 
802 			if (insn->mode == MODE_64BIT) {
803 				insn->rexPrefix = 0x40
804 					| (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
805 			}
806 
807 			switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
808 				default:
809 					break;
810 				case VEX_PREFIX_66:
811 					hasOpSize = true;
812 					break;
813 			}
814 		}
815 	} else if (byte == 0x8f) {
816 		uint8_t byte1;
817 
818 		if (lookAtByte(insn, &byte1)) {
819 			// dbgprintf(insn, "Couldn't read second byte of XOP");
820 			return -1;
821 		}
822 
823 		if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
824 			insn->vectorExtensionType = TYPE_XOP;
825 			insn->necessaryPrefixLocation = insn->readerCursor - 1;
826 		} else {
827 			unconsumeByte(insn);
828 			insn->necessaryPrefixLocation = insn->readerCursor - 1;
829 		}
830 
831 		if (insn->vectorExtensionType == TYPE_XOP) {
832 			insn->vectorExtensionPrefix[0] = byte;
833 			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
834 				return -1;
835 			if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
836 				return -1;
837 
838 			/* We simulate the REX prefix for simplicity's sake */
839 			if (insn->mode == MODE_64BIT) {
840 				insn->rexPrefix = 0x40
841 					| (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
842 					| (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
843 					| (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
844 					| (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
845 			}
846 
847 			switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
848 				default:
849 					break;
850 				case VEX_PREFIX_66:
851 					hasOpSize = true;
852 					break;
853 			}
854 		}
855 	} else {
856 		if (insn->mode == MODE_64BIT) {
857 			if ((byte & 0xf0) == 0x40) {
858 				uint8_t opcodeByte;
859 
860 				while(true) {
861 					if (lookAtByte(insn, &opcodeByte))	// out of input code
862 						return -1;
863 					if ((opcodeByte & 0xf0) == 0x40) {
864 						// another REX prefix, but we only remember the last one
865 						if (consumeByte(insn, &byte))
866 							return -1;
867 					} else
868 						break;
869 				}
870 
871 				insn->rexPrefix = byte;
872 				insn->necessaryPrefixLocation = insn->readerCursor - 2;
873 				// dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
874 			} else {
875 				unconsumeByte(insn);
876 				insn->necessaryPrefixLocation = insn->readerCursor - 1;
877 			}
878 		} else {
879 			unconsumeByte(insn);
880 			insn->necessaryPrefixLocation = insn->readerCursor - 1;
881 		}
882 	}
883 
884 	if (insn->mode == MODE_16BIT) {
885 		insn->registerSize       = (hasOpSize ? 4 : 2);
886 		insn->addressSize        = (hasAdSize ? 4 : 2);
887 		insn->displacementSize   = (hasAdSize ? 4 : 2);
888 		insn->immediateSize      = (hasOpSize ? 4 : 2);
889 		insn->immSize = (hasOpSize ? 4 : 2);
890 	} else if (insn->mode == MODE_32BIT) {
891 		insn->registerSize       = (hasOpSize ? 2 : 4);
892 		insn->addressSize        = (hasAdSize ? 2 : 4);
893 		insn->displacementSize   = (hasAdSize ? 2 : 4);
894 		insn->immediateSize      = (hasOpSize ? 2 : 4);
895 		insn->immSize = (hasOpSize ? 2 : 4);
896 	} else if (insn->mode == MODE_64BIT) {
897 		if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
898 			insn->registerSize       = 8;
899 			insn->addressSize        = (hasAdSize ? 4 : 8);
900 			insn->displacementSize   = 4;
901 			insn->immediateSize      = 4;
902 			insn->immSize      = 4;
903 		} else if (insn->rexPrefix) {
904 			insn->registerSize       = (hasOpSize ? 2 : 4);
905 			insn->addressSize        = (hasAdSize ? 4 : 8);
906 			insn->displacementSize   = (hasOpSize ? 2 : 4);
907 			insn->immediateSize      = (hasOpSize ? 2 : 4);
908 			insn->immSize      = (hasOpSize ? 2 : 4);
909 		} else {
910 			insn->registerSize       = (hasOpSize ? 2 : 4);
911 			insn->addressSize        = (hasAdSize ? 4 : 8);
912 			insn->displacementSize   = (hasOpSize ? 2 : 4);
913 			insn->immediateSize      = (hasOpSize ? 2 : 4);
914 			insn->immSize      = (hasOpSize ? 4 : 8);
915 		}
916 	}
917 
918 	return 0;
919 }
920 
921 static int readModRM(struct InternalInstruction *insn);
922 
923 /*
924  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
925  *   extended or escape opcodes).
926  *
927  * @param insn  - The instruction whose opcode is to be read.
928  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
929  */
readOpcode(struct InternalInstruction * insn)930 static int readOpcode(struct InternalInstruction *insn)
931 {
932 	/* Determine the length of the primary opcode */
933 	uint8_t current;
934 
935 	// printf(">>> readOpcode() = %x\n", insn->readerCursor);
936 
937 	insn->opcodeType = ONEBYTE;
938 	insn->firstByte = 0x00;
939 
940 	if (insn->vectorExtensionType == TYPE_EVEX) {
941 		switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
942 			default:
943 				// dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
944 				// 		mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
945 				return -1;
946 			case VEX_LOB_0F:
947 				insn->opcodeType = TWOBYTE;
948 				return consumeByte(insn, &insn->opcode);
949 			case VEX_LOB_0F38:
950 				insn->opcodeType = THREEBYTE_38;
951 				return consumeByte(insn, &insn->opcode);
952 			case VEX_LOB_0F3A:
953 				insn->opcodeType = THREEBYTE_3A;
954 				return consumeByte(insn, &insn->opcode);
955 		}
956 	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
957 		switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
958 			default:
959 				// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
960 				//		mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
961 				return -1;
962 			case VEX_LOB_0F:
963 				insn->twoByteEscape = 0x0f;
964 				insn->opcodeType = TWOBYTE;
965 				return consumeByte(insn, &insn->opcode);
966 			case VEX_LOB_0F38:
967 				insn->twoByteEscape = 0x0f;
968 				insn->threeByteEscape = 0x38;
969 				insn->opcodeType = THREEBYTE_38;
970 				return consumeByte(insn, &insn->opcode);
971 			case VEX_LOB_0F3A:
972 				insn->twoByteEscape = 0x0f;
973 				insn->threeByteEscape = 0x3a;
974 				insn->opcodeType = THREEBYTE_3A;
975 				return consumeByte(insn, &insn->opcode);
976 		}
977 	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
978 		insn->twoByteEscape = 0x0f;
979 		insn->opcodeType = TWOBYTE;
980 		return consumeByte(insn, &insn->opcode);
981 	} else if (insn->vectorExtensionType == TYPE_XOP) {
982 		switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
983 			default:
984 				// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
985 				// 		mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
986 				return -1;
987 			case XOP_MAP_SELECT_8:
988 				// FIXME: twoByteEscape?
989 				insn->opcodeType = XOP8_MAP;
990 				return consumeByte(insn, &insn->opcode);
991 			case XOP_MAP_SELECT_9:
992 				// FIXME: twoByteEscape?
993 				insn->opcodeType = XOP9_MAP;
994 				return consumeByte(insn, &insn->opcode);
995 			case XOP_MAP_SELECT_A:
996 				// FIXME: twoByteEscape?
997 				insn->opcodeType = XOPA_MAP;
998 				return consumeByte(insn, &insn->opcode);
999 		}
1000 	}
1001 
1002 	if (consumeByte(insn, &current))
1003 		return -1;
1004 
1005 	// save this first byte for MOVcr, MOVdr, MOVrc, MOVrd
1006 	insn->firstByte = current;
1007 
1008 	if (current == 0x0f) {
1009 		// dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
1010 
1011 		insn->twoByteEscape = current;
1012 
1013 		if (consumeByte(insn, &current))
1014 			return -1;
1015 
1016 		if (current == 0x38) {
1017 			// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1018 
1019 			insn->threeByteEscape = current;
1020 
1021 			if (consumeByte(insn, &current))
1022 				return -1;
1023 
1024 			insn->opcodeType = THREEBYTE_38;
1025 		} else if (current == 0x3a) {
1026 			// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1027 
1028 			insn->threeByteEscape = current;
1029 
1030 			if (consumeByte(insn, &current))
1031 				return -1;
1032 
1033 			insn->opcodeType = THREEBYTE_3A;
1034 		} else {
1035 #ifndef CAPSTONE_X86_REDUCE
1036 			switch(current) {
1037 				default:
1038 					// dbgprintf(insn, "Didn't find a three-byte escape prefix");
1039 					insn->opcodeType = TWOBYTE;
1040 					break;
1041 				case 0x0e:	// HACK for femms. to be handled properly in next version 3.x
1042 					insn->opcodeType = T3DNOW_MAP;
1043 					// this encode does not have ModRM
1044 					insn->consumedModRM = true;
1045 					break;
1046 				case 0x0f:
1047 					// 3DNow instruction has weird format: ModRM/SIB/displacement + opcode
1048 					if (readModRM(insn))
1049 						return -1;
1050 					// next is 3DNow opcode
1051 					if (consumeByte(insn, &current))
1052 						return -1;
1053 					insn->opcodeType = T3DNOW_MAP;
1054 					break;
1055 			}
1056 #endif
1057 		}
1058 	}
1059 
1060 	/*
1061 	 * At this point we have consumed the full opcode.
1062 	 * Anything we consume from here on must be unconsumed.
1063 	 */
1064 
1065 	insn->opcode = current;
1066 
1067 	return 0;
1068 }
1069 
1070 // Hacky for FEMMS
1071 #define GET_INSTRINFO_ENUM
1072 #ifndef CAPSTONE_X86_REDUCE
1073 #include "X86GenInstrInfo.inc"
1074 #else
1075 #include "X86GenInstrInfo_reduce.inc"
1076 #endif
1077 
1078 /*
1079  * getIDWithAttrMask - Determines the ID of an instruction, consuming
1080  *   the ModR/M byte as appropriate for extended and escape opcodes,
1081  *   and using a supplied attribute mask.
1082  *
1083  * @param instructionID - A pointer whose target is filled in with the ID of the
1084  *                        instruction.
1085  * @param insn          - The instruction whose ID is to be determined.
1086  * @param attrMask      - The attribute mask to search.
1087  * @return              - 0 if the ModR/M could be read when needed or was not
1088  *                        needed; nonzero otherwise.
1089  */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)1090 static int getIDWithAttrMask(uint16_t *instructionID,
1091 		struct InternalInstruction *insn,
1092 		uint16_t attrMask)
1093 {
1094 	bool hasModRMExtension;
1095 
1096 	InstructionContext instructionClass;
1097 
1098 #ifndef CAPSTONE_X86_REDUCE
1099 	// HACK for femms. to be handled properly in next version 3.x
1100 	if (insn->opcode == 0x0e && insn->opcodeType == T3DNOW_MAP) {
1101 		*instructionID = X86_FEMMS;
1102 		return 0;
1103 	}
1104 #endif
1105 
1106 	if (insn->opcodeType == T3DNOW_MAP)
1107 		instructionClass = IC_OF;
1108 	else
1109 		instructionClass = contextForAttrs(attrMask);
1110 
1111 	hasModRMExtension = modRMRequired(insn->opcodeType,
1112 			instructionClass,
1113 			insn->opcode) != 0;
1114 
1115 	if (hasModRMExtension) {
1116 		if (readModRM(insn))
1117 			return -1;
1118 
1119 		*instructionID = decode(insn->opcodeType,
1120 				instructionClass,
1121 				insn->opcode,
1122 				insn->modRM);
1123 	} else {
1124 		*instructionID = decode(insn->opcodeType,
1125 				instructionClass,
1126 				insn->opcode,
1127 				0);
1128 	}
1129 
1130 	return 0;
1131 }
1132 
1133 /*
1134  * is16BitEquivalent - Determines whether two instruction names refer to
1135  * equivalent instructions but one is 16-bit whereas the other is not.
1136  *
1137  * @param orig  - The instruction ID that is not 16-bit
1138  * @param equiv - The instruction ID that is 16-bit
1139  */
is16BitEquivalent(unsigned orig,unsigned equiv)1140 static bool is16BitEquivalent(unsigned orig, unsigned equiv)
1141 {
1142 	size_t i;
1143 	uint16_t idx;
1144 
1145 	if ((idx = x86_16_bit_eq_lookup[orig]) != 0) {
1146 		for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) {
1147 			if (x86_16_bit_eq_tbl[i].second == equiv)
1148 				return true;
1149 		}
1150 	}
1151 
1152 	return false;
1153 }
1154 
1155 /*
1156  * is64Bit - Determines whether this instruction is a 64-bit instruction.
1157  *
1158  * @param name - The instruction that is not 16-bit
1159  */
is64Bit(uint16_t id)1160 static bool is64Bit(uint16_t id)
1161 {
1162 	return is_64bit_insn[id];
1163 }
1164 
1165 /*
1166  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
1167  *   appropriate for extended and escape opcodes.  Determines the attributes and
1168  *   context for the instruction before doing so.
1169  *
1170  * @param insn  - The instruction whose ID is to be determined.
1171  * @return      - 0 if the ModR/M could be read when needed or was not needed;
1172  *                nonzero otherwise.
1173  */
getID(struct InternalInstruction * insn)1174 static int getID(struct InternalInstruction *insn)
1175 {
1176 	uint16_t attrMask;
1177 	uint16_t instructionID;
1178 
1179 	// printf(">>> getID()\n");
1180 	attrMask = ATTR_NONE;
1181 
1182 	if (insn->mode == MODE_64BIT)
1183 		attrMask |= ATTR_64BIT;
1184 
1185 	if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1186 		attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1187 
1188 		if (insn->vectorExtensionType == TYPE_EVEX) {
1189 			switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1190 				case VEX_PREFIX_66:
1191 					attrMask |= ATTR_OPSIZE;
1192 					break;
1193 				case VEX_PREFIX_F3:
1194 					attrMask |= ATTR_XS;
1195 					break;
1196 				case VEX_PREFIX_F2:
1197 					attrMask |= ATTR_XD;
1198 					break;
1199 			}
1200 
1201 			if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1202 				attrMask |= ATTR_EVEXKZ;
1203 			if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1204 				attrMask |= ATTR_EVEXB;
1205 			if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1206 				attrMask |= ATTR_EVEXK;
1207 			if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1208 				attrMask |= ATTR_EVEXL;
1209 			if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1210 				attrMask |= ATTR_EVEXL2;
1211 		} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1212 			switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1213 				case VEX_PREFIX_66:
1214 					attrMask |= ATTR_OPSIZE;
1215 					break;
1216 				case VEX_PREFIX_F3:
1217 					attrMask |= ATTR_XS;
1218 					break;
1219 				case VEX_PREFIX_F2:
1220 					attrMask |= ATTR_XD;
1221 					break;
1222 			}
1223 
1224 			if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1225 				attrMask |= ATTR_VEXL;
1226 		} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1227 			switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1228 				case VEX_PREFIX_66:
1229 					attrMask |= ATTR_OPSIZE;
1230 					break;
1231 				case VEX_PREFIX_F3:
1232 					attrMask |= ATTR_XS;
1233 					break;
1234 				case VEX_PREFIX_F2:
1235 					attrMask |= ATTR_XD;
1236 					break;
1237 			}
1238 
1239 			if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1240 				attrMask |= ATTR_VEXL;
1241 		} else if (insn->vectorExtensionType == TYPE_XOP) {
1242 			switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1243 				case VEX_PREFIX_66:
1244 					attrMask |= ATTR_OPSIZE;
1245 					break;
1246 				case VEX_PREFIX_F3:
1247 					attrMask |= ATTR_XS;
1248 					break;
1249 				case VEX_PREFIX_F2:
1250 					attrMask |= ATTR_XD;
1251 					break;
1252 			}
1253 
1254 			if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1255 				attrMask |= ATTR_VEXL;
1256 		} else {
1257 			return -1;
1258 		}
1259 	} else {
1260 		if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
1261 			attrMask |= ATTR_OPSIZE;
1262 		} else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) {
1263 			attrMask |= ATTR_ADSIZE;
1264 		} else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) {
1265 			attrMask |= ATTR_XS;
1266 		} else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) {
1267 			attrMask |= ATTR_XD;
1268 		}
1269 	}
1270 
1271 	if (insn->rexPrefix & 0x08)
1272 		attrMask |= ATTR_REXW;
1273 
1274 	/*
1275 	 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1276 	 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
1277 	 */
1278 	if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
1279 			insn->opcode == 0xE3)
1280 		attrMask ^= ATTR_ADSIZE;
1281 
1282 	if (getIDWithAttrMask(&instructionID, insn, attrMask))
1283 		return -1;
1284 
1285 	/* The following clauses compensate for limitations of the tables. */
1286 	if (insn->mode != MODE_64BIT &&
1287 			insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1288 		/*
1289 		 * The tables can't distinquish between cases where the W-bit is used to
1290 		 * select register size and cases where its a required part of the opcode.
1291 		 */
1292 		if ((insn->vectorExtensionType == TYPE_EVEX &&
1293 					wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1294 				(insn->vectorExtensionType == TYPE_VEX_3B &&
1295 				 wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1296 				(insn->vectorExtensionType == TYPE_XOP &&
1297 				 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1298 			uint16_t instructionIDWithREXW;
1299 			if (getIDWithAttrMask(&instructionIDWithREXW,
1300 						insn, attrMask | ATTR_REXW)) {
1301 				insn->instructionID = instructionID;
1302 				insn->spec = specifierForUID(instructionID);
1303 
1304 				return 0;
1305 			}
1306 
1307 			// If not a 64-bit instruction. Switch the opcode.
1308 			if (!is64Bit(instructionIDWithREXW)) {
1309 				insn->instructionID = instructionIDWithREXW;
1310 				insn->spec = specifierForUID(instructionIDWithREXW);
1311 
1312 				return 0;
1313 			}
1314 		}
1315 	}
1316 
1317 	/*
1318 	 * Absolute moves need special handling.
1319 	 * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1320 	 *  inverted w.r.t.
1321 	 * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1322 	 *  any position.
1323 	 */
1324 	if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
1325 		/* Make sure we observed the prefixes in any position. */
1326 		if (insn->isPrefix67)
1327 			attrMask |= ATTR_ADSIZE;
1328 		if (insn->isPrefix66)
1329 			attrMask |= ATTR_OPSIZE;
1330 
1331 		/* In 16-bit, invert the attributes. */
1332 		if (insn->mode == MODE_16BIT)
1333 			attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1334 
1335 		if (getIDWithAttrMask(&instructionID, insn, attrMask))
1336 			return -1;
1337 
1338 		insn->instructionID = instructionID;
1339 		insn->spec = specifierForUID(instructionID);
1340 
1341 		return 0;
1342 	}
1343 
1344 	if ((insn->mode == MODE_16BIT || insn->isPrefix66) &&
1345 			!(attrMask & ATTR_OPSIZE)) {
1346 		/*
1347 		 * The instruction tables make no distinction between instructions that
1348 		 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1349 		 * particular spot (i.e., many MMX operations).  In general we're
1350 		 * conservative, but in the specific case where OpSize is present but not
1351 		 * in the right place we check if there's a 16-bit operation.
1352 		 */
1353 
1354 		const struct InstructionSpecifier *spec;
1355 		uint16_t instructionIDWithOpsize;
1356 
1357 		spec = specifierForUID(instructionID);
1358 
1359 		if (getIDWithAttrMask(&instructionIDWithOpsize,
1360 					insn, attrMask | ATTR_OPSIZE)) {
1361 			/*
1362 			 * ModRM required with OpSize but not present; give up and return version
1363 			 * without OpSize set
1364 			 */
1365 
1366 			insn->instructionID = instructionID;
1367 			insn->spec = spec;
1368 			return 0;
1369 		}
1370 
1371 		if (is16BitEquivalent(instructionID, instructionIDWithOpsize) &&
1372 				(insn->mode == MODE_16BIT) ^ insn->isPrefix66) {
1373 			insn->instructionID = instructionIDWithOpsize;
1374 			insn->spec = specifierForUID(instructionIDWithOpsize);
1375 		} else {
1376 			insn->instructionID = instructionID;
1377 			insn->spec = spec;
1378 		}
1379 		return 0;
1380 	}
1381 
1382 	if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1383 			insn->rexPrefix & 0x01) {
1384 		/*
1385 		 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1386 		 * it should decode as XCHG %r8, %eax.
1387 		 */
1388 
1389 		const struct InstructionSpecifier *spec;
1390 		uint16_t instructionIDWithNewOpcode;
1391 		const struct InstructionSpecifier *specWithNewOpcode;
1392 
1393 		spec = specifierForUID(instructionID);
1394 
1395 		/* Borrow opcode from one of the other XCHGar opcodes */
1396 		insn->opcode = 0x91;
1397 
1398 		if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1399 					insn,
1400 					attrMask)) {
1401 			insn->opcode = 0x90;
1402 
1403 			insn->instructionID = instructionID;
1404 			insn->spec = spec;
1405 			return 0;
1406 		}
1407 
1408 		specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1409 
1410 		/* Change back */
1411 		insn->opcode = 0x90;
1412 
1413 		insn->instructionID = instructionIDWithNewOpcode;
1414 		insn->spec = specWithNewOpcode;
1415 
1416 		return 0;
1417 	}
1418 
1419 	insn->instructionID = instructionID;
1420 	insn->spec = specifierForUID(insn->instructionID);
1421 
1422 	return 0;
1423 }
1424 
1425 /*
1426  * readSIB - Consumes the SIB byte to determine addressing information for an
1427  *   instruction.
1428  *
1429  * @param insn  - The instruction whose SIB byte is to be read.
1430  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
1431  */
readSIB(struct InternalInstruction * insn)1432 static int readSIB(struct InternalInstruction *insn)
1433 {
1434 	SIBIndex sibIndexBase = SIB_INDEX_NONE;
1435 	SIBBase sibBaseBase = SIB_BASE_NONE;
1436 	uint8_t index, base;
1437 
1438 	// dbgprintf(insn, "readSIB()");
1439 
1440 	if (insn->consumedSIB)
1441 		return 0;
1442 
1443 	insn->consumedSIB = true;
1444 
1445 	switch (insn->addressSize) {
1446 		case 2:
1447 			// dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1448 			return -1;
1449 		case 4:
1450 			sibIndexBase = SIB_INDEX_EAX;
1451 			sibBaseBase = SIB_BASE_EAX;
1452 			break;
1453 		case 8:
1454 			sibIndexBase = SIB_INDEX_RAX;
1455 			sibBaseBase = SIB_BASE_RAX;
1456 			break;
1457 	}
1458 
1459 	if (consumeByte(insn, &insn->sib))
1460 		return -1;
1461 
1462 	index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1463 	if (insn->vectorExtensionType == TYPE_EVEX)
1464 		index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1465 
1466 	switch (index) {
1467 		case 0x4:
1468 			insn->sibIndex = SIB_INDEX_NONE;
1469 			break;
1470 		default:
1471 			insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1472 			if (insn->sibIndex == SIB_INDEX_sib ||
1473 					insn->sibIndex == SIB_INDEX_sib64)
1474 				insn->sibIndex = SIB_INDEX_NONE;
1475 			break;
1476 	}
1477 
1478 	switch (scaleFromSIB(insn->sib)) {
1479 		case 0:
1480 			insn->sibScale = 1;
1481 			break;
1482 		case 1:
1483 			insn->sibScale = 2;
1484 			break;
1485 		case 2:
1486 			insn->sibScale = 4;
1487 			break;
1488 		case 3:
1489 			insn->sibScale = 8;
1490 			break;
1491 	}
1492 
1493 	base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1494 
1495 	switch (base) {
1496 		case 0x5:
1497 		case 0xd:
1498 			switch (modFromModRM(insn->modRM)) {
1499 				case 0x0:
1500 					insn->eaDisplacement = EA_DISP_32;
1501 					insn->sibBase = SIB_BASE_NONE;
1502 					break;
1503 				case 0x1:
1504 					insn->eaDisplacement = EA_DISP_8;
1505 					insn->sibBase = (SIBBase)(sibBaseBase + base);
1506 					break;
1507 				case 0x2:
1508 					insn->eaDisplacement = EA_DISP_32;
1509 					insn->sibBase = (SIBBase)(sibBaseBase + base);
1510 					break;
1511 				case 0x3:
1512 					//debug("Cannot have Mod = 0b11 and a SIB byte");
1513 					return -1;
1514 			}
1515 			break;
1516 		default:
1517 			insn->sibBase = (SIBBase)(sibBaseBase + base);
1518 			break;
1519 	}
1520 
1521 	return 0;
1522 }
1523 
1524 /*
1525  * readDisplacement - Consumes the displacement of an instruction.
1526  *
1527  * @param insn  - The instruction whose displacement is to be read.
1528  * @return      - 0 if the displacement byte was successfully read; nonzero
1529  *                otherwise.
1530  */
readDisplacement(struct InternalInstruction * insn)1531 static int readDisplacement(struct InternalInstruction *insn)
1532 {
1533 	int8_t d8;
1534 	int16_t d16;
1535 	int32_t d32;
1536 
1537 	// dbgprintf(insn, "readDisplacement()");
1538 
1539 	if (insn->consumedDisplacement)
1540 		return 0;
1541 
1542 	insn->consumedDisplacement = true;
1543 	insn->displacementOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1544 
1545 	switch (insn->eaDisplacement) {
1546 		case EA_DISP_NONE:
1547 			insn->consumedDisplacement = false;
1548 			break;
1549 		case EA_DISP_8:
1550 			if (consumeInt8(insn, &d8))
1551 				return -1;
1552 			insn->displacement = d8;
1553 			break;
1554 		case EA_DISP_16:
1555 			if (consumeInt16(insn, &d16))
1556 				return -1;
1557 			insn->displacement = d16;
1558 			break;
1559 		case EA_DISP_32:
1560 			if (consumeInt32(insn, &d32))
1561 				return -1;
1562 			insn->displacement = d32;
1563 			break;
1564 	}
1565 
1566 	return 0;
1567 }
1568 
1569 /*
1570  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1571  *   displacement) for an instruction and interprets it.
1572  *
1573  * @param insn  - The instruction whose addressing information is to be read.
1574  * @return      - 0 if the information was successfully read; nonzero otherwise.
1575  */
readModRM(struct InternalInstruction * insn)1576 static int readModRM(struct InternalInstruction *insn)
1577 {
1578 	uint8_t mod, rm, reg;
1579 
1580 	// dbgprintf(insn, "readModRM()");
1581 
1582 	// already got ModRM byte?
1583 	if (insn->consumedModRM)
1584 		return 0;
1585 
1586 	insn->modRMOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1587 
1588 	if (consumeByte(insn, &insn->modRM))
1589 		return -1;
1590 
1591 	// mark that we already got ModRM
1592 	insn->consumedModRM = true;
1593 
1594 	// save original ModRM for later reference
1595 	insn->orgModRM = insn->modRM;
1596 
1597 	// handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 3
1598 	if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) &&
1599 			(insn->opcode >= 0x20 && insn->opcode <= 0x23 ))
1600 		insn->modRM |= 0xC0;
1601 
1602 	mod     = modFromModRM(insn->modRM);
1603 	rm      = rmFromModRM(insn->modRM);
1604 	reg     = regFromModRM(insn->modRM);
1605 
1606 	/*
1607 	 * This goes by insn->registerSize to pick the correct register, which messes
1608 	 * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
1609 	 * fixupReg().
1610 	 */
1611 	switch (insn->registerSize) {
1612 		case 2:
1613 			insn->regBase = MODRM_REG_AX;
1614 			insn->eaRegBase = EA_REG_AX;
1615 			break;
1616 		case 4:
1617 			insn->regBase = MODRM_REG_EAX;
1618 			insn->eaRegBase = EA_REG_EAX;
1619 			break;
1620 		case 8:
1621 			insn->regBase = MODRM_REG_RAX;
1622 			insn->eaRegBase = EA_REG_RAX;
1623 			break;
1624 	}
1625 
1626 	reg |= rFromREX(insn->rexPrefix) << 3;
1627 	rm  |= bFromREX(insn->rexPrefix) << 3;
1628 	if (insn->vectorExtensionType == TYPE_EVEX) {
1629 		reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1630 		rm  |=  xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1631 	}
1632 
1633 	insn->reg = (Reg)(insn->regBase + reg);
1634 
1635 	switch (insn->addressSize) {
1636 		case 2:
1637 			insn->eaBaseBase = EA_BASE_BX_SI;
1638 
1639 			switch (mod) {
1640 				case 0x0:
1641 					if (rm == 0x6) {
1642 						insn->eaBase = EA_BASE_NONE;
1643 						insn->eaDisplacement = EA_DISP_16;
1644 						if (readDisplacement(insn))
1645 							return -1;
1646 					} else {
1647 						insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1648 						insn->eaDisplacement = EA_DISP_NONE;
1649 					}
1650 					break;
1651 				case 0x1:
1652 					insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1653 					insn->eaDisplacement = EA_DISP_8;
1654 					insn->displacementSize = 1;
1655 					if (readDisplacement(insn))
1656 						return -1;
1657 					break;
1658 				case 0x2:
1659 					insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1660 					insn->eaDisplacement = EA_DISP_16;
1661 					if (readDisplacement(insn))
1662 						return -1;
1663 					break;
1664 				case 0x3:
1665 					insn->eaBase = (EABase)(insn->eaRegBase + rm);
1666 					insn->eaDisplacement = EA_DISP_NONE;
1667 					if (readDisplacement(insn))
1668 						return -1;
1669 					break;
1670 			}
1671 			break;
1672 		case 4:
1673 		case 8:
1674 			insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1675 
1676 			switch (mod) {
1677 				case 0x0:
1678 					insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1679 					switch (rm) {
1680 						case 0x14:
1681 						case 0x4:
1682 						case 0xc:   /* in case REXW.b is set */
1683 							insn->eaBase = (insn->addressSize == 4 ?
1684 									EA_BASE_sib : EA_BASE_sib64);
1685 							if (readSIB(insn) || readDisplacement(insn))
1686 								return -1;
1687 							break;
1688 						case 0x5:
1689 						case 0xd:
1690 							insn->eaBase = EA_BASE_NONE;
1691 							insn->eaDisplacement = EA_DISP_32;
1692 							if (readDisplacement(insn))
1693 								return -1;
1694 							break;
1695 						default:
1696 							insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1697 							break;
1698 					}
1699 
1700 					break;
1701 				case 0x1:
1702 					insn->displacementSize = 1;
1703 					/* FALLTHROUGH */
1704 				case 0x2:
1705 					insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1706 					switch (rm) {
1707 						case 0x14:
1708 						case 0x4:
1709 						case 0xc:   /* in case REXW.b is set */
1710 							insn->eaBase = EA_BASE_sib;
1711 							if (readSIB(insn) || readDisplacement(insn))
1712 								return -1;
1713 							break;
1714 						default:
1715 							insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1716 							if (readDisplacement(insn))
1717 								return -1;
1718 							break;
1719 					}
1720 					break;
1721 				case 0x3:
1722 					insn->eaDisplacement = EA_DISP_NONE;
1723 					insn->eaBase = (EABase)(insn->eaRegBase + rm);
1724 					break;
1725 			}
1726 			break;
1727 	} /* switch (insn->addressSize) */
1728 
1729 	return 0;
1730 }
1731 
1732 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
1733   static uint8_t name(struct InternalInstruction *insn,   \
1734                       OperandType type,                   \
1735                       uint8_t index,                      \
1736                       uint8_t *valid) {                   \
1737     *valid = 1;                                           \
1738     switch (type) {                                       \
1739     default:                                              \
1740       *valid = 0;                                         \
1741       return 0;                                           \
1742     case TYPE_Rv:                                         \
1743       return base + index;                                \
1744     case TYPE_R8:                                         \
1745       if (insn->rexPrefix &&                              \
1746          index >= 4 && index <= 7) {                      \
1747         return prefix##_SPL + (index - 4);                \
1748       } else {                                            \
1749         return prefix##_AL + index;                       \
1750       }                                                   \
1751     case TYPE_R16:                                        \
1752       return prefix##_AX + index;                         \
1753     case TYPE_R32:                                        \
1754       return prefix##_EAX + index;                        \
1755     case TYPE_R64:                                        \
1756       return prefix##_RAX + index;                        \
1757     case TYPE_XMM512:                                     \
1758       return prefix##_ZMM0 + index;                       \
1759     case TYPE_XMM256:                                     \
1760       return prefix##_YMM0 + index;                       \
1761     case TYPE_XMM128:                                     \
1762     case TYPE_XMM64:                                      \
1763     case TYPE_XMM32:                                      \
1764     case TYPE_XMM:                                        \
1765       return prefix##_XMM0 + index;                       \
1766     case TYPE_VK1:                                        \
1767     case TYPE_VK8:                                        \
1768     case TYPE_VK16:                                       \
1769       if (index > 7)                                      \
1770         *valid = 0;                                       \
1771       return prefix##_K0 + index;                         \
1772     case TYPE_MM64:                                       \
1773       return prefix##_MM0 + (index & 0x7);                \
1774     case TYPE_SEGMENTREG:                                 \
1775       if (index > 5)                                      \
1776         *valid = 0;                                       \
1777       return prefix##_ES + index;                         \
1778     case TYPE_DEBUGREG:                                   \
1779       return prefix##_DR0 + index;                        \
1780     case TYPE_CONTROLREG:                                 \
1781       return prefix##_CR0 + index;                        \
1782     }                                                     \
1783   }
1784 
1785 
1786 /*
1787  * fixup*Value - Consults an operand type to determine the meaning of the
1788  *   reg or R/M field.  If the operand is an XMM operand, for example, an
1789  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
1790  *   misinterpret it as.
1791  *
1792  * @param insn  - The instruction containing the operand.
1793  * @param type  - The operand type.
1794  * @param index - The existing value of the field as reported by readModRM().
1795  * @param valid - The address of a uint8_t.  The target is set to 1 if the
1796  *                field is valid for the register class; 0 if not.
1797  * @return      - The proper value.
1798  */
1799 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
1800 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
1801 
1802 /*
1803  * fixupReg - Consults an operand specifier to determine which of the
1804  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
1805  *
1806  * @param insn  - See fixup*Value().
1807  * @param op    - The operand specifier.
1808  * @return      - 0 if fixup was successful; -1 if the register returned was
1809  *                invalid for its class.
1810  */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)1811 static int fixupReg(struct InternalInstruction *insn,
1812 		const struct OperandSpecifier *op)
1813 {
1814 	uint8_t valid;
1815 
1816 	// dbgprintf(insn, "fixupReg()");
1817 
1818 	switch ((OperandEncoding)op->encoding) {
1819 		default:
1820 			//debug("Expected a REG or R/M encoding in fixupReg");
1821 			return -1;
1822 		case ENCODING_VVVV:
1823 			insn->vvvv = (Reg)fixupRegValue(insn,
1824 					(OperandType)op->type,
1825 					insn->vvvv,
1826 					&valid);
1827 			if (!valid)
1828 				return -1;
1829 			break;
1830 		case ENCODING_REG:
1831 			insn->reg = (Reg)fixupRegValue(insn,
1832 					(OperandType)op->type,
1833 					(uint8_t)(insn->reg - insn->regBase),
1834 					&valid);
1835 			if (!valid)
1836 				return -1;
1837 			break;
1838 		CASE_ENCODING_RM:
1839 			if (insn->eaBase >= insn->eaRegBase) {
1840 				insn->eaBase = (EABase)fixupRMValue(insn,
1841 						(OperandType)op->type,
1842 						(uint8_t)(insn->eaBase - insn->eaRegBase),
1843 						&valid);
1844 				if (!valid)
1845 					return -1;
1846 			}
1847 			break;
1848 	}
1849 
1850 	return 0;
1851 }
1852 
1853 /*
1854  * readOpcodeRegister - Reads an operand from the opcode field of an
1855  *   instruction and interprets it appropriately given the operand width.
1856  *   Handles AddRegFrm instructions.
1857  *
1858  * @param insn  - the instruction whose opcode field is to be read.
1859  * @param size  - The width (in bytes) of the register being specified.
1860  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1861  *                RAX.
1862  * @return      - 0 on success; nonzero otherwise.
1863  */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1864 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
1865 {
1866 	// dbgprintf(insn, "readOpcodeRegister()");
1867 
1868 	if (size == 0)
1869 		size = insn->registerSize;
1870 
1871 	insn->operandSize = size;
1872 
1873 	switch (size) {
1874 		case 1:
1875 			insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1876 						| (insn->opcode & 7)));
1877 			if (insn->rexPrefix &&
1878 					insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1879 					insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1880 				insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1881 						+ (insn->opcodeRegister - MODRM_REG_AL - 4));
1882 			}
1883 
1884 			break;
1885 		case 2:
1886 			insn->opcodeRegister = (Reg)(MODRM_REG_AX
1887 					+ ((bFromREX(insn->rexPrefix) << 3)
1888 						| (insn->opcode & 7)));
1889 			break;
1890 		case 4:
1891 			insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1892 					+ ((bFromREX(insn->rexPrefix) << 3)
1893 						| (insn->opcode & 7)));
1894 			break;
1895 		case 8:
1896 			insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1897 					+ ((bFromREX(insn->rexPrefix) << 3)
1898 						| (insn->opcode & 7)));
1899 			break;
1900 	}
1901 
1902 	return 0;
1903 }
1904 
1905 /*
1906  * readImmediate - Consumes an immediate operand from an instruction, given the
1907  *   desired operand size.
1908  *
1909  * @param insn  - The instruction whose operand is to be read.
1910  * @param size  - The width (in bytes) of the operand.
1911  * @return      - 0 if the immediate was successfully consumed; nonzero
1912  *                otherwise.
1913  */
readImmediate(struct InternalInstruction * insn,uint8_t size)1914 static int readImmediate(struct InternalInstruction *insn, uint8_t size)
1915 {
1916 	uint8_t imm8;
1917 	uint16_t imm16;
1918 	uint32_t imm32;
1919 	uint64_t imm64;
1920 
1921 	// dbgprintf(insn, "readImmediate()");
1922 
1923 	if (insn->numImmediatesConsumed == 2) {
1924 		//debug("Already consumed two immediates");
1925 		return -1;
1926 	}
1927 
1928 	if (size == 0)
1929 		size = insn->immediateSize;
1930 	else
1931 		insn->immediateSize = size;
1932 	insn->immediateOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1933 
1934 	switch (size) {
1935 		case 1:
1936 			if (consumeByte(insn, &imm8))
1937 				return -1;
1938 			insn->immediates[insn->numImmediatesConsumed] = imm8;
1939 			break;
1940 		case 2:
1941 			if (consumeUInt16(insn, &imm16))
1942 				return -1;
1943 			insn->immediates[insn->numImmediatesConsumed] = imm16;
1944 			break;
1945 		case 4:
1946 			if (consumeUInt32(insn, &imm32))
1947 				return -1;
1948 			insn->immediates[insn->numImmediatesConsumed] = imm32;
1949 			break;
1950 		case 8:
1951 			if (consumeUInt64(insn, &imm64))
1952 				return -1;
1953 			insn->immediates[insn->numImmediatesConsumed] = imm64;
1954 			break;
1955 	}
1956 
1957 	insn->numImmediatesConsumed++;
1958 
1959 	return 0;
1960 }
1961 
1962 /*
1963  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1964  *
1965  * @param insn  - The instruction whose operand is to be read.
1966  * @return      - 0 if the vvvv was successfully consumed; nonzero
1967  *                otherwise.
1968  */
readVVVV(struct InternalInstruction * insn)1969 static int readVVVV(struct InternalInstruction *insn)
1970 {
1971 	int vvvv;
1972 	// dbgprintf(insn, "readVVVV()");
1973 
1974 	if (insn->vectorExtensionType == TYPE_EVEX)
1975 		vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1976 				vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1977 	else if (insn->vectorExtensionType == TYPE_VEX_3B)
1978 		vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1979 	else if (insn->vectorExtensionType == TYPE_VEX_2B)
1980 		vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1981 	else if (insn->vectorExtensionType == TYPE_XOP)
1982 		vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1983 	else
1984 		return -1;
1985 
1986 	if (insn->mode != MODE_64BIT)
1987 		vvvv &= 0x7;
1988 
1989 	insn->vvvv = vvvv;
1990 
1991 	return 0;
1992 }
1993 
1994 /*
1995  * readMaskRegister - Reads an mask register from the opcode field of an
1996  *   instruction.
1997  *
1998  * @param insn    - The instruction whose opcode field is to be read.
1999  * @return        - 0 on success; nonzero otherwise.
2000  */
readMaskRegister(struct InternalInstruction * insn)2001 static int readMaskRegister(struct InternalInstruction *insn)
2002 {
2003 	// dbgprintf(insn, "readMaskRegister()");
2004 
2005 	if (insn->vectorExtensionType != TYPE_EVEX)
2006 		return -1;
2007 
2008 	insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]);
2009 
2010 	return 0;
2011 }
2012 
2013 /*
2014  * readOperands - Consults the specifier for an instruction and consumes all
2015  *   operands for that instruction, interpreting them as it goes.
2016  *
2017  * @param insn  - The instruction whose operands are to be read and interpreted.
2018  * @return      - 0 if all operands could be read; nonzero otherwise.
2019  */
readOperands(struct InternalInstruction * insn)2020 static int readOperands(struct InternalInstruction *insn)
2021 {
2022 	int index;
2023 	int hasVVVV, needVVVV;
2024 	int sawRegImm = 0;
2025 
2026 	// printf(">>> readOperands(): ID = %u\n", insn->instructionID);
2027 	/* If non-zero vvvv specified, need to make sure one of the operands
2028 	   uses it. */
2029 	hasVVVV = !readVVVV(insn);
2030 	needVVVV = hasVVVV && (insn->vvvv != 0);
2031 
2032 	for (index = 0; index < X86_MAX_OPERANDS; ++index) {
2033 		//printf(">>> encoding[%u] = %u\n", index, x86OperandSets[insn->spec->operands][index].encoding);
2034 		switch (x86OperandSets[insn->spec->operands][index].encoding) {
2035 			case ENCODING_NONE:
2036 			case ENCODING_SI:
2037 			case ENCODING_DI:
2038 				break;
2039 			case ENCODING_REG:
2040 			CASE_ENCODING_RM:
2041 				if (readModRM(insn))
2042 					return -1;
2043 				if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2044 					return -1;
2045 				// Apply the AVX512 compressed displacement scaling factor.
2046 				if (x86OperandSets[insn->spec->operands][index].encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
2047 					insn->displacement *= (int64_t)1 << (x86OperandSets[insn->spec->operands][index].encoding - ENCODING_RM);
2048 				break;
2049 			case ENCODING_CB:
2050 			case ENCODING_CW:
2051 			case ENCODING_CD:
2052 			case ENCODING_CP:
2053 			case ENCODING_CO:
2054 			case ENCODING_CT:
2055 				// dbgprintf(insn, "We currently don't hande code-offset encodings");
2056 				return -1;
2057 			case ENCODING_IB:
2058 				if (sawRegImm) {
2059 					/* Saw a register immediate so don't read again and instead split the
2060 					   previous immediate.  FIXME: This is a hack. */
2061 					insn->immediates[insn->numImmediatesConsumed] =
2062 						insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
2063 					++insn->numImmediatesConsumed;
2064 					break;
2065 				}
2066 				if (readImmediate(insn, 1))
2067 					return -1;
2068 				if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
2069 						x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
2070 					sawRegImm = 1;
2071 				break;
2072 			case ENCODING_IW:
2073 				if (readImmediate(insn, 2))
2074 					return -1;
2075 				break;
2076 			case ENCODING_ID:
2077 				if (readImmediate(insn, 4))
2078 					return -1;
2079 				break;
2080 			case ENCODING_IO:
2081 				if (readImmediate(insn, 8))
2082 					return -1;
2083 				break;
2084 			case ENCODING_Iv:
2085 				if (readImmediate(insn, insn->immediateSize))
2086 					return -1;
2087 				break;
2088 			case ENCODING_Ia:
2089 				if (readImmediate(insn, insn->addressSize))
2090 					return -1;
2091 				/* Direct memory-offset (moffset) immediate will get mapped
2092 				   to memory operand later. We want the encoding info to
2093 				   reflect that as well. */
2094 				insn->displacementOffset = insn->immediateOffset;
2095 				insn->consumedDisplacement = true;
2096 				insn->displacementSize = insn->immediateSize;
2097 				insn->displacement = insn->immediates[insn->numImmediatesConsumed - 1];
2098 				insn->immediateOffset = 0;
2099 				insn->immediateSize = 0;
2100 				break;
2101 			case ENCODING_RB:
2102 				if (readOpcodeRegister(insn, 1))
2103 					return -1;
2104 				break;
2105 			case ENCODING_RW:
2106 				if (readOpcodeRegister(insn, 2))
2107 					return -1;
2108 				break;
2109 			case ENCODING_RD:
2110 				if (readOpcodeRegister(insn, 4))
2111 					return -1;
2112 				break;
2113 			case ENCODING_RO:
2114 				if (readOpcodeRegister(insn, 8))
2115 					return -1;
2116 				break;
2117 			case ENCODING_Rv:
2118 				if (readOpcodeRegister(insn, 0))
2119 					return -1;
2120 				break;
2121 			case ENCODING_FP:
2122 				break;
2123 			case ENCODING_VVVV:
2124 				needVVVV = 0; /* Mark that we have found a VVVV operand. */
2125 				if (!hasVVVV)
2126 					return -1;
2127 				if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2128 					return -1;
2129 				break;
2130 			case ENCODING_WRITEMASK:
2131 				if (readMaskRegister(insn))
2132 					return -1;
2133 				break;
2134 			case ENCODING_DUP:
2135 				break;
2136 			default:
2137 				// dbgprintf(insn, "Encountered an operand with an unknown encoding.");
2138 				return -1;
2139 		}
2140 	}
2141 
2142 	/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
2143 	if (needVVVV) return -1;
2144 
2145 	return 0;
2146 }
2147 
2148 // return True if instruction is illegal to use with prefixes
2149 // This also check & fix the isPrefixNN when a prefix is irrelevant.
checkPrefix(struct InternalInstruction * insn)2150 static bool checkPrefix(struct InternalInstruction *insn)
2151 {
2152 	// LOCK prefix
2153 	if (insn->isPrefixf0) {
2154 		switch(insn->instructionID) {
2155 			default:
2156 				// invalid LOCK
2157 				return true;
2158 
2159 			// nop dword [rax]
2160 			case X86_NOOPL:
2161 
2162 			// DEC
2163 			case X86_DEC16m:
2164 			case X86_DEC32m:
2165 			case X86_DEC64m:
2166 			case X86_DEC8m:
2167 
2168 			// ADC
2169 			case X86_ADC16mi:
2170 			case X86_ADC16mi8:
2171 			case X86_ADC16mr:
2172 			case X86_ADC32mi:
2173 			case X86_ADC32mi8:
2174 			case X86_ADC32mr:
2175 			case X86_ADC64mi32:
2176 			case X86_ADC64mi8:
2177 			case X86_ADC64mr:
2178 			case X86_ADC8mi:
2179 			case X86_ADC8mi8:
2180 			case X86_ADC8mr:
2181 			case X86_ADC8rm:
2182 			case X86_ADC16rm:
2183 			case X86_ADC32rm:
2184 			case X86_ADC64rm:
2185 
2186 			// ADD
2187 			case X86_ADD16mi:
2188 			case X86_ADD16mi8:
2189 			case X86_ADD16mr:
2190 			case X86_ADD32mi:
2191 			case X86_ADD32mi8:
2192 			case X86_ADD32mr:
2193 			case X86_ADD64mi32:
2194 			case X86_ADD64mi8:
2195 			case X86_ADD64mr:
2196 			case X86_ADD8mi:
2197 			case X86_ADD8mi8:
2198 			case X86_ADD8mr:
2199 			case X86_ADD8rm:
2200 			case X86_ADD16rm:
2201 			case X86_ADD32rm:
2202 			case X86_ADD64rm:
2203 
2204 			// AND
2205 			case X86_AND16mi:
2206 			case X86_AND16mi8:
2207 			case X86_AND16mr:
2208 			case X86_AND32mi:
2209 			case X86_AND32mi8:
2210 			case X86_AND32mr:
2211 			case X86_AND64mi32:
2212 			case X86_AND64mi8:
2213 			case X86_AND64mr:
2214 			case X86_AND8mi:
2215 			case X86_AND8mi8:
2216 			case X86_AND8mr:
2217 			case X86_AND8rm:
2218 			case X86_AND16rm:
2219 			case X86_AND32rm:
2220 			case X86_AND64rm:
2221 
2222 
2223 			// BTC
2224 			case X86_BTC16mi8:
2225 			case X86_BTC16mr:
2226 			case X86_BTC32mi8:
2227 			case X86_BTC32mr:
2228 			case X86_BTC64mi8:
2229 			case X86_BTC64mr:
2230 
2231 			// BTR
2232 			case X86_BTR16mi8:
2233 			case X86_BTR16mr:
2234 			case X86_BTR32mi8:
2235 			case X86_BTR32mr:
2236 			case X86_BTR64mi8:
2237 			case X86_BTR64mr:
2238 
2239 			// BTS
2240 			case X86_BTS16mi8:
2241 			case X86_BTS16mr:
2242 			case X86_BTS32mi8:
2243 			case X86_BTS32mr:
2244 			case X86_BTS64mi8:
2245 			case X86_BTS64mr:
2246 
2247 			// CMPXCHG
2248 			case X86_CMPXCHG16B:
2249 			case X86_CMPXCHG16rm:
2250 			case X86_CMPXCHG32rm:
2251 			case X86_CMPXCHG64rm:
2252 			case X86_CMPXCHG8rm:
2253 			case X86_CMPXCHG8B:
2254 
2255 			// INC
2256 			case X86_INC16m:
2257 			case X86_INC32m:
2258 			case X86_INC64m:
2259 			case X86_INC8m:
2260 
2261 			// NEG
2262 			case X86_NEG16m:
2263 			case X86_NEG32m:
2264 			case X86_NEG64m:
2265 			case X86_NEG8m:
2266 
2267 			// NOT
2268 			case X86_NOT16m:
2269 			case X86_NOT32m:
2270 			case X86_NOT64m:
2271 			case X86_NOT8m:
2272 
2273 			// OR
2274 			case X86_OR16mi:
2275 			case X86_OR16mi8:
2276 			case X86_OR16mr:
2277 			case X86_OR32mi:
2278 			case X86_OR32mi8:
2279 			case X86_OR32mr:
2280 			case X86_OR32mrLocked:
2281 			case X86_OR64mi32:
2282 			case X86_OR64mi8:
2283 			case X86_OR64mr:
2284 			case X86_OR8mi8:
2285 			case X86_OR8mi:
2286 			case X86_OR8mr:
2287 			case X86_OR8rm:
2288 			case X86_OR16rm:
2289 			case X86_OR32rm:
2290 			case X86_OR64rm:
2291 
2292 			// SBB
2293 			case X86_SBB16mi:
2294 			case X86_SBB16mi8:
2295 			case X86_SBB16mr:
2296 			case X86_SBB32mi:
2297 			case X86_SBB32mi8:
2298 			case X86_SBB32mr:
2299 			case X86_SBB64mi32:
2300 			case X86_SBB64mi8:
2301 			case X86_SBB64mr:
2302 			case X86_SBB8mi:
2303 			case X86_SBB8mi8:
2304 			case X86_SBB8mr:
2305 
2306 			// SUB
2307 			case X86_SUB16mi:
2308 			case X86_SUB16mi8:
2309 			case X86_SUB16mr:
2310 			case X86_SUB32mi:
2311 			case X86_SUB32mi8:
2312 			case X86_SUB32mr:
2313 			case X86_SUB64mi32:
2314 			case X86_SUB64mi8:
2315 			case X86_SUB64mr:
2316 			case X86_SUB8mi8:
2317 			case X86_SUB8mi:
2318 			case X86_SUB8mr:
2319 			case X86_SUB8rm:
2320 			case X86_SUB16rm:
2321 			case X86_SUB32rm:
2322 			case X86_SUB64rm:
2323 
2324 			// XADD
2325 			case X86_XADD16rm:
2326 			case X86_XADD32rm:
2327 			case X86_XADD64rm:
2328 			case X86_XADD8rm:
2329 
2330 			// XCHG
2331 			case X86_XCHG16rm:
2332 			case X86_XCHG32rm:
2333 			case X86_XCHG64rm:
2334 			case X86_XCHG8rm:
2335 
2336 			// XOR
2337 			case X86_XOR16mi:
2338 			case X86_XOR16mi8:
2339 			case X86_XOR16mr:
2340 			case X86_XOR32mi:
2341 			case X86_XOR32mi8:
2342 			case X86_XOR32mr:
2343 			case X86_XOR64mi32:
2344 			case X86_XOR64mi8:
2345 			case X86_XOR64mr:
2346 			case X86_XOR8mi8:
2347 			case X86_XOR8mi:
2348 			case X86_XOR8mr:
2349 			case X86_XOR8rm:
2350 			case X86_XOR16rm:
2351 			case X86_XOR32rm:
2352 			case X86_XOR64rm:
2353 
2354 				// this instruction can be used with LOCK prefix
2355 				return false;
2356 		}
2357 	}
2358 
2359 	// REPNE prefix
2360 	if (insn->isPrefixf2) {
2361 		// 0xf2 can be a part of instruction encoding, but not really a prefix.
2362 		// In such a case, clear it.
2363 		if (insn->twoByteEscape == 0x0f) {
2364 			insn->prefix0 = 0;
2365 		}
2366 	}
2367 
2368 	// no invalid prefixes
2369 	return false;
2370 }
2371 
2372 /*
2373  * decodeInstruction - Reads and interprets a full instruction provided by the
2374  *   user.
2375  *
2376  * @param insn      - A pointer to the instruction to be populated.  Must be
2377  *                    pre-allocated.
2378  * @param reader    - The function to be used to read the instruction's bytes.
2379  * @param readerArg - A generic argument to be passed to the reader to store
2380  *                    any internal state.
2381  * @param startLoc  - The address (in the reader's address space) of the first
2382  *                    byte in the instruction.
2383  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
2384  *                    decode the instruction in.
2385  * @return          - 0 if instruction is valid; nonzero if not.
2386  */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,uint64_t startLoc,DisassemblerMode mode)2387 int decodeInstruction(struct InternalInstruction *insn,
2388 		byteReader_t reader,
2389 		const void *readerArg,
2390 		uint64_t startLoc,
2391 		DisassemblerMode mode)
2392 {
2393 	insn->reader = reader;
2394 	insn->readerArg = readerArg;
2395 	insn->startLocation = startLoc;
2396 	insn->readerCursor = startLoc;
2397 	insn->mode = mode;
2398 
2399 	if (readPrefixes(insn)       ||
2400 			readOpcode(insn)         ||
2401 			getID(insn)      ||
2402 			insn->instructionID == 0 ||
2403 			checkPrefix(insn) ||
2404 			readOperands(insn))
2405 		return -1;
2406 
2407 	insn->length = (size_t)(insn->readerCursor - insn->startLocation);
2408 
2409 	// instruction length must be <= 15 to be valid
2410 	if (insn->length > 15)
2411 		return -1;
2412 
2413 	if (insn->operandSize == 0)
2414 		insn->operandSize = insn->registerSize;
2415 
2416 	insn->operands = &x86OperandSets[insn->spec->operands][0];
2417 
2418 	return 0;
2419 }
2420 
2421 #endif
2422