1 /*
2 instructions.h
3 
4 diStorm3 - Powerful disassembler for X86/AMD64
5 http://ragestorm.net/distorm/
6 distorm at gmail dot com
7 Copyright (C) 2003-2012 Gil Dabah
8 
9 This program is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program.  If not, see <http://www.gnu.org/licenses/>
21 */
22 
23 
24 #ifndef INSTRUCTIONS_H
25 #define INSTRUCTIONS_H
26 
27 #include "config.h"
28 #include "prefix.h"
29 
30 
31 /*
32  * Operand type possibilities:
33  * Note "_FULL" suffix indicates to decode the operand as 16 bits or 32 bits depends on DecodeType -
34  * actually, it depends on the decoding mode, unless there's an operand/address size prefix.
35  * For example, the code: 33 c0 could be decoded/executed as XOR AX, AX or XOR EAX, EAX.
36  */
37 typedef enum OpType {
38 	/* No operand is set */
39 	OT_NONE = 0,
40 
41 	/* Read a byte(8 bits) immediate */
42 	OT_IMM8,
43 	/* Force a read of a word(16 bits) immediate, used by ret only */
44 	OT_IMM16,
45 	/* Read a word/dword immediate */
46 	OT_IMM_FULL,
47 	/* Read a double-word(32 bits) immediate */
48 	OT_IMM32,
49 
50 	/* Read a signed extended byte(8 bits) immediate */
51 	OT_SEIMM8,
52 
53 	/*
54 	 * Special immediates for instructions which have more than one immediate,
55 	 * which is an exception from standard instruction format.
56 	 * As to version v1.0: ENTER, INSERTQ, EXTRQ are the only problematic ones.
57 	 */
58 	/* 16 bits immediate using the first imm-slot */
59 	OT_IMM16_1,
60 	/* 8 bits immediate using the first imm-slot */
61 	OT_IMM8_1,
62 	/* 8 bits immediate using the second imm-slot */
63 	OT_IMM8_2,
64 
65 	/* Use a 8bit register */
66 	OT_REG8,
67 	/* Use a 16bit register */
68 	OT_REG16,
69 	/* Use a 16/32/64bit register */
70 	OT_REG_FULL,
71 	/* Use a 32bit register */
72 	OT_REG32,
73 	/*
74 	 * If used with REX the reg operand size becomes 64 bits, otherwise 32 bits.
75 	 * VMX instructions are promoted automatically without a REX prefix.
76 	 */
77 	OT_REG32_64,
78 	/* Used only by MOV CR/DR(n). Promoted with REX onlly. */
79 	OT_FREG32_64_RM,
80 
81 	/* Use or read (indirection) a 8bit register or immediate byte */
82 	OT_RM8,
83 	/* Some instructions force 16 bits (mov sreg, rm16) */
84 	OT_RM16,
85 	/* Use or read a 16/32/64bit register or immediate word/dword/qword */
86 	OT_RM_FULL,
87 	/*
88 	 * 32 or 64 bits (with REX) operand size indirection memory operand.
89 	 * Some instructions are promoted automatically without a REX prefix.
90 	 */
91 	OT_RM32_64,
92 	/* 16 or 32 bits RM. This is used only with MOVZXD instruction in 64bits. */
93 	OT_RM16_32,
94 	/* Same as OT_RMXX but POINTS to 16 bits [cannot use GENERAL-PURPOSE REG!] */
95 	OT_FPUM16,
96 	/* Same as OT_RMXX but POINTS to 32 bits (single precision) [cannot use GENERAL-PURPOSE REG!] */
97 	OT_FPUM32,
98 	/* Same as OT_RMXX but POINTS to 64 bits (double precision) [cannot use GENERAL-PURPOSE REG!] */
99 	OT_FPUM64,
100 	/* Same as OT_RMXX but POINTS to 80 bits (extended precision) [cannot use GENERAL-PURPOSE REG!] */
101 	OT_FPUM80,
102 
103 	/*
104 	 * Special operand type for SSE4 where the ModR/M might
105 	 * be a 32 bits register or 8 bits memory indirection operand.
106 	 */
107 	OT_R32_M8,
108 	/*
109 	 * Special ModR/M for PINSRW, which need a 16 bits memory operand or 32 bits register.
110 	 * In 16 bits decoding mode R32 becomes R16, operand size cannot affect this.
111 	 */
112 	OT_R32_M16,
113 	/*
114 	 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
115 	 * a 8 bits memory indirection operand.
116 	 */
117 	OT_R32_64_M8,
118 	/*
119 	 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
120 	 * a 16 bits memory indirection operand.
121 	 */
122 	OT_R32_64_M16,
123 	/*
124 	 * Special operand type for MOV reg16/32/64/mem16, segReg 8C /r. and SMSW.
125 	 * It supports all decoding modes, but if used as a memory indirection it's a 16 bit ModR/M indirection.
126 	 */
127 	OT_RFULL_M16,
128 
129 	/* Use a control register */
130 	OT_CREG,
131 	/* Use a debug register */
132 	OT_DREG,
133 	/* Use a segment register */
134 	OT_SREG,
135 	/*
136 	 * SEG is encoded in the flags of the opcode itself!
137 	 * This is used for specific "push SS" where SS is a segment where
138 	 * each "push SS" has an absolutely different opcode byte.
139 	 * We need this to detect whether an operand size prefix is used.
140 	 */
141 	OT_SEG,
142 
143 	/* Use AL */
144 	OT_ACC8,
145 	/* Use AX (FSTSW) */
146 	OT_ACC16,
147 	/* Use AX/EAX/RAX */
148 	OT_ACC_FULL,
149 	/* Use AX/EAX, no REX is possible for RAX, used only with IN/OUT which don't support 64 bit registers */
150 	OT_ACC_FULL_NOT64,
151 
152 	/*
153 	 * Read one word (seg), and a word/dword/qword (depends on operand size) from memory.
154 	 * JMP FAR [EBX] means EBX point to 16:32 ptr.
155 	 */
156 	OT_MEM16_FULL,
157 	/* Read one word (seg) and a word/dword/qword (depends on operand size), usually SEG:OFF, JMP 1234:1234 */
158 	OT_PTR16_FULL,
159 	/* Read one word (limit) and a dword/qword (limit) (depends on operand size), used by SGDT, SIDT, LGDT, LIDT. */
160 	OT_MEM16_3264,
161 
162 	/* Read a byte(8 bits) immediate and calculate it relatively to the current offset of the instruction being decoded */
163 	OT_RELCB,
164 	/* Read a word/dword immediate and calculate it relatively to the current offset of the instruction being decoded */
165 	OT_RELC_FULL,
166 
167 	/* Use general memory indirection, with varying sizes: */
168 	OT_MEM,
169 	/* Used when a memory indirection is required, but if the mod field is 11, this operand will be ignored. */
170 	OT_MEM_OPT,
171 	OT_MEM32,
172 	/* Memory dereference for MOVNTI, either 32 or 64 bits (with REX). */
173 	OT_MEM32_64,
174 	OT_MEM64,
175 	OT_MEM128,
176 	/* Used for cmpxchg8b/16b. */
177 	OT_MEM64_128,
178 
179 	/* Read an immediate as an absolute address, size is known by instruction, used by MOV (memory offset) only */
180 	OT_MOFFS8,
181 	OT_MOFFS_FULL,
182 	/* Use an immediate of 1, as for SHR R/M, 1 */
183 	OT_CONST1,
184 	/* Use CL, as for SHR R/M, CL */
185 	OT_REGCL,
186 
187 	/*
188 	 * Instruction-Block for one byte long instructions, used by INC/DEC/PUSH/POP/XCHG,
189 	 * REG is extracted from the value of opcode
190 	 * Use a 8bit register
191 	 */
192 	OT_IB_RB,
193 	/* Use a 16/32/64bit register */
194 	OT_IB_R_FULL,
195 
196 	/* Use [(r)SI] as INDIRECTION, for repeatable instructions */
197 	OT_REGI_ESI,
198 	/* Use [(r)DI] as INDIRECTION, for repeatable instructions */
199 	OT_REGI_EDI,
200 	/* Use [(r)BX + AL] as INDIRECTIOM, used by XLAT only */
201 	OT_REGI_EBXAL,
202 	/* Use [(r)AX] as INDIRECTION, used by AMD's SVM instructions */
203 	OT_REGI_EAX,
204 	/* Use DX, as for OUTS DX, BYTE [SI] */
205 	OT_REGDX,
206 	/* Use ECX in INVLPGA instruction */
207 	OT_REGECX,
208 
209 	/* FPU registers: */
210 	OT_FPU_SI, /* ST(i) */
211 	OT_FPU_SSI, /* ST(0), ST(i) */
212 	OT_FPU_SIS, /* ST(i), ST(0) */
213 
214 	/* MMX registers: */
215 	OT_MM,
216 	/* Extract the MMX register from the RM bits this time (used when the REG bits are used for opcode extension) */
217 	OT_MM_RM,
218 	/* ModR/M points to 32 bits MMX variable */
219 	OT_MM32,
220 	/* ModR/M points to 32 bits MMX variable */
221 	OT_MM64,
222 
223 	/* SSE registers: */
224 	OT_XMM,
225 	/* Extract the SSE register from the RM bits this time (used when the REG bits are used for opcode extension) */
226 	OT_XMM_RM,
227 	/* ModR/M points to 16 bits SSE variable */
228 	OT_XMM16,
229 	/* ModR/M points to 32 bits SSE variable */
230 	OT_XMM32,
231 	/* ModR/M points to 64 bits SSE variable */
232 	OT_XMM64,
233 	/* ModR/M points to 128 bits SSE variable */
234 	OT_XMM128,
235 	/* Implied XMM0 register as operand, used in SSE4. */
236 	OT_REGXMM0,
237 
238 	/* AVX operands: */
239 
240 	/* ModR/M for 32 bits. */
241 	OT_RM32,
242 	/* Reg32/Reg64 (prefix width) or Mem8. */
243 	OT_REG32_64_M8,
244 	/* Reg32/Reg64 (prefix width) or Mem16. */
245 	OT_REG32_64_M16,
246 	/* Reg32/Reg 64 depends on prefix width only. */
247 	OT_WREG32_64,
248 	/* RM32/RM64 depends on prefix width only. */
249 	OT_WRM32_64,
250 	/* XMM or Mem32/Mem64 depends on perfix width only. */
251 	OT_WXMM32_64,
252 	/* XMM is encoded in VEX.VVVV. */
253 	OT_VXMM,
254 	/* XMM is encoded in the high nibble of an immediate byte. */
255 	OT_XMM_IMM,
256 	/* YMM/XMM is dependent on VEX.L. */
257 	OT_YXMM,
258 	/* YMM/XMM (depends on prefix length) is encoded in the high nibble of an immediate byte. */
259 	OT_YXMM_IMM,
260 	/* YMM is encoded in reg. */
261 	OT_YMM,
262 	/* YMM or Mem256. */
263 	OT_YMM256,
264 	/* YMM is encoded in VEX.VVVV. */
265 	OT_VYMM,
266 	/* YMM/XMM is dependent on VEX.L, and encoded in VEX.VVVV. */
267 	OT_VYXMM,
268 	/* YMM/XMM or Mem64/Mem256 is dependent on VEX.L. */
269 	OT_YXMM64_256,
270 	/* YMM/XMM or Mem128/Mem256 is dependent on VEX.L. */
271 	OT_YXMM128_256,
272 	/* XMM or Mem64/Mem256 is dependent on VEX.L. */
273 	OT_LXMM64_128,
274 	/* Mem128/Mem256 is dependent on VEX.L. */
275 	OT_LMEM128_256
276 } _OpType;
277 
278 /* Flags for instruction: */
279 
280 /* Empty flags indicator: */
281 #define INST_FLAGS_NONE (0)
282 /* The instruction we are going to decode requires ModR/M encoding. */
283 #define INST_MODRM_REQUIRED (1)
284 /* Special treatment for instructions which are in the divided-category but still needs the whole byte for ModR/M... */
285 #define INST_NOT_DIVIDED (1 << 1)
286 /*
287  * Used explicitly in repeatable instructions,
288  * which needs a suffix letter in their mnemonic to specify operation-size (depend on operands).
289  */
290 #define INST_16BITS (1 << 2)
291 /* If the opcode is supported by 80286 and upper models (16/32 bits). */
292 #define INST_32BITS (1 << 3)
293 /*
294  * Prefix flags (6 types: lock/rep, seg override, addr-size, oper-size, REX, VEX)
295  * There are several specific instructions that can follow LOCK prefix,
296  * note that they must be using a memory operand form, otherwise they generate an exception.
297  */
298 #define INST_PRE_LOCK (1 << 4)
299 /* REPNZ prefix for string instructions only - means an instruction can follow it. */
300 #define INST_PRE_REPNZ (1 << 5)
301 /* REP prefix for string instructions only - means an instruction can follow it. */
302 #define INST_PRE_REP (1 << 6)
303 /* CS override prefix. */
304 #define INST_PRE_CS (1 << 7)
305 /* SS override prefix. */
306 #define INST_PRE_SS (1 << 8)
307 /* DS override prefix. */
308 #define INST_PRE_DS (1 << 9)
309 /* ES override prefix. */
310 #define INST_PRE_ES (1 << 10)
311 /* FS override prefix. Funky Segment :) */
312 #define INST_PRE_FS (1 << 11)
313 /* GS override prefix. Groovy Segment, of course not, duh ! */
314 #define INST_PRE_GS (1 << 12)
315 /* Switch operand size from 32 to 16 and vice versa. */
316 #define INST_PRE_OP_SIZE (1 << 13)
317 /* Switch address size from 32 to 16 and vice versa. */
318 #define INST_PRE_ADDR_SIZE (1 << 14)
319 /* Native instructions which needs suffix letter to indicate their operation-size (and don't depend on operands). */
320 #define INST_NATIVE (1 << 15)
321 /* Use extended mnemonic, means it's an _InstInfoEx structure, which contains another mnemonic for 32 bits specifically. */
322 #define INST_USE_EXMNEMONIC (1 << 16)
323 /* Use third operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
324 #define INST_USE_OP3 (1 << 17)
325 /* Use fourth operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
326 #define INST_USE_OP4 (1 << 18)
327 /* The instruction's mnemonic depends on the mod value of the ModR/M byte (mod=11, mod!=11). */
328 #define INST_MNEMONIC_MODRM_BASED (1 << 19)
329 /* The instruction uses a ModR/M byte which the MOD must be 11 (for registers operands only). */
330 #define INST_MODRR_REQUIRED (1 << 20)
331 /* The way of 3DNow! instructions are built, we have to handle their locating specially. Suffix imm8 tells which instruction it is. */
332 #define INST_3DNOW_FETCH (1 << 21)
333 /* The instruction needs two suffixes, one for the comparison type (imm8) and the second for its operation size indication (second mnemonic). */
334 #define INST_PSEUDO_OPCODE (1 << 22)
335 /* Invalid instruction at 64 bits decoding mode. */
336 #define INST_INVALID_64BITS (1 << 23)
337 /* Specific instruction can be promoted to 64 bits (without REX, it is promoted automatically). */
338 #define INST_64BITS (1 << 24)
339 /* Indicates the instruction must be REX prefixed in order to use 64 bits operands. */
340 #define INST_PRE_REX (1 << 25)
341 /* Third mnemonic is set. */
342 #define INST_USE_EXMNEMONIC2 (1 << 26)
343 /* Instruction is only valid in 64 bits decoding mode. */
344 #define INST_64BITS_FETCH (1 << 27)
345 /* Forces that the ModRM-REG/Opcode field will be 0. (For EXTRQ). */
346 #define INST_FORCE_REG0 (1 << 28)
347 /* Indicates that instruction is encoded with a VEX prefix. */
348 #define INST_PRE_VEX (1 << 29)
349 /* Indicates that the instruction is encoded with a ModRM byte (REG field specifically). */
350 #define INST_MODRM_INCLUDED (1 << 30)
351 /* Indicates that the first (/destination) operand of the instruction is writable. */
352 #define INST_DST_WR (1 << 31)
353 
354 #define INST_PRE_REPS (INST_PRE_REPNZ | INST_PRE_REP)
355 #define INST_PRE_LOKREP_MASK (INST_PRE_LOCK | INST_PRE_REPNZ | INST_PRE_REP)
356 #define INST_PRE_SEGOVRD_MASK32 (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES)
357 #define INST_PRE_SEGOVRD_MASK64 (INST_PRE_FS | INST_PRE_GS)
358 #define INST_PRE_SEGOVRD_MASK (INST_PRE_SEGOVRD_MASK32 | INST_PRE_SEGOVRD_MASK64)
359 
360 /* Extended flags for VEX: */
361 /* Indicates that the instruction might have VEX.L encoded. */
362 #define INST_VEX_L (1)
363 /* Indicates that the instruction might have VEX.W encoded. */
364 #define INST_VEX_W (1 << 1)
365 /* Indicates that the mnemonic of the instruction is based on the VEX.W bit. */
366 #define INST_MNEMONIC_VEXW_BASED (1 << 2)
367 /* Indicates that the mnemonic of the instruction is based on the VEX.L bit. */
368 #define INST_MNEMONIC_VEXL_BASED (1 << 3)
369 /* Forces the instruction to be encoded with VEX.L, otherwise it's undefined. */
370 #define INST_FORCE_VEXL (1 << 4)
371 /*
372  * Indicates that the instruction is based on the MOD field of the ModRM byte.
373  * (MOD==11: got the right instruction, else skip +4 in prefixed table for the correct instruction).
374  */
375 #define INST_MODRR_BASED (1 << 5)
376 /* Indicates that the instruction doesn't use the VVVV field of the VEX prefix, if it does then it's undecodable. */
377 #define INST_VEX_V_UNUSED (1 << 6)
378 
379 /* Indication that the instruction is privileged (Ring 0), this should be checked on the opcodeId field. */
380 #define OPCODE_ID_PRIVILEGED ((uint16_t)0x8000)
381 
382 /*
383  * Indicates which operand is being decoded.
384  * Destination (1st), Source (2nd), op3 (3rd), op4 (4th).
385  * Used to set the operands' fields in the _DInst structure!
386  */
387 typedef enum {ONT_NONE = -1, ONT_1 = 0, ONT_2 = 1, ONT_3 = 2, ONT_4 = 3} _OperandNumberType;
388 
389 /*
390  * In order to save more space for storing the DB statically,
391  * I came up with another level of shared info.
392  * Because I saw that most of the information that instructions use repeats itself.
393  *
394  * Info about the instruction, source/dest types, meta and flags.
395  * _InstInfo points to a table of _InstSharedInfo.
396  */
397 typedef struct {
398 	uint8_t flagsIndex; /* An index into FlagsTables */
399 	uint8_t s, d; /* OpType. */
400 	uint8_t meta; /* Hi 5 bits = Instruction set class | Lo 3 bits = flow control flags. */
401 	/* The following are CPU flag masks that the instruction changes. */
402 	uint8_t modifiedFlags;
403 	uint8_t testedFlags;
404 	uint8_t undefinedFlags;
405 } _InstSharedInfo;
406 
407 /*
408  * This structure is used for the instructions DB and NOT for the disassembled result code!
409  * This is the BASE structure, there are extentions to this structure below.
410  */
411 typedef struct {
412 	uint16_t sharedIndex; /* An index into the SharedInfoTable. */
413 	uint16_t opcodeId; /* The opcodeId is really a byte-offset into the mnemonics table. MSB is a privileged indication. */
414 } _InstInfo;
415 
416 /*
417  * There are merely few instructions which need a second mnemonic for 32 bits.
418  * Or a third for 64 bits. Therefore sometimes the second mnemonic is empty but not the third.
419  * In all decoding modes the first mnemonic is the default.
420  * A flag will indicate it uses another mnemonic.
421  *
422  * There are a couple of (SSE4) instructions in the whole DB which need both op3 and 3rd mnemonic for 64bits,
423  * therefore, I decided to make the extended structure contain all extra info in the same structure.
424  * There are a few instructions (SHLD/SHRD/IMUL and SSE too) which use third operand (or a fourth).
425  * A flag will indicate it uses a third/fourth operand.
426  */
427 typedef struct {
428 	/* Base structure (doesn't get accessed directly from code). */
429 	_InstInfo BASE;
430 
431 	/* Extended starts here. */
432 	uint8_t flagsEx; /* 8 bits are enough, in the future we might make it a bigger integer. */
433 	uint8_t op3, op4; /* OpType. */
434 	uint16_t opcodeId2, opcodeId3;
435 } _InstInfoEx;
436 
437 /* Trie data structure node type: */
438 typedef enum {
439 	INT_NOTEXISTS = 0, /* Not exists. */
440 	INT_INFO = 1, /* It's an instruction info. */
441 	INT_INFOEX,
442 	INT_LIST_GROUP,
443 	INT_LIST_FULL,
444 	INT_LIST_DIVIDED,
445 	INT_LIST_PREFIXED
446 } _InstNodeType;
447 
448 /* Used to check instType < INT_INFOS, means we got an inst-info. Cause it has to be only one of them. */
449 #define INT_INFOS (INT_LIST_GROUP)
450 
451 /* Instruction node is treated as { int index:13;  int type:3; } */
452 typedef uint16_t _InstNode;
453 
454 _InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps);
455 _InstInfo* inst_lookup_3dnow(_CodeInfo* ci);
456 
457 #endif /* INSTRUCTIONS_H */
458