1 /* diStorm3 3.3 */
2 
3 /*
4 distorm.h
5 
6 diStorm3 - Powerful disassembler for X86/AMD64
7 http://ragestorm.net/distorm/
8 distorm at gmail dot com
9 Copyright (C) 2003-2012 Gil Dabah
10 
11 This program is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License
22 along with this program.  If not, see <http://www.gnu.org/licenses/>
23 */
24 
25 
26 #ifndef DISTORM_H
27 #define DISTORM_H
28 
29 /*
30  * 64 bit offsets support:
31  * If the diStorm library you use was compiled with 64 bits offsets,
32  * make sure you compile your own code with the following macro set:
33  * SUPPORT_64BIT_OFFSET
34  * Otherwise comment it out, or you will get a linker error of an unresolved symbol...
35  * Turned on by default!
36  */
37 
38 #if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC))
39 	/* Define this macro for outer projects by default. */
40 	#define SUPPORT_64BIT_OFFSET
41 #endif
42 
43 /* TINYC has a problem with some 64bits library functions, so ignore 64 bit offsets. */
44 #ifdef __TINYC__
45 	#undef SUPPORT_64BIT_OFFSET
46 #endif
47 
48 /* If your compiler doesn't support stdint.h, define your own 64 bits type. */
49 #ifdef SUPPORT_64BIT_OFFSET
50 	#ifdef _MSC_VER
51 		#define OFFSET_INTEGER unsigned __int64
52 	#else
53 		#include <stdint.h>
54 		#define OFFSET_INTEGER uint64_t
55 	#endif
56 #else
57 	/* 32 bit offsets are used. */
58 	#define OFFSET_INTEGER unsigned long
59 #endif
60 
61 #ifdef _MSC_VER
62 /* Since MSVC isn't shipped with stdint.h, we will have our own: */
63 typedef signed __int64		int64_t;
64 typedef unsigned __int64	uint64_t;
65 typedef signed __int32		int32_t;
66 typedef unsigned __int32	uint32_t;
67 typedef signed __int16		int16_t;
68 typedef unsigned __int16	uint16_t;
69 typedef signed __int8		int8_t;
70 typedef unsigned __int8		uint8_t;
71 #endif
72 
73 /* Support C++ compilers */
74 #ifdef __cplusplus
75  extern "C" {
76 #endif
77 
78 
79 /* ***  Helper Macros  *** */
80 
81 /* Get the ISC of the instruction, used with the definitions below. */
82 #define META_GET_ISC(meta) (((meta) >> 3) & 0x1f)
83 #define META_SET_ISC(di, isc) (((di)->meta) |= ((isc) << 3))
84 /* Get the flow control flags of the instruction, see 'features for decompose' below. */
85 #define META_GET_FC(meta) ((meta) & 0x7)
86 
87 /* Get the target address of a branching instruction. O_PC operand type. */
88 #define INSTRUCTION_GET_TARGET(di) ((_OffsetType)(((di)->addr + (di)->imm.addr + (di)->size)))
89 /* Get the target address of a RIP-relative memory indirection. */
90 #define INSTRUCTION_GET_RIP_TARGET(di) ((_OffsetType)(((di)->addr + (di)->disp + (di)->size)))
91 
92 /*
93  * Operand Size or Adderss size are stored inside the flags:
94  * 00 - 16 bits
95  * 01 - 32 bits
96  * 10 - 64 bits
97  * 11 - reserved
98  *
99  * If you call these set-macros more than once, you will have to clean the bits before doing so.
100  */
101 #define FLAG_SET_OPSIZE(di, size) ((di->flags) |= (((size) & 3) << 8))
102 #define FLAG_SET_ADDRSIZE(di, size) ((di->flags) |= (((size) & 3) << 10))
103 #define FLAG_GET_OPSIZE(flags) (((flags) >> 8) & 3)
104 #define FLAG_GET_ADDRSIZE(flags) (((flags) >> 10) & 3)
105 /* To get the LOCK/REPNZ/REP prefixes. */
106 #define FLAG_GET_PREFIX(flags) ((flags) & 7)
107 
108 /*
109  * Macros to extract segment registers from 'segment':
110  */
111 #define SEGMENT_DEFAULT 0x80
112 #define SEGMENT_SET(di, seg) ((di->segment) |= seg)
113 #define SEGMENT_GET(segment) (((segment) == R_NONE) ? R_NONE : ((segment) & 0x7f))
114 #define SEGMENT_IS_DEFAULT(segment) (((segment) & SEGMENT_DEFAULT) == SEGMENT_DEFAULT)
115 
116 
117 /* Decodes modes of the disassembler, 16 bits or 32 bits or 64 bits for AMD64, x86-64. */
118 typedef enum { Decode16Bits = 0, Decode32Bits = 1, Decode64Bits = 2 } _DecodeType;
119 
120 typedef OFFSET_INTEGER _OffsetType;
121 
122 typedef struct {
123 	_OffsetType codeOffset, nextOffset; /* nextOffset is OUT only. */
124 	const uint8_t* code;
125 	int codeLen; /* Using signed integer makes it easier to detect an underflow. */
126 	_DecodeType dt;
127 	unsigned int features;
128 } _CodeInfo;
129 
130 typedef enum { O_NONE, O_REG, O_IMM, O_IMM1, O_IMM2, O_DISP, O_SMEM, O_MEM, O_PC, O_PTR } _OperandType;
131 
132 typedef union {
133 	/* Used by O_IMM: */
134 	int8_t sbyte;
135 	uint8_t byte;
136 	int16_t sword;
137 	uint16_t word;
138 	int32_t sdword;
139 	uint32_t dword;
140 	int64_t sqword; /* All immediates are SIGN-EXTENDED to 64 bits! */
141 	uint64_t qword;
142 
143 	/* Used by O_PC: (Use GET_TARGET_ADDR).*/
144 	_OffsetType addr; /* It's a relative offset as for now. */
145 
146 	/* Used by O_PTR: */
147 	struct {
148 		uint16_t seg;
149 		/* Can be 16 or 32 bits, size is in ops[n].size. */
150 		uint32_t off;
151 	} ptr;
152 
153 	/* Used by O_IMM1 (i1) and O_IMM2 (i2). ENTER instruction only. */
154 	struct {
155 		uint32_t i1;
156 		uint32_t i2;
157 	} ex;
158 } _Value;
159 
160 typedef struct {
161 	/* Type of operand:
162 		O_NONE: operand is to be ignored.
163 		O_REG: index holds global register index.
164 		O_IMM: instruction.imm.
165 		O_IMM1: instruction.imm.ex.i1.
166 		O_IMM2: instruction.imm.ex.i2.
167 		O_DISP: memory dereference with displacement only, instruction.disp.
168 		O_SMEM: simple memory dereference with optional displacement (a single register memory dereference).
169 		O_MEM: complex memory dereference (optional fields: s/i/b/disp).
170 		O_PC: the relative address of a branch instruction (instruction.imm.addr).
171 		O_PTR: the absolute target address of a far branch instruction (instruction.imm.ptr.seg/off).
172 	*/
173 	uint8_t type; /* _OperandType */
174 
175 	/* Index of:
176 		O_REG: holds global register index
177 		O_SMEM: holds the 'base' register. E.G: [ECX], [EBX+0x1234] are both in operand.index.
178 		O_MEM: holds the 'index' register. E.G: [EAX*4] is in operand.index.
179 	*/
180 	uint8_t index;
181 
182 	/* Size of:
183 		O_REG: register
184 		O_IMM: instruction.imm
185 		O_IMM1: instruction.imm.ex.i1
186 		O_IMM2: instruction.imm.ex.i2
187 		O_DISP: instruction.disp
188 		O_SMEM: size of indirection.
189 		O_MEM: size of indirection.
190 		O_PC: size of the relative offset
191 		O_PTR: size of instruction.imm.ptr.off (16 or 32)
192 	*/
193 	uint16_t size;
194 } _Operand;
195 
196 #define OPCODE_ID_NONE 0
197 /* Instruction could not be disassembled. */
198 #define FLAG_NOT_DECODABLE ((uint16_t)-1)
199 /* The instruction locks memory access. */
200 #define FLAG_LOCK (1 << 0)
201 /* The instruction is prefixed with a REPNZ. */
202 #define FLAG_REPNZ (1 << 1)
203 /* The instruction is prefixed with a REP, this can be a REPZ, it depends on the specific instruction. */
204 #define FLAG_REP (1 << 2)
205 /* Indicates there is a hint taken for Jcc instructions only. */
206 #define FLAG_HINT_TAKEN (1 << 3)
207 /* Indicates there is a hint non-taken for Jcc instructions only. */
208 #define FLAG_HINT_NOT_TAKEN (1 << 4)
209 /* The Imm value is signed extended. */
210 #define FLAG_IMM_SIGNED (1 << 5)
211 /* The destination operand is writable. */
212 #define FLAG_DST_WR (1 << 6)
213 /* The instruction uses RIP-relative indirection. */
214 #define FLAG_RIP_RELATIVE (1 << 7)
215 
216 /* See flag FLAG_GET_XXX macros above. */
217 
218 /* The instruction is privileged and can only be used from Ring0. */
219 #define FLAG_PRIVILEGED_INSTRUCTION (1 << 15)
220 
221 /* No register was defined. */
222 #define R_NONE ((uint8_t)-1)
223 
224 #define REGS64_BASE 0
225 #define REGS32_BASE 16
226 #define REGS16_BASE 32
227 #define REGS8_BASE 48
228 #define REGS8_REX_BASE 64
229 #define SREGS_BASE 68
230 #define FPUREGS_BASE 75
231 #define MMXREGS_BASE 83
232 #define SSEREGS_BASE 91
233 #define AVXREGS_BASE 107
234 #define CREGS_BASE 123
235 #define DREGS_BASE 132
236 
237 #define OPERANDS_NO (4)
238 
239 typedef struct {
240 	/* Used by ops[n].type == O_IMM/O_IMM1&O_IMM2/O_PTR/O_PC. Its size is ops[n].size. */
241 	_Value imm;
242 	/* Used by ops[n].type == O_SMEM/O_MEM/O_DISP. Its size is dispSize. */
243 	uint64_t disp;
244 	/* Virtual address of first byte of instruction. */
245 	_OffsetType addr;
246 	/* General flags of instruction, holds prefixes and more, if FLAG_NOT_DECODABLE, instruction is invalid. */
247 	uint16_t flags;
248 	/* Unused prefixes mask, for each bit that is set that prefix is not used (LSB is byte [addr + 0]). */
249 	uint16_t unusedPrefixesMask;
250 	/* Mask of registers that were used in the operands, only used for quick look up, in order to know *some* operand uses that register class. */
251 	uint16_t usedRegistersMask;
252 	/* ID of opcode in the global opcode table. Use for mnemonic look up. */
253 	uint16_t opcode;
254 	/* Up to four operands per instruction, ignored if ops[n].type == O_NONE. */
255 	_Operand ops[OPERANDS_NO];
256 	/* Size of the whole instruction. */
257 	uint8_t size;
258 	/* Segment information of memory indirection, default segment, or overriden one, can be -1. Use SEGMENT macros. */
259 	uint8_t segment;
260 	/* Used by ops[n].type == O_MEM. Base global register index (might be R_NONE), scale size (2/4/8), ignored for 0 or 1. */
261 	uint8_t base, scale;
262 	uint8_t dispSize;
263 	/* Meta defines the instruction set class, and the flow control flags. Use META macros. */
264 	uint8_t meta;
265 	/* The CPU flags that the instruction operates upon. */
266 	uint8_t modifiedFlagsMask, testedFlagsMask, undefinedFlagsMask;
267 } _DInst;
268 
269 #ifndef DISTORM_LIGHT
270 
271 /* Static size of strings. Do not change this value. Keep Python wrapper in sync. */
272 #define MAX_TEXT_SIZE (48)
273 typedef struct {
274 	unsigned int length;
275 	unsigned char p[MAX_TEXT_SIZE]; /* p is a null terminated string. */
276 } _WString;
277 
278 /*
279  * Old decoded instruction structure in text format.
280  * Used only for backward compatibility with diStorm64.
281  * This structure holds all information the disassembler generates per instruction.
282  */
283 typedef struct {
284 	_WString mnemonic; /* Mnemonic of decoded instruction, prefixed if required by REP, LOCK etc. */
285 	_WString operands; /* Operands of the decoded instruction, up to 3 operands, comma-seperated. */
286 	_WString instructionHex; /* Hex dump - little endian, including prefixes. */
287 	unsigned int size; /* Size of decoded instruction. */
288 	_OffsetType offset; /* Start offset of the decoded instruction. */
289 } _DecodedInst;
290 
291 #endif /* DISTORM_LIGHT */
292 
293 /* Register masks for quick look up, each mask indicates one of a register-class that is being used in some operand. */
294 #define RM_AX 1     /* AL, AH, AX, EAX, RAX */
295 #define RM_CX 2     /* CL, CH, CX, ECX, RCX */
296 #define RM_DX 4     /* DL, DH, DX, EDX, RDX */
297 #define RM_BX 8     /* BL, BH, BX, EBX, RBX */
298 #define RM_SP 0x10  /* SPL, SP, ESP, RSP */
299 #define RM_BP 0x20  /* BPL, BP, EBP, RBP */
300 #define RM_SI 0x40  /* SIL, SI, ESI, RSI */
301 #define RM_DI 0x80  /* DIL, DI, EDI, RDI */
302 #define RM_FPU 0x100 /* ST(0) - ST(7) */
303 #define RM_MMX 0x200 /* MM0 - MM7 */
304 #define RM_SSE 0x400 /* XMM0 - XMM15 */
305 #define RM_AVX 0x800 /* YMM0 - YMM15 */
306 #define RM_CR 0x1000 /* CR0, CR2, CR3, CR4, CR8 */
307 #define RM_DR 0x2000 /* DR0, DR1, DR2, DR3, DR6, DR7 */
308 /* RIP should be checked using the 'flags' field and FLAG_RIP_RELATIVE.
309  * Segments should be checked using the segment macros.
310  * For now R8 - R15 are not supported and non general purpose registers map into same RM.
311  */
312 
313 /* CPU Flags that instructions modify, test or undefine. */
314 #define D_ZF 1 /* Zero */
315 #define D_SF 2 /* Sign */
316 #define D_CF 4 /* Carry */
317 #define D_OF 8 /* Overflow */
318 #define D_PF 0x10 /* Parity */
319 #define D_AF 0x20 /* Auxilary */
320 #define D_DF 0x40 /* Direction */
321 #define D_IF 0x80 /* Interrupt */
322 
323 /*
324  * Instructions Set classes:
325  * if you want a better understanding of the available classes, look at disOps project, file: x86sets.py.
326  */
327 /* Indicates the instruction belongs to the General Integer set. */
328 #define ISC_INTEGER 1
329 /* Indicates the instruction belongs to the 387 FPU set. */
330 #define ISC_FPU 2
331 /* Indicates the instruction belongs to the P6 set. */
332 #define ISC_P6 3
333 /* Indicates the instruction belongs to the MMX set. */
334 #define ISC_MMX 4
335 /* Indicates the instruction belongs to the SSE set. */
336 #define ISC_SSE 5
337 /* Indicates the instruction belongs to the SSE2 set. */
338 #define ISC_SSE2 6
339 /* Indicates the instruction belongs to the SSE3 set. */
340 #define ISC_SSE3 7
341 /* Indicates the instruction belongs to the SSSE3 set. */
342 #define ISC_SSSE3 8
343 /* Indicates the instruction belongs to the SSE4.1 set. */
344 #define ISC_SSE4_1 9
345 /* Indicates the instruction belongs to the SSE4.2 set. */
346 #define ISC_SSE4_2 10
347 /* Indicates the instruction belongs to the AMD's SSE4.A set. */
348 #define ISC_SSE4_A 11
349 /* Indicates the instruction belongs to the 3DNow! set. */
350 #define ISC_3DNOW 12
351 /* Indicates the instruction belongs to the 3DNow! Extensions set. */
352 #define ISC_3DNOWEXT 13
353 /* Indicates the instruction belongs to the VMX (Intel) set. */
354 #define ISC_VMX 14
355 /* Indicates the instruction belongs to the SVM (AMD) set. */
356 #define ISC_SVM 15
357 /* Indicates the instruction belongs to the AVX (Intel) set. */
358 #define ISC_AVX 16
359 /* Indicates the instruction belongs to the FMA (Intel) set. */
360 #define ISC_FMA 17
361 /* Indicates the instruction belongs to the AES/AVX (Intel) set. */
362 #define ISC_AES 18
363 /* Indicates the instruction belongs to the CLMUL (Intel) set. */
364 #define ISC_CLMUL 19
365 
366 /* Features for decompose: */
367 #define DF_NONE 0
368 /* The decoder will limit addresses to a maximum of 16 bits. */
369 #define DF_MAXIMUM_ADDR16 1
370 /* The decoder will limit addresses to a maximum of 32 bits. */
371 #define DF_MAXIMUM_ADDR32 2
372 /* The decoder will return only flow control instructions (and filter the others internally). */
373 #define DF_RETURN_FC_ONLY 4
374 /* The decoder will stop and return to the caller when the instruction 'CALL' (near and far) was decoded. */
375 #define DF_STOP_ON_CALL 8
376 /* The decoder will stop and return to the caller when the instruction 'RET' (near and far) was decoded. */
377 #define DF_STOP_ON_RET 0x10
378 /* The decoder will stop and return to the caller when the instruction system-call/ret was decoded. */
379 #define DF_STOP_ON_SYS 0x20
380 /* The decoder will stop and return to the caller when any of the branch 'JMP', (near and far) instructions were decoded. */
381 #define DF_STOP_ON_UNC_BRANCH 0x40
382 /* The decoder will stop and return to the caller when any of the conditional branch instruction were decoded. */
383 #define DF_STOP_ON_CND_BRANCH 0x80
384 /* The decoder will stop and return to the caller when the instruction 'INT' (INT, INT1, INTO, INT 3) was decoded. */
385 #define DF_STOP_ON_INT 0x100
386 /* The decoder will stop and return to the caller when any of the 'CMOVxx' instruction was decoded. */
387 #define DF_STOP_ON_CMOV 0x200
388 /* The decoder will stop and return to the caller when any flow control instruction was decoded. */
389 #define DF_STOP_ON_FLOW_CONTROL (DF_STOP_ON_CALL | DF_STOP_ON_RET | DF_STOP_ON_SYS | DF_STOP_ON_UNC_BRANCH | DF_STOP_ON_CND_BRANCH | DF_STOP_ON_INT | DF_STOP_ON_CMOV)
390 
391 /* Indicates the instruction is not a flow-control instruction. */
392 #define FC_NONE 0
393 /* Indicates the instruction is one of: CALL, CALL FAR. */
394 #define FC_CALL 1
395 /* Indicates the instruction is one of: RET, IRET, RETF. */
396 #define FC_RET 2
397 /* Indicates the instruction is one of: SYSCALL, SYSRET, SYSENTER, SYSEXIT. */
398 #define FC_SYS 3
399 /* Indicates the instruction is one of: JMP, JMP FAR. */
400 #define FC_UNC_BRANCH 4
401 /*
402  * Indicates the instruction is one of:
403  * JCXZ, JO, JNO, JB, JAE, JZ, JNZ, JBE, JA, JS, JNS, JP, JNP, JL, JGE, JLE, JG, LOOP, LOOPZ, LOOPNZ.
404  */
405 #define FC_CND_BRANCH 5
406 /* Indiciates the instruction is one of: INT, INT1, INT 3, INTO, UD2. */
407 #define FC_INT 6
408 /* Indicates the instruction is one of: CMOVxx. */
409 #define FC_CMOV 7
410 
411 /* Return code of the decoding function. */
412 typedef enum { DECRES_NONE, DECRES_SUCCESS, DECRES_MEMORYERR, DECRES_INPUTERR, DECRES_FILTERED } _DecodeResult;
413 
414 /* Define the following interface functions only for outer projects. */
415 #if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC))
416 
417 /* distorm_decode
418  * Input:
419  *         offset - Origin of the given code (virtual address that is), NOT an offset in code.
420  *         code - Pointer to the code buffer to be disassembled.
421  *         length - Amount of bytes that should be decoded from the code buffer.
422  *         dt - Decoding mode, 16 bits (Decode16Bits), 32 bits (Decode32Bits) or AMD64 (Decode64Bits).
423  *         result - Array of type _DecodeInst which will be used by this function in order to return the disassembled instructions.
424  *         maxInstructions - The maximum number of entries in the result array that you pass to this function, so it won't exceed its bound.
425  *         usedInstructionsCount - Number of the instruction that successfully were disassembled and written to the result array.
426  * Output: usedInstructionsCount will hold the number of entries used in the result array
427  *         and the result array itself will be filled with the disassembled instructions.
428  * Return: DECRES_SUCCESS on success (no more to disassemble), DECRES_INPUTERR on input error (null code buffer, invalid decoding mode, etc...),
429  *         DECRES_MEMORYERR when there are not enough entries to use in the result array, BUT YOU STILL have to check for usedInstructionsCount!
430  * Side-Effects: Even if the return code is DECRES_MEMORYERR, there might STILL be data in the
431  *               array you passed, this function will try to use as much entries as possible!
432  * Notes:  1)The minimal size of maxInstructions is 15.
433  *         2)You will have to synchronize the offset,code and length by yourself if you pass code fragments and not a complete code block!
434  */
435 #ifdef SUPPORT_64BIT_OFFSET
436 
437 	_DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount);
438 	#define distorm_decompose distorm_decompose64
439 
440 #ifndef DISTORM_LIGHT
441 	/* If distorm-light is defined, we won't export these text-formatting functionality. */
442 	_DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount);
443 	void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result);
444 	#define distorm_decode distorm_decode64
445 	#define distorm_format distorm_format64
446 #endif /*DISTORM_LIGHT*/
447 
448 #else /*SUPPORT_64BIT_OFFSET*/
449 
450 	_DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount);
451 	#define distorm_decompose distorm_decompose32
452 
453 #ifndef DISTORM_LIGHT
454 	/* If distorm-light is defined, we won't export these text-formatting functionality. */
455 	_DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount);
456 	void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result);
457 	#define distorm_decode distorm_decode32
458 	#define distorm_format distorm_format32
459 #endif /*DISTORM_LIGHT*/
460 
461 #endif
462 
463 /*
464  * distorm_version
465  * Input:
466  *        none
467  *
468  * Output: unsigned int - version of compiled library.
469  */
470 unsigned int distorm_version();
471 
472 #endif /* DISTORM_STATIC */
473 
474 #ifdef __cplusplus
475 } /* End Of Extern */
476 #endif
477 
478 #endif /* DISTORM_H */
479