1 /* diStorm3 3.3 */ 2 3 /* 4 distorm.h 5 6 diStorm3 - Powerful disassembler for X86/AMD64 7 http://ragestorm.net/distorm/ 8 distorm at gmail dot com 9 Copyright (C) 2003-2012 Gil Dabah 10 11 This program is free software: you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation, either version 3 of the License, or 14 (at your option) any later version. 15 16 This program is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with this program. If not, see <http://www.gnu.org/licenses/> 23 */ 24 25 26 #ifndef DISTORM_H 27 #define DISTORM_H 28 29 /* 30 * 64 bit offsets support: 31 * If the diStorm library you use was compiled with 64 bits offsets, 32 * make sure you compile your own code with the following macro set: 33 * SUPPORT_64BIT_OFFSET 34 * Otherwise comment it out, or you will get a linker error of an unresolved symbol... 35 * Turned on by default! 36 */ 37 38 #if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC)) 39 /* Define this macro for outer projects by default. */ 40 #define SUPPORT_64BIT_OFFSET 41 #endif 42 43 /* TINYC has a problem with some 64bits library functions, so ignore 64 bit offsets. */ 44 #ifdef __TINYC__ 45 #undef SUPPORT_64BIT_OFFSET 46 #endif 47 48 /* If your compiler doesn't support stdint.h, define your own 64 bits type. */ 49 #ifdef SUPPORT_64BIT_OFFSET 50 #ifdef _MSC_VER 51 #define OFFSET_INTEGER unsigned __int64 52 #else 53 #include <stdint.h> 54 #define OFFSET_INTEGER uint64_t 55 #endif 56 #else 57 /* 32 bit offsets are used. */ 58 #define OFFSET_INTEGER unsigned long 59 #endif 60 61 #ifdef _MSC_VER 62 /* Since MSVC isn't shipped with stdint.h, we will have our own: */ 63 typedef signed __int64 int64_t; 64 typedef unsigned __int64 uint64_t; 65 typedef signed __int32 int32_t; 66 typedef unsigned __int32 uint32_t; 67 typedef signed __int16 int16_t; 68 typedef unsigned __int16 uint16_t; 69 typedef signed __int8 int8_t; 70 typedef unsigned __int8 uint8_t; 71 #endif 72 73 /* Support C++ compilers */ 74 #ifdef __cplusplus 75 extern "C" { 76 #endif 77 78 79 /* *** Helper Macros *** */ 80 81 /* Get the ISC of the instruction, used with the definitions below. */ 82 #define META_GET_ISC(meta) (((meta) >> 3) & 0x1f) 83 #define META_SET_ISC(di, isc) (((di)->meta) |= ((isc) << 3)) 84 /* Get the flow control flags of the instruction, see 'features for decompose' below. */ 85 #define META_GET_FC(meta) ((meta) & 0x7) 86 87 /* Get the target address of a branching instruction. O_PC operand type. */ 88 #define INSTRUCTION_GET_TARGET(di) ((_OffsetType)(((di)->addr + (di)->imm.addr + (di)->size))) 89 /* Get the target address of a RIP-relative memory indirection. */ 90 #define INSTRUCTION_GET_RIP_TARGET(di) ((_OffsetType)(((di)->addr + (di)->disp + (di)->size))) 91 92 /* 93 * Operand Size or Adderss size are stored inside the flags: 94 * 00 - 16 bits 95 * 01 - 32 bits 96 * 10 - 64 bits 97 * 11 - reserved 98 * 99 * If you call these set-macros more than once, you will have to clean the bits before doing so. 100 */ 101 #define FLAG_SET_OPSIZE(di, size) ((di->flags) |= (((size) & 3) << 8)) 102 #define FLAG_SET_ADDRSIZE(di, size) ((di->flags) |= (((size) & 3) << 10)) 103 #define FLAG_GET_OPSIZE(flags) (((flags) >> 8) & 3) 104 #define FLAG_GET_ADDRSIZE(flags) (((flags) >> 10) & 3) 105 /* To get the LOCK/REPNZ/REP prefixes. */ 106 #define FLAG_GET_PREFIX(flags) ((flags) & 7) 107 108 /* 109 * Macros to extract segment registers from 'segment': 110 */ 111 #define SEGMENT_DEFAULT 0x80 112 #define SEGMENT_SET(di, seg) ((di->segment) |= seg) 113 #define SEGMENT_GET(segment) (((segment) == R_NONE) ? R_NONE : ((segment) & 0x7f)) 114 #define SEGMENT_IS_DEFAULT(segment) (((segment) & SEGMENT_DEFAULT) == SEGMENT_DEFAULT) 115 116 117 /* Decodes modes of the disassembler, 16 bits or 32 bits or 64 bits for AMD64, x86-64. */ 118 typedef enum { Decode16Bits = 0, Decode32Bits = 1, Decode64Bits = 2 } _DecodeType; 119 120 typedef OFFSET_INTEGER _OffsetType; 121 122 typedef struct { 123 _OffsetType codeOffset, nextOffset; /* nextOffset is OUT only. */ 124 const uint8_t* code; 125 int codeLen; /* Using signed integer makes it easier to detect an underflow. */ 126 _DecodeType dt; 127 unsigned int features; 128 } _CodeInfo; 129 130 typedef enum { O_NONE, O_REG, O_IMM, O_IMM1, O_IMM2, O_DISP, O_SMEM, O_MEM, O_PC, O_PTR } _OperandType; 131 132 typedef union { 133 /* Used by O_IMM: */ 134 int8_t sbyte; 135 uint8_t byte; 136 int16_t sword; 137 uint16_t word; 138 int32_t sdword; 139 uint32_t dword; 140 int64_t sqword; /* All immediates are SIGN-EXTENDED to 64 bits! */ 141 uint64_t qword; 142 143 /* Used by O_PC: (Use GET_TARGET_ADDR).*/ 144 _OffsetType addr; /* It's a relative offset as for now. */ 145 146 /* Used by O_PTR: */ 147 struct { 148 uint16_t seg; 149 /* Can be 16 or 32 bits, size is in ops[n].size. */ 150 uint32_t off; 151 } ptr; 152 153 /* Used by O_IMM1 (i1) and O_IMM2 (i2). ENTER instruction only. */ 154 struct { 155 uint32_t i1; 156 uint32_t i2; 157 } ex; 158 } _Value; 159 160 typedef struct { 161 /* Type of operand: 162 O_NONE: operand is to be ignored. 163 O_REG: index holds global register index. 164 O_IMM: instruction.imm. 165 O_IMM1: instruction.imm.ex.i1. 166 O_IMM2: instruction.imm.ex.i2. 167 O_DISP: memory dereference with displacement only, instruction.disp. 168 O_SMEM: simple memory dereference with optional displacement (a single register memory dereference). 169 O_MEM: complex memory dereference (optional fields: s/i/b/disp). 170 O_PC: the relative address of a branch instruction (instruction.imm.addr). 171 O_PTR: the absolute target address of a far branch instruction (instruction.imm.ptr.seg/off). 172 */ 173 uint8_t type; /* _OperandType */ 174 175 /* Index of: 176 O_REG: holds global register index 177 O_SMEM: holds the 'base' register. E.G: [ECX], [EBX+0x1234] are both in operand.index. 178 O_MEM: holds the 'index' register. E.G: [EAX*4] is in operand.index. 179 */ 180 uint8_t index; 181 182 /* Size of: 183 O_REG: register 184 O_IMM: instruction.imm 185 O_IMM1: instruction.imm.ex.i1 186 O_IMM2: instruction.imm.ex.i2 187 O_DISP: instruction.disp 188 O_SMEM: size of indirection. 189 O_MEM: size of indirection. 190 O_PC: size of the relative offset 191 O_PTR: size of instruction.imm.ptr.off (16 or 32) 192 */ 193 uint16_t size; 194 } _Operand; 195 196 #define OPCODE_ID_NONE 0 197 /* Instruction could not be disassembled. */ 198 #define FLAG_NOT_DECODABLE ((uint16_t)-1) 199 /* The instruction locks memory access. */ 200 #define FLAG_LOCK (1 << 0) 201 /* The instruction is prefixed with a REPNZ. */ 202 #define FLAG_REPNZ (1 << 1) 203 /* The instruction is prefixed with a REP, this can be a REPZ, it depends on the specific instruction. */ 204 #define FLAG_REP (1 << 2) 205 /* Indicates there is a hint taken for Jcc instructions only. */ 206 #define FLAG_HINT_TAKEN (1 << 3) 207 /* Indicates there is a hint non-taken for Jcc instructions only. */ 208 #define FLAG_HINT_NOT_TAKEN (1 << 4) 209 /* The Imm value is signed extended. */ 210 #define FLAG_IMM_SIGNED (1 << 5) 211 /* The destination operand is writable. */ 212 #define FLAG_DST_WR (1 << 6) 213 /* The instruction uses RIP-relative indirection. */ 214 #define FLAG_RIP_RELATIVE (1 << 7) 215 216 /* See flag FLAG_GET_XXX macros above. */ 217 218 /* The instruction is privileged and can only be used from Ring0. */ 219 #define FLAG_PRIVILEGED_INSTRUCTION (1 << 15) 220 221 /* No register was defined. */ 222 #define R_NONE ((uint8_t)-1) 223 224 #define REGS64_BASE 0 225 #define REGS32_BASE 16 226 #define REGS16_BASE 32 227 #define REGS8_BASE 48 228 #define REGS8_REX_BASE 64 229 #define SREGS_BASE 68 230 #define FPUREGS_BASE 75 231 #define MMXREGS_BASE 83 232 #define SSEREGS_BASE 91 233 #define AVXREGS_BASE 107 234 #define CREGS_BASE 123 235 #define DREGS_BASE 132 236 237 #define OPERANDS_NO (4) 238 239 typedef struct { 240 /* Used by ops[n].type == O_IMM/O_IMM1&O_IMM2/O_PTR/O_PC. Its size is ops[n].size. */ 241 _Value imm; 242 /* Used by ops[n].type == O_SMEM/O_MEM/O_DISP. Its size is dispSize. */ 243 uint64_t disp; 244 /* Virtual address of first byte of instruction. */ 245 _OffsetType addr; 246 /* General flags of instruction, holds prefixes and more, if FLAG_NOT_DECODABLE, instruction is invalid. */ 247 uint16_t flags; 248 /* Unused prefixes mask, for each bit that is set that prefix is not used (LSB is byte [addr + 0]). */ 249 uint16_t unusedPrefixesMask; 250 /* Mask of registers that were used in the operands, only used for quick look up, in order to know *some* operand uses that register class. */ 251 uint16_t usedRegistersMask; 252 /* ID of opcode in the global opcode table. Use for mnemonic look up. */ 253 uint16_t opcode; 254 /* Up to four operands per instruction, ignored if ops[n].type == O_NONE. */ 255 _Operand ops[OPERANDS_NO]; 256 /* Size of the whole instruction. */ 257 uint8_t size; 258 /* Segment information of memory indirection, default segment, or overriden one, can be -1. Use SEGMENT macros. */ 259 uint8_t segment; 260 /* Used by ops[n].type == O_MEM. Base global register index (might be R_NONE), scale size (2/4/8), ignored for 0 or 1. */ 261 uint8_t base, scale; 262 uint8_t dispSize; 263 /* Meta defines the instruction set class, and the flow control flags. Use META macros. */ 264 uint8_t meta; 265 /* The CPU flags that the instruction operates upon. */ 266 uint8_t modifiedFlagsMask, testedFlagsMask, undefinedFlagsMask; 267 } _DInst; 268 269 #ifndef DISTORM_LIGHT 270 271 /* Static size of strings. Do not change this value. Keep Python wrapper in sync. */ 272 #define MAX_TEXT_SIZE (48) 273 typedef struct { 274 unsigned int length; 275 unsigned char p[MAX_TEXT_SIZE]; /* p is a null terminated string. */ 276 } _WString; 277 278 /* 279 * Old decoded instruction structure in text format. 280 * Used only for backward compatibility with diStorm64. 281 * This structure holds all information the disassembler generates per instruction. 282 */ 283 typedef struct { 284 _WString mnemonic; /* Mnemonic of decoded instruction, prefixed if required by REP, LOCK etc. */ 285 _WString operands; /* Operands of the decoded instruction, up to 3 operands, comma-seperated. */ 286 _WString instructionHex; /* Hex dump - little endian, including prefixes. */ 287 unsigned int size; /* Size of decoded instruction. */ 288 _OffsetType offset; /* Start offset of the decoded instruction. */ 289 } _DecodedInst; 290 291 #endif /* DISTORM_LIGHT */ 292 293 /* Register masks for quick look up, each mask indicates one of a register-class that is being used in some operand. */ 294 #define RM_AX 1 /* AL, AH, AX, EAX, RAX */ 295 #define RM_CX 2 /* CL, CH, CX, ECX, RCX */ 296 #define RM_DX 4 /* DL, DH, DX, EDX, RDX */ 297 #define RM_BX 8 /* BL, BH, BX, EBX, RBX */ 298 #define RM_SP 0x10 /* SPL, SP, ESP, RSP */ 299 #define RM_BP 0x20 /* BPL, BP, EBP, RBP */ 300 #define RM_SI 0x40 /* SIL, SI, ESI, RSI */ 301 #define RM_DI 0x80 /* DIL, DI, EDI, RDI */ 302 #define RM_FPU 0x100 /* ST(0) - ST(7) */ 303 #define RM_MMX 0x200 /* MM0 - MM7 */ 304 #define RM_SSE 0x400 /* XMM0 - XMM15 */ 305 #define RM_AVX 0x800 /* YMM0 - YMM15 */ 306 #define RM_CR 0x1000 /* CR0, CR2, CR3, CR4, CR8 */ 307 #define RM_DR 0x2000 /* DR0, DR1, DR2, DR3, DR6, DR7 */ 308 /* RIP should be checked using the 'flags' field and FLAG_RIP_RELATIVE. 309 * Segments should be checked using the segment macros. 310 * For now R8 - R15 are not supported and non general purpose registers map into same RM. 311 */ 312 313 /* CPU Flags that instructions modify, test or undefine. */ 314 #define D_ZF 1 /* Zero */ 315 #define D_SF 2 /* Sign */ 316 #define D_CF 4 /* Carry */ 317 #define D_OF 8 /* Overflow */ 318 #define D_PF 0x10 /* Parity */ 319 #define D_AF 0x20 /* Auxilary */ 320 #define D_DF 0x40 /* Direction */ 321 #define D_IF 0x80 /* Interrupt */ 322 323 /* 324 * Instructions Set classes: 325 * if you want a better understanding of the available classes, look at disOps project, file: x86sets.py. 326 */ 327 /* Indicates the instruction belongs to the General Integer set. */ 328 #define ISC_INTEGER 1 329 /* Indicates the instruction belongs to the 387 FPU set. */ 330 #define ISC_FPU 2 331 /* Indicates the instruction belongs to the P6 set. */ 332 #define ISC_P6 3 333 /* Indicates the instruction belongs to the MMX set. */ 334 #define ISC_MMX 4 335 /* Indicates the instruction belongs to the SSE set. */ 336 #define ISC_SSE 5 337 /* Indicates the instruction belongs to the SSE2 set. */ 338 #define ISC_SSE2 6 339 /* Indicates the instruction belongs to the SSE3 set. */ 340 #define ISC_SSE3 7 341 /* Indicates the instruction belongs to the SSSE3 set. */ 342 #define ISC_SSSE3 8 343 /* Indicates the instruction belongs to the SSE4.1 set. */ 344 #define ISC_SSE4_1 9 345 /* Indicates the instruction belongs to the SSE4.2 set. */ 346 #define ISC_SSE4_2 10 347 /* Indicates the instruction belongs to the AMD's SSE4.A set. */ 348 #define ISC_SSE4_A 11 349 /* Indicates the instruction belongs to the 3DNow! set. */ 350 #define ISC_3DNOW 12 351 /* Indicates the instruction belongs to the 3DNow! Extensions set. */ 352 #define ISC_3DNOWEXT 13 353 /* Indicates the instruction belongs to the VMX (Intel) set. */ 354 #define ISC_VMX 14 355 /* Indicates the instruction belongs to the SVM (AMD) set. */ 356 #define ISC_SVM 15 357 /* Indicates the instruction belongs to the AVX (Intel) set. */ 358 #define ISC_AVX 16 359 /* Indicates the instruction belongs to the FMA (Intel) set. */ 360 #define ISC_FMA 17 361 /* Indicates the instruction belongs to the AES/AVX (Intel) set. */ 362 #define ISC_AES 18 363 /* Indicates the instruction belongs to the CLMUL (Intel) set. */ 364 #define ISC_CLMUL 19 365 366 /* Features for decompose: */ 367 #define DF_NONE 0 368 /* The decoder will limit addresses to a maximum of 16 bits. */ 369 #define DF_MAXIMUM_ADDR16 1 370 /* The decoder will limit addresses to a maximum of 32 bits. */ 371 #define DF_MAXIMUM_ADDR32 2 372 /* The decoder will return only flow control instructions (and filter the others internally). */ 373 #define DF_RETURN_FC_ONLY 4 374 /* The decoder will stop and return to the caller when the instruction 'CALL' (near and far) was decoded. */ 375 #define DF_STOP_ON_CALL 8 376 /* The decoder will stop and return to the caller when the instruction 'RET' (near and far) was decoded. */ 377 #define DF_STOP_ON_RET 0x10 378 /* The decoder will stop and return to the caller when the instruction system-call/ret was decoded. */ 379 #define DF_STOP_ON_SYS 0x20 380 /* The decoder will stop and return to the caller when any of the branch 'JMP', (near and far) instructions were decoded. */ 381 #define DF_STOP_ON_UNC_BRANCH 0x40 382 /* The decoder will stop and return to the caller when any of the conditional branch instruction were decoded. */ 383 #define DF_STOP_ON_CND_BRANCH 0x80 384 /* The decoder will stop and return to the caller when the instruction 'INT' (INT, INT1, INTO, INT 3) was decoded. */ 385 #define DF_STOP_ON_INT 0x100 386 /* The decoder will stop and return to the caller when any of the 'CMOVxx' instruction was decoded. */ 387 #define DF_STOP_ON_CMOV 0x200 388 /* The decoder will stop and return to the caller when any flow control instruction was decoded. */ 389 #define DF_STOP_ON_FLOW_CONTROL (DF_STOP_ON_CALL | DF_STOP_ON_RET | DF_STOP_ON_SYS | DF_STOP_ON_UNC_BRANCH | DF_STOP_ON_CND_BRANCH | DF_STOP_ON_INT | DF_STOP_ON_CMOV) 390 391 /* Indicates the instruction is not a flow-control instruction. */ 392 #define FC_NONE 0 393 /* Indicates the instruction is one of: CALL, CALL FAR. */ 394 #define FC_CALL 1 395 /* Indicates the instruction is one of: RET, IRET, RETF. */ 396 #define FC_RET 2 397 /* Indicates the instruction is one of: SYSCALL, SYSRET, SYSENTER, SYSEXIT. */ 398 #define FC_SYS 3 399 /* Indicates the instruction is one of: JMP, JMP FAR. */ 400 #define FC_UNC_BRANCH 4 401 /* 402 * Indicates the instruction is one of: 403 * JCXZ, JO, JNO, JB, JAE, JZ, JNZ, JBE, JA, JS, JNS, JP, JNP, JL, JGE, JLE, JG, LOOP, LOOPZ, LOOPNZ. 404 */ 405 #define FC_CND_BRANCH 5 406 /* Indiciates the instruction is one of: INT, INT1, INT 3, INTO, UD2. */ 407 #define FC_INT 6 408 /* Indicates the instruction is one of: CMOVxx. */ 409 #define FC_CMOV 7 410 411 /* Return code of the decoding function. */ 412 typedef enum { DECRES_NONE, DECRES_SUCCESS, DECRES_MEMORYERR, DECRES_INPUTERR, DECRES_FILTERED } _DecodeResult; 413 414 /* Define the following interface functions only for outer projects. */ 415 #if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC)) 416 417 /* distorm_decode 418 * Input: 419 * offset - Origin of the given code (virtual address that is), NOT an offset in code. 420 * code - Pointer to the code buffer to be disassembled. 421 * length - Amount of bytes that should be decoded from the code buffer. 422 * dt - Decoding mode, 16 bits (Decode16Bits), 32 bits (Decode32Bits) or AMD64 (Decode64Bits). 423 * result - Array of type _DecodeInst which will be used by this function in order to return the disassembled instructions. 424 * maxInstructions - The maximum number of entries in the result array that you pass to this function, so it won't exceed its bound. 425 * usedInstructionsCount - Number of the instruction that successfully were disassembled and written to the result array. 426 * Output: usedInstructionsCount will hold the number of entries used in the result array 427 * and the result array itself will be filled with the disassembled instructions. 428 * Return: DECRES_SUCCESS on success (no more to disassemble), DECRES_INPUTERR on input error (null code buffer, invalid decoding mode, etc...), 429 * DECRES_MEMORYERR when there are not enough entries to use in the result array, BUT YOU STILL have to check for usedInstructionsCount! 430 * Side-Effects: Even if the return code is DECRES_MEMORYERR, there might STILL be data in the 431 * array you passed, this function will try to use as much entries as possible! 432 * Notes: 1)The minimal size of maxInstructions is 15. 433 * 2)You will have to synchronize the offset,code and length by yourself if you pass code fragments and not a complete code block! 434 */ 435 #ifdef SUPPORT_64BIT_OFFSET 436 437 _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); 438 #define distorm_decompose distorm_decompose64 439 440 #ifndef DISTORM_LIGHT 441 /* If distorm-light is defined, we won't export these text-formatting functionality. */ 442 _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); 443 void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result); 444 #define distorm_decode distorm_decode64 445 #define distorm_format distorm_format64 446 #endif /*DISTORM_LIGHT*/ 447 448 #else /*SUPPORT_64BIT_OFFSET*/ 449 450 _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); 451 #define distorm_decompose distorm_decompose32 452 453 #ifndef DISTORM_LIGHT 454 /* If distorm-light is defined, we won't export these text-formatting functionality. */ 455 _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); 456 void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result); 457 #define distorm_decode distorm_decode32 458 #define distorm_format distorm_format32 459 #endif /*DISTORM_LIGHT*/ 460 461 #endif 462 463 /* 464 * distorm_version 465 * Input: 466 * none 467 * 468 * Output: unsigned int - version of compiled library. 469 */ 470 unsigned int distorm_version(); 471 472 #endif /* DISTORM_STATIC */ 473 474 #ifdef __cplusplus 475 } /* End Of Extern */ 476 #endif 477 478 #endif /* DISTORM_H */ 479