1 // Copyright (C) 2004, Matt Conover (mconover@gmail.com) 2 // 3 // WARNING: 4 // I wouldn't recommend changing any flags like OP_*, ITYPE_*, or *_MASK 5 // aside from those marked as UNUSED. This is because the flags parts of 6 // the flags are architecture independent and other are left to specific 7 // architectures to define, so unless you understand the relationships 8 // between them, I would leave them as is. 9 10 #ifndef DISASM_H 11 #define DISASM_H 12 #ifdef __cplusplus 13 extern "C" { 14 #endif 15 #include <windows.h> 16 #include <stdio.h> 17 #include "misc.h" 18 19 typedef signed char S8; 20 typedef unsigned char U8; 21 typedef signed short S16; 22 typedef unsigned short U16; 23 typedef signed long S32; 24 typedef unsigned long U32; 25 typedef LONG64 S64; 26 typedef ULONG64 U64; 27 28 // Forward declarations 29 struct _INSTRUCTION; 30 31 #ifdef SPEEDY 32 // On Visual Studio 6, making the internal functions inline makes compiling take forever 33 #define INTERNAL static _inline 34 #define INLINE _inline 35 #else 36 #define INTERNAL static 37 #define INLINE 38 #endif 39 40 #define VALID_INSTRUCTION(i) ((i) && !((i)->ErrorOccurred)) 41 #define NEXT_INSTRUCTION(i) ((i)->Address + (i)->Length) 42 #define DISASM_ARCH_TYPE(dis) ((dis)->ArchType) 43 #define INS_ARCH_TYPE(ins) DISASM_ARCH_TYPE((ins)->Disassembler) 44 45 // NOTE: these should be as big set to the maximum of the supported architectures 46 #define MAX_PREFIX_LENGTH 15 47 #define MAX_OPERAND_COUNT 3 48 #define MAX_INSTRUCTION_LENGTH 25 49 #define MAX_OPCODE_LENGTH 3 50 #define MAX_OPCODE_DESCRIPTION 256 51 52 ///////////////////////////////////////////////////////////////////// 53 // Code branch 54 ///////////////////////////////////////////////////////////////////// 55 56 #define MAX_CODE_REFERENCE_COUNT 3 57 58 typedef struct _CODE_BRANCH 59 { 60 U64 Addresses[MAX_CODE_REFERENCE_COUNT]; // NULL if multiple to addresses 61 U32 Count; 62 U8 IsLoop : 1; 63 U8 IsCall : 1; // branch if false 64 U8 IsIndirect : 1; // call/jmp [Address] 65 U8 AddressOffset: 5; 66 struct _INSTRUCTION_OPERAND *Operand; // the operand containg the address 67 } CODE_BRANCH; 68 69 ///////////////////////////////////////////////////////////////////// 70 // Data references 71 ///////////////////////////////////////////////////////////////////// 72 73 #define MAX_DATA_REFERENCE_COUNT 3 74 75 typedef struct _DATA_REFERENCE 76 { 77 U64 Addresses[MAX_DATA_REFERENCE_COUNT]; // NULL if multiple to addresses 78 U32 Count; 79 ULONG_PTR DataSize; 80 struct _INSTRUCTION_OPERAND *Operand; // the operand containg the address 81 } DATA_REFERENCE; 82 83 //////////////////////////////////////////////////////////////////// 84 // Instruction 85 ///////////////////////////////////////////////////////////////////// 86 87 // 88 // Instruction types (bits 0-7) 89 // Instruction groups (bits 8-26) 90 // 91 #define ITYPE_EXEC_OFFSET (1<<8) 92 #define ITYPE_ARITH_OFFSET (1<<9) 93 #define ITYPE_LOGIC_OFFSET (1<<10) 94 #define ITYPE_STACK_OFFSET (1<<11) 95 #define ITYPE_TESTCOND_OFFSET (1<<12) 96 #define ITYPE_LOAD_OFFSET (1<<13) 97 #define ITYPE_ARRAY_OFFSET (1<<14) 98 #define ITYPE_BIT_OFFSET (1<<15) 99 #define ITYPE_FLAG_OFFSET (1<<16) 100 #define ITYPE_FPU_OFFSET (1<<17) 101 #define ITYPE_TRAPS_OFFSET (1<<18) 102 #define ITYPE_SYSTEM_OFFSET (1<<19) 103 #define ITYPE_OTHER_OFFSET (1<<20) 104 #define ITYPE_UNUSED1_OFFSET (1<<21) 105 #define ITYPE_UNUSED2_OFFSET (1<<22) 106 #define ITYPE_UNUSED3_OFFSET (1<<23) 107 #define ITYPE_UNUSED4_OFFSET (1<<24) 108 #define ITYPE_UNUSED5_OFFSET (1<<25) 109 #define ITYPE_UNUSED6_OFFSET (1<<26) 110 #define ITYPE_EXT_UNUSED1 (1<<27) 111 #define ITYPE_EXT_UNUSED2 (1<<28) 112 #define ITYPE_EXT_UNUSED3 (1<<29) 113 #define ITYPE_EXT_UNUSED4 (1<<30) 114 #define ITYPE_EXT_UNUSED5 (1<<31) 115 116 // 117 // X86-specific flags (bits 27-31) 118 // 119 120 #define ITYPE_EXT_64 ITYPE_EXT_UNUSED1 // Use index 1 if in 64-bit mode and 0 otherwise 121 #define ITYPE_EXT_MODRM ITYPE_EXT_UNUSED2 // ModRM byte may extend the opcode 122 #define ITYPE_EXT_SUFFIX ITYPE_EXT_UNUSED3 // byte after ModRM/SIB/displacement is the third opcode 123 #define ITYPE_EXT_PREFIX ITYPE_EXT_UNUSED4 // prefix 124 #define ITYPE_EXT_FPU ITYPE_EXT_UNUSED5 // FPU instructions require special handling 125 126 #define ITYPE_3DNOW_OFFSET ITYPE_UNUSED1_OFFSET 127 #define ITYPE_MMX_OFFSET ITYPE_UNUSED2_OFFSET 128 #define ITYPE_SSE_OFFSET ITYPE_UNUSED3_OFFSET 129 #define ITYPE_SSE2_OFFSET ITYPE_UNUSED4_OFFSET 130 #define ITYPE_SSE3_OFFSET ITYPE_UNUSED5_OFFSET 131 132 // 133 // Instruction types 134 // 135 136 #define ITYPE_TYPE_MASK 0x7FFFFFFF 137 #define ITYPE_GROUP_MASK 0x7FFFFF00 138 139 typedef enum _INSTRUCTION_TYPE 140 { 141 // ITYPE_EXEC group 142 ITYPE_EXEC = ITYPE_EXEC_OFFSET, 143 ITYPE_BRANCH, 144 ITYPE_BRANCHCC, // conditional (not necessarily just flags) 145 ITYPE_CALL, 146 ITYPE_CALLCC, // conditional (not necessarily just flags) 147 ITYPE_RET, 148 ITYPE_LOOPCC, 149 150 // ITYPE_ARITH group 151 ITYPE_ARITH = ITYPE_ARITH_OFFSET, 152 ITYPE_XCHGADD, 153 ITYPE_ADD, 154 ITYPE_SUB, 155 ITYPE_MUL, 156 ITYPE_DIV, 157 ITYPE_INC, 158 ITYPE_DEC, 159 ITYPE_SHL, 160 ITYPE_SHR, 161 ITYPE_ROL, 162 ITYPE_ROR, 163 164 // ITYPE_LOGIC group 165 ITYPE_LOGIC=ITYPE_LOGIC_OFFSET, 166 ITYPE_AND, 167 ITYPE_OR, 168 ITYPE_XOR, 169 ITYPE_NOT, 170 ITYPE_NEG, 171 172 // ITYPE_STACK group 173 ITYPE_STACK=ITYPE_STACK_OFFSET, 174 ITYPE_PUSH, 175 ITYPE_POP, 176 ITYPE_PUSHA, 177 ITYPE_POPA, 178 ITYPE_PUSHF, 179 ITYPE_POPF, 180 ITYPE_ENTER, 181 ITYPE_LEAVE, 182 183 // ITYPE_TESTCOND group 184 ITYPE_TESTCOND=ITYPE_TESTCOND_OFFSET, 185 ITYPE_TEST, 186 ITYPE_CMP, 187 188 // ITYPE_LOAD group 189 ITYPE_LOAD=ITYPE_LOAD_OFFSET, 190 ITYPE_MOV, 191 ITYPE_MOVCC, // conditional 192 ITYPE_LEA, 193 ITYPE_XCHG, 194 ITYPE_XCHGCC, // conditional 195 196 // ITYPE_ARRAY group 197 ITYPE_ARRAY=ITYPE_ARRAY_OFFSET, 198 ITYPE_STRCMP, 199 ITYPE_STRLOAD, 200 ITYPE_STRMOV, 201 ITYPE_STRSTOR, 202 ITYPE_XLAT, 203 204 // ITYPE_BIT group 205 ITYPE_BIT=ITYPE_BIT_OFFSET, 206 ITYPE_BITTEST, 207 ITYPE_BITSET, 208 ITYPE_BITCLR, 209 210 // ITYPE_FLAG group 211 // PF = parify flag 212 // ZF = zero flag 213 // OF = overflow flag 214 // DF = direction flag 215 // SF = sign flag 216 ITYPE_FLAG=ITYPE_FLAG_OFFSET, 217 // clear 218 ITYPE_CLEARCF, 219 ITYPE_CLEARZF, 220 ITYPE_CLEAROF, 221 ITYPE_CLEARDF, 222 ITYPE_CLEARSF, 223 ITYPE_CLEARPF, 224 // set 225 ITYPE_SETCF, 226 ITYPE_SETZF, 227 ITYPE_SETOF, 228 ITYPE_SETDF, 229 ITYPE_SETSF, 230 ITYPE_SETPF, 231 // toggle 232 ITYPE_TOGCF, 233 ITYPE_TOGZF, 234 ITYPE_TOGOF, 235 ITYPE_TOGDF, 236 ITYPE_TOGSF, 237 ITYPE_TOGPF, 238 239 // ITYPE_FPU group 240 ITYPE_FPU=ITYPE_FPU_OFFSET, 241 ITYPE_FADD, 242 ITYPE_FSUB, 243 ITYPE_FMUL, 244 ITYPE_FDIV, 245 ITYPE_FCOMP, 246 ITYPE_FEXCH, 247 ITYPE_FLOAD, 248 ITYPE_FLOADENV, 249 ITYPE_FSTORE, 250 ITYPE_FSTOREENV, 251 ITYPE_FSAVE, 252 ITYPE_FRESTORE, 253 ITYPE_FMOVCC, 254 255 ITYPE_UNUSED1=ITYPE_UNUSED1_OFFSET, 256 ITYPE_UNUSED2=ITYPE_UNUSED2_OFFSET, 257 ITYPE_UNUSED3=ITYPE_UNUSED3_OFFSET, 258 259 // ITYPE_MMX group 260 ITYPE_MMX=ITYPE_MMX_OFFSET, 261 ITYPE_MMX_MOV, 262 ITYPE_MMX_ADD, 263 ITYPE_MMX_SUB, 264 ITYPE_MMX_MUL, 265 ITYPE_MMX_DIV, 266 ITYPE_MMX_AND, 267 ITYPE_MMX_OR, 268 ITYPE_MMX_XOR, 269 ITYPE_MMX_CMP, 270 271 // ITYPE_SSE group 272 ITYPE_SSE=ITYPE_SSE_OFFSET, 273 ITYPE_SSE_MOV, 274 ITYPE_SSE_ADD, 275 ITYPE_SSE_SUB, 276 ITYPE_SSE_MUL, 277 ITYPE_SSE_DIV, 278 ITYPE_SSE_AND, 279 ITYPE_SSE_OR, 280 ITYPE_SSE_XOR, 281 ITYPE_SSE_CMP, 282 283 // ITYPE_SSE2 group 284 ITYPE_SSE2=ITYPE_SSE2_OFFSET, 285 ITYPE_SSE2_MOV, 286 ITYPE_SSE2_ADD, 287 ITYPE_SSE2_SUB, 288 ITYPE_SSE2_MUL, 289 ITYPE_SSE2_DIV, 290 ITYPE_SSE2_AND, 291 ITYPE_SSE2_OR, 292 ITYPE_SSE2_XOR, 293 ITYPE_SSE2_CMP, 294 295 // ITYPE_SSE3 group 296 ITYPE_SSE3=ITYPE_SSE3_OFFSET, 297 ITYPE_SSE3_MOV, 298 ITYPE_SSE3_ADD, 299 ITYPE_SSE3_SUB, 300 ITYPE_SSE3_MUL, 301 ITYPE_SSE3_DIV, 302 ITYPE_SSE3_AND, 303 ITYPE_SSE3_OR, 304 ITYPE_SSE3_XOR, 305 ITYPE_SSE3_CMP, 306 307 // ITYPE_3DNOW group 308 ITYPE_3DNOW=ITYPE_3DNOW_OFFSET, 309 ITYPE_3DNOW_ADD, 310 ITYPE_3DNOW_SUB, 311 ITYPE_3DNOW_MUL, 312 ITYPE_3DNOW_DIV, 313 ITYPE_3DNOW_CMP, 314 ITYPE_3DNOW_XCHG, 315 316 // ITYPE_TRAP 317 ITYPE_TRAPS=ITYPE_TRAPS_OFFSET, 318 ITYPE_TRAP, // generate trap 319 ITYPE_TRAPCC, // conditional trap gen 320 ITYPE_TRAPRET, // return from trap 321 ITYPE_BOUNDS, // gen bounds trap 322 ITYPE_DEBUG, // gen breakpoint trap 323 ITYPE_TRACE, // gen single step trap 324 ITYPE_INVALID, // gen invalid instruction 325 ITYPE_OFLOW, // gen overflow trap 326 327 // ITYPE_SYSTEM group 328 ITYPE_SYSTEM=ITYPE_SYSTEM_OFFSET, 329 ITYPE_HALT, // halt machine 330 ITYPE_IN, // input form port 331 ITYPE_OUT, // output to port 332 ITYPE_CPUID, // identify cpu 333 ITYPE_SETIF, // allow interrupts 334 ITYPE_CLEARIF, // block interrupts 335 ITYPE_SYSCALL, 336 ITYPE_SYSCALLRET, 337 338 // ITYPE_OTHER group 339 ITYPE_OTHER = ITYPE_OTHER_OFFSET, 340 ITYPE_NOP, 341 ITYPE_BCDCONV, // convert to/from BCD 342 ITYPE_SZCONV // convert size of operand 343 } INSTRUCTION_TYPE; 344 345 // 346 // Operand flags 347 // 348 349 // Type = bits 0-6 (these are mutually exclusive -- bits 0-6 will always be a power of 2)) 350 #define OPTYPE_NONE 0x00 351 #define OPTYPE_IMM 0x01 // immediate value 352 #define OPTYPE_OFFSET 0x02 // relative offset 353 #define OPTYPE_FLOAT 0x03 // floating point 354 #define OPTYPE_BCD 0x04 355 #define OPTYPE_STRING 0x05 356 #define OPTYPE_SPECIAL 0x06 357 #define OPTYPE_MASK 0x7F 358 359 // Flags = bits 7-23 (these can be combinations) 360 // These are used in the X86 opcode table 361 #define OP_REG (1<<7) // 0x80 362 #define OP_SIGNED (1<<8) 363 #define OP_SYS (1<<9) // parameter is an index into some system structure 364 #define OP_CONDR (1<<10) 365 #define OP_CONDW (1<<11) 366 #define OP_UNUSED (1<<12) 367 #define OP_SRC (1<<13) // operand is source operand 368 #define OP_DST (1<<14) // operand is destination operand 369 #define OP_EXEC (1<<15) // operand is executed 370 371 #define OP_CONDE OP_CONDR 372 #define OP_COND_EXEC (OP_CONDE|OP_EXEC) // executed only if the pre-conditions are met 373 #define OP_COND_SRC (OP_CONDR|OP_SRC) // set only if pre-conditions are met 374 #define OP_COND_DST (OP_CONDW|OP_DST) // set only if pre-conditions are met 375 #define OP_COND (OP_CONDR|OP_CONDW) 376 377 // Bits 16-31 are available for use outside of the opcode table, but they can only 378 // be used in INSTRUCTION_OPERAND.Flags, they may conflit with the architecture specific 379 // operands. For example, bits 16-31 are used in X86 for AMODE_* and OPTYPE_* 380 #define OP_ADDRESS (1<<16) 381 #define OP_LOCAL (1<<17) 382 #define OP_PARAM (1<<18) 383 #define OP_GLOBAL (1<<19) 384 #define OP_FAR (1<<20) 385 #define OP_IPREL (1<<21) 386 387 // 388 // X86-specific flags (bits 27-31) 389 // 390 #define OP_MSR (OP_SYS|OP_UNUSED) 391 392 // 393 // Other architecture flags 394 // 395 #define OP_DELAY OP_UNUSED // delayed instruction (e.g., delayed branch that executes after the next instruction) 396 397 ///////////////////////////////////////////////////////////////////// 398 // Architectures 399 ///////////////////////////////////////////////////////////////////// 400 401 typedef enum _ARCHITECTURE_TYPE 402 { 403 ARCH_UNKNOWN=0, 404 405 // x86-based 406 ARCH_X86, // 32-bit x86 407 ARCH_X86_16, // 16-bit x86 408 ARCH_X64, // AMD64 and Intel EMD64 409 410 // everything else 411 ARCH_ALPHA, 412 ARCH_ARM, 413 ARCH_DOTNET, 414 ARCH_EFI, 415 ARCH_IA64, 416 ARCH_M68K, 417 ARCH_MIPS, 418 ARCH_PPC, 419 ARCH_SH3, 420 ARCH_SH4, 421 ARCH_SPARC, 422 ARCH_THUMB 423 424 } ARCHITECTURE_TYPE; 425 426 typedef BOOL (*INIT_INSTRUCTION)(struct _INSTRUCTION *Instruction); 427 typedef void (*DUMP_INSTRUCTION)(struct _INSTRUCTION *Instruction, BOOL ShowBytes, BOOL Verbose); 428 typedef BOOL (*GET_INSTRUCTION)(struct _INSTRUCTION *Instruction, U8 *Address, U32 Flags); 429 typedef U8 *(*FIND_FUNCTION_BY_PROLOGUE)(struct _INSTRUCTION *Instruction, U8 *StartAddress, U8 *EndAddress, U32 Flags); 430 431 typedef struct _ARCHITECTURE_FORMAT_FUNCTIONS 432 { 433 INIT_INSTRUCTION InitInstruction; 434 DUMP_INSTRUCTION DumpInstruction; 435 GET_INSTRUCTION GetInstruction; 436 FIND_FUNCTION_BY_PROLOGUE FindFunctionByPrologue; 437 } ARCHITECTURE_FORMAT_FUNCTIONS; 438 439 typedef struct _ARCHITECTURE_FORMAT 440 { 441 ARCHITECTURE_TYPE Type; 442 ARCHITECTURE_FORMAT_FUNCTIONS *Functions; 443 } ARCHITECTURE_FORMAT; 444 445 #define DISASSEMBLER_INITIALIZED 0x1234566F 446 #define INSTRUCTION_INITIALIZED 0x1234567F 447 448 #include "disasm_x86.h" 449 450 typedef struct DECLSPEC_ALIGN(16) _S128 451 { 452 U64 Low; 453 S64 High; 454 } S128; 455 typedef struct DECLSPEC_ALIGN(16) _U128 456 { 457 U64 Low; 458 U64 High; 459 } U128; 460 461 typedef struct _INSTRUCTION_OPERAND 462 { 463 U32 Flags; 464 U8 Type : 6; 465 U8 Unused : 2; 466 U16 Length; 467 468 469 // If non-NULL, this indicates the target address of the instruction (e.g., a branch or 470 // a displacement with no base register). However, this address is only reliable if the 471 // image is mapped correctly (e.g., the executable is mapped as an image and fixups have 472 // been applied if it is not at its preferred image base). 473 // 474 // If disassembling a 16-bit DOS application, TargetAddress is in the context of 475 // X86Instruction->Segment. For example, if TargetAddress is the address of a code branch, 476 // it is in the CS segment (unless X86Instruction->HasSegmentOverridePrefix is set). If 477 // TargetAddress is a data pointer, it is in the DS segment (unless 478 // X86Instruction->HasSegmentOverridePrefix is set) 479 U64 TargetAddress; 480 U32 Register; 481 482 union 483 { 484 // All 8/16/32-bit operands are extended to 64-bits automatically 485 // If you want to downcast, check whether Flags & OP_SIGNED is set 486 // Like this: 487 // U32 GetOperand32(OPERAND *Operand) 488 // { 489 // if (Operand->Flags & OP_SIGNED) return (S32)Operand->Value_S64; 490 // else return (U32)Operand->Value_U64; 491 //} 492 U64 Value_U64; 493 S64 Value_S64; 494 U128 Value_U128; 495 U128 Float128; 496 U8 Float80[80]; 497 U8 BCD[10]; 498 }; 499 } INSTRUCTION_OPERAND; 500 501 typedef struct _INSTRUCTION 502 { 503 U32 Initialized; 504 struct _DISASSEMBLER *Disassembler; 505 506 char String[MAX_OPCODE_DESCRIPTION]; 507 U8 StringIndex; 508 U64 VirtualAddressDelta; 509 510 U32 Groups; // ITYPE_EXEC, ITYPE_ARITH, etc. -- NOTE groups can be OR'd together 511 INSTRUCTION_TYPE Type; // ITYPE_ADD, ITYPE_RET, etc. -- NOTE there is only one possible type 512 513 U8 *Address; 514 U8 *OpcodeAddress; 515 U32 Length; 516 517 U8 Prefixes[MAX_PREFIX_LENGTH]; 518 U32 PrefixCount; 519 520 U8 LastOpcode; // last byte of opcode 521 U8 OpcodeBytes[MAX_OPCODE_LENGTH]; 522 U32 OpcodeLength; // excludes any operands and prefixes 523 524 INSTRUCTION_OPERAND Operands[MAX_OPERAND_COUNT]; 525 U32 OperandCount; 526 527 X86_INSTRUCTION X86; 528 529 DATA_REFERENCE DataSrc; 530 DATA_REFERENCE DataDst; 531 CODE_BRANCH CodeBranch; 532 533 // Direction depends on which direction the stack grows 534 // For example, on x86 a push results in StackChange < 0 since the stack grows down 535 // This is only relevant if (Group & ITYPE_STACK) is true 536 // 537 // If Groups & ITYPE_STACK is set but StackChange = 0, it means that the change 538 // couldn't be determined (non-constant) 539 LONG StackChange; 540 541 // Used to assist in debugging 542 // If set, the current instruction is doing something that requires special handling 543 // For example, popf can cause tracing to be disabled 544 545 U8 StringAligned : 1; // internal only 546 U8 NeedsEmulation : 1; // instruction does something that re 547 U8 Repeat : 1; // instruction repeats until some condition is met (e.g., REP prefix on X86) 548 U8 ErrorOccurred : 1; // set if instruction is invalid 549 U8 AnomalyOccurred : 1; // set if instruction is anomalous 550 U8 LastInstruction : 1; // tells the iterator callback it is the last instruction 551 U8 CodeBlockFirst: 1; 552 U8 CodeBlockLast : 1; 553 } INSTRUCTION; 554 555 typedef struct _DISASSEMBLER 556 { 557 U32 Initialized; 558 ARCHITECTURE_TYPE ArchType; 559 ARCHITECTURE_FORMAT_FUNCTIONS *Functions; 560 INSTRUCTION Instruction; 561 U32 Stage1Count; // GetInstruction called 562 U32 Stage2Count; // Opcode fully decoded 563 U32 Stage3CountNoDecode; // made it through all checks when DISASM_DECODE is not set 564 U32 Stage3CountWithDecode; // made it through all checks when DISASM_DECODE is set 565 } DISASSEMBLER; 566 567 #define DISASM_DISASSEMBLE (1<<1) 568 #define DISASM_DECODE (1<<2) 569 #define DISASM_SUPPRESSERRORS (1<<3) 570 #define DISASM_SHOWFLAGS (1<<4) 571 #define DISASM_ALIGNOUTPUT (1<<5) 572 #define DISASM_DISASSEMBLE_MASK (DISASM_ALIGNOUTPUT|DISASM_SHOWBYTES|DISASM_DISASSEMBLE) 573 574 BOOL InitDisassembler(DISASSEMBLER *Disassembler, ARCHITECTURE_TYPE Architecture); 575 void CloseDisassembler(DISASSEMBLER *Disassembler); 576 INSTRUCTION *GetInstruction(DISASSEMBLER *Disassembler, U64 VirtualAddress, U8 *Address, U32 Flags); 577 578 #ifdef __cplusplus 579 } 580 #endif 581 #endif // DISASM_H 582