1 /* 2 instructions.h 3 4 diStorm3 - Powerful disassembler for X86/AMD64 5 http://ragestorm.net/distorm/ 6 distorm at gmail dot com 7 Copyright (C) 2003-2012 Gil Dabah 8 9 This program is free software: you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation, either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program. If not, see <http://www.gnu.org/licenses/> 21 */ 22 23 24 #ifndef INSTRUCTIONS_H 25 #define INSTRUCTIONS_H 26 27 #include "config.h" 28 #include "prefix.h" 29 30 31 /* 32 * Operand type possibilities: 33 * Note "_FULL" suffix indicates to decode the operand as 16 bits or 32 bits depends on DecodeType - 34 * actually, it depends on the decoding mode, unless there's an operand/address size prefix. 35 * For example, the code: 33 c0 could be decoded/executed as XOR AX, AX or XOR EAX, EAX. 36 */ 37 typedef enum OpType { 38 /* No operand is set */ 39 OT_NONE = 0, 40 41 /* Read a byte(8 bits) immediate */ 42 OT_IMM8, 43 /* Force a read of a word(16 bits) immediate, used by ret only */ 44 OT_IMM16, 45 /* Read a word/dword immediate */ 46 OT_IMM_FULL, 47 /* Read a double-word(32 bits) immediate */ 48 OT_IMM32, 49 50 /* Read a signed extended byte(8 bits) immediate */ 51 OT_SEIMM8, 52 53 /* 54 * Special immediates for instructions which have more than one immediate, 55 * which is an exception from standard instruction format. 56 * As to version v1.0: ENTER, INSERTQ, EXTRQ are the only problematic ones. 57 */ 58 /* 16 bits immediate using the first imm-slot */ 59 OT_IMM16_1, 60 /* 8 bits immediate using the first imm-slot */ 61 OT_IMM8_1, 62 /* 8 bits immediate using the second imm-slot */ 63 OT_IMM8_2, 64 65 /* Use a 8bit register */ 66 OT_REG8, 67 /* Use a 16bit register */ 68 OT_REG16, 69 /* Use a 16/32/64bit register */ 70 OT_REG_FULL, 71 /* Use a 32bit register */ 72 OT_REG32, 73 /* 74 * If used with REX the reg operand size becomes 64 bits, otherwise 32 bits. 75 * VMX instructions are promoted automatically without a REX prefix. 76 */ 77 OT_REG32_64, 78 /* Used only by MOV CR/DR(n). Promoted with REX onlly. */ 79 OT_FREG32_64_RM, 80 81 /* Use or read (indirection) a 8bit register or immediate byte */ 82 OT_RM8, 83 /* Some instructions force 16 bits (mov sreg, rm16) */ 84 OT_RM16, 85 /* Use or read a 16/32/64bit register or immediate word/dword/qword */ 86 OT_RM_FULL, 87 /* 88 * 32 or 64 bits (with REX) operand size indirection memory operand. 89 * Some instructions are promoted automatically without a REX prefix. 90 */ 91 OT_RM32_64, 92 /* 16 or 32 bits RM. This is used only with MOVZXD instruction in 64bits. */ 93 OT_RM16_32, 94 /* Same as OT_RMXX but POINTS to 16 bits [cannot use GENERAL-PURPOSE REG!] */ 95 OT_FPUM16, 96 /* Same as OT_RMXX but POINTS to 32 bits (single precision) [cannot use GENERAL-PURPOSE REG!] */ 97 OT_FPUM32, 98 /* Same as OT_RMXX but POINTS to 64 bits (double precision) [cannot use GENERAL-PURPOSE REG!] */ 99 OT_FPUM64, 100 /* Same as OT_RMXX but POINTS to 80 bits (extended precision) [cannot use GENERAL-PURPOSE REG!] */ 101 OT_FPUM80, 102 103 /* 104 * Special operand type for SSE4 where the ModR/M might 105 * be a 32 bits register or 8 bits memory indirection operand. 106 */ 107 OT_R32_M8, 108 /* 109 * Special ModR/M for PINSRW, which need a 16 bits memory operand or 32 bits register. 110 * In 16 bits decoding mode R32 becomes R16, operand size cannot affect this. 111 */ 112 OT_R32_M16, 113 /* 114 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or 115 * a 8 bits memory indirection operand. 116 */ 117 OT_R32_64_M8, 118 /* 119 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or 120 * a 16 bits memory indirection operand. 121 */ 122 OT_R32_64_M16, 123 /* 124 * Special operand type for MOV reg16/32/64/mem16, segReg 8C /r. and SMSW. 125 * It supports all decoding modes, but if used as a memory indirection it's a 16 bit ModR/M indirection. 126 */ 127 OT_RFULL_M16, 128 129 /* Use a control register */ 130 OT_CREG, 131 /* Use a debug register */ 132 OT_DREG, 133 /* Use a segment register */ 134 OT_SREG, 135 /* 136 * SEG is encoded in the flags of the opcode itself! 137 * This is used for specific "push SS" where SS is a segment where 138 * each "push SS" has an absolutely different opcode byte. 139 * We need this to detect whether an operand size prefix is used. 140 */ 141 OT_SEG, 142 143 /* Use AL */ 144 OT_ACC8, 145 /* Use AX (FSTSW) */ 146 OT_ACC16, 147 /* Use AX/EAX/RAX */ 148 OT_ACC_FULL, 149 /* Use AX/EAX, no REX is possible for RAX, used only with IN/OUT which don't support 64 bit registers */ 150 OT_ACC_FULL_NOT64, 151 152 /* 153 * Read one word (seg), and a word/dword/qword (depends on operand size) from memory. 154 * JMP FAR [EBX] means EBX point to 16:32 ptr. 155 */ 156 OT_MEM16_FULL, 157 /* Read one word (seg) and a word/dword/qword (depends on operand size), usually SEG:OFF, JMP 1234:1234 */ 158 OT_PTR16_FULL, 159 /* Read one word (limit) and a dword/qword (limit) (depends on operand size), used by SGDT, SIDT, LGDT, LIDT. */ 160 OT_MEM16_3264, 161 162 /* Read a byte(8 bits) immediate and calculate it relatively to the current offset of the instruction being decoded */ 163 OT_RELCB, 164 /* Read a word/dword immediate and calculate it relatively to the current offset of the instruction being decoded */ 165 OT_RELC_FULL, 166 167 /* Use general memory indirection, with varying sizes: */ 168 OT_MEM, 169 /* Used when a memory indirection is required, but if the mod field is 11, this operand will be ignored. */ 170 OT_MEM_OPT, 171 OT_MEM32, 172 /* Memory dereference for MOVNTI, either 32 or 64 bits (with REX). */ 173 OT_MEM32_64, 174 OT_MEM64, 175 OT_MEM128, 176 /* Used for cmpxchg8b/16b. */ 177 OT_MEM64_128, 178 179 /* Read an immediate as an absolute address, size is known by instruction, used by MOV (memory offset) only */ 180 OT_MOFFS8, 181 OT_MOFFS_FULL, 182 /* Use an immediate of 1, as for SHR R/M, 1 */ 183 OT_CONST1, 184 /* Use CL, as for SHR R/M, CL */ 185 OT_REGCL, 186 187 /* 188 * Instruction-Block for one byte long instructions, used by INC/DEC/PUSH/POP/XCHG, 189 * REG is extracted from the value of opcode 190 * Use a 8bit register 191 */ 192 OT_IB_RB, 193 /* Use a 16/32/64bit register */ 194 OT_IB_R_FULL, 195 196 /* Use [(r)SI] as INDIRECTION, for repeatable instructions */ 197 OT_REGI_ESI, 198 /* Use [(r)DI] as INDIRECTION, for repeatable instructions */ 199 OT_REGI_EDI, 200 /* Use [(r)BX + AL] as INDIRECTIOM, used by XLAT only */ 201 OT_REGI_EBXAL, 202 /* Use [(r)AX] as INDIRECTION, used by AMD's SVM instructions */ 203 OT_REGI_EAX, 204 /* Use DX, as for OUTS DX, BYTE [SI] */ 205 OT_REGDX, 206 /* Use ECX in INVLPGA instruction */ 207 OT_REGECX, 208 209 /* FPU registers: */ 210 OT_FPU_SI, /* ST(i) */ 211 OT_FPU_SSI, /* ST(0), ST(i) */ 212 OT_FPU_SIS, /* ST(i), ST(0) */ 213 214 /* MMX registers: */ 215 OT_MM, 216 /* Extract the MMX register from the RM bits this time (used when the REG bits are used for opcode extension) */ 217 OT_MM_RM, 218 /* ModR/M points to 32 bits MMX variable */ 219 OT_MM32, 220 /* ModR/M points to 32 bits MMX variable */ 221 OT_MM64, 222 223 /* SSE registers: */ 224 OT_XMM, 225 /* Extract the SSE register from the RM bits this time (used when the REG bits are used for opcode extension) */ 226 OT_XMM_RM, 227 /* ModR/M points to 16 bits SSE variable */ 228 OT_XMM16, 229 /* ModR/M points to 32 bits SSE variable */ 230 OT_XMM32, 231 /* ModR/M points to 64 bits SSE variable */ 232 OT_XMM64, 233 /* ModR/M points to 128 bits SSE variable */ 234 OT_XMM128, 235 /* Implied XMM0 register as operand, used in SSE4. */ 236 OT_REGXMM0, 237 238 /* AVX operands: */ 239 240 /* ModR/M for 32 bits. */ 241 OT_RM32, 242 /* Reg32/Reg64 (prefix width) or Mem8. */ 243 OT_REG32_64_M8, 244 /* Reg32/Reg64 (prefix width) or Mem16. */ 245 OT_REG32_64_M16, 246 /* Reg32/Reg 64 depends on prefix width only. */ 247 OT_WREG32_64, 248 /* RM32/RM64 depends on prefix width only. */ 249 OT_WRM32_64, 250 /* XMM or Mem32/Mem64 depends on perfix width only. */ 251 OT_WXMM32_64, 252 /* XMM is encoded in VEX.VVVV. */ 253 OT_VXMM, 254 /* XMM is encoded in the high nibble of an immediate byte. */ 255 OT_XMM_IMM, 256 /* YMM/XMM is dependent on VEX.L. */ 257 OT_YXMM, 258 /* YMM/XMM (depends on prefix length) is encoded in the high nibble of an immediate byte. */ 259 OT_YXMM_IMM, 260 /* YMM is encoded in reg. */ 261 OT_YMM, 262 /* YMM or Mem256. */ 263 OT_YMM256, 264 /* YMM is encoded in VEX.VVVV. */ 265 OT_VYMM, 266 /* YMM/XMM is dependent on VEX.L, and encoded in VEX.VVVV. */ 267 OT_VYXMM, 268 /* YMM/XMM or Mem64/Mem256 is dependent on VEX.L. */ 269 OT_YXMM64_256, 270 /* YMM/XMM or Mem128/Mem256 is dependent on VEX.L. */ 271 OT_YXMM128_256, 272 /* XMM or Mem64/Mem256 is dependent on VEX.L. */ 273 OT_LXMM64_128, 274 /* Mem128/Mem256 is dependent on VEX.L. */ 275 OT_LMEM128_256 276 } _OpType; 277 278 /* Flags for instruction: */ 279 280 /* Empty flags indicator: */ 281 #define INST_FLAGS_NONE (0) 282 /* The instruction we are going to decode requires ModR/M encoding. */ 283 #define INST_MODRM_REQUIRED (1) 284 /* Special treatment for instructions which are in the divided-category but still needs the whole byte for ModR/M... */ 285 #define INST_NOT_DIVIDED (1 << 1) 286 /* 287 * Used explicitly in repeatable instructions, 288 * which needs a suffix letter in their mnemonic to specify operation-size (depend on operands). 289 */ 290 #define INST_16BITS (1 << 2) 291 /* If the opcode is supported by 80286 and upper models (16/32 bits). */ 292 #define INST_32BITS (1 << 3) 293 /* 294 * Prefix flags (6 types: lock/rep, seg override, addr-size, oper-size, REX, VEX) 295 * There are several specific instructions that can follow LOCK prefix, 296 * note that they must be using a memory operand form, otherwise they generate an exception. 297 */ 298 #define INST_PRE_LOCK (1 << 4) 299 /* REPNZ prefix for string instructions only - means an instruction can follow it. */ 300 #define INST_PRE_REPNZ (1 << 5) 301 /* REP prefix for string instructions only - means an instruction can follow it. */ 302 #define INST_PRE_REP (1 << 6) 303 /* CS override prefix. */ 304 #define INST_PRE_CS (1 << 7) 305 /* SS override prefix. */ 306 #define INST_PRE_SS (1 << 8) 307 /* DS override prefix. */ 308 #define INST_PRE_DS (1 << 9) 309 /* ES override prefix. */ 310 #define INST_PRE_ES (1 << 10) 311 /* FS override prefix. Funky Segment :) */ 312 #define INST_PRE_FS (1 << 11) 313 /* GS override prefix. Groovy Segment, of course not, duh ! */ 314 #define INST_PRE_GS (1 << 12) 315 /* Switch operand size from 32 to 16 and vice versa. */ 316 #define INST_PRE_OP_SIZE (1 << 13) 317 /* Switch address size from 32 to 16 and vice versa. */ 318 #define INST_PRE_ADDR_SIZE (1 << 14) 319 /* Native instructions which needs suffix letter to indicate their operation-size (and don't depend on operands). */ 320 #define INST_NATIVE (1 << 15) 321 /* Use extended mnemonic, means it's an _InstInfoEx structure, which contains another mnemonic for 32 bits specifically. */ 322 #define INST_USE_EXMNEMONIC (1 << 16) 323 /* Use third operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */ 324 #define INST_USE_OP3 (1 << 17) 325 /* Use fourth operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */ 326 #define INST_USE_OP4 (1 << 18) 327 /* The instruction's mnemonic depends on the mod value of the ModR/M byte (mod=11, mod!=11). */ 328 #define INST_MNEMONIC_MODRM_BASED (1 << 19) 329 /* The instruction uses a ModR/M byte which the MOD must be 11 (for registers operands only). */ 330 #define INST_MODRR_REQUIRED (1 << 20) 331 /* The way of 3DNow! instructions are built, we have to handle their locating specially. Suffix imm8 tells which instruction it is. */ 332 #define INST_3DNOW_FETCH (1 << 21) 333 /* The instruction needs two suffixes, one for the comparison type (imm8) and the second for its operation size indication (second mnemonic). */ 334 #define INST_PSEUDO_OPCODE (1 << 22) 335 /* Invalid instruction at 64 bits decoding mode. */ 336 #define INST_INVALID_64BITS (1 << 23) 337 /* Specific instruction can be promoted to 64 bits (without REX, it is promoted automatically). */ 338 #define INST_64BITS (1 << 24) 339 /* Indicates the instruction must be REX prefixed in order to use 64 bits operands. */ 340 #define INST_PRE_REX (1 << 25) 341 /* Third mnemonic is set. */ 342 #define INST_USE_EXMNEMONIC2 (1 << 26) 343 /* Instruction is only valid in 64 bits decoding mode. */ 344 #define INST_64BITS_FETCH (1 << 27) 345 /* Forces that the ModRM-REG/Opcode field will be 0. (For EXTRQ). */ 346 #define INST_FORCE_REG0 (1 << 28) 347 /* Indicates that instruction is encoded with a VEX prefix. */ 348 #define INST_PRE_VEX (1 << 29) 349 /* Indicates that the instruction is encoded with a ModRM byte (REG field specifically). */ 350 #define INST_MODRM_INCLUDED (1 << 30) 351 /* Indicates that the first (/destination) operand of the instruction is writable. */ 352 #define INST_DST_WR (1 << 31) 353 354 #define INST_PRE_REPS (INST_PRE_REPNZ | INST_PRE_REP) 355 #define INST_PRE_LOKREP_MASK (INST_PRE_LOCK | INST_PRE_REPNZ | INST_PRE_REP) 356 #define INST_PRE_SEGOVRD_MASK32 (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES) 357 #define INST_PRE_SEGOVRD_MASK64 (INST_PRE_FS | INST_PRE_GS) 358 #define INST_PRE_SEGOVRD_MASK (INST_PRE_SEGOVRD_MASK32 | INST_PRE_SEGOVRD_MASK64) 359 360 /* Extended flags for VEX: */ 361 /* Indicates that the instruction might have VEX.L encoded. */ 362 #define INST_VEX_L (1) 363 /* Indicates that the instruction might have VEX.W encoded. */ 364 #define INST_VEX_W (1 << 1) 365 /* Indicates that the mnemonic of the instruction is based on the VEX.W bit. */ 366 #define INST_MNEMONIC_VEXW_BASED (1 << 2) 367 /* Indicates that the mnemonic of the instruction is based on the VEX.L bit. */ 368 #define INST_MNEMONIC_VEXL_BASED (1 << 3) 369 /* Forces the instruction to be encoded with VEX.L, otherwise it's undefined. */ 370 #define INST_FORCE_VEXL (1 << 4) 371 /* 372 * Indicates that the instruction is based on the MOD field of the ModRM byte. 373 * (MOD==11: got the right instruction, else skip +4 in prefixed table for the correct instruction). 374 */ 375 #define INST_MODRR_BASED (1 << 5) 376 /* Indicates that the instruction doesn't use the VVVV field of the VEX prefix, if it does then it's undecodable. */ 377 #define INST_VEX_V_UNUSED (1 << 6) 378 379 /* Indication that the instruction is privileged (Ring 0), this should be checked on the opcodeId field. */ 380 #define OPCODE_ID_PRIVILEGED ((uint16_t)0x8000) 381 382 /* 383 * Indicates which operand is being decoded. 384 * Destination (1st), Source (2nd), op3 (3rd), op4 (4th). 385 * Used to set the operands' fields in the _DInst structure! 386 */ 387 typedef enum {ONT_NONE = -1, ONT_1 = 0, ONT_2 = 1, ONT_3 = 2, ONT_4 = 3} _OperandNumberType; 388 389 /* 390 * In order to save more space for storing the DB statically, 391 * I came up with another level of shared info. 392 * Because I saw that most of the information that instructions use repeats itself. 393 * 394 * Info about the instruction, source/dest types, meta and flags. 395 * _InstInfo points to a table of _InstSharedInfo. 396 */ 397 typedef struct { 398 uint8_t flagsIndex; /* An index into FlagsTables */ 399 uint8_t s, d; /* OpType. */ 400 uint8_t meta; /* Hi 5 bits = Instruction set class | Lo 3 bits = flow control flags. */ 401 /* The following are CPU flag masks that the instruction changes. */ 402 uint8_t modifiedFlags; 403 uint8_t testedFlags; 404 uint8_t undefinedFlags; 405 } _InstSharedInfo; 406 407 /* 408 * This structure is used for the instructions DB and NOT for the disassembled result code! 409 * This is the BASE structure, there are extentions to this structure below. 410 */ 411 typedef struct { 412 uint16_t sharedIndex; /* An index into the SharedInfoTable. */ 413 uint16_t opcodeId; /* The opcodeId is really a byte-offset into the mnemonics table. MSB is a privileged indication. */ 414 } _InstInfo; 415 416 /* 417 * There are merely few instructions which need a second mnemonic for 32 bits. 418 * Or a third for 64 bits. Therefore sometimes the second mnemonic is empty but not the third. 419 * In all decoding modes the first mnemonic is the default. 420 * A flag will indicate it uses another mnemonic. 421 * 422 * There are a couple of (SSE4) instructions in the whole DB which need both op3 and 3rd mnemonic for 64bits, 423 * therefore, I decided to make the extended structure contain all extra info in the same structure. 424 * There are a few instructions (SHLD/SHRD/IMUL and SSE too) which use third operand (or a fourth). 425 * A flag will indicate it uses a third/fourth operand. 426 */ 427 typedef struct { 428 /* Base structure (doesn't get accessed directly from code). */ 429 _InstInfo BASE; 430 431 /* Extended starts here. */ 432 uint8_t flagsEx; /* 8 bits are enough, in the future we might make it a bigger integer. */ 433 uint8_t op3, op4; /* OpType. */ 434 uint16_t opcodeId2, opcodeId3; 435 } _InstInfoEx; 436 437 /* Trie data structure node type: */ 438 typedef enum { 439 INT_NOTEXISTS = 0, /* Not exists. */ 440 INT_INFO = 1, /* It's an instruction info. */ 441 INT_INFOEX, 442 INT_LIST_GROUP, 443 INT_LIST_FULL, 444 INT_LIST_DIVIDED, 445 INT_LIST_PREFIXED 446 } _InstNodeType; 447 448 /* Used to check instType < INT_INFOS, means we got an inst-info. Cause it has to be only one of them. */ 449 #define INT_INFOS (INT_LIST_GROUP) 450 451 /* Instruction node is treated as { int index:13; int type:3; } */ 452 typedef uint16_t _InstNode; 453 454 _InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps); 455 _InstInfo* inst_lookup_3dnow(_CodeInfo* ci); 456 457 #endif /* INSTRUCTIONS_H */ 458