1 /*
2 distorm.c
3
4 diStorm3 C Library Interface
5 diStorm3 - Powerful disassembler for X86/AMD64
6 http://ragestorm.net/distorm/
7 distorm at gmail dot com
8 Copyright (C) 2003-2012 Gil Dabah
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
22 */
23
24
25 #include "../include/distorm.h"
26 #include "config.h"
27 #include "decoder.h"
28 #include "x86defs.h"
29 #include "textdefs.h"
30 #include "wstring.h"
31 #include "../include/mnemonics.h"
32
33 /* C DLL EXPORTS */
34 #ifdef SUPPORT_64BIT_OFFSET
distorm_decompose64(_CodeInfo * ci,_DInst result[],unsigned int maxInstructions,unsigned int * usedInstructionsCount)35 _DLLEXPORT_ _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
36 #else
37 _DLLEXPORT_ _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
38 #endif
39 {
40 if (usedInstructionsCount == NULL) {
41 return DECRES_SUCCESS;
42 }
43
44 /* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
45 *usedInstructionsCount = 0;
46
47 if ((ci == NULL) ||
48 (ci->codeLen < 0) ||
49 ((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
50 (ci->code == NULL) ||
51 (result == NULL) ||
52 ((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
53 {
54 return DECRES_INPUTERR;
55 }
56
57 /* Assume length=0 is success. */
58 if (ci->codeLen == 0) {
59 return DECRES_SUCCESS;
60 }
61
62 return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);
63 }
64
65 #ifndef DISTORM_LIGHT
66
67 /* Helper function to concatenate an explicit size when it's unknown from the operands. */
distorm_format_size(_WString * str,const _DInst * di,int opNum)68 static void distorm_format_size(_WString* str, const _DInst* di, int opNum)
69 {
70 int isSizingRequired = 0;
71 /*
72 * We only have to output the size explicitly if it's not clear from the operands.
73 * For example:
74 * mov al, [0x1234] -> The size is 8, we know it from the AL register operand.
75 * mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam
76 *
77 * If given operand number is higher than 2, then output the size anyways.
78 */
79 isSizingRequired = ((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG)));
80
81 /* Still not sure? Try some special instructions. */
82 if (!isSizingRequired) {
83 /*
84 * INS/OUTS are exception, because DX is a port specifier and not a real src/dst register.
85 * A few exceptions that always requires sizing:
86 * MOVZX, MOVSX, MOVSXD.
87 * ROL, ROR, RCL, RCR, SHL, SHR, SAL, SAR.
88 * SHLD, SHRD.
89 */
90 switch (di->opcode)
91 {
92 case I_INS:
93 case I_OUTS:
94 case I_MOVZX:
95 case I_MOVSX:
96 case I_MOVSXD:
97 case I_ROL:
98 case I_ROR:
99 case I_RCL:
100 case I_RCR:
101 case I_SHL:
102 case I_SHR:
103 case I_SAL:
104 case I_SAR:
105 case I_SHLD:
106 case I_SHRD:
107 isSizingRequired = 1;
108 break;
109 default: /* Instruction doesn't require sizing. */ break;
110 }
111 }
112
113 if (isSizingRequired)
114 {
115 switch (di->ops[opNum].size)
116 {
117 case 0: break; /* OT_MEM's unknown size. */
118 case 8: strcat_WSN(str, "BYTE "); break;
119 case 16: strcat_WSN(str, "WORD "); break;
120 case 32: strcat_WSN(str, "DWORD "); break;
121 case 64: strcat_WSN(str, "QWORD "); break;
122 case 80: strcat_WSN(str, "TBYTE "); break;
123 case 128: strcat_WSN(str, "DQWORD "); break;
124 case 256: strcat_WSN(str, "YWORD "); break;
125 default: /* Big oh uh if it gets here. */ break;
126 }
127 }
128 }
129
distorm_format_signed_disp(_WString * str,const _DInst * di,uint64_t addrMask)130 static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask)
131 {
132 int64_t tmpDisp64;
133
134 if (di->dispSize) {
135 chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR);
136 if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp;
137 else tmpDisp64 = di->disp;
138 tmpDisp64 &= addrMask;
139 str_code_hqw(str, (uint8_t*)&tmpDisp64);
140 }
141 }
142
143 #ifdef SUPPORT_64BIT_OFFSET
distorm_format64(const _CodeInfo * ci,const _DInst * di,_DecodedInst * result)144 _DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
145 #else
146 _DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
147 #endif
148 {
149 _WString* str;
150 unsigned int i, isDefault;
151 int64_t tmpDisp64;
152 uint64_t addrMask = (uint64_t)-1;
153 uint8_t segment;
154 const _WMnemonic* mnemonic;
155
156 /* Set address mask, when default is for 64bits addresses. */
157 if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
158 else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;
159
160 /* Copy other fields. */
161 result->size = di->size;
162 result->offset = di->addr & addrMask;
163
164 if (di->flags == FLAG_NOT_DECODABLE) {
165 str = &result->mnemonic;
166 strclear_WS(&result->operands);
167 strcpy_WSN(str, "DB ");
168 str_code_hb(str, di->imm.byte);
169 strclear_WS(&result->instructionHex);
170 str_hex_b(&result->instructionHex, di->imm.byte);
171 return; /* Skip to next instruction. */
172 }
173
174 str = &result->instructionHex;
175 strclear_WS(str);
176 for (i = 0; i < di->size; i++)
177 str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);
178
179 str = &result->mnemonic;
180 switch (FLAG_GET_PREFIX(di->flags))
181 {
182 case FLAG_LOCK:
183 strcpy_WSN(str, "LOCK ");
184 break;
185 case FLAG_REP:
186 strcpy_WSN(str, "REP ");
187 break;
188 case FLAG_REPNZ:
189 strcpy_WSN(str, "REPNZ ");
190 break;
191 default:
192 /* Init mnemonic string, cause next touch is concatenation. */
193 strclear_WS(str);
194 break;
195 }
196
197 mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
198 memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
199 str->length += mnemonic->length;
200
201 /* Format operands: */
202 str = &result->operands;
203 strclear_WS(str);
204
205 /* Special treatment for String instructions. */
206 if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
207 ((di->opcode == I_MOVS) ||
208 (di->opcode == I_CMPS) ||
209 (di->opcode == I_STOS) ||
210 (di->opcode == I_LODS) ||
211 (di->opcode == I_SCAS)))
212 {
213 /*
214 * No operands are needed if the address size is the default one,
215 * and no segment is overridden, so add the suffix letter,
216 * to indicate size of operation and continue to next instruction.
217 */
218 if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
219 str = &result->mnemonic;
220 switch (di->ops[0].size)
221 {
222 case 8: chrcat_WS(str, 'B'); break;
223 case 16: chrcat_WS(str, 'W'); break;
224 case 32: chrcat_WS(str, 'D'); break;
225 case 64: chrcat_WS(str, 'Q'); break;
226 }
227 return;
228 }
229 }
230
231 for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
232 if (i > 0) strcat_WSN(str, ", ");
233 switch (di->ops[i].type)
234 {
235 case O_REG:
236 strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
237 break;
238 case O_IMM:
239 /* If the instruction is 'push', show explicit size (except byte imm). */
240 if ((di->opcode == I_PUSH) && (di->ops[i].size != 8)) distorm_format_size(str, di, i);
241 /* Special fix for negative sign extended immediates. */
242 if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
243 if (di->imm.sbyte < 0) {
244 chrcat_WS(str, MINUS_DISP_CHR);
245 str_code_hb(str, -di->imm.sbyte);
246 break;
247 }
248 }
249 if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
250 else str_code_hdw(str, di->imm.dword);
251 break;
252 case O_IMM1:
253 str_code_hdw(str, di->imm.ex.i1);
254 break;
255 case O_IMM2:
256 str_code_hdw(str, di->imm.ex.i2);
257 break;
258 case O_DISP:
259 distorm_format_size(str, di, i);
260 chrcat_WS(str, OPEN_CHR);
261 if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
262 strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
263 chrcat_WS(str, SEG_OFF_CHR);
264 }
265 tmpDisp64 = di->disp & addrMask;
266 str_code_hqw(str, (uint8_t*)&tmpDisp64);
267 chrcat_WS(str, CLOSE_CHR);
268 break;
269 case O_SMEM:
270 distorm_format_size(str, di, i);
271 chrcat_WS(str, OPEN_CHR);
272
273 /*
274 * This is where we need to take special care for String instructions.
275 * If we got here, it means we need to explicitly show their operands.
276 * The problem with CMPS and MOVS is that they have two(!) memory operands.
277 * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
278 * And make the rest of the String operations explicit.
279 */
280 segment = SEGMENT_GET(di->segment);
281 isDefault = SEGMENT_IS_DEFAULT(di->segment);
282 switch (di->opcode)
283 {
284 case I_MOVS:
285 isDefault = FALSE;
286 if (i == 0) segment = R_ES;
287 break;
288 case I_CMPS:
289 isDefault = FALSE;
290 if (i == 1) segment = R_ES;
291 break;
292 case I_INS:
293 case I_LODS:
294 case I_STOS:
295 case I_SCAS: isDefault = FALSE; break;
296 }
297 if (!isDefault && (segment != R_NONE)) {
298 strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
299 chrcat_WS(str, SEG_OFF_CHR);
300 }
301
302 strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
303
304 distorm_format_signed_disp(str, di, addrMask);
305 chrcat_WS(str, CLOSE_CHR);
306 break;
307 case O_MEM:
308 distorm_format_size(str, di, i);
309 chrcat_WS(str, OPEN_CHR);
310 if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
311 strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
312 chrcat_WS(str, SEG_OFF_CHR);
313 }
314 if (di->base != R_NONE) {
315 strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
316 chrcat_WS(str, PLUS_DISP_CHR);
317 }
318 strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
319 if (di->scale != 0) {
320 chrcat_WS(str, '*');
321 if (di->scale == 2) chrcat_WS(str, '2');
322 else if (di->scale == 4) chrcat_WS(str, '4');
323 else /* if (di->scale == 8) */ chrcat_WS(str, '8');
324 }
325
326 distorm_format_signed_disp(str, di, addrMask);
327 chrcat_WS(str, CLOSE_CHR);
328 break;
329 case O_PC:
330 #ifdef SUPPORT_64BIT_OFFSET
331 str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
332 #else
333 str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
334 #endif
335 break;
336 case O_PTR:
337 str_code_hdw(str, di->imm.ptr.seg);
338 chrcat_WS(str, SEG_OFF_CHR);
339 str_code_hdw(str, di->imm.ptr.off);
340 break;
341 }
342 }
343
344 if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
345 else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
346 }
347
348 #ifdef SUPPORT_64BIT_OFFSET
distorm_decode64(_OffsetType codeOffset,const unsigned char * code,int codeLen,_DecodeType dt,_DecodedInst result[],unsigned int maxInstructions,unsigned int * usedInstructionsCount)349 _DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
350 #else
351 _DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
352 #endif
353 {
354 _DecodeResult res;
355 _DInst di;
356 _CodeInfo ci;
357 unsigned int instsCount = 0, i;
358
359 *usedInstructionsCount = 0;
360
361 /* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */
362 if (codeLen < 0) {
363 return DECRES_INPUTERR;
364 }
365
366 if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
367 return DECRES_INPUTERR;
368 }
369
370 if (code == NULL || result == NULL) {
371 return DECRES_INPUTERR;
372 }
373
374 /* Assume length=0 is success. */
375 if (codeLen == 0) {
376 return DECRES_SUCCESS;
377 }
378
379 /*
380 * We have to format the result into text. But the interal decoder works with the new structure of _DInst.
381 * Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
382 * Then we will copy each result to a temporary structure, and use it to reformat that specific result.
383 *
384 * This is all done to save memory allocation and to work on the same result array in-place!!!
385 * It's a bit ugly, I have to admit, but worth it.
386 */
387
388 ci.codeOffset = codeOffset;
389 ci.code = code;
390 ci.codeLen = codeLen;
391 ci.dt = dt;
392 ci.features = DF_NONE;
393 if (dt == Decode16Bits) ci.features = DF_MAXIMUM_ADDR16;
394 else if (dt == Decode32Bits) ci.features = DF_MAXIMUM_ADDR32;
395
396 res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
397 for (i = 0; i < instsCount; i++) {
398 if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;
399
400 /* Copy the current decomposed result to a temp structure, so we can override the result with text. */
401 memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
402 #ifdef SUPPORT_64BIT_OFFSET
403 distorm_format64(&ci, &di, &result[i]);
404 #else
405 distorm_format32(&ci, &di, &result[i]);
406 #endif
407 }
408
409 *usedInstructionsCount = instsCount;
410 return res;
411 }
412
413 #endif /* DISTORM_LIGHT */
414
distorm_version()415 _DLLEXPORT_ unsigned int distorm_version()
416 {
417 return __DISTORMV__;
418 }
419