1 /*
2 distorm.c
3 
4 diStorm3 C Library Interface
5 diStorm3 - Powerful disassembler for X86/AMD64
6 http://ragestorm.net/distorm/
7 distorm at gmail dot com
8 Copyright (C) 2003-2012 Gil Dabah
9 
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14 
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with this program.  If not, see <http://www.gnu.org/licenses/>
22 */
23 
24 
25 #include "../include/distorm.h"
26 #include "config.h"
27 #include "decoder.h"
28 #include "x86defs.h"
29 #include "textdefs.h"
30 #include "wstring.h"
31 #include "../include/mnemonics.h"
32 
33 /* C DLL EXPORTS */
34 #ifdef SUPPORT_64BIT_OFFSET
distorm_decompose64(_CodeInfo * ci,_DInst result[],unsigned int maxInstructions,unsigned int * usedInstructionsCount)35 	_DLLEXPORT_ _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
36 #else
37 	_DLLEXPORT_ _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
38 #endif
39 {
40 	if (usedInstructionsCount == NULL) {
41 		return DECRES_SUCCESS;
42 	}
43 
44 	/* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
45 	*usedInstructionsCount = 0;
46 
47 	if ((ci == NULL) ||
48 		(ci->codeLen < 0) ||
49 		((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
50 		(ci->code == NULL) ||
51 		(result == NULL) ||
52 		((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
53 	{
54 		return DECRES_INPUTERR;
55 	}
56 
57 	/* Assume length=0 is success. */
58 	if (ci->codeLen == 0) {
59 		return DECRES_SUCCESS;
60 	}
61 
62 	return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);
63 }
64 
65 #ifndef DISTORM_LIGHT
66 
67 /* Helper function to concatenate an explicit size when it's unknown from the operands. */
distorm_format_size(_WString * str,const _DInst * di,int opNum)68 static void distorm_format_size(_WString* str, const _DInst* di, int opNum)
69 {
70 	int isSizingRequired = 0;
71 	/*
72 	 * We only have to output the size explicitly if it's not clear from the operands.
73 	 * For example:
74 	 * mov al, [0x1234] -> The size is 8, we know it from the AL register operand.
75 	 * mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam
76 	 *
77 	 * If given operand number is higher than 2, then output the size anyways.
78 	 */
79 	isSizingRequired = ((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG)));
80 
81 	/* Still not sure? Try some special instructions. */
82 	if (!isSizingRequired) {
83 		/*
84 		 * INS/OUTS are exception, because DX is a port specifier and not a real src/dst register.
85 		 * A few exceptions that always requires sizing:
86 		 * MOVZX, MOVSX, MOVSXD.
87 		 * ROL, ROR, RCL, RCR, SHL, SHR, SAL, SAR.
88 		 * SHLD, SHRD.
89 		 */
90 		switch (di->opcode)
91 		{
92 			case I_INS:
93 			case I_OUTS:
94 			case I_MOVZX:
95 			case I_MOVSX:
96 			case I_MOVSXD:
97 			case I_ROL:
98 			case I_ROR:
99 			case I_RCL:
100 			case I_RCR:
101 			case I_SHL:
102 			case I_SHR:
103 			case I_SAL:
104 			case I_SAR:
105 			case I_SHLD:
106 			case I_SHRD:
107 				isSizingRequired = 1;
108 			break;
109 			default: /* Instruction doesn't require sizing. */ break;
110 		}
111 	}
112 
113 	if (isSizingRequired)
114 	{
115 		switch (di->ops[opNum].size)
116 		{
117 			case 0: break; /* OT_MEM's unknown size. */
118 			case 8: strcat_WSN(str, "BYTE "); break;
119 			case 16: strcat_WSN(str, "WORD "); break;
120 			case 32: strcat_WSN(str, "DWORD "); break;
121 			case 64: strcat_WSN(str, "QWORD "); break;
122 			case 80: strcat_WSN(str, "TBYTE "); break;
123 			case 128: strcat_WSN(str, "DQWORD "); break;
124 			case 256: strcat_WSN(str, "YWORD "); break;
125 			default: /* Big oh uh if it gets here. */ break;
126 		}
127 	}
128 }
129 
distorm_format_signed_disp(_WString * str,const _DInst * di,uint64_t addrMask)130 static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask)
131 {
132 	int64_t tmpDisp64;
133 
134 	if (di->dispSize) {
135 		chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR);
136 		if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp;
137 		else tmpDisp64 = di->disp;
138 		tmpDisp64 &= addrMask;
139 		str_code_hqw(str, (uint8_t*)&tmpDisp64);
140 	}
141 }
142 
143 #ifdef SUPPORT_64BIT_OFFSET
distorm_format64(const _CodeInfo * ci,const _DInst * di,_DecodedInst * result)144 	_DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
145 #else
146 	_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
147 #endif
148 {
149 	_WString* str;
150 	unsigned int i, isDefault;
151 	int64_t tmpDisp64;
152 	uint64_t addrMask = (uint64_t)-1;
153 	uint8_t segment;
154 	const _WMnemonic* mnemonic;
155 
156 	/* Set address mask, when default is for 64bits addresses. */
157 	if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
158 	else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;
159 
160 	/* Copy other fields. */
161 	result->size = di->size;
162 	result->offset = di->addr & addrMask;
163 
164 	if (di->flags == FLAG_NOT_DECODABLE) {
165 		str = &result->mnemonic;
166 		strclear_WS(&result->operands);
167 		strcpy_WSN(str, "DB ");
168 		str_code_hb(str, di->imm.byte);
169 		strclear_WS(&result->instructionHex);
170 		str_hex_b(&result->instructionHex, di->imm.byte);
171 		return; /* Skip to next instruction. */
172 	}
173 
174 	str = &result->instructionHex;
175 	strclear_WS(str);
176 	for (i = 0; i < di->size; i++)
177 		str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);
178 
179 	str = &result->mnemonic;
180 	switch (FLAG_GET_PREFIX(di->flags))
181 	{
182 		case FLAG_LOCK:
183 			strcpy_WSN(str, "LOCK ");
184 		break;
185 		case FLAG_REP:
186 			strcpy_WSN(str, "REP ");
187 		break;
188 		case FLAG_REPNZ:
189 			strcpy_WSN(str, "REPNZ ");
190 		break;
191 		default:
192 			/* Init mnemonic string, cause next touch is concatenation. */
193 			strclear_WS(str);
194 		break;
195 	}
196 
197 	mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
198 	memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
199 	str->length += mnemonic->length;
200 
201 	/* Format operands: */
202 	str = &result->operands;
203 	strclear_WS(str);
204 
205 	/* Special treatment for String instructions. */
206 	if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
207 		((di->opcode == I_MOVS) ||
208 		 (di->opcode == I_CMPS) ||
209 		 (di->opcode == I_STOS) ||
210 		 (di->opcode == I_LODS) ||
211 		 (di->opcode == I_SCAS)))
212 	{
213 		/*
214 		 * No operands are needed if the address size is the default one,
215 		 * and no segment is overridden, so add the suffix letter,
216 		 * to indicate size of operation and continue to next instruction.
217 		 */
218 		if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
219 			str = &result->mnemonic;
220 			switch (di->ops[0].size)
221 			{
222 				case 8: chrcat_WS(str, 'B'); break;
223 				case 16: chrcat_WS(str, 'W'); break;
224 				case 32: chrcat_WS(str, 'D'); break;
225 				case 64: chrcat_WS(str, 'Q'); break;
226 			}
227 			return;
228 		}
229 	}
230 
231 	for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
232 		if (i > 0) strcat_WSN(str, ", ");
233 		switch (di->ops[i].type)
234 		{
235 			case O_REG:
236 				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
237 			break;
238 			case O_IMM:
239 				/* If the instruction is 'push', show explicit size (except byte imm). */
240 				if ((di->opcode == I_PUSH) && (di->ops[i].size != 8)) distorm_format_size(str, di, i);
241 				/* Special fix for negative sign extended immediates. */
242 				if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
243 					if (di->imm.sbyte < 0) {
244 						chrcat_WS(str, MINUS_DISP_CHR);
245 						str_code_hb(str, -di->imm.sbyte);
246 						break;
247 					}
248 				}
249 				if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
250 				else str_code_hdw(str, di->imm.dword);
251 			break;
252 			case O_IMM1:
253 				str_code_hdw(str, di->imm.ex.i1);
254 			break;
255 			case O_IMM2:
256 				str_code_hdw(str, di->imm.ex.i2);
257 			break;
258 			case O_DISP:
259 				distorm_format_size(str, di, i);
260 				chrcat_WS(str, OPEN_CHR);
261 				if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
262 					strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
263 					chrcat_WS(str, SEG_OFF_CHR);
264 				}
265 				tmpDisp64 = di->disp & addrMask;
266 				str_code_hqw(str, (uint8_t*)&tmpDisp64);
267 				chrcat_WS(str, CLOSE_CHR);
268 			break;
269 			case O_SMEM:
270 				distorm_format_size(str, di, i);
271 				chrcat_WS(str, OPEN_CHR);
272 
273 				/*
274 				 * This is where we need to take special care for String instructions.
275 				 * If we got here, it means we need to explicitly show their operands.
276 				 * The problem with CMPS and MOVS is that they have two(!) memory operands.
277 				 * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
278 				 * And make the rest of the String operations explicit.
279 				 */
280 				segment = SEGMENT_GET(di->segment);
281 				isDefault = SEGMENT_IS_DEFAULT(di->segment);
282 				switch (di->opcode)
283 				{
284 					case I_MOVS:
285 						isDefault = FALSE;
286 						if (i == 0) segment = R_ES;
287 					break;
288 					case I_CMPS:
289 						isDefault = FALSE;
290 						if (i == 1) segment = R_ES;
291 					break;
292 					case I_INS:
293 					case I_LODS:
294 					case I_STOS:
295 					case I_SCAS: isDefault = FALSE; break;
296 				}
297 				if (!isDefault && (segment != R_NONE)) {
298 					strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
299 					chrcat_WS(str, SEG_OFF_CHR);
300 				}
301 
302 				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
303 
304 				distorm_format_signed_disp(str, di, addrMask);
305 				chrcat_WS(str, CLOSE_CHR);
306 			break;
307 			case O_MEM:
308 				distorm_format_size(str, di, i);
309 				chrcat_WS(str, OPEN_CHR);
310 				if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
311 					strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
312 					chrcat_WS(str, SEG_OFF_CHR);
313 				}
314 				if (di->base != R_NONE) {
315 					strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
316 					chrcat_WS(str, PLUS_DISP_CHR);
317 				}
318 				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
319 				if (di->scale != 0) {
320 					chrcat_WS(str, '*');
321 					if (di->scale == 2) chrcat_WS(str, '2');
322 					else if (di->scale == 4) chrcat_WS(str, '4');
323 					else /* if (di->scale == 8) */ chrcat_WS(str, '8');
324 				}
325 
326 				distorm_format_signed_disp(str, di, addrMask);
327 				chrcat_WS(str, CLOSE_CHR);
328 			break;
329 			case O_PC:
330 #ifdef SUPPORT_64BIT_OFFSET
331 				str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
332 #else
333 				str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
334 #endif
335 			break;
336 			case O_PTR:
337 				str_code_hdw(str, di->imm.ptr.seg);
338 				chrcat_WS(str, SEG_OFF_CHR);
339 				str_code_hdw(str, di->imm.ptr.off);
340 			break;
341 		}
342 	}
343 
344 	if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
345 	else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
346 }
347 
348 #ifdef SUPPORT_64BIT_OFFSET
distorm_decode64(_OffsetType codeOffset,const unsigned char * code,int codeLen,_DecodeType dt,_DecodedInst result[],unsigned int maxInstructions,unsigned int * usedInstructionsCount)349 	_DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
350 #else
351 	_DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
352 #endif
353 {
354 	_DecodeResult res;
355 	_DInst di;
356 	_CodeInfo ci;
357 	unsigned int instsCount = 0, i;
358 
359 	*usedInstructionsCount = 0;
360 
361 	/* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */
362 	if (codeLen < 0) {
363 		return DECRES_INPUTERR;
364 	}
365 
366 	if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
367 		return DECRES_INPUTERR;
368 	}
369 
370 	if (code == NULL || result == NULL) {
371 		return DECRES_INPUTERR;
372 	}
373 
374 	/* Assume length=0 is success. */
375 	if (codeLen == 0) {
376 		return DECRES_SUCCESS;
377 	}
378 
379 	/*
380 	 * We have to format the result into text. But the interal decoder works with the new structure of _DInst.
381 	 * Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
382 	 * Then we will copy each result to a temporary structure, and use it to reformat that specific result.
383 	 *
384 	 * This is all done to save memory allocation and to work on the same result array in-place!!!
385 	 * It's a bit ugly, I have to admit, but worth it.
386 	 */
387 
388 	ci.codeOffset = codeOffset;
389 	ci.code = code;
390 	ci.codeLen = codeLen;
391 	ci.dt = dt;
392 	ci.features = DF_NONE;
393 	if (dt == Decode16Bits) ci.features = DF_MAXIMUM_ADDR16;
394 	else if (dt == Decode32Bits) ci.features = DF_MAXIMUM_ADDR32;
395 
396 	res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
397 	for (i = 0; i < instsCount; i++) {
398 		if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;
399 
400 		/* Copy the current decomposed result to a temp structure, so we can override the result with text. */
401 		memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
402 #ifdef SUPPORT_64BIT_OFFSET
403 		distorm_format64(&ci, &di, &result[i]);
404 #else
405 		distorm_format32(&ci, &di, &result[i]);
406 #endif
407 	}
408 
409 	*usedInstructionsCount = instsCount;
410 	return res;
411 }
412 
413 #endif /* DISTORM_LIGHT */
414 
distorm_version()415 _DLLEXPORT_ unsigned int distorm_version()
416 {
417 	return __DISTORMV__;
418 }
419