1 /* radare - LGPL - Copyright 2007-2013 - pancake */
2 
3 #include <string.h>
4 #include <r_types.h>
5 #include <r_lib.h>
6 #include <r_asm.h>
7 #include <r_anal.h>
8 
9 /* DEPRECATE ?? */
10 #include "wine-arm.h"
11 #include "../asm/arch/arm/asm-arm.h"
12 #include "../asm/arch/arm/winedbg/be_arm.h"
13 #include "./anal_arm_hacks.inc"
14 
disarm_branch_offset(unsigned int pc,unsigned int insoff)15 static unsigned int disarm_branch_offset(unsigned int pc, unsigned int insoff) {
16 	unsigned int add = insoff << 2;
17 	/* zero extend if higher is 1 (0x02000000) */
18 	if ((add & 0x02000000) == 0x02000000) {
19 		add |= 0xFC000000;
20 	}
21 	return add + pc + 8;
22 }
23 
24 #define IS_BRANCH(x)  (((x) & ARM_BRANCH_I_MASK) == ARM_BRANCH_I)
25 #define IS_BRANCHL(x) (IS_BRANCH (x) && ((x) & ARM_BRANCH_LINK) == ARM_BRANCH_LINK)
26 #define IS_RETURN(x)  (((x) & (ARM_DTM_I_MASK | ARM_DTM_LOAD | (1 << 15))) == (ARM_DTM_I | ARM_DTM_LOAD | (1 << 15)))
27 // if ( (inst & ( ARM_DTX_I_MASK | ARM_DTX_LOAD  | ( ARM_DTX_RD_MASK ) ) ) == ( ARM_DTX_LOAD | ARM_DTX_I | ( ARM_PC << 12 ) ) )
28 #define IS_UNKJMP(x)  ((((ARM_DTX_RD_MASK))) == (ARM_DTX_LOAD | ARM_DTX_I | (ARM_PC << 12)))
29 #define IS_LOAD(x)    (((x) & ARM_DTX_LOAD) == (ARM_DTX_LOAD))
30 #define IS_CONDAL(x)  (((x) & ARM_COND_MASK) == ARM_COND_AL)
31 #define IS_EXITPOINT(x) (IS_BRANCH (x) || IS_RETURN (x) || IS_UNKJMP (x))
32 
33 #define API static
34 
op_thumb(RAnal * anal,RAnalOp * op,ut64 addr,const ut8 * data,int len)35 static int op_thumb(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *data, int len) {
36 	int op_code;
37 	ut16 *_ins = (ut16 *) data;
38 	ut16 ins = *_ins;
39 	ut32 *_ins32 = (ut32 *) data;
40 	ut32 ins32 = *_ins32;
41 
42 	struct winedbg_arm_insn *arminsn = arm_new ();
43 	arm_set_thumb (arminsn, true);
44 	arm_set_input_buffer (arminsn, data);
45 	arm_set_pc (arminsn, addr);
46 	op->delay = 0;
47 	op->size = arm_disasm_one_insn (arminsn);
48 	op->jump = arminsn->jmp;
49 	op->fail = arminsn->fail;
50 	arm_free (arminsn);
51 
52 	// TODO: handle 32bit instructions (branches are not correctly decoded //
53 
54 	/* CMP */
55 	if (((ins & B4 (B1110, 0, 0, 0)) == B4 (B0010, 0, 0, 0))
56 	    && (1 == (ins & B4 (1, B1000, 0, 0)) >> 11)) { // dp3
57 		op->type = R_ANAL_OP_TYPE_CMP;
58 		return op->size;
59 	}
60 	if ((ins & B4 (B1111, B1100, 0, 0)) == B4 (B0100, 0, 0, 0)) {
61 		op_code = (ins & B4 (0, B0011, B1100, 0)) >> 6;
62 		if (op_code == 8 || op_code == 10) {  // dp5
63 			op->type = R_ANAL_OP_TYPE_CMP;
64 			return op->size;
65 		}
66 	}
67 	if ((ins & B4 (B1111, B1100, 0, 0)) == B4 (B0100, B0100, 0, 0)) {
68 		op_code = (ins & B4 (0, B0011, 0, 0)) >> 8;  // dp8
69 		if (op_code == 1) {
70 			op->type = R_ANAL_OP_TYPE_CMP;
71 			return op->size;
72 		}
73 	}
74 	if (ins == 0xbf) {
75 		// TODO: add support for more NOP instructions
76 		op->type = R_ANAL_OP_TYPE_NOP;
77 	} else if (((op_code = ((ins & B4 (B1111, B1000, 0, 0)) >> 11)) >= 12 &&
78 	            op_code <= 17)) {
79 		if (op_code % 2) {
80 			op->type = R_ANAL_OP_TYPE_LOAD;
81 		} else {
82 			op->type = R_ANAL_OP_TYPE_STORE;
83 		}
84 	} else if ((ins & B4 (B1111, 0, 0, 0)) == B4 (B0101, 0, 0, 0)) {
85 		op_code = (ins & B4 (0, B1110, 0, 0)) >> 9;
86 		if (op_code % 2) {
87 			op->type = R_ANAL_OP_TYPE_LOAD;
88 		} else {
89 			op->type = R_ANAL_OP_TYPE_STORE;
90 		}
91 	} else if ((ins & B4 (B1111, 0, 0, 0)) == B4 (B1101, 0, 0, 0)) {
92 		// BNE..
93 		int delta = (ins & B4 (0, 0, B1111, B1111));
94 		op->type = R_ANAL_OP_TYPE_CJMP;
95 		op->jump = addr + 4 + (delta << 1);
96 		op->fail = addr + 4;
97 	} else if ((ins & B4 (B1111, B1000, 0, 0)) == B4 (B1110, 0, 0, 0)) {
98 		// B
99 		int delta = (ins & B4 (0, 0, B1111, B1111));
100 		op->type = R_ANAL_OP_TYPE_JMP;
101 		op->jump = addr + 4 + (delta << 1);
102 		op->fail = addr + 4;
103 	} else if ((ins & B4 (B1111, B1111, B1000, 0)) ==
104 	           B4 (B0100, B0111, B1000, 0)) {
105 		// BLX
106 		op->type = R_ANAL_OP_TYPE_UCALL;
107 		op->fail = addr + 4;
108 	} else if ((ins & B4 (B1111, B1111, B1000, 0)) ==
109 	           B4 (B0100, B0111, 0, 0)) {
110 		// BX
111 		op->type = R_ANAL_OP_TYPE_UJMP;
112 		op->fail = addr + 4;
113 	} else if ((ins & B4 (B1111, B1000, 0, 0)) == B4 (B1111, 0, 0, 0)) {
114 		// BL The long branch with link, it's in 2 instructions:
115 		// prefix: 11110[offset]
116 		// suffix: 11111[offset] (11101[offset] for blx)
117 		ut16 nextins = (ins32 & 0xFFFF0000) >> 16;
118 		ut32 high = (ins & B4 (0, B0111, B1111, B1111)) << 12;
119 		if (ins & B4 (0, B0100, 0, 0)) {
120 			high |= B4 (B1111, B1000, 0, 0) << 16;
121 		}
122 		int delta = high + ((nextins & B4 (0, B0111, B1111, B1111)) * 2);
123 		op->jump = (int) (addr + 4 + (delta));
124 		op->type = R_ANAL_OP_TYPE_CALL;
125 		op->fail = addr + 4;
126 	} else if ((ins & B4 (B1111, B1111, 0, 0)) == B4 (B1011, B1110, 0, 0)) {
127 		op->type = R_ANAL_OP_TYPE_TRAP;
128 		op->val = (ut64) (ins >> 8);
129 	} else if ((ins & B4 (B1111, B1111, 0, 0)) == B4 (B1101, B1111, 0, 0)) {
130 		op->type = R_ANAL_OP_TYPE_SWI;
131 		op->val = (ut64) (ins >> 8);
132 	}
133 	return op->size;
134 }
135 
136 #if 0
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv",
139 #endif
140 static int iconds[] = {
141 	R_ANAL_COND_EQ,
142 	R_ANAL_COND_NE,
143 	0, // cs
144 	0, // cc
145 	0, // mi
146 	0, // pl
147 	0, // vs
148 	0, // vc
149 
150 	0, // hi
151 	0, // ls
152 	R_ANAL_COND_GE,
153 	R_ANAL_COND_LT,
154 	R_ANAL_COND_GT,
155 	R_ANAL_COND_LE,
156 	R_ANAL_COND_AL,
157 	R_ANAL_COND_NV,
158 };
159 
op_cond(const ut8 * data)160 static int op_cond(const ut8 *data) {
161 	ut8 b = data[3] >> 4;
162 	if (b == 0xf) {
163 		return 0;
164 	}
165 	return iconds[b];
166 }
167 
arm_op32(RAnal * anal,RAnalOp * op,ut64 addr,const ut8 * data,int len)168 static int arm_op32(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *data, int len) {
169 	const ut8 *b = (ut8 *) data;
170 	ut8 ndata[4];
171 	ut32 branch_dst_addr, i = 0;
172 	ut32 *code = (ut32 *) data;
173 	struct winedbg_arm_insn *arminsn;
174 
175 	if (!data) {
176 		return 0;
177 	}
178 	arminsn = arm_new ();
179 	arm_set_thumb (arminsn, false);
180 	arm_set_input_buffer (arminsn, data);
181 	arm_set_pc (arminsn, addr);
182 	op->addr = addr;
183 	op->type = R_ANAL_OP_TYPE_UNK;
184 
185 	if (anal->big_endian) {
186 		b = data = ndata;
187 		ndata[0] = data[3];
188 		ndata[1] = data[2];
189 		ndata[2] = data[1];
190 		ndata[3] = data[0];
191 	}
192 	if (anal->bits == 16) {
193 		arm_free (arminsn);
194 		return op_thumb (anal, op, addr, data, len);
195 	}
196 	op->size = 4;
197 	op->cond = op_cond (data);
198 	if (b[2] == 0x8f && b[3] == 0xe2) {
199 		op->type = R_ANAL_OP_TYPE_ADD;
200 #define ROR(x, y) ((int) ((x) >> (y)) | (((x) << (32 - (y)))))
201 		op->ptr = addr + ROR (b[0], (b[1] & 0xf) << 1) + 8;
202 	} else if (b[2] >= 0x9c && b[2] <= 0x9f) {  // load instruction
203 		char ch = b[3] & 0xf;
204 		switch (ch) {
205 		case 5:
206 			if ((b[3] & 0xf) == 5) {
207 				op->ptr = 8 + addr + b[0] + ((b[1] & 0xf) << 8);
208 				// XXX: if set it breaks the visual disasm wtf
209 				// op->refptr = true;
210 			}
211 		case 4:
212 		case 6:
213 		case 7:
214 		case 8:
215 		case 9: op->type = R_ANAL_OP_TYPE_LOAD; break;
216 		}
217 	} else // 0x000037b8  00:0000   0             800000ef  svc 0x00000080
218 	if (b[2] == 0xa0 && b[3] == 0xe1) {
219 		int n = (b[0] << 16) + b[1];
220 		op->type = R_ANAL_OP_TYPE_MOV;
221 		switch (n) {
222 		case 0:
223 		case 0x0110: case 0x0220: case 0x0330: case 0x0440:
224 		case 0x0550: case 0x0660: case 0x0770: case 0x0880:
225 		case 0x0990: case 0x0aa0: case 0x0bb0: case 0x0cc0:
226 			op->type = R_ANAL_OP_TYPE_NOP;
227 			break;
228 		}
229 	} else if (b[3] == 0xef) {
230 		op->type = R_ANAL_OP_TYPE_SWI;
231 		op->val = (b[0] | (b[1] << 8) | (b[2] << 2));
232 	} else if ((b[3] & 0xf) == 5) {  // [reg,0xa4]
233 #if 0
234 		0x00000000      a4a09fa4 ldrge sl, [pc], 0xa4
235 		0x00000000      a4a09fa5 ldrge sl, [pc, 0xa4]
236 		0x00000000      a4a09fa6 ldrge sl, [pc], r4, lsr 1
237 		0x00000000      a4a09fa7 ldrge sl, [pc, r4, lsr 1]
238 		0x00000000      a4a09fe8 ldm pc, {
239 			r2, r5, r7, sp, pc
240 		}; < UNPREDICT
241 #endif
242 		if ((b[1] & 0xf0) == 0xf0) {
243 			// ldr pc, [pc, #1] ;
244 			// op->type = R_ANAL_OP_TYPE_UJMP;
245 			op->type = R_ANAL_OP_TYPE_RET; // FAKE FOR FUN
246 			// op->stackop = R_ANAL_STACK_SET;
247 			op->jump = 1234;
248 			// op->ptr = 4+addr+b[0]; // sure? :)
249 			// op->ptrptr = true;
250 		}
251 		// eprintf("0x%08x\n", code[i] & ARM_DTX_LOAD);
252 		// 0x0001B4D8,           1eff2fe1        bx    lr
253 	} else if (b[3] == 0xe2 && b[2] == 0x8d && b[1] == 0xd0) {
254 		// ADD SP, SP, ...
255 		op->type = R_ANAL_OP_TYPE_ADD;
256 		op->stackop = R_ANAL_STACK_INC;
257 		op->val = -b[0];
258 	} else if (b[3] == 0xe2 && b[2] == 0x4d && b[1] == 0xd0) {
259 		// SUB SP, SP, ..
260 		op->type = R_ANAL_OP_TYPE_SUB;
261 		op->stackop = R_ANAL_STACK_INC;
262 		op->val = b[0];
263 	} else if (b[3] == 0xe2 && b[2] == 0x4c && b[1] == 0xb0) {
264 		// SUB SP, FP, ..
265 		op->type = R_ANAL_OP_TYPE_SUB;
266 		op->stackop = R_ANAL_STACK_INC;
267 		op->val = -b[0];
268 	} else if (b[3] == 0xe2 && b[2] == 0x4b && b[1] == 0xd0) {
269 		// SUB SP, IP, ..
270 		op->type = R_ANAL_OP_TYPE_SUB;
271 		op->stackop = R_ANAL_STACK_INC;
272 		op->val = -b[0];
273 	} else if ((code[i] == 0x1eff2fe1) ||
274 	           (code[i] == 0xe12fff1e)) {  // bx lr
275 		op->type = R_ANAL_OP_TYPE_RET;
276 	} else if ((code[i] & ARM_DTX_LOAD)) {  // IS_LOAD(code[i])) {
277 		ut32 ptr = 0;
278 		op->type = R_ANAL_OP_TYPE_MOV;
279 		if (b[2] == 0x1b) {
280 			/* XXX pretty incomplete */
281 			op->stackop = R_ANAL_STACK_GET;
282 			op->ptr = b[0];
283 			// var_add_access(addr, -b[0], 1, 0); // TODO: set/get (the last 0)
284 		} else {
285 			// ut32 oaddr = addr+8+b[0];
286 			// XXX TODO ret = radare_read_at(oaddr, (ut8*)&ptr, 4);
287 			if (anal->bits == 32) {
288 				b = (ut8 *) &ptr;
289 				op->ptr = b[0] + (b[1] << 8) + (b[2] << 16) + (b[3] << 24);
290 				// XXX data_xrefs_add(oaddr, op->ptr, 1);
291 				// TODO change data type to pointer
292 			} else {
293 				op->ptr = 0;
294 			}
295 		}
296 	}
297 
298 	if (IS_LOAD (code[i])) {
299 		op->type = R_ANAL_OP_TYPE_LOAD;
300 		op->refptr = 4;
301 	}
302 	if (((((code[i] & 0xff) >= 0x10 && (code[i] & 0xff) < 0x20)) &&
303 	     ((code[i] & 0xffffff00) == 0xe12fff00)) ||
304 	    IS_EXITPOINT (code[i])) {
305 		// if (IS_EXITPOINT (code[i])) {
306 		b = data;
307 		branch_dst_addr = disarm_branch_offset (
308 			addr, b[0] | (b[1] << 8) |
309 			(b[2] << 16));                // code[i]&0x00FFFFFF);
310 		op->ptr = 0;
311 		if ((((code[i] & 0xff) >= 0x10 && (code[i] & 0xff) < 0x20)) &&
312 		    ((code[i] & 0xffffff00) == 0xe12fff00)) {
313 			op->type = R_ANAL_OP_TYPE_UJMP;
314 		} else if (IS_BRANCHL (code[i])) {
315 			if (IS_BRANCH (code[i])) {
316 				op->type = R_ANAL_OP_TYPE_CALL;
317 				op->jump = branch_dst_addr;
318 				op->fail = addr + 4;
319 			} else {
320 				op->type = R_ANAL_OP_TYPE_RET;
321 			}
322 		} else if (IS_BRANCH (code[i])) {
323 			if (IS_CONDAL (code[i])) {
324 				op->type = R_ANAL_OP_TYPE_JMP;
325 				op->jump = branch_dst_addr;
326 				op->fail = UT64_MAX;
327 			} else {
328 				op->type = R_ANAL_OP_TYPE_CJMP;
329 				op->jump = branch_dst_addr;
330 				op->fail = addr + 4;
331 			}
332 		} else {
333 			// unknown jump o return
334 			// op->type = R_ANAL_OP_TYPE_UJMP;
335 			// op->type = R_ANAL_OP_TYPE_NOP;
336 		}
337 	}
338 	// op->jump = arminsn->jmp;
339 	// op->fail = arminsn->fail;
340 	arm_free (arminsn);
341 	return op->size;
342 }
343 
344 
getaddr(ut64 addr,const ut8 * d)345 static ut64 getaddr(ut64 addr, const ut8 *d) {
346 	if (d[2] >> 7) {
347 		/// st32 n = (d[0] + (d[1] << 8) + (d[2] << 16) + (0xff << 24));
348 		st32 n = (d[0] + (d[1] << 8) + (d[2] << 16) + ((ut64)(0xff) << 24)); // * 16777216));
349 		n = -n;
350 		return addr - (n * 4);
351 	}
352 	return addr + (4 * (d[0] + (d[1] << 8) + (d[2] << 16)));
353 }
354 
arm_op64(RAnal * anal,RAnalOp * op,ut64 addr,const ut8 * d,int len)355 static int arm_op64(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *d, int len) {
356 	if (d[3] == 0) {
357 		return -1;      // invalid
358 	}
359 	int haa = hackyArmAnal (anal, op, d, len);
360 	if (haa > 0) {
361 		return haa;
362 	}
363 	op->size = 4;
364 	op->type = R_ANAL_OP_TYPE_NULL;
365 	if (d[0] == 0xc0 && d[3] == 0xd6) {
366 		// defaults to x30 reg. but can be different
367 		op->type = R_ANAL_OP_TYPE_RET;
368 	}
369 	switch (d[3]) {
370 	case 0x71:
371 	case 0xeb:
372 		op->type = R_ANAL_OP_TYPE_CMP;
373 		break;
374 	case 0xb8:
375 	case 0xb9:
376 	case 0xf8:
377 	case 0xa9: // ldp/stp
378 	case 0xf9: // ldr/str
379 		op->type = R_ANAL_OP_TYPE_LOAD;
380 		break;
381 	case 0x91: // mov
382 	case 0x52: // mov
383 	case 0x94: // bl A
384 	case 0x97: // bl A
385 		op->type = R_ANAL_OP_TYPE_CALL;
386 		op->jump = getaddr (addr, d);
387 		op->fail = addr + 4;
388 		break;
389 	case 0x54: // beq A
390 		op->type = R_ANAL_OP_TYPE_CJMP;
391 		op->jump = addr + (4 * ((d[0] >> 4) | (d[1] << 8) | (d[2] << 16)));
392 		op->fail = addr + 4;
393 		break;
394 	case 0x17: // b A
395 	case 0x14: // b A
396 		op->type = R_ANAL_OP_TYPE_JMP;
397 		op->jump = getaddr (addr, d);
398 		op->fail = addr + 4;
399 		break;
400 	}
401 	return op->size;
402 }
403 
arm_op(RAnal * anal,RAnalOp * op,ut64 addr,const ut8 * data,int len,RAnalOpMask mask)404 static int arm_op(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *data, int len, RAnalOpMask mask) {
405 	if (anal->bits == 64) {
406 		return arm_op64 (anal, op, addr, data, len);
407 	}
408 	return arm_op32 (anal, op, addr, data, len);
409 }
410 
set_reg_profile(RAnal * anal)411 static bool set_reg_profile(RAnal *anal) {
412 	// TODO: support 64bit profile
413 	const char *p32 =
414 		"=PC	r15\n"
415 		"=SP	r13\n"
416 		"=BP	r14\n" // XXX
417 		"=A0	r0\n"
418 		"=A1	r1\n"
419 		"=A2	r2\n"
420 		"=A3	r3\n"
421 		"gpr	lr	.32	56	0\n" // r14
422 		"gpr	pc	.32	60	0\n" // r15
423 
424 		"gpr	r0	.32	0	0\n"
425 		"gpr	r1	.32	4	0\n"
426 		"gpr	r2	.32	8	0\n"
427 		"gpr	r3	.32	12	0\n"
428 		"gpr	r4	.32	16	0\n"
429 		"gpr	r5	.32	20	0\n"
430 		"gpr	r6	.32	24	0\n"
431 		"gpr	r7	.32	28	0\n"
432 		"gpr	r8	.32	32	0\n"
433 		"gpr	r9	.32	36	0\n"
434 		"gpr	r10	.32	40	0\n"
435 		"gpr	r11	.32	44	0\n"
436 		"gpr	r12	.32	48	0\n"
437 		"gpr	r13	.32	52	0\n"
438 		"gpr	r14	.32	56	0\n"
439 		"gpr	r15	.32	60	0\n"
440 		"gpr	r16	.32	64	0\n"
441 		"gpr	r17	.32	68	0\n"
442 		"gpr	cpsr	.32	72	0\n";
443 	return r_reg_set_profile_string (anal->reg, p32);
444 }
445 
archinfo(RAnal * anal,int q)446 static int archinfo(RAnal *anal, int q) {
447 	if (q == R_ANAL_ARCHINFO_ALIGN) {
448 		if (anal && anal->bits == 16) {
449 			return 2;
450 		}
451 		return 4;
452 	}
453 	if (q == R_ANAL_ARCHINFO_MAX_OP_SIZE) {
454 		return 4;
455 	}
456 	if (q == R_ANAL_ARCHINFO_MIN_OP_SIZE) {
457 		if (anal && anal->bits == 16) {
458 			return 2;
459 		}
460 		return 4;
461 	}
462 	return 4; // XXX
463 }
464 
465 RAnalPlugin r_anal_plugin_arm_gnu = {
466 	.name = "arm.gnu",
467 	.arch = "arm",
468 	.license = "LGPL3",
469 	.bits = 16 | 32 | 64,
470 	.desc = "ARM code analysis plugin",
471 	.archinfo = archinfo,
472 	.op = &arm_op,
473 	.set_reg_profile = set_reg_profile,
474 };
475 
476 #ifndef R2_PLUGIN_INCORE
477 R_API RLibStruct radare_plugin = {
478 	.type = R_LIB_TYPE_ANAL,
479 	.data = &r_anal_plugin_arm_gnu,
480 	.version = R2_VERSION
481 };
482 #endif
483