1 /* disarm -- a simple disassembler for ARM instructions
2 * (c) 2000 Gareth McCaughan
3 *
4 * This file may be distributed and used freely provided:
5 * 1. You do not distribute any version that lacks this
6 * copyright notice (exactly as it appears here, extending
7 * from the start to the end of the C-language comment
8 * containing these words)); and,
9 * 2. If you distribute any modified version, its source
10 * contains a clear description of the ways in which
11 * it differs from the original version, and a clear
12 * indication that the changes are not mine.
13 * There is no restriction on your permission to use and
14 * distribute object code or executable code derived from
15 * this.
16 *
17 * The original version of this file (or perhaps a later
18 * version by the original author) may or may not be
19 * available at http://web.ukonline.co.uk/g.mccaughan/g/software.html .
20 *
21 * Share and enjoy! -- g
22 */
23
24 /* (*This* comment is NOT part of the notice mentioned in the
25 * distribution conditions above.)
26 *
27 * The bulk of this code was ripped brutally from the middle
28 * of a much more interesting piece of software whose purpose
29 * is to disassemble object files in the format known as AOF;
30 * it's quite clever at spotting blocks of non-code embedded
31 * in code, identifying labels, and so on.
32 *
33 * This program, on the other hand, is very much simpler.
34 * It simply disassembles one instruction at a time. Some
35 * traces of the original purpose can be seen here and there.
36 * You might want to make this do a two-phase disassembly,
37 * adding labels etc the second time around. I've made this
38 * work by loading the whole file into memory first, partly
39 * because that makes a two-pass approach easier.
40 *
41 * One word of warning: I believe that the syntax this program
42 * uses for the MSR instruction is now obsolete.
43 *
44 * Usage:
45 * disarm <filename> <base-address>
46 * will disassemble every word in <filename>.
47 *
48 * <base-address> should be something understood by strtol.
49 * So you can get hex (which is probably what you want)
50 * by prefixing "0x".
51 *
52 * The -r option will byte-reverse each word before it's
53 * disassembled.
54 *
55 * The code is rather unmaintainable. I'm sorry.
56 *
57 * Changes since original release:
58 * ????-??-?? v0.00 Initial release.
59 * 2007-09-02 v0.11 Change %X to %lX in a format string.
60 * (Thanks to Vincent Zweije for reporting this.)
61 */
62
63 #ifdef __clang__
64 #pragma GCC diagnostic push
65 #pragma GCC diagnostic ignored "-Wtautological-compare" //used to avoid warning, force compiler to accept it.
66 #pragma GCC diagnostic ignored "-Wstring-plus-int"
67 #endif
68
69 #include "ppsspp_config.h"
70 #include <cstdio>
71 #include <cstdlib>
72 #include <cstring>
73
74 #include "Common/ArmEmitter.h"
75 #include "ext/disarm.h"
76
77 static const char *CCFlagsStr[] = {
78 "EQ", // Equal
79 "NEQ", // Not equal
80 "CS", // Carry Set
81 "CC", // Carry Clear
82 "MI", // Minus (Negative)
83 "PL", // Plus
84 "VS", // Overflow
85 "VC", // No Overflow
86 "HI", // Unsigned higher
87 "LS", // Unsigned lower or same
88 "GE", // Signed greater than or equal
89 "LT", // Signed less than
90 "GT", // Signed greater than
91 "LE", // Signed less than or equal
92 "", // Always (unconditional) 14
93 };
94
GetVd(uint32_t op,bool quad=false,bool dbl=false)95 int GetVd(uint32_t op, bool quad = false, bool dbl = false) {
96 int val;
97 if (!quad && !dbl) {
98 val = ((op >> 22) & 1) | ((op >> 11) & 0x1E);
99 } else {
100 val = ((op >> 18) & 0x10) | ((op >> 12) & 0xF);
101 }
102 if (quad)
103 val >>= 1;
104 return val;
105 }
106
GetVn(uint32_t op,bool quad=false,bool dbl=false)107 int GetVn(uint32_t op, bool quad = false, bool dbl = false) {
108 int val;
109 if (!quad && !dbl) {
110 val = ((op >> 7) & 1) | ((op >> 15) & 0x1E);
111 } else {
112 val = ((op >> 16) & 0xF) | ((op >> 3) & 0x10);
113 }
114 if (quad)
115 val >>= 1;
116 return val;
117 }
118
GetVm(uint32_t op,bool quad=false,bool dbl=false)119 int GetVm(uint32_t op, bool quad = false, bool dbl = false) {
120 int val;
121 if (!quad && !dbl) {
122 val = ((op >> 5) & 1) | ((op << 1) & 0x1E);
123 } else {
124 val = ((op >> 1) & 0x10) | (op & 0xF);
125 }
126 if (quad)
127 val >>= 1;
128 return val;
129 }
130
131
132 // Modern VFP disassembler, written entirely separately because I can't figure out the old stuff :P
133 // Horrible array of hacks but hey. Can be cleaned up later.
134
DisasmVFP(uint32_t op,char * text)135 bool DisasmVFP(uint32_t op, char *text) {
136 #if defined(__ANDROID__) && PPSSPP_ARCH(X86)
137 // Prevent linking errors with ArmEmitter which I've excluded on x86 android.
138 strcpy(text, "ARM disasm not available");
139 #else
140 const char *cond = CCFlagsStr[op >> 28];
141 switch ((op >> 24) & 0xF) {
142 case 0xC:
143 // VLDMIA/VSTMIA
144 {
145 bool single_reg = ((op >> 8) & 0xF) == 10;
146 int freg = ((op >> 11) & 0x1E) | ((op >> 22) & 1);
147 int base = (op >> 16) & 0xF;
148 bool load = (op >> 20) & 1;
149 bool writeback = (op >> 21) & 1;
150 int numregs = op & 0xF;
151 bool add = (op >> 23) & 1;
152 if (add && writeback && load && base == 13) {
153 if (single_reg)
154 sprintf(text, "VPOP%s {s%i-s%i}", cond, freg, freg-1+numregs);
155 else
156 sprintf(text, "VPOP%s {d%i-d%i}", cond, freg, freg-1+(numregs/2));
157
158 return true;
159 }
160 if (single_reg)
161 sprintf(text, "%s%s r%i%s, {s%i-s%i}", load ? "VLDMIA" : "VSTMIA", cond, base, writeback ? "!":"", freg, freg-1+numregs);
162 else
163 sprintf(text, "%s%s r%i%s, {d%i-d%i}", load ? "VLDMIA" : "VSTMIA", cond, base, writeback ? "!":"", freg, freg-1+(numregs/2));
164
165 return true;
166 }
167 case 0xD:
168 // VLDR/VSTR/VLDMDB/VSTMDB
169 {
170 bool single_reg = ((op >> 8) & 0xF) == 10;
171 int freg = ((op >> 11) & 0x1E) | ((op >> 22) & 1);
172 int base = (op >> 16) & 0xF;
173 bool load = (op >> 20) & 1;
174 bool add = (op >> 23) & 1;
175 bool writeback = (op >> 21) & 1;
176 if (writeback) { // Multiple
177 int numregs = op & 0xF;
178 if (!add && !load && base == 13) {
179 if (single_reg)
180 sprintf(text, "VPUSH%s {s%i-s%i}", cond, freg, freg-1+numregs);
181 else
182 sprintf(text, "VPUSH%s {d%i-d%i}", cond, freg, freg-1+(numregs/2));
183
184 return true;
185 }
186
187 if (single_reg)
188 sprintf(text, "%s%s r%i, {s%i-s%i}", load ? "VLDMDB" : "VSTMDB", cond, base, freg, freg-1+numregs);
189 else
190 sprintf(text, "%s%s r%i, {d%i-d%i}", load ? "VLDMDB" : "VSTMDB", cond, base, freg, freg-1+(numregs/2));
191 } else {
192 int offset = (op & 0xFF) << 2;
193 if (!add) offset = -offset;
194 sprintf(text, "%s%s s%i, [r%i, #%i]", load ? "VLDR" : "VSTR", cond, freg, base, offset);
195 }
196
197 return true;
198 }
199
200 case 0xE:
201 {
202 switch ((op >> 20) & 0xF) {
203 case 0xE: // VMSR
204 if ((op & 0xFFF) != 0xA10)
205 break;
206 sprintf(text, "VMSR%s r%i", cond, (op >> 12) & 0xF);
207 return true;
208 case 0xF: // VMRS
209 if ((op & 0xFFF) != 0xA10)
210 break;
211 if (op == 0xEEF1FA10) {
212 sprintf(text, "VMRS%s APSR", cond);
213 } else {
214 sprintf(text, "VMRS%s r%i", cond, (op >> 12) & 0xF);
215 }
216 return true;
217 default:
218 break;
219 }
220
221 if (((op >> 19) & 0x7) == 0x7) {
222 // VCVT
223 sprintf(text, "VCVT ...");
224 return true;
225 }
226
227 bool quad_reg = (op >> 6) & 1;
228 bool double_reg = (op >> 8) & 1;
229 char c = double_reg ? 'd' : 's';
230
231 int part1 = ((op >> 23) & 0x1F);
232 int part2 = ((op >> 9) & 0x7) ;
233 int part3 = ((op >> 20) & 0x3) ;
234 if (part3 == 3 && part2 == 5 && part1 == 0x1D) {
235 // VMOV, VCMP
236 int vn = GetVn(op);
237 if (vn != 1 && vn != 2 && vn != 3) {
238 int vm = GetVm(op, false, double_reg);
239 int vd = GetVd(op, false, double_reg);
240
241 const char *name = "VMOV";
242 if (op & 0x40000)
243 name = (op & 0x80) ? "VCMPE" : "VCMP";
244 sprintf(text, "%s%s %c%i, %c%i", name, cond, c, vd, c, vm);
245 return true;
246 }
247 }
248
249 // Moves between single precision registers and GPRs
250 if (((op >> 20) & 0xFFE) == 0xEE0) {
251 int vd = ((op >> 15) & 0x1E) | ((op >> 7) & 0x1);
252 int src = (op >> 12) & 0xF;
253
254 if (op & (1 << 20))
255 sprintf(text, "VMOV r%i, s%i", src, vd);
256 else
257 sprintf(text, "VMOV s%i, r%i", vd, src);
258 return true;
259 }
260
261 // Arithmetic
262
263 int opnum = -1;
264 int opc1 = (op >> 20) & 0xFB;
265 int opc2 = (op >> 4) & 0xAC;
266 for (int i = 0; i < 16; i++) {
267 // What the hell?
268 int fixed_opc2 = opc2;
269 if (!(ArmGen::VFPOps[i][0].opc2 & 0x8))
270 fixed_opc2 &= 0xA7;
271 if (ArmGen::VFPOps[i][0].opc1 == opc1 && ArmGen::VFPOps[i][0].opc2 == fixed_opc2) {
272 opnum = i;
273 break;
274 }
275 }
276 if (opnum < 0)
277 return false;
278 switch (opnum) {
279 case 8:
280 case 10:
281 case 11:
282 case 12:
283 case 13:
284 case 14:
285 {
286 quad_reg = false;
287 int vd = GetVd(op, quad_reg, double_reg);
288 int vn = GetVn(op, quad_reg, true);
289 int vm = GetVm(op, quad_reg, double_reg);
290 if (opnum == 8 && vn == 0x11)
291 opnum += 3;
292 sprintf(text, "%s%s %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vm);
293 return true;
294 }
295 default:
296 {
297 quad_reg = false;
298 int vd = GetVd(op, quad_reg, double_reg);
299 int vn = GetVn(op, quad_reg, double_reg);
300 int vm = GetVm(op, quad_reg, double_reg);
301 sprintf(text, "%s%s %c%i, %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vn, c, vm);
302 return true;
303 }
304 }
305 return true;
306 }
307 break;
308 }
309 #endif
310 return false;
311 }
312
GetSizeString(int sz)313 static const char *GetSizeString(int sz) {
314 switch (sz) {
315 case 0:
316 return "8";
317 case 1:
318 return "16";
319 case 2:
320 return "32";
321 case 3:
322 return "64";
323 default:
324 return "(err)";
325 }
326 }
327
GetISizeString(int sz)328 static const char *GetISizeString(int sz) {
329 switch (sz) {
330 case 0:
331 return "i8";
332 case 1:
333 return "i16";
334 case 2:
335 return "i32";
336 case 3:
337 return "i64";
338 default:
339 return "(err)";
340 }
341 }
342
GetRegCount(int type)343 static int GetRegCount(int type) {
344 switch (type) {
345 case 7: return 1;
346 case 10: return 2;
347 case 6: return 3;
348 case 4: return 4;
349 default:
350 return 0;
351 }
352 }
353
354 // VLD1 / VST1
DisasmNeonLDST(uint32_t op,char * text)355 static bool DisasmNeonLDST(uint32_t op, char *text) {
356 bool load = (op >> 21) & 1;
357 int Rn = (op >> 16) & 0xF;
358 int Rm = (op & 0xF);
359 int Vd = GetVd(op, false, true);
360
361 const char *name = load ? "LD" : "ST";
362 const char *suffix = "";
363 if (Rm == 13)
364 suffix = "!";
365
366 if ((op & (1 << 23)) == 0) {
367 int sz = (op >> 6) & 3;
368 int regCount = GetRegCount((op >> 8) & 0xF);
369
370 int startReg = Vd;
371 int endReg = Vd + regCount - 1;
372
373 if (Rm != 15 && Rm != 13) {
374 sprintf(text, "V%s1 - regsum", name);
375 } else {
376 if (startReg == endReg)
377 sprintf(text, "V%s1.%s {d%i}, [r%i]%s", name, GetSizeString(sz), startReg, Rn, suffix);
378 else
379 sprintf(text, "V%s1.%s {d%i-d%i}, [r%i]%s", name, GetSizeString(sz), startReg, endReg, Rn, suffix);
380 }
381 } else {
382 int reg = Vd;
383 int sz = (op >> 10) & 3;
384 int index_align = (op >> 4) & 0xF;
385 int lane = 0;
386 switch (sz) {
387 case 0: lane = index_align >> 1; break;
388 case 1: lane = index_align >> 2; break;
389 case 2: lane = index_align >> 3; break;
390 }
391 if (Rm != 15) {
392 sprintf(text, "V%s1 d[0] - regsum", name);
393 } else {
394 sprintf(text, "V%s1.%s {d%i[%i]}, [r%i]%s", name, sz == 2 ? GetSizeString(sz) : GetISizeString(sz), reg, lane, Rn, suffix);
395 }
396 }
397
398 return true;
399 }
400
DisasmArithNeon(uint32_t op,const char * opname,char * text,bool includeSuffix=true)401 static bool DisasmArithNeon(uint32_t op, const char *opname, char *text, bool includeSuffix = true) {
402 bool quad = ((op >> 6) & 1);
403 int size = (op >> 20) & 3;
404 int type = (op >> 8) & 0xF;
405 char r = quad ? 'q' : 'd';
406 const char *szname = GetISizeString(size);
407 if (type == 0xD || type == 0xF)
408 szname = "f32";
409
410 int Vd = GetVd(op, quad, true);
411 int Vn = GetVn(op, quad, true);
412 int Vm = GetVm(op, quad, true);
413 sprintf(text, "V%s%s%s %c%i, %c%i, %c%i", opname, includeSuffix ? "." : "", includeSuffix ? szname : "", r, Vd, r, Vn, r, Vm);
414 return true;
415 }
416
DisasmNeonImmVal(uint32_t op,char * text)417 static bool DisasmNeonImmVal(uint32_t op, char *text) {
418 using namespace ArmGen;
419 int opcode = (op >> 5) & 1;
420 int cmode = (op >> 8) & 0xF;
421 int imm = ((op >> 17) & 0x80) | ((op >> 12) & 0x70) | (op & 0xF);
422 int quad = (op >> 6) & 1;
423 const char *operation = "MOV";
424 const char *size = "(unk)";
425 char temp[256] = "(unk)";
426 switch (cmode) {
427 case VIMM___x___x:
428 case VIMM___x___x + 1:
429 sprintf(temp, "000000%02x_000000%02x", imm, imm);
430 size = ".i32";
431 break;
432 case VIMM__x___x_:
433 case VIMM__x___x_ + 1:
434 sprintf(temp, "0000%02x00_0000%02x00", imm, imm);
435 size = ".i32";
436 break;
437 case VIMM_x___x__:
438 case VIMM_x___x__ + 1:
439 sprintf(temp, "00%02x0000_00%02x0000", imm, imm);
440 size = ".i32";
441 break;
442 case VIMMx___x___:
443 case VIMMx___x___ + 1:
444 sprintf(temp, "%02x000000_%02x000000", imm, imm);
445 size = ".i32";
446 break;
447
448 // TODO: More
449
450 case VIMMf000f000:
451 if (opcode == 0) {
452 // TODO: Do this properly
453 float f = 1337;
454 switch (imm) {
455 case 0: f = 0.0f; break;
456 case 0x78: f = 1.5; break;
457 case 0x70: f = 1.0; break;
458 case 0xF0: f = -1.0; break;
459 }
460 sprintf(temp, "%1.1f", f);
461 size = "";
462 break;
463 }
464 }
465 char c = quad ? 'q' : 'd';
466 sprintf(text, "V%s%s %c%i, %s", operation, size, c, GetVd(op, false, false), temp);
467 return true;
468 }
469
DisasmNeon2Op(uint32_t op,char * text)470 static bool DisasmNeon2Op(uint32_t op, char *text) {
471 const char *opname = "(unk2op)";
472
473 bool quad = (op >> 6) & 1;
474 bool quadD = quad;
475 bool doubleD = false;
476 // VNEG, VABS
477 if (op & (1 << 16))
478 opname = "NEG";
479
480 int opcode = (op >> 6) & 0xF;
481 int sz = (op >> 18) & 3;
482 const char *size = "f32";
483 switch (opcode) {
484 case 0xE:
485 opname = "NEG";
486 size = GetISizeString(sz);
487 break;
488 case 0xD:
489 opname = "ABS";
490 size = GetISizeString(sz);
491 break;
492 case 0x7:
493 opname = "MVN";
494 size = ""; // MVN surely has no "size"?
495 break;
496 case 0x8:
497 opname = "MOVN"; // narrow, not negate
498 size = GetISizeString(sz + 1);
499 quad = true;
500 quadD = false;
501 doubleD = true;
502 break;
503 case 0xC:
504 opname = "SHLL"; // widen and shift
505 size = GetISizeString(sz);
506 quad = false;
507 quadD = true;
508 doubleD = true;
509 break;
510 }
511
512 int Vd = GetVd(op, quadD, doubleD);
513 int Vm = GetVm(op, quad, false);
514 char cD = quadD ? 'q' : 'd';
515 char c = quad ? 'q' : 'd';
516 if (opcode == 0xC) {
517 sprintf(text, "V%s%s%s %c%i, %c%i, #%d", opname, strlen(size) ? "." : "", size, cD, Vd, c, Vm, 8 << sz);
518 } else {
519 sprintf(text, "V%s%s%s %c%i, %c%i", opname, strlen(size) ? "." : "", size, cD, Vd, c, Vm);
520 }
521 return true;
522 }
523
DisasmVdup(uint32_t op,char * text)524 static bool DisasmVdup(uint32_t op, char *text) {
525 bool quad = (op >> 6) & 1;
526 int imm4 = (op >> 16) & 0xF;
527 int Vd = GetVd(op, quad, false);
528 int Vm = GetVm(op, false, true);
529 char c = quad ? 'q' : 'd';
530 int index = 0;
531 int size = 0;
532 if (imm4 & 1) {
533 index = imm4 >> 1;
534 size = 0;
535 } else if (imm4 & 2) {
536 index = imm4 >> 2;
537 size = 1;
538 } else if (imm4 & 4) {
539 index = imm4 >> 3;
540 size = 2;
541 }
542
543 sprintf(text, "VDUP.%s %c%i, d%i[%i]", GetSizeString(size), c, Vd, Vm, index);
544 return true;
545 }
546
DisasmNeonVecScalar(uint32_t op,char * text)547 static bool DisasmNeonVecScalar(uint32_t op, char *text) {
548 bool quad = (op >> 24) & 1;
549
550 int Vd = GetVd(op, quad, true);
551 int Vn = GetVn(op, quad, true);
552 int Vm = GetVm(op, false, false);
553
554 char c = quad ? 'q' : 'd';
555
556 const char *opname = "(unk)";
557 const char *size = "f32";
558
559 switch ((op >> 4) & 0xFF) {
560 case 0x94:
561 case 0x9C:
562 opname = "VMUL";
563 break;
564 case 0x14:
565 case 0x1C:
566 case 0x1E: // Hmmm.. Should look this up :P
567 opname = "VMLA";
568 break;
569 }
570
571 int part = Vm & 1;
572 int reg = Vm >> 1;
573 sprintf(text, "%s.%s %c%i, %c%i, d%i[%i]", opname, size, c, Vd, c, Vn, reg, part);
574 return true;
575 }
576
577 // This needs a rewrite, those gotos are quite ugly...
DecodeSizeAndShiftImm7(bool U,bool sign,bool inverse,int imm7,bool incSize,int * shift)578 const char *DecodeSizeAndShiftImm7(bool U, bool sign, bool inverse, int imm7, bool incSize, int *shift) {
579 if (imm7 & 64) {
580 if (inverse) {
581 *shift = 64 - (imm7 & 63);
582 } else {
583 *shift = imm7 & 63;
584 }
585 to64:
586 return U ? "u64" : (sign ? "s64" : "i64");
587 } else if (imm7 & 32) {
588 if (inverse) {
589 *shift = 32 - (imm7 & 31);
590 } else {
591 *shift = imm7 & 31;
592 }
593 if (incSize) goto to64;
594 to32:
595 return U ? "u32" : (sign ? "s32" : "i32");
596 } else if (imm7 & 16) {
597 if (inverse) {
598 *shift = 16 - (imm7 & 15);
599 } else {
600 *shift = imm7 & 15;
601 }
602 if (incSize) goto to32;
603 to16:
604 return U ? "u16" : (sign ? "s16" : "i16");
605 } else if (imm7 & 8) {
606 if (inverse) {
607 *shift = 8 - (imm7 & 7);
608 } else {
609 *shift = imm7 & 7;
610 }
611 if (incSize) goto to16;
612 return U ? "u8" : (sign ? "s8" : "i8");
613 } else {
614 // Invalid encoding
615 *shift = -1;
616 }
617 return "i32";
618 }
619
620 // What a horror show!
DisasmNeon2RegShiftImm(uint32_t op,char * text)621 static bool DisasmNeon2RegShiftImm(uint32_t op, char *text) {
622 bool U = (op >> 24) & 1;
623 bool quadDest = false;
624 bool quadSrc = false;
625 bool incSize = false;
626
627 const char *opname = "(unk)";
628 int opcode = (op >> 8) & 0xF;
629 bool inverse = false;
630 bool sign = false;
631 switch (opcode) {
632 case 0x5: opname = "VSHL"; quadDest = quadSrc = ((op >> 6) & 1); break;
633 case 0xA: opname = "VSHLL"; quadDest = true; quadSrc = false; sign = true; break;
634 case 0x0: opname = "VSHR"; sign = true; quadDest = quadSrc = ((op >> 6) & 1); inverse = true; break;
635 case 0x8: opname = "VSHRN"; quadDest = false; quadSrc = true; inverse = true; incSize = true; break;
636 default:
637 // Immediate value ops!
638 return DisasmNeonImmVal(op, text);
639 }
640
641 int Vd = GetVd(op, quadDest, true);
642 int Vm = GetVm(op, quadSrc, true);
643
644 char c1 = quadDest ? 'q' : 'd';
645 char c2 = quadSrc ? 'q' : 'd';
646 int imm7 = ((op >> 16) & 0x3f) | ((op & 0x80) >> 1);
647 int shift;
648
649 const char *size;
650 if (opcode == 0xA) {
651 if (imm7 & 0x40) {
652 sprintf(text, "neon2regshiftimm undefined %08x", op);
653 return true;
654 }
655 }
656
657 size = DecodeSizeAndShiftImm7(U, sign, inverse, imm7, incSize, &shift);
658
659 if (opcode == 0xA && shift == 0) {
660 opname = "VMOVL";
661 sprintf(text, "%s.%s %c%i, %c%i", opname, size, c1, Vd, c2, Vm);
662 } else {
663 sprintf(text, "%s.%s %c%i, %c%i, #%i", opname, size, c1, Vd, c2, Vm, shift);
664 }
665 return true;
666 }
667
DisasmNeonF2F3(uint32_t op,char * text)668 static bool DisasmNeonF2F3(uint32_t op, char *text) {
669 sprintf(text, "NEON F2");
670 if (((op >> 20) & 0xFF8) == 0xF20 || ((op >> 20) & 0xFF8) == 0xF30) {
671 const char *opname = "(unk)";
672 bool includeSuffix = true;
673 int temp;
674 switch ((op >> 20) & 0xFF) {
675 case 0x20:
676 temp = (op >> 4) & 0xF1;
677 switch (temp) {
678 case 0x11:
679 opname = "AND";
680 includeSuffix = false;
681 break;
682 case 0xd1:
683 opname = "MLA";
684 break;
685 case 0x80:
686 case 0xd0:
687 opname = "ADD";
688 break;
689 case 0xF0:
690 opname = "MAX";
691 break;
692 }
693 return DisasmArithNeon(op, opname, text, includeSuffix);
694 case 0x22:
695 case 0x24:
696 temp = (op >> 4) & 0xF1;
697 switch (temp) {
698 case 0xF0:
699 opname = "MIN";
700 break;
701 case 0x11:
702 opname = "ORR";
703 includeSuffix = false;
704 break;
705 case 0x80:
706 case 0xd0:
707 opname = "ADD";
708 break;
709 case 0xd1:
710 opname = "MLS";
711 break;
712 default:
713 opname = "???";
714 break;
715 }
716 return DisasmArithNeon(op, opname, text, includeSuffix);
717 case 0x31:
718 if (op & 0x100)
719 opname = "MLS";
720 else
721 opname = "SUB";
722 return DisasmArithNeon(op, opname, text);
723 case 0x30:
724 case 0x34:
725 temp = (op >> 4) & 0xF1;
726 switch (temp) {
727 case 0x11:
728 opname = "EOR";
729 includeSuffix = false;
730 break;
731 case 0xd0:
732 opname = "PADD";
733 break;
734 default:
735 opname = "MUL";
736 }
737 return DisasmArithNeon(op, opname, text, includeSuffix);
738 }
739 } else if ((op & 0xFE800010) == 0xF2800010) {
740 // Two regs and a shift amount
741 return DisasmNeon2RegShiftImm(op, text);
742 } else if ((op >> 20) == 0xF3E || (op >> 20) == 0xF2E || (op >> 20) == 0xF3A || (op >> 20) == 0xF2A) {
743 return DisasmNeonVecScalar(op, text);
744 } else if ((op >> 20) == 0xF3B && ((op >> 4) & 1) == 0) {
745 return DisasmNeon2Op(op, text);
746 } else if ((op >> 20) == 0xF3F) {
747 return DisasmVdup(op, text);
748 }
749 return true;
750 }
751
DisasmNeon(uint32_t op,char * text)752 static bool DisasmNeon(uint32_t op, char *text) {
753 switch (op >> 24) {
754 case 0xF4:
755 return DisasmNeonLDST(op, text);
756 case 0xF2:
757 case 0xF3:
758 return DisasmNeonF2F3(op, text);
759 }
760 return false;
761 }
762
ArmAnalyzeLoadStore(uint32_t addr,uint32_t op,ArmLSInstructionInfo * info)763 bool ArmAnalyzeLoadStore(uint32_t addr, uint32_t op, ArmLSInstructionInfo *info) {
764 *info = {};
765 info->instructionSize = 4;
766
767 // TODO
768
769 return false;
770 }
771
772
773 typedef unsigned int word;
774 typedef unsigned int address;
775 typedef unsigned int addrdiff;
776 #define W(x) ((word*)(x))
777
778 #define declstruct(name) typedef struct name s##name, * p##name
779 #define defstruct(name) struct name
780 #define defequiv(new,old) typedef struct old s##new, * p##new
781
782 declstruct(DisOptions);
783 declstruct(Instruction);
784
785 typedef enum {
786 target_None, /* instruction doesn't refer to an address */
787 target_Data, /* instruction refers to address of data */
788 target_FloatS, /* instruction refers to address of single-float */
789 target_FloatD, /* instruction refers to address of double-float */
790 target_FloatE, /* blah blah extended-float */
791 target_FloatP, /* blah blah packed decimal float */
792 target_Code, /* instruction refers to address of code */
793 target_Unknown /* instruction refers to address of *something* */
794 } eTargetType;
795
defstruct(Instruction)796 defstruct(Instruction) {
797 char text[128]; /* the disassembled instruction */
798 int undefined; /* non-0 iff it's an undefined instr */
799 int badbits; /* non-0 iff something reserved has the wrong value */
800 int oddbits; /* non-0 iff something unspecified isn't 0 */
801 int is_SWI; /* non-0 iff it's a SWI */
802 word swinum; /* only set for SWIs */
803 address target; /* address instr refers to */
804 eTargetType target_type; /* and what we expect to be there */
805 int offset; /* offset from register in LDR or STR or similar */
806 char * addrstart; /* start of address part of instruction, or 0 */
807 };
808
809 #define disopt_SWInames 1 /* use names, not &nnnn */
810 #define disopt_CommaSpace 2 /* put spaces after commas */
811 #define disopt_FIXS 4 /* bogus FIX syntax for ObjAsm */
812 #define disopt_ReverseBytes 8 /* byte-reverse words first */
813
defstruct(DisOptions)814 defstruct(DisOptions) {
815 word flags;
816 const char * * regnames; /* pointer to 16 |char *|s: register names */
817 };
818
819 static pInstruction instr_disassemble(word, address, pDisOptions);
820
821 #define INSTR_grok_v4
822
823 /* Preprocessor defs you can give to affect this stuff:
824 * INSTR_grok_v4 understand ARMv4 instructions (halfword & sign-ext LDR/STR)
825 * INSTR_new_msr be prepared to produce new MSR syntax if asked
826 * The first of these is supported; the second isn't.
827 */
828
829 /* Some important single-bit fields. */
830
831 #define Sbit (1<<20) /* set condition codes (data processing) */
832 #define Lbit (1<<20) /* load, not store (data transfer) */
833 #define Wbit (1<<21) /* writeback (data transfer) */
834 #define Bbit (1<<22) /* single byte (data transfer, SWP) */
835 #define Ubit (1<<23) /* up, not down (data transfer) */
836 #define Pbit (1<<24) /* pre-, not post-, indexed (data transfer) */
837 #define Ibit (1<<25) /* non-immediate (data transfer) */
838 /* immediate (data processing) */
839 #define SPSRbit (1<<22) /* SPSR, not CPSR (MRS, MSR) */
840
841 /* Some important 4-bit fields. */
842
843 #define RD(x) ((x)<<12) /* destination register */
844 #define RN(x) ((x)<<16) /* operand/base register */
845 #define CP(x) ((x)<<8) /* coprocessor number */
846 #define RDbits RD(15)
847 #define RNbits RN(15)
848 #define CPbits CP(15)
849 #define RD_is(x) ((instr&RDbits)==RD(x))
850 #define RN_is(x) ((instr&RNbits)==RN(x))
851 #define CP_is(x) ((instr&CPbits)==CP(x))
852
853 /* A slightly efficient way of telling whether two bits are the same
854 * or not. It's assumed that a<b.
855 */
856 #define BitsDiffer(a,b) ((instr^(instr>>(b-a)))&(1<<a))
857
858 /* op = append(op,ip) === op += sprintf(op,"%s",ip),
859 * except that it's faster.
860 */
append(char * op,const char * ip)861 static char * append(char * op, const char *ip) {
862 char c;
863 while ((c=*ip++)!=0) *op++=c;
864 return op;
865 }
866
867 /* op = hex8(op,w) === op += sprintf(op,"&%08lX",w), but faster.
868 */
hex8(char * op,word w)869 static char * hex8(char * op, word w) {
870 int i;
871 *op++='&';
872 for (i=28; i>=0; i-=4) *op++ = "0123456789ABCDEF"[(w>>i)&15];
873 return op;
874 }
875
876 /* op = reg(op,'x',n) === op += sprintf(op,"x%lu",n&15).
877 */
reg(char * op,char c,word n)878 static char * reg(char * op, char c, word n) {
879 *op++=c;
880 n&=15;
881 if (n>=10) { *op++='1'; n+='0'-10; } else n+='0';
882 *op++=(char)n;
883 return op;
884 }
885
886 /* op = num(op,n) appends n in decimal or &n in hex
887 * depending on whether n<100. It's assumed that n>=0.
888 */
num(char * op,word w)889 static char * num(char * op, word w) {
890 if (w>=100) {
891 int i;
892 word t;
893 *op++='&';
894 for (i=28; (t=(w>>i)&15)==0; i-=4) ;
895 for (; i>=0; i-=4) *op++ = "0123456789ABCDEF"[(w>>i)&15];
896 }
897 else {
898 /* divide by 10. You can prove this works by exhaustive search. :-) */
899 word t = w-(w>>2); t=(t+(t>>4)) >> 3;
900 { word u = w-10*t;
901 if (u==10) { u=0; ++t; }
902 if (t) *op++=(char)(t+'0');
903 *op++=(char)(u+'0');
904 }
905 }
906 return op;
907 }
908
909 /* instr_disassemble
910 * Disassemble a single instruction.
911 *
912 * args: instr a single ARM instruction
913 * addr the address it's presumed to have come from
914 * opts cosmetic preferences for our output
915 *
916 * reqs: opts must be filled in right. In particular, it must contain
917 * a list of register names.
918 *
919 * return: a pointer to a structure containing the disassembled instruction
920 * and some other information about it.
921 *
922 * This is basically a replacement for the SWI Debugger_Disassemble,
923 * but it has the following advantages:
924 *
925 * + it's 3-4 times as fast
926 * + it's better at identifying undefined instructions,
927 * and instructions not invariant under { disassemble; ObjAsm; }
928 * + it provides some other useful information as well
929 * + its output syntax is the same as ObjAsm's input syntax
930 * (where possible)
931 * + it doesn't disassemble FIX incorrectly unless you ask it to
932 * + it's more configurable in some respects
933 *
934 * It also has the following disadvantages:
935 *
936 * - it increases the size of ObjDism
937 * - it doesn't provide so many `helpful' usage comments etc
938 * - it's less configurable in some respects
939 * - it doesn't (yet) know about ARMv4 instructions
940 *
941 * This function proceeds in two phases. The first is very simple:
942 * it works out what sort of instruction it's looking at and sets up
943 * three strings:
944 * - |mnemonic| (the basic mnemonic: LDR or whatever)
945 * - |flagchars| (things to go after the cond code: B or whatever)
946 * - |format| (a string describing how to display the instruction)
947 * The second phase consists of interpreting |format|, character by
948 * character. Some characters (e.g., letters) just mean `append this
949 * character to the output string'; some mean more complicated things
950 * like `append the name of the register whose number is in bits 12..15'
951 * or, worse, `append a description of the <op2> field'.
952 *
953 * I'm afraid the magic characters in |format| are rather arbitrary.
954 * One criterion in choosing them was that they should form a contiguous
955 * subrange of the character set! Sorry.
956 *
957 * Things I still want to do:
958 *
959 * - more configurability?
960 * - make it much faster, if possible
961 * - make it much smaller, if possible
962 *
963 * Format characters:
964 *
965 * \01..\05 copro register number from nybble (\001 == nybble 0, sorry)
966 * $ SWI number
967 * % register set for LDM/STM (takes note of bit 22 for ^)
968 * & address for B/BL
969 * ' ! if bit 21 set, else nothing (mnemonic: half a !)
970 * ( #regs for SFM (bits 22,15 = fpn, assumed already tweaked)
971 * ) copro opcode in bits 20..23 (for CDP)
972 * * op2 (takes note of bottom 12 bits, and bit 25)
973 * + FP register or immediate value: bits 0..3
974 * , comma or comma-space
975 * - copro extra info in bits 5..7 preceded by , omitted if 0
976 * . address in ADR instruction
977 * / address for LDR/STR (takes note of bit 23 & reg in bits 16..19)
978 * 0..4 register number from nybble
979 * 5..9 FP register number from nybble
980 * : copro opcode in bits 21..23 (for MRC/MCR)
981 * ; copro number in bits 8..11
982 *
983 * ADDED BY HRYDGARD:
984 * ^ 16-bit immediate
985 * > 5-bit immediate at 11..7 (lsb)
986 * < 5-bit immediate at 20..16 with +1 or -lsb if bit 6 set
987 *
988 * NB that / takes note of bit 22, too, and does its own ! when
989 * appropriate.
990 *
991 * On typical instructions this seems to take about 100us on my ARM6;
992 * that's about 3000 cycles, which seems grossly excessive. I'm not
993 * sure where all those cycles are being spent. Perhaps it's possible
994 * to make it much, much faster. Most of this time is spent on phase 2.
995 */
996
997 extern pInstruction
instr_disassemble(word instr,address addr,pDisOptions opts)998 instr_disassemble(word instr, address addr, pDisOptions opts) {
999 static char flagchars[4];
1000 static sInstruction result;
1001 const char * mnemonic = 0;
1002 char * flagp = flagchars;
1003 const char * format = 0;
1004 word fpn;
1005 eTargetType poss_tt = target_None;
1006 #ifdef INSTR_grok_v4
1007 int is_v4 = 0;
1008 #endif
1009
1010 /* PHASE 0. Set up default values for |result|. */
1011
1012 if (opts->flags & disopt_ReverseBytes) {
1013 instr = ((instr & 0xFF00FF00) >> 8) | ((instr & 0x00FF00FF) << 8);
1014 instr = (instr >> 16) | (instr << 16);
1015 }
1016
1017 fpn = ((instr>>15)&1) + ((instr>>21)&2);
1018
1019 result.undefined = 0;
1020 result.badbits = 0;
1021 result.oddbits = 0;
1022 result.is_SWI = 0;
1023 result.target_type = target_None;
1024 result.offset = 0x80000000;
1025 result.addrstart = 0;
1026
1027 /* PHASE 1. Decode and classify instruction. */
1028
1029 switch ((instr>>24)&15) {
1030 case 0:
1031 /* multiply or data processing, or LDRH etc */
1032 if ((instr&(15<<4))!=(9<<4)) goto lMaybeLDRHetc;
1033 /* multiply */
1034 if (instr&(1<<23)) {
1035 /* int multiply */
1036 mnemonic = "UMULL\0UMLAL\0SMULL\0SMLAL" + 6*((instr>>21)&3);
1037 format = "3,4,0,2";
1038 }
1039 else {
1040 if (instr&(1<<22)) goto lUndefined; /* "class C" */
1041 /* short multiply */
1042 if (instr&(1<<21)) {
1043 mnemonic = "MLA";
1044 format = "4,0,2,3";
1045 }
1046 else {
1047 mnemonic = "MUL";
1048 format = "4,0,2";
1049 }
1050 }
1051 if (instr&Sbit) *flagp++='S';
1052 break;
1053 case 1:
1054 if ((instr & 0x0FFFFFF0) == ((18 << 20) | (0xFFF << 8) | (1 << 4))) {
1055 mnemonic = "B";
1056 format = "0";
1057 break;
1058 } else if ((instr & 0x0FFFFFF0) == 0x012FFF30) {
1059 mnemonic = "BL";
1060 format = "0";
1061 break;
1062 } else if ((instr & 0x0FF000F0) == 0x01200070) {
1063 int imm = ((instr & 0xFFF00) >> 4) | (instr & 0xF);
1064 snprintf(result.text, sizeof(result.text), "BKPT %d", imm);
1065 result.undefined = 0;
1066 return &result;
1067 }
1068 case 3:
1069 if (instr >> 24 == 0xF3) {
1070 if (!DisasmNeon(instr, result.text)) {
1071 goto lUndefined;
1072 }
1073 result.undefined = 0;
1074 return &result;
1075 }
1076 /* SWP or MRS/MSR or data processing */
1077 // hrydgard addition: MOVW/MOVT
1078 if ((instr & 0x0FF00000) == 0x03000000) {
1079 mnemonic = "MOVW";
1080 format = "3,^";
1081 break;
1082 }
1083 else if ((instr & 0x0FF00000) == 0x03400000) {
1084 mnemonic = "MOVT";
1085 format = "3,^";
1086 break;
1087 }
1088 else if ((instr&0x02B00FF0)==0x00000090) {
1089 /* SWP */
1090 mnemonic = "SWP";
1091 format = "3,0,[4]";
1092 if (instr&Bbit) *flagp++='B';
1093 break;
1094 }
1095 else if ((instr&0x02BF0FFF)==0x000F0000) {
1096 /* MRS */
1097 mnemonic = "MRS";
1098 format = (instr&SPSRbit) ? "3,SPSR" : "3,CPSR";
1099 break;
1100 }
1101 else if ((instr&0x02BFFFF0)==0x0029F000) {
1102 /* MSR psr<P=0/1...>,Rs */
1103 mnemonic = "MSR";
1104 format = (instr&SPSRbit) ? "SPSR,0" : "CPSR,0";
1105 break;
1106 }
1107 else if ((instr&0x00BFF000)==0x0028F000) {
1108 /* MSR {C,S}PSR_flag,op2 */
1109 mnemonic = "MSR";
1110 format = (instr&SPSRbit) ? "SPSR_flg,*" : "CPSR_flg,*";
1111 if (!(instr&Ibit) && (instr&(15<<4)))
1112 #ifdef INSTR_grok_v4
1113 goto lMaybeLDRHetc;
1114 #else
1115 goto lUndefined; /* shifted reg in MSR illegal */
1116 #endif
1117 break;
1118 }
1119 /* fall through here */
1120 lMaybeLDRHetc:
1121 #ifdef INSTR_grok_v4
1122 if ((instr&(14<<24))==0
1123 && ((instr&(9<<4))==(9<<4))) {
1124 /* Might well be LDRH or similar. */
1125 if ((instr&(Wbit+Pbit))==Wbit) goto lUndefined; /* "class E", case 1 */
1126 if ((instr&(Lbit+(1<<6)))==(1<<6)) goto lUndefined; /* STRSH etc */
1127 mnemonic = "STR\0LDR" + ((instr&Lbit) >> 18);
1128 if (instr&(1<<6)) *flagp++='S';
1129 *flagp++ = (instr&(1<<5)) ? 'B' : 'H';
1130 format = "3,/";
1131 /* aargh: */
1132 if (!(instr&(1<<22))) instr |= Ibit;
1133 is_v4=1;
1134 break;
1135 }
1136 #endif
1137 case 2:
1138 if (instr >> 24 == 0xF2) {
1139 if (!DisasmNeon(instr, result.text)) {
1140 goto lUndefined;
1141 }
1142 result.undefined = 0;
1143 return &result;
1144 }
1145 /* data processing */
1146 { word op21 = instr&(15<<21);
1147 if ((op21==(2<<21) || (op21==(4<<21))) /* ADD or SUB */
1148 && ((instr&(RNbits+Ibit+Sbit))==RN(15)+Ibit) /* imm, no S */
1149 /*&& ((instr&(30<<7))==0 || (instr&3))*/) { /* normal rot */
1150 /* ADD ...,pc,#... or SUB ...,pc,#...: turn into ADR */
1151 mnemonic = "ADR";
1152 format = "3,.";
1153 if ((instr&(30<<7))!=0 && !(instr&3)) result.oddbits=1;
1154 break;
1155 }
1156 mnemonic = "AND\0EOR\0SUB\0RSB\0ADD\0ADC\0SBC\0RSC\0"
1157 "TST\0TEQ\0CMP\0CMN\0ORR\0MOV\0BIC\0MVN" /* \0 */
1158 + (op21 >> 19);
1159 /* Rd needed for all but TST,TEQ,CMP,CMN (8..11) */
1160 /* Rn needed for all but MOV,MVN (13,15) */
1161 if (op21 < ( 8<<21)) format = "3,4,*";
1162 else if (op21 < (12<<21)) {
1163 format = "4,*";
1164 if (instr&RDbits) {
1165 if ((instr&Sbit) && RD_is(15))
1166 *flagp++='P';
1167 else result.oddbits=1;
1168 }
1169 if (!(instr&Sbit)) goto lUndefined; /* CMP etc, no S bit */
1170 }
1171 else if (op21 & (1<<21)) {
1172 format = "3,*";
1173 if (instr&RNbits) result.oddbits=1;
1174 }
1175 else format = "3,4,*";
1176 if (instr&Sbit && (op21<(8<<21) || op21>=(12<<21))) *flagp++='S';
1177 }
1178 break;
1179 case 4:
1180 if ((instr >> 24) == 0xF4) {
1181 if (!DisasmNeon(instr, result.text)) {
1182 goto lUndefined;
1183 }
1184 result.undefined = 0;
1185 return &result;
1186 }
1187 case 5:
1188 case 6:
1189 case 7:
1190 /* STR/LDR/BFI/BFC/UBFX/SBFX or undefined */
1191 if ((instr&Ibit) && (instr&(1<<4))) {
1192 switch ((instr >> 21) & 7) {
1193 case 5:
1194 case 7:
1195 /* SBFX/UBFX */
1196 if (((instr>>4) & 7) != 5) {
1197 goto lUndefined;
1198 }
1199 mnemonic = (instr & (1 << 22)) ? "UBFX" : "SBFX";
1200 format = "3,0,>,<";
1201 break;
1202 case 6:
1203 /* BFI/BFC */
1204 if (((instr>>4) & 7) != 1) {
1205 goto lUndefined;
1206 }
1207 if ((instr & 15) == 15) {
1208 mnemonic = "BFC";
1209 format = "3,>,<";
1210 } else {
1211 mnemonic = "BFI";
1212 format = "3,0,>,<";
1213 }
1214 break;
1215 default:
1216 goto lUndefined; /* "class A" */
1217 }
1218 } else {
1219 mnemonic = "STR\0LDR" + ((instr&Lbit) >> 18);
1220 format = "3,/";
1221 if (instr&Bbit) *flagp++='B';
1222 if ((instr&(Wbit+Pbit))==Wbit) *flagp++='T';
1223 poss_tt = target_Data;
1224 }
1225 break;
1226 case 8:
1227 case 9:
1228 /* STM/LDM */
1229 mnemonic = "STM\0LDM" + ((instr&Lbit) >> 18);
1230 if (RN_is(13)) {
1231 /* r13, so treat as stack */
1232 word x = (instr&(3<<23)) >> 22;
1233 if (instr&Lbit) x^=6;
1234 { const char * foo = "EDEAFDFA"+x;
1235 *flagp++ = *foo++;
1236 *flagp++ = *foo;
1237 }
1238 }
1239 else {
1240 /* not r13, so don't treat as stack */
1241 *flagp++ = (instr&Ubit) ? 'I' : 'D';
1242 *flagp++ = (instr&Pbit) ? 'B' : 'A';
1243 }
1244 format = "4',%";
1245 break;
1246 case 10:
1247 case 11:
1248 /* B or BL */
1249 mnemonic = "B\0BL"+((instr&(1<<24))>>23);
1250 format = "&";
1251 break;
1252 case 12:
1253 case 13:
1254 case 14: // FPU
1255 {
1256 if (!DisasmVFP(instr, result.text)) {
1257 goto lUndefined;
1258 }
1259 result.undefined = 0;
1260 return &result;
1261 }
1262 break;
1263 case 15:
1264 /* SWI */
1265 mnemonic = "SWI";
1266 format = "$";
1267 break;
1268 /* Nasty hack: this is code that won't be reached in the normal
1269 * course of events, and after the last case of the switch is a
1270 * convenient place for it.
1271 */
1272 lUndefined:
1273 strcpy(result.text, "Undefined instruction");
1274 result.undefined = 1;
1275 return &result;
1276 }
1277 *flagp=0;
1278
1279 /* PHASE 2. Produce string. */
1280
1281 { char * op = result.text;
1282
1283 /* 2a. Mnemonic. */
1284
1285 op = append(op,mnemonic);
1286
1287 /* 2b. Condition code. */
1288
1289 { word cond = instr>>28;
1290 if (cond!=14) {
1291 const char * ip = "EQNECSCCMIPLVSVCHILSGELTGTLEALNV"+2*cond;
1292 *op++ = *ip++;
1293 *op++ = *ip;
1294 }
1295 }
1296
1297 /* 2c. Flags. */
1298
1299 { const char * ip = flagchars;
1300 while (*ip) *op++ = *ip++;
1301 }
1302
1303 /* 2d. A tab character. */
1304
1305 *op++ = '\t';
1306
1307 /* 2e. Other stuff, determined by format string. */
1308
1309 { const char * ip = format;
1310 char c;
1311
1312 const char * * regnames = opts->regnames;
1313 word oflags = opts->flags;
1314
1315 while ((c=*ip++) != 0) {
1316 switch(c) {
1317 case '^': // hrydgard addition
1318 {
1319 unsigned short imm16 = ((instr & 0x000F0000) >> 4) | (instr & 0x0FFF);
1320 op += sprintf(op, "%04x", imm16);
1321 }
1322 break;
1323 case '$':
1324 result.is_SWI = 1;
1325 result.swinum = instr&0x00FFFFFF;
1326 result.addrstart = op;
1327 op += sprintf(op, "&%X", result.swinum);
1328 break;
1329 case '%':
1330 *op++='{';
1331 { word w = instr&0xFFFF;
1332 int i=0;
1333 while (w) {
1334 int j;
1335 while (!(w&(1ul<<i))) ++i;
1336 for (j=i+1; w&(1ul<<j); ++j) ;
1337 --j;
1338 /* registers [i..j] */
1339 op = append(op, regnames[i]);
1340 if (j-i) {
1341 *op++ = (j-i>1) ? '-' : ',';
1342 op = append(op, regnames[j]);
1343 }
1344 i=j; w=(w>>(j+1))<<(j+1);
1345 if (w) *op++=',';
1346 }
1347 }
1348 *op++='}';
1349 if (instr&(1<<22)) *op++='^';
1350 break;
1351 case '&':
1352 { address target = (addr+8 + ((((int)instr)<<8)>>6)) & 0x03FFFFFC;
1353 result.addrstart = op;
1354 op = hex8(op, target);
1355 result.target_type = target_Code;
1356 result.target = target;
1357 }
1358 break;
1359 case '\'':
1360 lPling:
1361 if (instr&Wbit) *op++='!';
1362 break;
1363 case '(':
1364 *op++ = (char)('0'+fpn);
1365 break;
1366 case ')':
1367 { word w = (instr>>20)&15;
1368 if (w>=10) { *op++='1'; *op++=(char)('0'-10+w); }
1369 else *op++=(char)(w+'0');
1370 }
1371 break;
1372 case '*':
1373 case '.':
1374 if (instr&Ibit) {
1375 /* immediate constant */
1376 word imm8 = (instr&255);
1377 word rot = (instr>>7)&30;
1378 if (rot && !(imm8&3) && c=='*') {
1379 /* Funny immediate const. Guaranteed not '.', btw */
1380 *op++='#'; *op++='&';
1381 *op++="0123456789ABCDEF"[imm8>>4];
1382 *op++="0123456789ABCDEF"[imm8&15];
1383 *op++=',';
1384 op = num(op, rot);
1385 }
1386 else {
1387 if (rot != 0) {
1388 imm8 = (imm8>>rot) | (imm8<<(32-rot));
1389 }
1390 if (c=='*') {
1391 *op++='#';
1392 if (imm8>256 && ((imm8&(imm8-1))==0)) {
1393 /* only one bit set, and that later than bit 8.
1394 * Represent as 1<<... .
1395 */
1396 op = append(op,"1<<");
1397 { int n=0;
1398 while (!(imm8&15)) { n+=4; imm8=imm8>>4; }
1399 /* Now imm8 is 1, 2, 4 or 8. */
1400 n += (0x30002010 >> 4*(imm8-1))&15;
1401 op = num(op, n);
1402 }
1403 }
1404 else {
1405 if (((int)imm8)<0 && ((int)imm8)>-100) {
1406 *op++='-'; imm8=-(int)imm8;
1407 }
1408 op = num(op, imm8);
1409 }
1410 }
1411 else {
1412 address a = addr+8;
1413 if (instr&(1<<22)) a-=imm8; else a+=imm8;
1414 result.addrstart=op;
1415 op = hex8(op, a);
1416 result.target=a; result.target_type=target_Unknown;
1417 }
1418 }
1419 }
1420 else {
1421 /* rotated register */
1422 const char * rot = "LSL\0LSR\0ASR\0ROR" + ((instr&(3<<5)) >> 3);
1423 op = append(op, regnames[instr&15]);
1424 if (instr&(1<<4)) {
1425 /* register rotation */
1426 if (instr&(1<<7)) goto lUndefined;
1427 *op++=','; if (oflags&disopt_CommaSpace) *op++=' ';
1428 op = append(op,rot); *op++=' ';
1429 op = append(op,regnames[(instr&(15<<8))>>8]);
1430 }
1431 else {
1432 /* constant rotation */
1433 word n = instr&(31<<7);
1434 if (!n) {
1435 if (!(instr&(3<<5))) break;
1436 else if ((instr&(3<<5))==(3<<5)) {
1437 op = append(op, ",RRX");
1438 break;
1439 }
1440 else n=32<<7;
1441 }
1442 *op++ = ','; if (oflags&disopt_CommaSpace) *op++=' ';
1443 op = num(append(append(op,rot)," #"),n>>7);
1444 }
1445 }
1446 break;
1447 case '+':
1448 if (instr&(1<<3)) {
1449 word w = instr&7;
1450 *op++='#';
1451 if (w<6) *op++=(char)('0'+w);
1452 else op = append(op, w==6 ? "0.5" : "10");
1453 }
1454 else {
1455 *op++='f';
1456 *op++=(char)('0'+(instr&7));
1457 }
1458 break;
1459 case ',':
1460 *op++=',';
1461 if (oflags&disopt_CommaSpace) *op++=' ';
1462 break;
1463 case '-':
1464 { word w = instr&(7<<5);
1465 if (w) {
1466 *op++=',';
1467 if (oflags&disopt_CommaSpace) *op++=' ';
1468 *op++ = (char)('0'+(w>>5));
1469 }
1470 }
1471 break;
1472 case '/':
1473 result.addrstart = op;
1474 *op++='[';
1475 op = append(op, regnames[(instr&RNbits)>>16]);
1476 if (!(instr&Pbit)) *op++=']';
1477 *op++=','; if (oflags&disopt_CommaSpace) *op++=' ';
1478 /* For following, NB that bit 25 is always 0 for LDC, SFM etc */
1479 if (instr&Ibit) {
1480 /* shifted offset */
1481 if (!(instr&Ubit)) *op++='-';
1482 /* We're going to transfer to '*', basically. The stupid
1483 * thing is that the meaning of bit 25 is reversed there;
1484 * I don't know why the designers of the ARM did that.
1485 */
1486 instr ^= Ibit;
1487 if (instr&(1<<4)) {
1488 #ifdef INSTR_grok_v4
1489 if (is_v4 && !(instr&(15<<8))) {
1490 ip = (instr&Pbit) ? "0]" : "0";
1491 break;
1492 }
1493 #else
1494 goto lUndefined; /* LSL r3 forbidden */
1495 #endif
1496 }
1497 /* Need a ] iff it was pre-indexed; and an optional ! iff
1498 * it's pre-indexed *or* a copro instruction,
1499 * except that FPU operations don't need the !. Bletch.
1500 */
1501 if (instr&Pbit) ip="*]'";
1502 else if (instr&(1<<27)) {
1503 if (CP_is(1) || CP_is(2)) {
1504 if (!(instr&Wbit)) goto lUndefined;
1505 ip="*";
1506 }
1507 else ip="*'";
1508 }
1509 else ip="*";
1510 }
1511 else {
1512 /* immediate offset */
1513 word offset;
1514 if (instr&(1<<27)) {
1515 /* LDF or LFM or similar */
1516 offset = (instr&255)<<2;
1517 }
1518 #ifdef INSTR_grok_v4
1519 else if (is_v4) offset = (instr&15) + ((instr&(15<<8))>>4);
1520 #endif
1521 else {
1522 /* LDR or STR */
1523 offset = instr&0xFFF;
1524 }
1525 *op++='#';
1526 if (!(instr&Ubit)) {
1527 if (offset) *op++='-';
1528 else result.oddbits=1;
1529 result.offset = -(int)offset;
1530 }
1531 else result.offset = offset;
1532 op = num(op, offset);
1533 if (RN_is(15) && (instr&Pbit)) {
1534 /* Immediate, pre-indexed and PC-relative. Set target. */
1535 result.target_type = poss_tt;
1536 result.target = (instr&Ubit) ? addr+8 + offset
1537 : addr+8 - offset;
1538 if (!(instr&Wbit)) {
1539 /* no writeback, either. Use friendly form. */
1540 op = hex8(result.addrstart, result.target);
1541 break;
1542 }
1543 }
1544 if (instr&Pbit) { *op++=']'; goto lPling; }
1545 else if (instr&(1<<27)) {
1546 if (CP_is(1) || CP_is(2)) {
1547 if (!(instr&Wbit)) goto lUndefined;
1548 }
1549 else goto lPling;
1550 }
1551 }
1552 break;
1553 case '0': case '1': case '2': case '3': case '4':
1554 op = append(op, regnames[(instr>>(4*(c-'0')))&15]);
1555 break;
1556 case '5': case '6': case '7': case '8': case '9':
1557 *op++='f';
1558 *op++=(char)('0' + ((instr>>(4*(c-'5')))&7));
1559 break;
1560 case ':':
1561 *op++ = (char)('0' + ((instr>>21)&7));
1562 break;
1563 case ';':
1564 op = reg(op, 'p', instr>>8);
1565 break;
1566 case '>':
1567 *op++='#';
1568 op = num(op, (instr >> 7) & 31);
1569 break;
1570 case '<':
1571 *op++='#';
1572 if (instr & (1 << 6)) {
1573 op = num(op, ((instr >> 16) & 31) + 1);
1574 } else {
1575 op = num(op, ((instr >> 16) & 31) + 1 - ((instr >> 7) & 31));
1576 }
1577 break;
1578 default:
1579 if (c<=5)
1580 op = reg(op, 'c', instr >> (4*(c-1)));
1581 else *op++ = c;
1582 }
1583 }
1584 *op=0;
1585 }
1586 }
1587
1588 /* DONE! */
1589
1590 return &result;
1591 }
1592
1593 static const char * reg_names[16] = {
1594 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
1595 "r8", "r9", "r10", "r11", "ip", "sp", "lr", "pc"
1596 };
1597
1598 static sDisOptions options = {
1599 disopt_CommaSpace,
1600 reg_names
1601 };
1602
ArmRegName(int r)1603 const char *ArmRegName(int r) {
1604 return reg_names[r];
1605 }
1606
ArmDis(unsigned int addr,unsigned int w,char * output,int bufsize,bool includeWord)1607 void ArmDis(unsigned int addr, unsigned int w, char *output, int bufsize, bool includeWord) {
1608 pInstruction instr = instr_disassemble(w, addr, &options);
1609 char temp[256];
1610 if (includeWord) {
1611 snprintf(output, bufsize, "%08x\t%s", w, instr->text);
1612 } else {
1613 snprintf(output, bufsize, "%s", instr->text);
1614 }
1615 if (instr->undefined || instr->badbits || instr->oddbits) {
1616 if (instr->undefined) snprintf(output, bufsize, "%08x\t[undefined instr]", w);
1617 if (instr->badbits) snprintf(output, bufsize, "%08x\t[illegal bits]", w);
1618
1619 // HUH? LDR and STR gets this a lot
1620 // strcat(output, " ? (extra bits)");
1621 if (instr->oddbits) {
1622 snprintf(temp, sizeof(temp), " [unexpected bits %08x]", w);
1623 strcat(output, temp);
1624 }
1625 }
1626 // zap tabs
1627 while (*output) {
1628 if (*output == '\t')
1629 *output = ' ';
1630 output++;
1631 }
1632 }
1633
1634 #ifdef __clang__
1635 #pragma GCC diagnostic pop
1636 #endif
1637