1 #include <assert.h>
2 #include <stdint.h>
3
4 #include "armv5te/emu.h"
5 #include "armv5te/mem.h"
6 #include "armv5te/cpu.h"
7 #include "armv5te/asmcode.h"
8 #include "armv5te/translate.h"
9 #include "armv5te/debug.h"
10 #include "armv5te/os/os.h"
11
12 extern void translation_enter() __asm__("translation_enter");
13 extern void translation_next() __asm__("translation_next");
14 extern void translation_next_bx() __asm__("translation_next_bx");
15 extern uintptr_t arm_shift_proc[2][4] __asm__("arm_shift_proc");
16 void **in_translation_rsp __asm__("in_translation_rsp");
17 void *in_translation_pc_ptr __asm__("in_translation_pc_ptr");
18
19 #define MAX_TRANSLATIONS 262144
20 struct translation translation_table[MAX_TRANSLATIONS];
21
22 static int next_index = 0;
23 uint8_t *insn_buffer = NULL;
24 uint8_t *insn_bufptr = NULL;
25 static uint8_t *jtbl_buffer[500000];
26 static uint8_t **jtbl_bufptr = jtbl_buffer;
27 static uint8_t *out;
28 static uint8_t **outj;
29
30 #define REG_ARG1 EDI
31 #define REG_ARG2 ESI
32
33 enum x86_reg { EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI };
34 enum x86_reg8 { AL, CL, DL, BL, AH, CH, DH, BH };
35 enum group1 { ADD, OR, ADC, SBB, AND, SUB, XOR, CMP };
36 enum group2 { ROL, ROR, RCL, RCR, SHL, SHR, SAL, SAR };
37 enum group3 { NOT = 2, NEG, MUL, IMUL, DIV, IDIV };
38
39 /* x86 conditional jump instructions */
40 enum { JO = 0x70, JNO, JB, JAE, JZ, JNZ, JBE, JA,
41 JS = 0x78, JNS, JPE, JPO, JL, JGE, JLE, JG };
42
emit_byte(uint8_t b)43 static inline void emit_byte(uint8_t b) { *out++ = b; }
emit_word(uint16_t w)44 static inline void emit_word(uint16_t w) { *(uint16_t *)out = w; out += 2; }
emit_dword(uint32_t dw)45 static inline void emit_dword(uint32_t dw) { *(uint32_t *)out = dw; out += 4; }
46
47 /*This is a hack:
48 * -regs not saved
49 * -stack not aligned */
emit_call_nosave(uintptr_t target)50 static inline void emit_call_nosave(uintptr_t target) {
51 emit_byte(0xE8);
52 int64_t diff = target - ((uintptr_t) out + 4);
53 if(diff > INT32_MAX || diff < INT32_MIN)
54 assert(false); //Distance doesn't fit into immediate
55
56 emit_dword(diff);
57 }
58
59 //The AMD64 ABI says that most regs have to be saved by the caller
emit_call(uintptr_t target)60 static inline void emit_call(uintptr_t target) {
61 //If you change the stack layout, change the usage of in_translation_rsp in translate_fix_pc below as well!
62
63 //The call instruction pushes 8 bytes on the stack, which would violate
64 //the 16 bytes stack alignment. Push an uneven count of regs to counter it.
65
66 //TODO: Verify that %rdi isn't that important to save (it's the first arg)
67 //emit_byte(0x57); // push %rdi
68 emit_byte(0x56); // push %rsi
69 emit_byte(0x52); // push %rdx
70 emit_byte(0x51); // push %rcx
71
72 emit_call_nosave(target);
73
74 emit_byte(0x59);
75 emit_byte(0x5a);
76 emit_byte(0x5e);
77 //emit_byte(0x5f);
78 }
79
emit_jump(uintptr_t target)80 static inline void emit_jump(uintptr_t target) {
81 emit_byte(0xE9);
82 int64_t diff = target - ((uintptr_t) out + 4);
83 if(diff > INT32_MAX || diff < INT32_MIN)
84 assert(false);
85
86 emit_dword(diff);
87 }
88
89 // ----------------------------------------------------------------------
90
emit_modrm_x86reg(int r,int x86reg)91 static inline void emit_modrm_x86reg(int r, int x86reg) {
92 emit_byte(0xC0 | r << 3 | x86reg);
93 }
94
emit_modrm_base_offset(int r,int basex86reg,int offset)95 static void emit_modrm_base_offset(int r, int basex86reg, int offset) {
96 if (offset == 0) {
97 emit_byte(basex86reg | r << 3);
98 } else if (offset >= -0x80 && offset < 0x80) {
99 emit_byte(0x40 | basex86reg | r << 3);
100 emit_byte(offset);
101 } else {
102 emit_byte(0x80 | basex86reg | r << 3);
103 emit_dword(offset);
104 }
105 }
106
emit_modrm_armreg(int r,int armreg)107 static void emit_modrm_armreg(int r, int armreg) {
108 if (armreg < 0 || armreg > 14) error("translation f***up");
109 emit_modrm_base_offset(r, EBX, (uint8_t *)&arm.reg[armreg] - (uint8_t *)&arm);
110 }
111
112 // ----------------------------------------------------------------------
113
emit_mov_x86reg_immediate(int x86reg,int imm)114 static void emit_mov_x86reg_immediate(int x86reg, int imm) {
115 emit_byte(0xB8 | x86reg);
116 emit_dword(imm);
117 }
118
emit_alu_x86reg_immediate(int aluop,int x86reg,int imm)119 static void emit_alu_x86reg_immediate(int aluop, int x86reg, int imm) {
120 if (imm >= -0x80 && imm < 0x80) {
121 emit_byte(0x83);
122 emit_modrm_x86reg(aluop, x86reg);
123 emit_byte(imm);
124 } else if (x86reg == EAX) {
125 emit_byte(0x05 | aluop << 3);
126 emit_dword(imm);
127 } else {
128 emit_byte(0x81);
129 emit_modrm_x86reg(aluop, x86reg);
130 emit_dword(imm);
131 }
132 }
133
emit_mov_armreg_immediate(int armreg,int imm)134 static void emit_mov_armreg_immediate(int armreg, int imm) {
135 emit_byte(0xC7);
136 emit_modrm_armreg(0, armreg);
137 emit_dword(imm);
138 }
139
emit_alu_armreg_immediate(int aluop,int armreg,int imm)140 static void emit_alu_armreg_immediate(int aluop, int armreg, int imm) {
141 if (imm >= -0x80 && imm < 0x80) {
142 emit_byte(0x83);
143 emit_modrm_armreg(aluop, armreg);
144 emit_byte(imm);
145 } else {
146 emit_byte(0x81);
147 emit_modrm_armreg(aluop, armreg);
148 emit_dword(imm);
149 }
150 }
151
emit_mov_x86reg_x86reg(int dest,int src)152 static inline void emit_mov_x86reg_x86reg(int dest, int src) {
153 emit_byte(0x8B);
154 emit_modrm_x86reg(dest, src);
155 }
156
emit_alu_x86reg_x86reg(int aluop,int dest,int src)157 static inline void emit_alu_x86reg_x86reg(int aluop, int dest, int src) {
158 emit_byte(0x03 | aluop << 3);
159 emit_modrm_x86reg(dest, src);
160 }
161
emit_mov_x86reg_armreg(int x86reg,int armreg)162 static inline void emit_mov_x86reg_armreg(int x86reg, int armreg) {
163 emit_byte(0x8B);
164 emit_modrm_armreg(x86reg, armreg);
165 }
166
emit_alu_x86reg_armreg(int aluop,int x86reg,int armreg)167 static inline void emit_alu_x86reg_armreg(int aluop, int x86reg, int armreg) {
168 emit_byte(0x03 | aluop << 3);
169 emit_modrm_armreg(x86reg, armreg);
170 }
171
emit_mov_armreg_x86reg(int armreg,int x86reg)172 static inline void emit_mov_armreg_x86reg(int armreg, int x86reg) {
173 emit_byte(0x89);
174 emit_modrm_armreg(x86reg, armreg);
175 }
176
emit_alu_armreg_x86reg(int aluop,int armreg,int x86reg)177 static inline void emit_alu_armreg_x86reg(int aluop, int armreg, int x86reg) {
178 emit_byte(0x01 | aluop << 3);
179 emit_modrm_armreg(x86reg, armreg);
180 }
181
emit_unary_x86reg(int unop,int x86reg)182 static inline void emit_unary_x86reg(int unop, int x86reg) {
183 emit_byte(0xF7);
184 emit_modrm_x86reg(unop, x86reg);
185 }
186
emit_unary_armreg(int unop,int armreg)187 static inline void emit_unary_armreg(int unop, int armreg) {
188 emit_byte(0xF7);
189 emit_modrm_armreg(unop, armreg);
190 }
191
emit_test_armreg_immediate(int armreg,int imm)192 static inline void emit_test_armreg_immediate(int armreg, int imm) {
193 emit_byte(0xF7);
194 emit_modrm_armreg(0, armreg);
195 emit_dword(imm);
196 }
197
emit_test_armreg_x86reg(int armreg,int x86reg)198 static inline void emit_test_armreg_x86reg(int armreg, int x86reg) {
199 emit_byte(0x85);
200 emit_modrm_armreg(x86reg, armreg);
201 }
202
emit_test_x86reg_x86reg(int reg1,int reg2)203 static inline void emit_test_x86reg_x86reg(int reg1, int reg2) {
204 emit_byte(0x85);
205 emit_modrm_x86reg(reg1, reg2);
206 }
207
208 #define SHIFT_BY_CL -1
emit_shift_x86reg(int shiftop,int x86reg,int count)209 static void emit_shift_x86reg(int shiftop, int x86reg, int count) {
210 if (count == SHIFT_BY_CL) {
211 emit_byte(0xD3);
212 emit_modrm_x86reg(shiftop, x86reg);
213 } else if (count == 0) {
214 /* no-op */
215 } else if (count == 1) {
216 emit_byte(0xD1);
217 emit_modrm_x86reg(shiftop, x86reg);
218 } else {
219 emit_byte(0xC1);
220 emit_modrm_x86reg(shiftop, x86reg);
221 emit_byte(count);
222 }
223 }
224
emit_shift_armreg(int shiftop,int armreg,int count)225 static void emit_shift_armreg(int shiftop, int armreg, int count) {
226 if (count == SHIFT_BY_CL) {
227 emit_byte(0xD3);
228 emit_modrm_armreg(shiftop, armreg);
229 } else if (count == 0) {
230 /* no-op */
231 } else if (count == 1) {
232 emit_byte(0xD1);
233 emit_modrm_armreg(shiftop, armreg);
234 } else {
235 emit_byte(0xC1);
236 emit_modrm_armreg(shiftop, armreg);
237 emit_byte(count);
238 }
239 }
240
emit_mov_x86reg8_immediate(int x86reg,int immediate)241 static inline void emit_mov_x86reg8_immediate(int x86reg, int immediate) {
242 emit_byte(0xB0 | x86reg);
243 emit_byte(immediate);
244 }
emit_cmp_flag_immediate(void * flagptr,int immediate)245 static inline void emit_cmp_flag_immediate(void *flagptr, int immediate) {
246 emit_byte(0x80);
247 emit_modrm_base_offset(CMP, EBX, (uint8_t *)flagptr - (uint8_t *)&arm);
248 emit_byte(immediate);
249 }
emit_mov_x86reg8_flag(int x86reg,void * flagptr)250 static inline void emit_mov_x86reg8_flag(int x86reg, void *flagptr) {
251 emit_byte(0x8A);
252 emit_modrm_base_offset(x86reg, EBX, (uint8_t *)flagptr - (uint8_t *)&arm);
253 }
emit_alu_x86reg8_flag(int aluop,int x86reg,void * flagptr)254 static inline void emit_alu_x86reg8_flag(int aluop, int x86reg, void *flagptr) {
255 emit_byte(0x02 | aluop << 3);
256 emit_modrm_base_offset(x86reg, EBX, (uint8_t *)flagptr - (uint8_t *)&arm);
257 }
emit_mov_flag_immediate(void * flagptr,int imm)258 static inline void emit_mov_flag_immediate(void *flagptr, int imm) {
259 emit_byte(0xC6);
260 emit_modrm_base_offset(0, EBX, (uint8_t *)flagptr - (uint8_t *)&arm);
261 emit_byte(imm);
262 }
263 enum { SETO = 0x90, SETNO, SETB, SETAE, SETZ, SETNZ, SETBE, SETA,
264 SETS, SETNS, SETPE, SETPO, SETL, SETGE, SETLE, SETG };
emit_setcc_flag(int setcc,void * flagptr)265 static inline void emit_setcc_flag(int setcc, void *flagptr) {
266 emit_byte(0x0F);
267 emit_byte(setcc);
268 emit_modrm_base_offset(0, EBX, (uint8_t *)flagptr - (uint8_t *)&arm);
269 }
270
translate_init()271 bool translate_init()
272 {
273 if(!insn_buffer)
274 {
275 insn_buffer = os_alloc_executable(INSN_BUFFER_SIZE);
276 insn_bufptr = insn_buffer;
277 }
278
279 return !!insn_buffer;
280 }
281
translate_deinit()282 void translate_deinit()
283 {
284 if(!insn_buffer)
285 return;
286
287 os_free(insn_buffer, INSN_BUFFER_SIZE);
288 insn_buffer = NULL;
289 }
290
translate(uint32_t start_pc,uint32_t * start_insnp)291 void translate(uint32_t start_pc, uint32_t *start_insnp) {
292 out = insn_bufptr;
293 outj = jtbl_bufptr;
294 uint32_t pc = start_pc;
295 uint32_t *insnp = start_insnp;
296
297 if (next_index >= MAX_TRANSLATIONS)
298 error("too many translations");
299
300 uint8_t *insn_start;
301 int stop_here = 0;
302 while (1) {
303 if (out >= &insn_buffer[INSN_BUFFER_SIZE - 1000])
304 error("Out of instruction space");
305 if (outj >= &jtbl_buffer[sizeof jtbl_buffer / sizeof *jtbl_buffer])
306 error("Out of jump table space");
307
308 insn_start = out;
309
310 if ((pc ^ start_pc) & ~0x3FF) {
311 //printf("stopping translation - end of page\n");
312 goto branch_conditional;
313 }
314 if (RAM_FLAGS(insnp) & DONT_TRANSLATE) {
315 //printf("stopping translation - at breakpoint %x (%x)\n", pc);
316 goto branch_conditional;
317 }
318 uint32_t insn = *insnp;
319
320 /* Condition code */
321 int cond = insn >> 28;
322 int jcc = JZ;
323 uint8_t *cond_jmp_offset = NULL;
324 switch (cond >> 1) {
325 case 0: /* EQ (Z), NE (!Z) */
326 emit_cmp_flag_immediate(&arm.cpsr_z, 0);
327 break;
328 case 1: /* CS (C), CC (!C) */
329 emit_cmp_flag_immediate(&arm.cpsr_c, 0);
330 break;
331 case 2: /* MI (N), PL (!N) */
332 emit_cmp_flag_immediate(&arm.cpsr_n, 0);
333 break;
334 case 3: /* VS (V), VC (!V) */
335 emit_cmp_flag_immediate(&arm.cpsr_v, 0);
336 break;
337 case 4: /* HI (!Z & C), LS (Z | !C) */
338 emit_mov_x86reg8_flag(AL, &arm.cpsr_z);
339 emit_alu_x86reg8_flag(CMP, AL, &arm.cpsr_c);
340 jcc = JAE; // execute if Z is less than C
341 break;
342 case 5: /* GE (N = V), LT (N != V) */
343 emit_mov_x86reg8_flag(AL, &arm.cpsr_n);
344 emit_alu_x86reg8_flag(CMP, AL, &arm.cpsr_v);
345 jcc = JNZ;
346 break;
347 case 6: /* GT (!Z & N = V), LE (Z | N != V) */
348 emit_mov_x86reg8_flag(AL, &arm.cpsr_n);
349 emit_alu_x86reg8_flag(XOR, AL, &arm.cpsr_v);
350 emit_alu_x86reg8_flag(OR, AL, &arm.cpsr_z);
351 jcc = JNZ;
352 break;
353 case 7: /* AL */
354 if (cond & 1) goto unimpl;
355 goto no_condition;
356 }
357 /* If condition not met, jump around code.
358 * (If ARM condition code is inverted, invert x86 code too) */
359 emit_byte(jcc ^ (cond & 1));
360 emit_byte(0);
361 cond_jmp_offset = out;
362 no_condition:
363
364 if ((insn & 0xE000090) == 0x0000090) {
365 if ((insn & 0xFC000F0) == 0x0000090) {
366 /* MUL, MLA - 32x32->32 multiplications */
367 int left_reg = insn & 15;
368 int right_reg = insn >> 8 & 15;
369 int acc_reg = insn >> 12 & 15;
370 int dest_reg = insn >> 16 & 15;
371 if (left_reg == 15 || right_reg == 15 || acc_reg == 15 || dest_reg == 15)
372 goto unimpl;
373
374 emit_mov_x86reg_armreg(EAX, left_reg);
375 emit_unary_armreg(MUL, right_reg);
376 if (insn & 0x0200000)
377 emit_alu_x86reg_armreg(ADD, EAX, acc_reg);
378 emit_mov_armreg_x86reg(dest_reg, EAX);
379
380 if (insn & 0x0100000) {
381 if (!(insn & 0x0200000))
382 emit_test_x86reg_x86reg(EAX, EAX);
383 emit_setcc_flag(SETS, &arm.cpsr_n);
384 emit_setcc_flag(SETZ, &arm.cpsr_z);
385 }
386 } else if ((insn & 0xF8000F0) == 0x0800090) {
387 /* UMULL, UMLAL, SMULL, SMLAL: 32x32 to 64 multiplications */
388 uint32_t left_reg = insn & 15;
389 uint32_t right_reg = insn >> 8 & 15;
390 uint32_t reg_lo = insn >> 12 & 15;
391 uint32_t reg_hi = insn >> 16 & 15;
392
393 if (left_reg == 15 || right_reg == 15 || reg_lo == 15 || reg_hi == 15)
394 goto unimpl;
395 if (reg_lo == reg_hi)
396 goto unimpl;
397 if (insn & 0x0100000) // set flags
398 goto unimpl;
399
400 emit_mov_x86reg_armreg(EAX, left_reg);
401 emit_unary_armreg((insn & 0x0400000) ? IMUL : MUL, right_reg);
402 if (insn & 0x0200000) {
403 /* Accumulate */
404 emit_alu_armreg_x86reg(ADD, reg_lo, EAX);
405 emit_alu_armreg_x86reg(ADC, reg_hi, EDX);
406 } else {
407 emit_mov_armreg_x86reg(reg_lo, EAX);
408 emit_mov_armreg_x86reg(reg_hi, EDX);
409 }
410 } else {
411 enum { INVALID, H, SB, SH } type;
412 int is_load = insn & (1 << 20);
413 type = insn >> 5 & 3;
414 if (type == INVALID || (!is_load && type != H))
415 // multiply, SWP, or doubleword access
416 goto unimpl;
417
418 int post_index = !(insn & (1 << 24));
419 int offset_op = (insn & (1 << 23)) ? ADD : SUB;
420 int pre_index = insn & (1 << 21);
421 int base_reg = insn >> 16 & 15;
422 int data_reg = insn >> 12 & 15;
423
424 if (base_reg == 15 || data_reg == 15)
425 goto unimpl;
426
427 if (pre_index || post_index) {
428 if (pre_index && post_index) goto unimpl;
429 if (base_reg == 15) goto unimpl;
430 if (is_load && base_reg == data_reg) goto unimpl;
431 }
432
433 if (insn & (1 << 22)) {
434 // Offset is immediate
435 int offset = (insn & 0x0F) | (insn >> 4 & 0xF0);
436 emit_mov_x86reg_armreg(REG_ARG1, base_reg);
437 if (!post_index && offset != 0)
438 emit_alu_x86reg_immediate(offset_op, REG_ARG1, offset);
439 } else {
440 // Offset is register
441 int offset_reg = insn & 0x0F;
442 if (offset_reg == 15)
443 goto unimpl;
444 if (post_index || pre_index)
445 goto unimpl;
446 emit_mov_x86reg_armreg(REG_ARG1, base_reg);
447 emit_alu_x86reg_armreg(offset_op, REG_ARG1, offset_reg);
448 }
449
450 if (is_load) {
451 if (type == SB) {
452 emit_call_nosave((uintptr_t)read_byte_asm);
453 // movsx eax,al
454 emit_word(0xBE0F);
455 emit_byte(0xC0);
456 } else {
457 emit_call_nosave((uintptr_t)read_half_asm);
458 if (type == SH) {
459 // cwde
460 emit_byte(0x98);
461 }
462 }
463 emit_mov_armreg_x86reg(data_reg, EAX);
464 } else {
465 emit_mov_x86reg_armreg(REG_ARG2, data_reg);
466 emit_call_nosave((uintptr_t)write_half_asm);
467 }
468
469 if (post_index || pre_index)
470 emit_alu_armreg_immediate(offset_op, base_reg, ((insn & 0x0F) | (insn >> 4 & 0xF0)));
471 }
472 } else if ((insn & 0xD900000) == 0x1000000) {
473 if ((insn & 0xFFFFFD0) == 0x12FFF10) {
474 /* BX/BLX */
475 int target_reg = insn & 15;
476 if (target_reg == 15)
477 break;
478 emit_mov_x86reg_armreg(EAX, target_reg);
479 if (insn & 0x20)
480 emit_mov_armreg_immediate(14, pc + 4);
481 emit_jump((uintptr_t)translation_next_bx);
482 stop_here = 1;
483 } else if ((insn & 0xFBF0FFF) == 0x10F0000) {
484 /* MRS - move reg <- status */
485 int target_reg = insn >> 12 & 15;
486 if (target_reg == 15)
487 break;
488 emit_call((insn & 0x0400000) ? (uintptr_t)get_spsr : (uintptr_t)get_cpsr);
489 emit_mov_armreg_x86reg(target_reg, EAX);
490 } else if ((insn & 0xFB0FFF0) == 0x120F000 ||
491 (insn & 0xFB0F000) == 0x320F000) {
492 /* MSR - move status <- reg/imm */
493 uint32_t mask = 0;
494 if (insn & 0x2000000) {
495 uint32_t imm = insn & 0xFF;
496 int rotate = insn >> 7 & 30;
497 imm = imm >> rotate | imm << (32 - rotate);
498 emit_mov_x86reg_immediate(REG_ARG1, imm);
499 } else {
500 int reg = insn & 15;
501 if (reg == 15)
502 break;
503 emit_mov_x86reg_armreg(REG_ARG1, reg);
504 }
505 if (insn & 0x0080000) mask |= 0xFF000000;
506 if (insn & 0x0040000) mask |= 0x00FF0000;
507 if (insn & 0x0020000) mask |= 0x0000FF00;
508 if (insn & 0x0010000) mask |= 0x000000FF;
509 emit_mov_x86reg_immediate(REG_ARG2, mask);
510 emit_call((insn & 0x0400000) ? (uintptr_t)set_spsr : (uintptr_t)set_cpsr);
511 // If cpsr_c changed, leave translation to check for interrupts
512 if ((insn & 0x0410000) == 0x0010000) {
513 emit_mov_x86reg_immediate(EAX, pc + 4);
514 emit_jump((uintptr_t)translation_next);
515 }
516 } else if ((insn & 0xFFF0FF0) == 0x16F0F10) {
517 /* CLZ: Count leading zeros */
518 int src_reg = insn & 15;
519 int dst_reg = insn >> 12 & 15;
520 if (src_reg == 15 || dst_reg == 15)
521 break;
522 emit_word(0xBD0F); // BSR
523 emit_modrm_armreg(EAX, src_reg);
524 emit_word(5 << 8 | JNZ);
525 emit_mov_x86reg_immediate(EAX, 63);
526 emit_alu_x86reg_immediate(XOR, EAX, 31);
527 emit_mov_armreg_x86reg(dst_reg, EAX);
528 } else {
529 break;
530 }
531 } else if ((insn & 0xC000000) == 0) {
532 /* Data processing instructions */
533 int right_reg = insn & 15;
534 int dest_reg = insn >> 12 & 15;
535 int left_reg = insn >> 16 & 15;
536 int setcc = insn >> 20 & 1;
537 int op = insn >> 21 & 15;
538
539 if (dest_reg == 15 || left_reg == 15)
540 break; // not dealing with this for now
541
542 int set_overflow = -1;
543 int set_carry = -1;
544 int right_is_imm = insn >> 25 & 1;
545 int right_is_reg = 0;
546 uint32_t imm = 0; // value not used, just suppressing uninitialized variable warning
547 if (right_is_imm) {
548 // Right operand is immediate
549 imm = insn & 0xFF;
550 int rotate = insn >> 7 & 30;
551 if (rotate != 0)
552 {
553 imm = (imm >> rotate) | (imm << (32 - rotate));
554 set_carry = imm >> 31;
555 }
556 } else if (right_reg == 15) {
557 if (insn & 0xFF0) // Shifted PC?! Not likely.
558 goto unimpl;
559 imm = pc + 8;
560 right_is_imm = 1;
561 } else {
562 int shift_type = insn >> 5 & 3;
563 static const uint8_t shift_table[] = { SHL, SHR, SAR, ROR };
564 int x86_shift_type = shift_table[shift_type];
565
566 int count = insn >> 7 & 31;
567 int shift_need_carry = setcc & ((0xF303 >> op) & 1);
568 if (insn & (1 << 4)) {
569 if (insn & (1 << 7))
570 goto unimpl;
571 /* Register shifted by register.
572 * ARM's shifts are very different from x86's, unfortunately.
573 * In x86, only 5 bits of the shift count are used.
574 * In ARM, 8 bits are used. To implement ARM shifts on x86,
575 * one must check for the 32-255 cases explicitly.
576 * This is done in asmcode.S */
577
578 int shift_reg = count >> 1;
579 if (shift_reg == 15)
580 goto unimpl;
581
582 emit_mov_x86reg_armreg(ECX, shift_reg);
583 if (shift_type == 3 && !shift_need_carry) {
584 /* Ignoring flags, ARM's ROR is the same as x86's :) */
585 count = SHIFT_BY_CL;
586 goto simple_shift;
587 }
588
589 emit_mov_x86reg_armreg(EAX, right_reg);
590 emit_call_nosave(arm_shift_proc[shift_need_carry][shift_type]);
591
592 shift_need_carry = 0; /* Already set by the function */
593 } else if (count == 0) {
594 if (shift_type == 0) {
595 /* Right operand is just an ARM register */
596 right_is_reg = 1;
597 shift_need_carry = 0;
598 } else if (shift_type == 1) {
599 /* LSR #32 */
600 if (shift_need_carry) {
601 emit_mov_x86reg_armreg(EAX, right_reg);
602 emit_shift_x86reg(SHL, EAX, 1);
603 }
604 imm = 0;
605 right_is_imm = 1;
606 } else if (shift_type == 2) {
607 /* ASR #32 */
608 emit_mov_x86reg_armreg(EAX, right_reg);
609 emit_shift_x86reg(SAR, EAX, 31);
610 if (shift_need_carry)
611 emit_shift_x86reg(SAR, EAX, 1);
612 } else if (shift_type == 3) {
613 /* RRX */
614 emit_mov_x86reg8_immediate(AL, 0);
615 emit_alu_x86reg8_flag(CMP, AL, &arm.cpsr_c);
616 x86_shift_type = RCR;
617 count = 1;
618 goto simple_shift;
619 }
620 } else {
621 simple_shift:
622 if (dest_reg == right_reg && op == 13) {
623 /* MOV of a shifted register to itself. Do shift in-place */
624 emit_shift_armreg(x86_shift_type, dest_reg, count);
625 right_is_reg = 1;
626 } else {
627 emit_mov_x86reg_armreg(EAX, right_reg);
628 emit_shift_x86reg(x86_shift_type, EAX, count);
629 }
630 }
631 if (shift_need_carry)
632 emit_setcc_flag(SETB, &arm.cpsr_c);
633 }
634
635 if (op == 13 || op == 15) {
636 if (right_is_imm) {
637 if (op == 15)
638 imm = ~imm;
639 emit_mov_armreg_immediate(dest_reg, imm);
640 if (setcc)
641 goto unimpl;
642 } else if (right_is_reg && dest_reg == right_reg) {
643 /* MOV/MVN of a register to itself */
644 if (op == 15) {
645 if (setcc)
646 emit_alu_armreg_immediate(XOR, dest_reg, -1);
647 else
648 emit_unary_armreg(NOT, dest_reg);
649 } else {
650 if (setcc)
651 emit_alu_armreg_immediate(CMP, dest_reg, 0);
652 }
653 } else {
654 if (right_is_reg)
655 emit_mov_x86reg_armreg(EAX, right_reg);
656 if (op == 15)
657 emit_unary_x86reg(NOT, EAX);
658 emit_mov_armreg_x86reg(dest_reg, EAX);
659 if (setcc)
660 emit_test_x86reg_x86reg(EAX, EAX);
661 }
662 } else if (op == 8) { // TST
663 if (right_is_imm) {
664 emit_test_armreg_immediate(left_reg, imm);
665 } else {
666 if (right_is_reg)
667 emit_mov_x86reg_armreg(EAX, right_reg);
668 emit_test_armreg_x86reg(left_reg, EAX);
669 }
670 } else if (op == 10) { // CMP
671 if (right_is_imm) {
672 emit_alu_armreg_immediate(CMP, left_reg, imm);
673 } else {
674 if (right_is_reg)
675 emit_mov_x86reg_armreg(EAX, right_reg);
676 emit_alu_armreg_x86reg(CMP, left_reg, EAX);
677 }
678 set_overflow = SETO;
679 set_carry = SETAE;
680 } else if (op == 9 || op == 11) { // TEQ, CMN
681 int aluop;
682 if (op == 9) { aluop = XOR; }
683 else { aluop = ADD; set_overflow = SETO; set_carry = SETB; }
684
685 if (right_is_imm) {
686 emit_mov_x86reg_armreg(EAX, left_reg);
687 emit_alu_x86reg_immediate(aluop, EAX, imm);
688 } else {
689 if (right_is_reg)
690 emit_mov_x86reg_armreg(EAX, right_reg);
691 emit_alu_x86reg_armreg(aluop, EAX, left_reg);
692 }
693 } else {
694 int aluop;
695 enum { LR = 1, RL = 2 } direction;
696
697 if (op == 0) { aluop = AND; direction = LR | RL; }
698 else if (op == 1) { aluop = XOR; direction = LR | RL; }
699 else if (op == 2) { aluop = SUB; direction = LR; set_overflow = SETO; set_carry = SETAE; }
700 else if (op == 3) { aluop = SUB; direction = RL; set_overflow = SETO; set_carry = SETAE; }
701 else if (op == 4) { aluop = ADD; direction = LR | RL; set_overflow = SETO; set_carry = SETB; }
702 else if (op == 5) { aluop = ADC; direction = LR | RL; set_overflow = SETO; set_carry = SETB; }
703 else if (op == 6) { aluop = SBB; direction = LR; set_overflow = SETO; set_carry = SETAE; }
704 else if (op == 7) { aluop = SBB; direction = RL; set_overflow = SETO; set_carry = SETAE; }
705 else if (op == 12) { aluop = OR; direction = LR | RL; }
706 else {
707 // Convert BIC to AND
708 if (right_is_imm) {
709 imm = ~imm;
710 } else {
711 if (right_is_reg) {
712 emit_mov_x86reg_armreg(EAX, right_reg);
713 right_is_reg = 0;
714 }
715 emit_unary_x86reg(NOT, EAX);
716 }
717 aluop = AND; direction = LR | RL;
718 }
719
720 if (aluop == ADC) {
721 emit_mov_x86reg8_immediate(CL, 0);
722 emit_alu_x86reg8_flag(CMP, CL, &arm.cpsr_c);
723 } else if (aluop == SBB) {
724 emit_cmp_flag_immediate(&arm.cpsr_c, 1);
725 }
726
727 int reg_out = EAX;
728 if (dest_reg == left_reg && (direction & LR)) {
729 if (right_is_imm) {
730 emit_alu_armreg_immediate(aluop, dest_reg, imm);
731 } else {
732 if (right_is_reg)
733 emit_mov_x86reg_armreg(EAX, right_reg);
734 emit_alu_armreg_x86reg(aluop, dest_reg, EAX);
735 }
736 } else if (right_is_reg && dest_reg == right_reg && (direction & RL)) {
737 emit_mov_x86reg_armreg(EAX, left_reg);
738 emit_alu_armreg_x86reg(aluop, dest_reg, EAX);
739 } else {
740 if (right_is_imm) {
741 if (direction & LR) {
742 emit_mov_x86reg_armreg(EAX, left_reg);
743 emit_alu_x86reg_immediate(aluop, EAX, imm);
744 } else {
745 if (aluop == SUB && imm == 0) {
746 if (dest_reg == left_reg) {
747 /* RSB reg, reg, 0 is like x86's NEG */
748 emit_unary_armreg(NEG, left_reg);
749 goto data_proc_done;
750 }
751 emit_alu_x86reg_x86reg(XOR, EAX, EAX);
752 } else {
753 emit_mov_x86reg_immediate(EAX, imm);
754 }
755 emit_alu_x86reg_armreg(aluop, EAX, left_reg);
756 }
757 } else if (right_is_reg) {
758 if (direction & LR) {
759 emit_mov_x86reg_armreg(EAX, left_reg);
760 emit_alu_x86reg_armreg(aluop, EAX, right_reg);
761 } else {
762 emit_mov_x86reg_armreg(EAX, right_reg);
763 emit_alu_x86reg_armreg(aluop, EAX, left_reg);
764 }
765 } else {
766 if (direction & RL) {
767 emit_alu_x86reg_armreg(aluop, EAX, left_reg);
768 } else {
769 emit_mov_x86reg_armreg(REG_ARG2, left_reg);
770 emit_alu_x86reg_x86reg(aluop, REG_ARG2, EAX);
771 reg_out = REG_ARG2;
772 }
773 }
774 emit_mov_armreg_x86reg(dest_reg, reg_out);
775 }
776 }
777 data_proc_done:
778 if (setcc) {
779 emit_setcc_flag(SETS, &arm.cpsr_n);
780 emit_setcc_flag(SETZ, &arm.cpsr_z);
781 if (set_carry >= 0) {
782 if (set_carry < 2)
783 emit_mov_flag_immediate(&arm.cpsr_c, set_carry);
784 else
785 emit_setcc_flag(set_carry, &arm.cpsr_c);
786 }
787 if (set_overflow >= 0)
788 emit_setcc_flag(set_overflow, &arm.cpsr_v);
789 }
790 } else if ((insn & 0xC000000) == 0x4000000) {
791 /* Byte/word memory access */
792 int post_index = !(insn & (1 << 24));
793 int offset_op = (insn & (1 << 23)) ? ADD : SUB;
794 int is_byteop = insn & (1 << 22);
795 int pre_index = insn & (1 << 21);
796 int is_load = insn & (1 << 20);
797 int base_reg = insn >> 16 & 15;
798 int data_reg = insn >> 12 & 15;
799
800 if (pre_index || post_index) {
801 // Pre-indexed addressing is broken (maybe data abort issues?)
802 if (pre_index) break;
803 if (pre_index && post_index) break;
804 if (base_reg == 15) break;
805 if (is_load && base_reg == data_reg) break;
806 }
807
808 if (insn & (1 << 25)) {
809 // Offset is register
810
811 int offset_reg = insn & 15;
812 int shift_type = insn >> 5 & 3;
813 static const uint8_t shift_table[] = { SHL, SHR, SAR, ROR };
814 int count;
815
816 if (insn & (1 << 4))
817 // reg shifted by reg
818 break;
819
820 // reg shifted by immediate
821 count = insn >> 7 & 31;
822 if (count == 0 && shift_type != 0)
823 break; // special shift
824
825 if (base_reg == 15)
826 emit_mov_x86reg_immediate(REG_ARG1, pc + 8);
827 else
828 emit_mov_x86reg_armreg(REG_ARG1, base_reg);
829
830 if (count == 0 && !pre_index && !post_index) {
831 emit_alu_x86reg_armreg(offset_op, REG_ARG1, offset_reg);
832 } else {
833 emit_mov_x86reg_armreg(ECX, offset_reg);
834 emit_shift_x86reg(shift_table[shift_type], ECX, count);
835 if (!post_index)
836 emit_alu_x86reg_x86reg(offset_op, REG_ARG1, ECX);
837 }
838 } else {
839 // Offset is immediate
840 int offset = insn & 0xFFF;
841 if (base_reg == 15) {
842 if (offset_op == SUB)
843 offset = -offset;
844 emit_mov_x86reg_immediate(REG_ARG1, pc + 8 + offset);
845 } else {
846 emit_mov_x86reg_armreg(REG_ARG1, base_reg);
847 if (offset != 0 && !post_index)
848 emit_alu_x86reg_immediate(offset_op, REG_ARG1, offset);
849 }
850 }
851
852 if (is_load) {
853 /* LDR/LDRB instruction */
854 emit_call_nosave(is_byteop ? (uintptr_t)read_byte_asm : (uintptr_t)read_word_asm);
855 if (data_reg != 15)
856 emit_mov_armreg_x86reg(data_reg, EAX);
857 } else {
858 /* STR/STRB instruction */
859 if (data_reg == 15)
860 emit_mov_x86reg_immediate(REG_ARG2, pc + 12);
861 else
862 emit_mov_x86reg_armreg(REG_ARG2, data_reg);
863 emit_call_nosave(is_byteop ? (uintptr_t)write_byte_asm : (uintptr_t)write_word_asm);
864 }
865
866 if (pre_index || post_index) { // Writeback
867 if (insn & (1 << 25)) // Register offset
868 emit_alu_armreg_x86reg(offset_op, base_reg, ECX);
869 else // Immediate offset
870 emit_alu_armreg_immediate(offset_op, base_reg, insn & 0xFFF);
871 }
872
873 if (is_load && data_reg == 15) {
874 emit_jump((uintptr_t)translation_next_bx);
875 stop_here = 1;
876 }
877 } else if ((insn & 0xE000000) == 0x8000000) {
878 /* Load/store multiple */
879 int writeback = insn & (1 << 21);
880 int load = insn & (1 << 20);
881 int reg, offset, wb_offset, count;
882 bool loaded_addr_reg = false;
883
884 if (insn & (1 << 22)) // restore CPSR, or use umode regs
885 goto unimpl;
886
887 int addr_reg = insn >> 16 & 15;
888 if (addr_reg == 15)
889 goto unimpl;
890
891 if (writeback && load && insn & (1 << addr_reg))
892 goto unimpl;
893
894 for (reg = count = 0; reg < 16; reg++)
895 count += (insn >> reg & 1);
896
897 if (insn & (1 << 23)) { /* Increasing */
898 wb_offset = count * 4;
899 offset = 0;
900 if (insn & (1 << 24)) // Preincrement
901 offset += 4;
902 } else { /* Decreasing */
903 wb_offset = count * -4;
904 offset = wb_offset;
905 if (!(insn & (1 << 24))) // Postdecrement
906 offset += 4;
907 }
908
909 emit_mov_x86reg_armreg(EDX, addr_reg);
910 for (reg = 0; reg < 16; reg++) {
911 if (!(insn >> reg & 1))
912 continue;
913 emit_byte(0x8D); // LEA
914 emit_modrm_base_offset(REG_ARG1, EDX, offset);
915 if (load) {
916 emit_call_nosave((uintptr_t)read_word_asm);
917 if (reg == addr_reg && (insn & ~0u << reg & 0xFFFF)) {
918 // Loading the address register, but there are still more
919 // registers to go. In case they cause a data abort, don't
920 // write to register yet; save it to ECX
921 emit_mov_x86reg_x86reg(ECX, EAX);
922 loaded_addr_reg = true;
923 } else if (reg != 15)
924 emit_mov_armreg_x86reg(reg, EAX);
925 } else {
926 if (reg == 15)
927 emit_mov_x86reg_immediate(REG_ARG2, pc + 12);
928 else
929 emit_mov_x86reg_armreg(REG_ARG2, reg);
930 emit_call_nosave((uintptr_t)write_word_asm);
931 }
932 offset += 4;
933 }
934
935 if (writeback)
936 emit_alu_armreg_immediate(ADD, addr_reg, wb_offset);
937
938 if (loaded_addr_reg)
939 emit_mov_armreg_x86reg(addr_reg, ECX);
940
941 if (insn & (1 << 15) && load) {
942 // LDM with PC
943 emit_jump((uintptr_t)translation_next_bx);
944 stop_here = 1;
945 }
946 } else if ((insn & 0xE000000) == 0xA000000) {
947 /* Branch, branch-and-link */
948 if (insn & (1 << 24))
949 emit_mov_armreg_immediate(14, pc + 4);
950 emit_mov_x86reg_immediate(EAX, pc + 8 + ((int32_t)(insn << 8) >> 6));
951 emit_jump((uintptr_t)translation_next);
952 stop_here = 1;
953 } else {
954 break;
955 }
956
957 /* Fill in the conditional jump offset */
958 if (cond_jmp_offset) {
959 if (out - cond_jmp_offset > 0x7F)
960 goto unimpl; /* yes, this could happen (with large LDM/STM) */
961 cond_jmp_offset[-1] = out - cond_jmp_offset;
962 }
963
964 RAM_FLAGS(insnp) |= (RF_CODE_TRANSLATED | next_index << RFS_TRANSLATION_INDEX);
965 pc += 4;
966 insnp++;
967 *outj++ = insn_start;
968
969 if (stop_here) {
970 if (cond == 0x0E)
971 goto branch_unconditional;
972 else
973 goto branch_conditional;
974 }
975 }
976 unimpl:
977 out = insn_start;
978 RAM_FLAGS(insnp) |= RF_CODE_NO_TRANSLATE;
979 branch_conditional:
980 emit_mov_x86reg_immediate(EAX, pc);
981 emit_jump((uintptr_t)translation_next);
982 branch_unconditional:
983
984 if (pc == start_pc)
985 return;
986
987 int index = next_index++;
988
989 //jump_table[0] is pointer to code on pc=start_ptr
990 //jump_table[1] is pointer to code on pc=start_ptr+4
991 translation_table[index].jump_table = (void**) jtbl_bufptr;
992 translation_table[index].start_ptr = start_insnp;
993 translation_table[index].end_ptr = insnp;
994
995 insn_bufptr = out;
996 jtbl_bufptr = outj;
997 }
998
flush_translations()999 void flush_translations() {
1000 int index;
1001 for (index = 0; index < next_index; index++) {
1002 uint32_t *start = translation_table[index].start_ptr;
1003 uint32_t *end = translation_table[index].end_ptr;
1004 for (; start < end; start++)
1005 RAM_FLAGS(start) &= ~(RF_CODE_TRANSLATED | (~0u << RFS_TRANSLATION_INDEX));
1006 }
1007 next_index = 0;
1008 insn_bufptr = insn_buffer;
1009 jtbl_bufptr = jtbl_buffer;
1010 }
1011
invalidate_translation(int index)1012 void invalidate_translation(int index) {
1013 if (in_translation_rsp) {
1014 uint32_t flags = RAM_FLAGS(in_translation_pc_ptr);
1015 if ((flags & RF_CODE_TRANSLATED) && (int)(flags >> RFS_TRANSLATION_INDEX) == index)
1016 error("Cannot modify currently executing code block.");
1017 }
1018 flush_translations();
1019 }
1020
translate_fix_pc()1021 void translate_fix_pc() {
1022 if (!in_translation_rsp)
1023 return;
1024
1025 uint32_t *insnp = in_translation_pc_ptr;
1026 void *ret_eip = in_translation_rsp[-1];
1027 uint32_t flags = RAM_FLAGS(insnp);
1028 if (!(flags & RF_CODE_TRANSLATED))
1029 error("Couldn't get PC for fault");
1030 int index = flags >> RFS_TRANSLATION_INDEX;
1031
1032 assert(insnp >= translation_table[index].start_ptr);
1033 assert(insnp < translation_table[index].end_ptr);
1034 // We may have jumped into the middle of a translation
1035 arm.reg[15] -= (uint8_t*) insnp - (uint8_t*) translation_table[index].start_ptr;
1036
1037 unsigned int translation_insts = translation_table[index].end_ptr - translation_table[index].start_ptr;
1038 for(unsigned int i = 0; ret_eip > translation_table[index].jump_table[i] && i < translation_insts; ++i)
1039 arm.reg[15] += 4;
1040
1041 cycle_count_delta -= ((uintptr_t)translation_table[index].end_ptr - (uintptr_t)insnp) >> 2;
1042 in_translation_rsp = NULL;
1043
1044 assert(!(arm.cpsr_low28 & 0x20));
1045 }
1046