1 /*
2 * SH2 recompiler
3 * (C) notaz, 2009,2010,2013
4 * (C) kub, 2018,2019,2020
5 *
6 * This work is licensed under the terms of MAME license.
7 * See COPYING file in the top-level directory.
8 *
9 * notes:
10 * - tcache, block descriptor, block entry buffer overflows result in oldest
11 * blocks being deleted until enough space is available
12 * - link and list element buffer overflows result in failure and exit
13 * - jumps between blocks are tracked for SMC handling (in block_entry->links),
14 * except jumps from global to CPU-local tcaches
15 *
16 * implemented:
17 * - static register allocation
18 * - remaining register caching and tracking in temporaries
19 * - block-local branch linking
20 * - block linking
21 * - some constant propagation
22 * - call stack caching for host block entry address
23 * - delay, poll, and idle loop detection and handling
24 * - some T/M flag optimizations where the value is known or isn't used
25 *
26 * TODO:
27 * - better constant propagation
28 * - bug fixing
29 */
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <assert.h>
34
35 #include "../../pico/pico_int.h"
36 #include "../../pico/arm_features.h"
37 #include "sh2.h"
38 #include "compiler.h"
39 #include "../drc/cmn.h"
40 #include "../debug.h"
41
42 // features
43 #define PROPAGATE_CONSTANTS 1
44 #define LINK_BRANCHES 1
45 #define BRANCH_CACHE 1
46 #define CALL_STACK 1
47 #define ALIAS_REGISTERS 1
48 #define REMAP_REGISTER 1
49 #define LOOP_DETECTION 1
50 #define LOOP_OPTIMIZER 1
51 #define T_OPTIMIZER 1
52 #define DIV_OPTIMIZER 0
53
54 #define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset
55 #define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4)
56 #define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 2)
57
58 // debug stuff
59 // 01 - warnings/errors
60 // 02 - block info/smc
61 // 04 - asm
62 // 08 - runtime block entry log
63 // 10 - smc self-check
64 // 20 - runtime block entry counter
65 // 40 - rcache checking
66 // 80 - branch cache statistics
67 // 100 - write trace
68 // 200 - compare trace
69 // 400 - block entry backtrace on exit
70 // 800 - state dump on exit
71 // {
72 #ifndef DRC_DEBUG
73 #define DRC_DEBUG 0//x847
74 #endif
75
76 #if DRC_DEBUG
77 #define dbg(l,...) { \
78 if ((l) & DRC_DEBUG) \
79 elprintf(EL_STATUS, ##__VA_ARGS__); \
80 }
81 #include "mame/sh2dasm.h"
82 #include <platform/libpicofe/linux/host_dasm.h>
83 static int insns_compiled, hash_collisions, host_insn_count;
84 #define COUNT_OP \
85 host_insn_count++
86 #else // !DRC_DEBUG
87 #define COUNT_OP
88 #define dbg(...)
89 #endif
90
91
92 ///
93 #define FETCH_OP(pc) \
94 dr_pc_base[(pc) / 2]
95
96 #define FETCH32(a) \
97 ((dr_pc_base[(a) / 2] << 16) | dr_pc_base[(a) / 2 + 1])
98
99 #define CHECK_UNHANDLED_BITS(mask, label) { \
100 if ((op & (mask)) != 0) \
101 goto label; \
102 }
103
104 #define GET_Fx() \
105 ((op >> 4) & 0x0f)
106
107 #define GET_Rm GET_Fx
108
109 #define GET_Rn() \
110 ((op >> 8) & 0x0f)
111
112 #define T 0x00000001
113 #define S 0x00000002
114 #define I 0x000000f0
115 #define Q 0x00000100
116 #define M 0x00000200
117 #define T_save 0x00000800
118
119 #define I_SHIFT 4
120 #define Q_SHIFT 8
121 #define M_SHIFT 9
122 #define T_SHIFT 11
123
124 static struct op_data {
125 u8 op;
126 u8 cycles;
127 u8 size; // 0, 1, 2 - byte, word, long
128 s8 rm; // branch or load/store data reg
129 u32 source; // bitmask of src regs
130 u32 dest; // bitmask of dest regs
131 u32 imm; // immediate/io address/branch target
132 // (for literal - address, not value)
133 } ops[BLOCK_INSN_LIMIT];
134
135 enum op_types {
136 OP_UNHANDLED = 0,
137 OP_BRANCH,
138 OP_BRANCH_N, // conditional known not to be taken
139 OP_BRANCH_CT, // conditional, branch if T set
140 OP_BRANCH_CF, // conditional, branch if T clear
141 OP_BRANCH_R, // indirect
142 OP_BRANCH_RF, // indirect far (PC + Rm)
143 OP_SETCLRT, // T flag set/clear
144 OP_MOVE, // register move
145 OP_LOAD_CONST,// load const to register
146 OP_LOAD_POOL, // literal pool load, imm is address
147 OP_MOVA, // MOVA instruction
148 OP_SLEEP, // SLEEP instruction
149 OP_RTE, // RTE instruction
150 OP_TRAPA, // TRAPA instruction
151 OP_LDC, // LDC instruction
152 OP_DIV0, // DIV0[US] instruction
153 OP_UNDEFINED,
154 };
155
156 struct div {
157 u32 state:1; // 0: expect DIV1/ROTCL, 1: expect DIV1
158 u32 rn:5, rm:5, ro:5; // rn and rm for DIV1, ro for ROTCL
159 u32 div1:8, rotcl:8; // DIV1 count, ROTCL count
160 };
161 union _div { u32 imm; struct div div; }; // XXX tut-tut type punning...
162 #define div(opd) ((union _div *)&((opd)->imm))->div
163
164 // XXX consider trap insns: OP_TRAPA, OP_UNDEFINED?
165 #define OP_ISBRANCH(op) ((BITRANGE(OP_BRANCH, OP_BRANCH_RF)| BITMASK1(OP_RTE)) \
166 & BITMASK1(op))
167 #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \
168 & BITMASK1(op))
169 #define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) \
170 & BITMASK1(op))
171 #define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \
172 & BITMASK1(op))
173 #define OP_ISBRAIND(op) (BITMASK3(OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \
174 & BITMASK1(op))
175
176 #ifdef DRC_SH2
177
178 #if (DRC_DEBUG & 4)
179 static u8 *tcache_dsm_ptrs[3];
180 static char sh2dasm_buff[64];
181 #define do_host_disasm(tcid) \
182 host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \
183 tcache_dsm_ptrs[tcid] = emith_insn_ptr()
184 #else
185 #define do_host_disasm(x)
186 #endif
187
188 #define SH2_DUMP(sh2, reason) { \
189 char ms = (sh2)->is_slave ? 's' : 'm'; \
190 printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \
191 printf("%csh2 r0-7 %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
192 (sh2)->r[0], (sh2)->r[1], (sh2)->r[2], (sh2)->r[3], \
193 (sh2)->r[4], (sh2)->r[5], (sh2)->r[6], (sh2)->r[7]); \
194 printf("%csh2 r8-15 %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
195 (sh2)->r[8], (sh2)->r[9], (sh2)->r[10], (sh2)->r[11], \
196 (sh2)->r[12], (sh2)->r[13], (sh2)->r[14], (sh2)->r[15]); \
197 printf("%csh2 pc-ml %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
198 (sh2)->pc, (sh2)->ppc, (sh2)->pr, (sh2)->sr&0xfff, \
199 (sh2)->gbr, (sh2)->vbr, (sh2)->mach, (sh2)->macl); \
200 printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
201 (sh2)->drc_tmp, (sh2)->irq_cycles, \
202 (sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \
203 (sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \
204 }
205
206 #if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
207 #if (DRC_DEBUG & (256|512|1024))
208 static SH2 csh2[2][8];
209 static FILE *trace[2];
210 #endif
sh2_drc_log_entry(void * block,SH2 * sh2,u32 sr)211 static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
212 {
213 if (block != NULL) {
214 dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
215 sh2->pc, block, (signed int)sr >> 12);
216 #if defined PDB
217 pdb_step(sh2, sh2->pc);
218 #elif (DRC_DEBUG & 256)
219 {
220 int idx = sh2->is_slave;
221 if (!trace[0]) {
222 trace[0] = fopen("pico.trace0", "wb");
223 trace[1] = fopen("pico.trace1", "wb");
224 }
225 if (csh2[idx][0].pc != sh2->pc) {
226 fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]);
227 fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]);
228 memcpy(&csh2[idx][0], sh2, offsetof(SH2, poll_cnt)+4);
229 csh2[idx][0].is_slave = idx;
230 }
231 }
232 #elif (DRC_DEBUG & 512)
233 {
234 static SH2 fsh2;
235 int idx = sh2->is_slave;
236 if (!trace[0]) {
237 trace[0] = fopen("pico.trace0", "rb");
238 trace[1] = fopen("pico.trace1", "rb");
239 }
240 if (csh2[idx][0].pc != sh2->pc) {
241 if (!fread(&fsh2, offsetof(SH2, read8_map), 1, trace[idx]) ||
242 !fread(&fsh2.pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx])) {
243 printf("trace eof at %08lx\n",ftell(trace[idx]));
244 exit(1);
245 }
246 fsh2.sr = (fsh2.sr & 0xfff) | (sh2->sr & ~0xfff);
247 fsh2.is_slave = idx;
248 if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) ||
249 0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum)))
250 {
251 printf("difference at %08lx!\n",ftell(trace[idx]));
252 SH2_DUMP(&fsh2, "file");
253 SH2_DUMP(sh2, "current");
254 SH2_DUMP(&csh2[idx][0], "previous");
255 char *ps = (char *)sh2, *pf = (char *)&fsh2;
256 for (idx = 0; idx < offsetof(SH2, read8_map); idx += sizeof(u32))
257 if (*(u32 *)(ps+idx) != *(u32 *)(pf+idx))
258 printf("diff reg %ld\n",idx/sizeof(u32));
259 exit(1);
260 }
261 csh2[idx][0] = fsh2;
262 }
263 }
264 #elif (DRC_DEBUG & 1024)
265 {
266 int x = sh2->is_slave, i;
267 for (i = 0; i < ARRAY_SIZE(csh2[x])-1; i++)
268 memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, poll_cnt)+4);
269 memcpy(&csh2[x][ARRAY_SIZE(csh2[x])-1], sh2, offsetof(SH2, poll_cnt)+4);
270 csh2[x][0].is_slave = x;
271 }
272 #endif
273 }
274 return block;
275 }
276 #endif
277
278
279 // we have 3 translation cache buffers, split from one drc/cmn buffer.
280 // BIOS shares tcache with data array because it's only used for init
281 // and can be discarded early
282 #define TCACHE_BUFFERS 3
283
284
285 struct ring_buffer {
286 u8 *base; // ring buffer memory
287 unsigned item_sz; // size of one buffer item
288 unsigned size; // number of itmes in ring
289 int first, next; // read and write pointers
290 int used; // number of used items in ring
291 };
292
293 enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX };
294 struct block_link {
295 short tcache_id;
296 short type; // BL_JMP et al
297 u32 target_pc;
298 void *jump; // insn address
299 void *blx; // block link/exit area if any
300 u8 jdisp[12]; // jump backup buffer
301 struct block_link *next; // either in block_entry->links or unresolved
302 struct block_link *o_next; // ...in block_entry->o_links
303 struct block_link *prev;
304 struct block_link *o_prev;
305 struct block_entry *target;// target block this is linked in (be->links)
306 };
307
308 struct block_entry {
309 u32 pc;
310 u8 *tcache_ptr; // translated block for above PC
311 struct block_entry *next; // chain in hash_table with same pc hash
312 struct block_entry *prev;
313 struct block_link *links; // incoming links to this entry
314 struct block_link *o_links;// outgoing links from this entry
315 #if (DRC_DEBUG & 2)
316 struct block_desc *block;
317 #endif
318 #if (DRC_DEBUG & 32)
319 int entry_count;
320 #endif
321 };
322
323 struct block_desc {
324 u32 addr; // block start SH2 PC address
325 u32 addr_lit; // block start SH2 literal pool addr
326 int size; // ..of recompiled insns
327 int size_lit; // ..of (insns+)literal pool
328 u8 *tcache_ptr; // start address of block in cache
329 u16 crc; // crc of insns and literals
330 u16 active; // actively used or deactivated?
331 struct block_list *list;
332 #if (DRC_DEBUG & 2)
333 int refcount;
334 #endif
335 int entry_count;
336 struct block_entry *entryp;
337 };
338
339 struct block_list {
340 struct block_desc *block; // block reference
341 struct block_list *next; // pointers for doubly linked list
342 struct block_list *prev;
343 struct block_list **head; // list head (for removing from list)
344 struct block_list *l_next;
345 };
346
347 static u8 *tcache_ptr; // ptr for code emitters
348
349 // XXX: need to tune sizes
350
351 static struct ring_buffer tcache_ring[TCACHE_BUFFERS];
352 static const int tcache_sizes[TCACHE_BUFFERS] = {
353 DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM
354 DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2
355 DRC_TCACHE_SIZE / 32, // ... slave
356 };
357
358 #define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256)
359 static struct ring_buffer block_ring[TCACHE_BUFFERS];
360 static struct block_desc *block_tables[TCACHE_BUFFERS];
361
362 #define ENTRY_MAX_COUNT(tcid) ((tcid) ? 8*512 : 256*512)
363 static struct ring_buffer entry_ring[TCACHE_BUFFERS];
364 static struct block_entry *entry_tables[TCACHE_BUFFERS];
365
366 // we have block_link_pool to avoid using mallocs
367 #define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512)
368 static struct block_link *block_link_pool[TCACHE_BUFFERS];
369 static int block_link_pool_counts[TCACHE_BUFFERS];
370 static struct block_link **unresolved_links[TCACHE_BUFFERS];
371 static struct block_link *blink_free[TCACHE_BUFFERS];
372
373 // used for invalidation
374 #define RAM_SIZE(tcid) ((tcid) ? 0x1000 : 0x40000)
375 #define INVAL_PAGE_SIZE 0x100
376
377 static struct block_list *inactive_blocks[TCACHE_BUFFERS];
378
379 // array of pointers to block_lists for RAM and 2 data arrays
380 // each array has len: sizeof(mem) / INVAL_PAGE_SIZE
381 static struct block_list **inval_lookup[TCACHE_BUFFERS];
382
383 #define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 32*512)
384 static struct block_entry **hash_tables[TCACHE_BUFFERS];
385
386 #define HASH_FUNC(hash_tab, addr, mask) \
387 (hash_tab)[((addr) >> 1) & (mask)]
388
389 #define BLOCK_LIST_MAX_COUNT (64*1024)
390 static struct block_list *block_list_pool;
391 static int block_list_pool_count;
392 static struct block_list *blist_free;
393
394 #if (DRC_DEBUG & 128)
395 #if BRANCH_CACHE
396 int bchit, bcmiss;
397 #endif
398 #if CALL_STACK
399 int rchit, rcmiss;
400 #endif
401 #endif
402
403 // host register tracking
404 enum cache_reg_htype {
405 HRT_TEMP = 1, // is for temps and args
406 HRT_REG = 2, // is for sh2 regs
407 };
408
409 enum cache_reg_flags {
410 HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx
411 HRF_PINNED = 1 << 1, // has a pinned mapping
412 HRF_S16 = 1 << 2, // has a sign extended 16 bit value
413 HRF_U16 = 1 << 3, // has a zero extended 16 bit value
414 };
415
416 enum cache_reg_type {
417 HR_FREE,
418 HR_CACHED, // vreg has sh2_reg_e
419 HR_TEMP, // reg used for temp storage
420 };
421
422 typedef struct {
423 u8 hreg:6; // "host" reg
424 u8 htype:2; // TEMP or REG?
425 u8 flags:4; // DIRTY, PINNED?
426 u8 type:2; // CACHED or TEMP?
427 u8 locked:2; // LOCKED reference counter
428 u16 stamp; // kind of a timestamp
429 u32 gregs; // "guest" reg mask
430 } cache_reg_t;
431
432 // guest register tracking
433 enum guest_reg_flags {
434 GRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx
435 GRF_CONST = 1 << 1, // reg has a constant
436 GRF_CDIRTY = 1 << 2, // constant not yet written to ctx
437 GRF_STATIC = 1 << 3, // reg has static mapping to vreg
438 GRF_PINNED = 1 << 4, // reg has pinned mapping to vreg
439 };
440
441 typedef struct {
442 u8 flags; // guest flags: is constant, is dirty?
443 s8 sreg; // cache reg for static mapping
444 s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped
445 s8 cnst; // const index if this is constant
446 } guest_reg_t;
447
448
449 // possibly needed in code emitter
450 static int rcache_get_tmp(void);
451 static void rcache_free_tmp(int hr);
452
453 // Note: Register assignment goes by ABI convention. Caller save registers are
454 // TEMPORARY, callee save registers are PRESERVED. Unusable regs are omitted.
455 // there must be at least the free (not context or statically mapped) amount of
456 // PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4).
457 // there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4.
458 // SR must and R0 should by all means be statically mapped.
459 // XXX the static definition of SR MUST match that in compiler.h
460
461 #if defined(__arm__) || defined(_M_ARM)
462 #include "../drc/emit_arm.c"
463 #elif defined(__aarch64__) || defined(_M_ARM64)
464 #include "../drc/emit_arm64.c"
465 #elif defined(__mips__)
466 #include "../drc/emit_mips.c"
467 #elif defined(__riscv__) || defined(__riscv)
468 #include "../drc/emit_riscv.c"
469 #elif defined(__powerpc__)
470 #include "../drc/emit_ppc.c"
471 #elif defined(__i386__) || defined(_M_X86)
472 #include "../drc/emit_x86.c"
473 #elif defined(__x86_64__) || defined(_M_X64)
474 #include "../drc/emit_x86.c"
475 #else
476 #error unsupported arch
477 #endif
478
479 static const signed char hregs_param[] = PARAM_REGS;
480 static const signed char hregs_temp [] = TEMPORARY_REGS;
481 static const signed char hregs_saved[] = PRESERVED_REGS;
482 static const signed char regs_static[] = STATIC_SH2_REGS;
483
484 #define CACHE_REGS \
485 (ARRAY_SIZE(hregs_param)+ARRAY_SIZE(hregs_temp)+ARRAY_SIZE(hregs_saved)-1)
486 static cache_reg_t cache_regs[CACHE_REGS];
487
488 static signed char reg_map_host[HOST_REGS];
489
490 static guest_reg_t guest_regs[SH2_REGS];
491
492 static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2);
493 static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc);
494 #if CALL_STACK
495 static u32 REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc);
496 static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc);
497 #endif
498 static void REGPARM(1) (*sh2_drc_exit)(u32 pc);
499 static void (*sh2_drc_test_irq)(void);
500
501 static u32 REGPARM(1) (*sh2_drc_read8)(u32 a);
502 static u32 REGPARM(1) (*sh2_drc_read16)(u32 a);
503 static u32 REGPARM(1) (*sh2_drc_read32)(u32 a);
504 static u32 REGPARM(1) (*sh2_drc_read8_poll)(u32 a);
505 static u32 REGPARM(1) (*sh2_drc_read16_poll)(u32 a);
506 static u32 REGPARM(1) (*sh2_drc_read32_poll)(u32 a);
507 static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d);
508 static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d);
509 static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d);
510
511 #ifdef DRC_SR_REG
512 void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2);
513 void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2);
514 #endif
515
516 // flags for memory access
517 #define MF_SIZEMASK 0x03 // size of access
518 #define MF_POSTINCR 0x10 // post increment (for read_rr)
519 #define MF_PREDECR MF_POSTINCR // pre decrement (for write_rr)
520 #define MF_POLLING 0x20 // include polling check in read
521
522 // address space stuff
dr_is_rom(u32 a)523 static int dr_is_rom(u32 a)
524 {
525 // tweak for WWF Raw which writes data to some high ROM addresses
526 return (a & 0xc6000000) == 0x02000000 && (a & 0x3f0000) < 0x3e0000;
527 }
528
dr_ctx_get_mem_ptr(SH2 * sh2,u32 a,u32 * mask)529 static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask)
530 {
531 void *memptr;
532 int poffs = -1;
533
534 // check if region is mapped memory
535 memptr = p32x_sh2_get_mem_ptr(a, mask, sh2);
536 if (memptr == NULL)
537 return poffs;
538
539 if (memptr == sh2->p_bios) // BIOS
540 poffs = offsetof(SH2, p_bios);
541 else if (memptr == sh2->p_da) // data array
542 poffs = offsetof(SH2, p_da);
543 else if (memptr == sh2->p_sdram) // SDRAM
544 poffs = offsetof(SH2, p_sdram);
545 else if (memptr == sh2->p_rom) // ROM
546 poffs = offsetof(SH2, p_rom);
547
548 return poffs;
549 }
550
dr_get_tcache_id(u32 pc,int is_slave)551 static int dr_get_tcache_id(u32 pc, int is_slave)
552 {
553 u32 tcid = 0;
554
555 if ((pc & 0xe0000000) == 0xc0000000)
556 tcid = 1 + is_slave; // data array
557 if ((pc & ~0xfff) == 0)
558 tcid = 1 + is_slave; // BIOS
559 return tcid;
560 }
561
dr_get_entry(u32 pc,int is_slave,int * tcache_id)562 static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id)
563 {
564 struct block_entry *be;
565
566 *tcache_id = dr_get_tcache_id(pc, is_slave);
567
568 be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1);
569 if (be != NULL) // don't ask... gcc code generation hint
570 for (; be != NULL; be = be->next)
571 if (be->pc == pc)
572 return be;
573
574 return NULL;
575 }
576
577 // ---------------------------------------------------------------
578
579 // ring buffer management
580 #define RING_INIT(r,m,n) *(r) = (struct ring_buffer) { .base = (u8 *)m, \
581 .item_sz = sizeof(*(m)), .size = n };
582
ring_alloc(struct ring_buffer * rb,int count)583 static void *ring_alloc(struct ring_buffer *rb, int count)
584 {
585 // allocate space in ring buffer
586 void *p;
587
588 p = rb->base + rb->next * rb->item_sz;
589 if (rb->next+count > rb->size) {
590 rb->used += rb->size - rb->next;
591 p = rb->base; // wrap if overflow at end
592 rb->next = count;
593 } else {
594 rb->next += count;
595 if (rb->next == rb->size) rb->next = 0;
596 }
597
598 rb->used += count;
599 return p;
600 }
601
ring_wrap(struct ring_buffer * rb)602 static void ring_wrap(struct ring_buffer *rb)
603 {
604 // insufficient space at end of buffer memory, wrap around
605 rb->used += rb->size - rb->next;
606 rb->next = 0;
607 }
608
ring_free(struct ring_buffer * rb,int count)609 static void ring_free(struct ring_buffer *rb, int count)
610 {
611 // free oldest space in ring buffer
612 rb->first += count;
613 if (rb->first >= rb->size) rb->first -= rb->size;
614
615 rb->used -= count;
616 }
617
ring_free_p(struct ring_buffer * rb,void * p)618 static void ring_free_p(struct ring_buffer *rb, void *p)
619 {
620 // free ring buffer space upto given pointer
621 rb->first = ((u8 *)p - rb->base) / rb->item_sz;
622
623 rb->used = rb->next - rb->first;
624 if (rb->used < 0) rb->used += rb->size;
625 }
626
ring_reset(struct ring_buffer * rb)627 static void *ring_reset(struct ring_buffer *rb)
628 {
629 // reset to initial state
630 rb->first = rb->next = rb->used = 0;
631 return rb->base + rb->next * rb->item_sz;
632 }
633
ring_first(struct ring_buffer * rb)634 static void *ring_first(struct ring_buffer *rb)
635 {
636 return rb->base + rb->first * rb->item_sz;
637 }
638
ring_next(struct ring_buffer * rb)639 static void *ring_next(struct ring_buffer *rb)
640 {
641 return rb->base + rb->next * rb->item_sz;
642 }
643
644
645 // block management
add_to_block_list(struct block_list ** blist,struct block_desc * block)646 static void add_to_block_list(struct block_list **blist, struct block_desc *block)
647 {
648 struct block_list *added;
649
650 if (blist_free) {
651 added = blist_free;
652 blist_free = added->next;
653 } else if (block_list_pool_count >= BLOCK_LIST_MAX_COUNT) {
654 printf( "block list overflow\n");
655 exit(1);
656 } else {
657 added = block_list_pool + block_list_pool_count;
658 block_list_pool_count ++;
659 }
660
661 added->block = block;
662 added->l_next = block->list;
663 block->list = added;
664 added->head = blist;
665
666 added->prev = NULL;
667 if (*blist)
668 (*blist)->prev = added;
669 added->next = *blist;
670 *blist = added;
671 }
672
rm_from_block_lists(struct block_desc * block)673 static void rm_from_block_lists(struct block_desc *block)
674 {
675 struct block_list *entry;
676
677 entry = block->list;
678 while (entry != NULL) {
679 if (entry->prev != NULL)
680 entry->prev->next = entry->next;
681 else
682 *(entry->head) = entry->next;
683 if (entry->next != NULL)
684 entry->next->prev = entry->prev;
685
686 entry->next = blist_free;
687 blist_free = entry;
688
689 entry = entry->l_next;
690 }
691 block->list = NULL;
692 }
693
discard_block_list(struct block_list ** blist)694 static void discard_block_list(struct block_list **blist)
695 {
696 struct block_list *next, *current = *blist;
697 while (current != NULL) {
698 next = current->next;
699 current->next = blist_free;
700 blist_free = current;
701 current = next;
702 }
703 *blist = NULL;
704 }
705
add_to_hashlist(struct block_entry * be,int tcache_id)706 static void add_to_hashlist(struct block_entry *be, int tcache_id)
707 {
708 u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
709 struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
710
711 be->prev = NULL;
712 if (*head)
713 (*head)->prev = be;
714 be->next = *head;
715 *head = be;
716
717 #if (DRC_DEBUG & 2)
718 if (be->next != NULL) {
719 printf(" %08x@%p: entry hash collision with %08x@%p\n",
720 be->pc, be->tcache_ptr, be->next->pc, be->next->tcache_ptr);
721 hash_collisions++;
722 }
723 #endif
724 }
725
rm_from_hashlist(struct block_entry * be,int tcache_id)726 static void rm_from_hashlist(struct block_entry *be, int tcache_id)
727 {
728 u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
729 struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
730
731 #if DRC_DEBUG & 1
732 struct block_entry *current = be;
733 while (current->prev != NULL)
734 current = current->prev;
735 if (current != *head)
736 dbg(1, "rm_from_hashlist @%p: be %p %08x missing?", head, be, be->pc);
737 #endif
738
739 if (be->prev != NULL)
740 be->prev->next = be->next;
741 else
742 *head = be->next;
743 if (be->next != NULL)
744 be->next->prev = be->prev;
745 }
746
747
add_to_hashlist_unresolved(struct block_link * bl,int tcache_id)748 static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id)
749 {
750 u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
751 struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
752
753 #if DRC_DEBUG & 1
754 struct block_link *current = *head;
755 while (current != NULL && current != bl)
756 current = current->next;
757 if (current == bl)
758 dbg(1, "add_to_hashlist_unresolved @%p: bl %p %p %08x already in?", head, bl, bl->target, bl->target_pc);
759 #endif
760
761 bl->target = NULL; // marker for not resolved
762 bl->prev = NULL;
763 if (*head)
764 (*head)->prev = bl;
765 bl->next = *head;
766 *head = bl;
767 }
768
rm_from_hashlist_unresolved(struct block_link * bl,int tcache_id)769 static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id)
770 {
771 u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
772 struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
773
774 #if DRC_DEBUG & 1
775 struct block_link *current = bl;
776 while (current->prev != NULL)
777 current = current->prev;
778 if (current != *head)
779 dbg(1, "rm_from_hashlist_unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc);
780 #endif
781
782 if (bl->prev != NULL)
783 bl->prev->next = bl->next;
784 else
785 *head = bl->next;
786 if (bl->next != NULL)
787 bl->next->prev = bl->prev;
788 }
789
790 #if LINK_BRANCHES
dr_block_link(struct block_entry * be,struct block_link * bl,int emit_jump)791 static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump)
792 {
793 dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ",
794 bl->jump, bl->target_pc, be->tcache_ptr);
795
796 if (emit_jump) {
797 u8 *jump = bl->jump;
798 int jsz = emith_jump_patch_size();
799 if (bl->type == BL_JMP) { // patch: jump @entry
800 // inlined: @jump far jump to target
801 emith_jump_patch(jump, be->tcache_ptr, &jump);
802 } else if (bl->type == BL_LDJMP) { // write: jump @entry
803 // inlined: @jump far jump to target
804 emith_jump_at(jump, be->tcache_ptr);
805 jsz = emith_jump_at_size();
806 } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry
807 if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) {
808 // inlined: @jump near jumpcc to target
809 emith_jump_patch(jump, be->tcache_ptr, &jump);
810 } else { // dispatcher cond immediate
811 // via blx: @jump near jumpcc to blx; @blx far jump
812 emith_jump_patch(jump, bl->blx, &jump);
813 emith_jump_at(bl->blx, be->tcache_ptr);
814 host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(),
815 ((uintptr_t)bl->blx & 0x1f) + emith_jump_at_size()-1 > 0x1f);
816 }
817 } else {
818 printf("unknown BL type %d\n", bl->type);
819 exit(1);
820 }
821 host_instructions_updated(jump, jump + jsz, ((uintptr_t)jump & 0x1f) + jsz-1 > 0x1f);
822 }
823
824 // move bl to block_entry
825 bl->target = be;
826 bl->prev = NULL;
827 if (be->links)
828 be->links->prev = bl;
829 bl->next = be->links;
830 be->links = bl;
831 }
832
dr_block_unlink(struct block_link * bl,int emit_jump)833 static void dr_block_unlink(struct block_link *bl, int emit_jump)
834 {
835 dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc);
836
837 if (bl->target) {
838 if (emit_jump) {
839 u8 *jump = bl->jump;
840 int jsz = emith_jump_patch_size();
841 if (bl->type == BL_JMP) { // jump_patch @dispatcher
842 // inlined: @jump far jump to dispatcher
843 emith_jump_patch(jump, sh2_drc_dispatcher, &jump);
844 } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher
845 // inlined: @jump load target_pc, far jump to dispatcher
846 memcpy(jump, bl->jdisp, emith_jump_at_size());
847 jsz = emith_jump_at_size();
848 } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump
849 // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump
850 emith_jump_patch(bl->jump, bl->blx, &jump);
851 memcpy(bl->blx, bl->jdisp, emith_jump_at_size());
852 host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(), 1);
853 } else {
854 printf("unknown BL type %d\n", bl->type);
855 exit(1);
856 }
857 // update cpu caches since the previous jump target doesn't exist anymore
858 host_instructions_updated(jump, jump + jsz, 1);
859 }
860
861 if (bl->prev)
862 bl->prev->next = bl->next;
863 else
864 bl->target->links = bl->next;
865 if (bl->next)
866 bl->next->prev = bl->prev;
867 bl->target = NULL;
868 }
869 }
870 #endif
871
dr_prepare_ext_branch(struct block_entry * owner,u32 pc,int is_slave,int tcache_id)872 static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id)
873 {
874 #if LINK_BRANCHES
875 struct block_link *bl = block_link_pool[tcache_id];
876 int cnt = block_link_pool_counts[tcache_id];
877 int target_tcache_id;
878
879 // get the target block entry
880 target_tcache_id = dr_get_tcache_id(pc, is_slave);
881 if (target_tcache_id && target_tcache_id != tcache_id)
882 return NULL;
883
884 // get a block link
885 if (blink_free[tcache_id] != NULL) {
886 bl = blink_free[tcache_id];
887 blink_free[tcache_id] = bl->next;
888 } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) {
889 dbg(1, "bl overflow for tcache %d", tcache_id);
890 return NULL;
891 } else {
892 bl += cnt;
893 block_link_pool_counts[tcache_id] = cnt+1;
894 }
895
896 // prepare link and add to outgoing list of owner
897 bl->tcache_id = tcache_id;
898 bl->target_pc = pc;
899 bl->jump = tcache_ptr;
900 bl->blx = NULL;
901 bl->o_next = owner->o_links;
902 owner->o_links = bl;
903
904 add_to_hashlist_unresolved(bl, tcache_id);
905 return bl;
906 #else
907 return NULL;
908 #endif
909 }
910
dr_mark_memory(int mark,struct block_desc * block,int tcache_id,u32 nolit)911 static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit)
912 {
913 u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL;
914 u32 addr, end, mask = 0, shift = 0, idx;
915
916 // mark memory blocks as containing compiled code
917 if ((block->addr & 0xc7fc0000) == 0x06000000
918 || (block->addr & 0xfffff000) == 0xc0000000)
919 {
920 if (tcache_id != 0) {
921 // data array
922 drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1];
923 lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1];
924 shift = SH2_DRCBLK_DA_SHIFT;
925 }
926 else {
927 // SDRAM
928 drc_ram_blk = Pico32xMem->drcblk_ram;
929 lit_ram_blk = Pico32xMem->drclit_ram;
930 shift = SH2_DRCBLK_RAM_SHIFT;
931 }
932 mask = RAM_SIZE(tcache_id) - 1;
933
934 // mark recompiled insns
935 addr = block->addr & ~((1 << shift) - 1);
936 end = block->addr + block->size;
937 for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
938 drc_ram_blk[idx++] += mark;
939
940 // mark literal pool
941 if (addr < (block->addr_lit & ~((1 << shift) - 1)))
942 addr = block->addr_lit & ~((1 << shift) - 1);
943 end = block->addr_lit + block->size_lit;
944 for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
945 drc_ram_blk[idx++] += mark;
946
947 // mark for literals disabled
948 if (nolit) {
949 addr = nolit & ~((1 << shift) - 1);
950 end = block->addr_lit + block->size_lit;
951 for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
952 lit_ram_blk[idx++] = 1;
953 }
954
955 if (mark < 0)
956 rm_from_block_lists(block);
957 else {
958 // add to invalidation lookup lists
959 addr = block->addr & ~(INVAL_PAGE_SIZE - 1);
960 end = block->addr + block->size;
961 for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE)
962 add_to_block_list(&inval_lookup[tcache_id][idx++], block);
963
964 if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1)))
965 addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1);
966 end = block->addr_lit + block->size_lit;
967 for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE)
968 add_to_block_list(&inval_lookup[tcache_id][idx++], block);
969 }
970 }
971 }
972
dr_check_nolit(u32 start,u32 end,int tcache_id)973 static u32 dr_check_nolit(u32 start, u32 end, int tcache_id)
974 {
975 u8 *lit_ram_blk = NULL;
976 u32 mask = 0, shift = 0, addr, idx;
977
978 if ((start & 0xc7fc0000) == 0x06000000
979 || (start & 0xfffff000) == 0xc0000000)
980 {
981 if (tcache_id != 0) {
982 // data array
983 lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1];
984 shift = SH2_DRCBLK_DA_SHIFT;
985 }
986 else {
987 // SDRAM
988 lit_ram_blk = Pico32xMem->drclit_ram;
989 shift = SH2_DRCBLK_RAM_SHIFT;
990 }
991 mask = RAM_SIZE(tcache_id) - 1;
992
993 addr = start & ~((1 << shift) - 1);
994 for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
995 if (lit_ram_blk[idx++])
996 break;
997
998 return (addr < start ? start : addr > end ? end : addr);
999 }
1000
1001 return end;
1002 }
1003
dr_rm_block_entry(struct block_desc * bd,int tcache_id,u32 nolit,int free)1004 static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free)
1005 {
1006 struct block_link *bl;
1007 u32 i;
1008
1009 free = free || nolit; // block is invalid if literals are overwritten
1010 dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl",
1011 bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit,
1012 tcache_id, bd - block_tables[tcache_id]);
1013 if (bd->addr == 0 || bd->entry_count == 0) {
1014 dbg(1, " killing dead block!? %08x", bd->addr);
1015 return;
1016 }
1017
1018 #if LINK_BRANCHES
1019 // remove from hash table, make incoming links unresolved
1020 if (bd->active) {
1021 for (i = 0; i < bd->entry_count; i++) {
1022 rm_from_hashlist(&bd->entryp[i], tcache_id);
1023
1024 while ((bl = bd->entryp[i].links) != NULL) {
1025 dr_block_unlink(bl, 1);
1026 add_to_hashlist_unresolved(bl, tcache_id);
1027 }
1028 }
1029
1030 dr_mark_memory(-1, bd, tcache_id, nolit);
1031 add_to_block_list(&inactive_blocks[tcache_id], bd);
1032 }
1033 bd->active = 0;
1034 #endif
1035
1036 if (free) {
1037 #if LINK_BRANCHES
1038 // revoke outgoing links
1039 for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) {
1040 if (bl->target)
1041 dr_block_unlink(bl, 0);
1042 else
1043 rm_from_hashlist_unresolved(bl, tcache_id);
1044 bl->jump = NULL;
1045 bl->next = blink_free[bl->tcache_id];
1046 blink_free[bl->tcache_id] = bl;
1047 }
1048 bd->entryp[0].o_links = NULL;
1049 #endif
1050 // invalidate block
1051 rm_from_block_lists(bd);
1052 bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0;
1053 bd->entry_count = 0;
1054 bd->entryp = NULL;
1055 }
1056 emith_update_cache();
1057 }
1058
dr_find_inactive_block(int tcache_id,u16 crc,u32 addr,int size,u32 addr_lit,int size_lit)1059 static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc,
1060 u32 addr, int size, u32 addr_lit, int size_lit)
1061 {
1062 struct block_list **head = &inactive_blocks[tcache_id];
1063 struct block_list *current;
1064
1065 for (current = *head; current != NULL; current = current->next) {
1066 struct block_desc *block = current->block;
1067 if (block->crc == crc && block->addr == addr && block->size == size &&
1068 block->addr_lit == addr_lit && block->size_lit == size_lit)
1069 {
1070 rm_from_block_lists(block);
1071 return block;
1072 }
1073 }
1074 return NULL;
1075 }
1076
dr_add_block(int entries,u32 addr,int size,u32 addr_lit,int size_lit,u16 crc,int is_slave,int * blk_id)1077 static struct block_desc *dr_add_block(int entries, u32 addr, int size,
1078 u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id)
1079 {
1080 struct block_entry *be;
1081 struct block_desc *bd;
1082 int tcache_id;
1083
1084 // do a lookup to get tcache_id and override check
1085 be = dr_get_entry(addr, is_slave, &tcache_id);
1086 if (be != NULL)
1087 dbg(1, "block override for %08x", addr);
1088
1089 if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
1090 entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) {
1091 dbg(1, "bd overflow for tcache %d", tcache_id);
1092 return NULL;
1093 }
1094
1095 *blk_id = block_ring[tcache_id].next;
1096 bd = ring_alloc(&block_ring[tcache_id], 1);
1097 bd->entryp = ring_alloc(&entry_ring[tcache_id], entries);
1098
1099 bd->addr = addr;
1100 bd->size = size;
1101 bd->addr_lit = addr_lit;
1102 bd->size_lit = size_lit;
1103 bd->tcache_ptr = tcache_ptr;
1104 bd->crc = crc;
1105 bd->active = 0;
1106 bd->list = NULL;
1107 bd->entry_count = 0;
1108 #if (DRC_DEBUG & 2)
1109 bd->refcount = 0;
1110 #endif
1111
1112 return bd;
1113 }
1114
dr_link_blocks(struct block_entry * be,int tcache_id)1115 static void dr_link_blocks(struct block_entry *be, int tcache_id)
1116 {
1117 #if LINK_BRANCHES
1118 u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
1119 u32 pc = be->pc;
1120 struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask);
1121 struct block_link *bl = *head, *next;
1122
1123 while (bl != NULL) {
1124 next = bl->next;
1125 if (bl->target_pc == pc && (!bl->tcache_id || bl->tcache_id == tcache_id)) {
1126 rm_from_hashlist_unresolved(bl, bl->tcache_id);
1127 dr_block_link(be, bl, 1);
1128 }
1129 bl = next;
1130 }
1131 #endif
1132 }
1133
dr_link_outgoing(struct block_entry * be,int tcache_id,int is_slave)1134 static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave)
1135 {
1136 #if LINK_BRANCHES
1137 struct block_link *bl;
1138 int target_tcache_id;
1139
1140 for (bl = be->o_links; bl; bl = bl->o_next) {
1141 if (bl->target == NULL) {
1142 be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id);
1143 if (be != NULL && (!target_tcache_id || target_tcache_id == tcache_id)) {
1144 // remove bl from unresolved_links (must've been since target was NULL)
1145 rm_from_hashlist_unresolved(bl, bl->tcache_id);
1146 dr_block_link(be, bl, 1);
1147 }
1148 }
1149 }
1150 #endif
1151 }
1152
dr_activate_block(struct block_desc * bd,int tcache_id,int is_slave)1153 static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave)
1154 {
1155 int i;
1156
1157 // connect branches
1158 for (i = 0; i < bd->entry_count; i++) {
1159 struct block_entry *entry = &bd->entryp[i];
1160 add_to_hashlist(entry, tcache_id);
1161 // incoming branches
1162 dr_link_blocks(entry, tcache_id);
1163 if (!tcache_id)
1164 dr_link_blocks(entry, is_slave?2:1);
1165 // outgoing branches
1166 dr_link_outgoing(entry, tcache_id, is_slave);
1167 }
1168
1169 // mark memory for overwrite detection
1170 dr_mark_memory(1, bd, tcache_id, 0);
1171 bd->active = 1;
1172 }
1173
dr_lookup_block(u32 pc,SH2 * sh2,int * tcache_id)1174 static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id)
1175 {
1176 struct block_entry *be = NULL;
1177 void *block = NULL;
1178
1179 be = dr_get_entry(pc, sh2->is_slave, tcache_id);
1180 if (be != NULL)
1181 block = be->tcache_ptr;
1182
1183 #if (DRC_DEBUG & 2)
1184 if (be != NULL)
1185 be->block->refcount++;
1186 #endif
1187 return block;
1188 }
1189
dr_free_oldest_block(int tcache_id)1190 static void dr_free_oldest_block(int tcache_id)
1191 {
1192 struct block_desc *bf;
1193
1194 bf = ring_first(&block_ring[tcache_id]);
1195 if (bf->addr && bf->entry_count)
1196 dr_rm_block_entry(bf, tcache_id, 0, 1);
1197 ring_free(&block_ring[tcache_id], 1);
1198
1199 if (block_ring[tcache_id].used) {
1200 bf = ring_first(&block_ring[tcache_id]);
1201 ring_free_p(&entry_ring[tcache_id], bf->entryp);
1202 ring_free_p(&tcache_ring[tcache_id], bf->tcache_ptr);
1203 } else {
1204 // reset since size of code block isn't known if no successor block exists
1205 ring_reset(&block_ring[tcache_id]);
1206 ring_reset(&entry_ring[tcache_id]);
1207 ring_reset(&tcache_ring[tcache_id]);
1208 }
1209 }
1210
dr_reserve_cache(int tcache_id,struct ring_buffer * rb,int count)1211 static inline void dr_reserve_cache(int tcache_id, struct ring_buffer *rb, int count)
1212 {
1213 // while not enough space available
1214 if (rb->next + count >= rb->size){
1215 // not enough space in rest of buffer -> wrap around
1216 while (rb->first >= rb->next && rb->used)
1217 dr_free_oldest_block(tcache_id);
1218 if (rb->first == 0 && rb->used)
1219 dr_free_oldest_block(tcache_id);
1220 ring_wrap(rb);
1221 }
1222 while (rb->first >= rb->next && rb->next + count > rb->first && rb->used)
1223 dr_free_oldest_block(tcache_id);
1224 }
1225
dr_prepare_cache(int tcache_id,int insn_count,int entry_count)1226 static u8 *dr_prepare_cache(int tcache_id, int insn_count, int entry_count)
1227 {
1228 int bf = block_ring[tcache_id].first;
1229
1230 // reserve one block desc
1231 if (block_ring[tcache_id].used >= block_ring[tcache_id].size)
1232 dr_free_oldest_block(tcache_id);
1233 // reserve block entries
1234 dr_reserve_cache(tcache_id, &entry_ring[tcache_id], entry_count);
1235 // reserve cache space
1236 dr_reserve_cache(tcache_id, &tcache_ring[tcache_id], insn_count*128);
1237
1238 if (bf != block_ring[tcache_id].first) {
1239 // deleted some block(s), clear branch cache and return stack
1240 #if BRANCH_CACHE
1241 if (tcache_id)
1242 memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
1243 else {
1244 memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
1245 memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
1246 }
1247 #endif
1248 #if CALL_STACK
1249 if (tcache_id) {
1250 memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
1251 sh2s[tcache_id-1].rts_cache_idx = 0;
1252 } else {
1253 memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
1254 memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4);
1255 sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
1256 }
1257 #endif
1258 }
1259
1260 return ring_next(&tcache_ring[tcache_id]);
1261 }
1262
dr_flush_tcache(int tcid)1263 static void dr_flush_tcache(int tcid)
1264 {
1265 int i;
1266 #if (DRC_DEBUG & 1)
1267 elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d bes %d/%d)", tcid,
1268 tcache_ring[tcid].used, tcache_ring[tcid].size, block_ring[tcid].used,
1269 block_ring[tcid].size, entry_ring[tcid].used, entry_ring[tcid].size);
1270 #endif
1271
1272 ring_reset(&tcache_ring[tcid]);
1273 ring_reset(&block_ring[tcid]);
1274 ring_reset(&entry_ring[tcid]);
1275
1276 block_link_pool_counts[tcid] = 0;
1277 blink_free[tcid] = NULL;
1278 memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid));
1279 memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid));
1280
1281 if (tcid == 0) { // ROM, RAM
1282 memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
1283 memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram));
1284 memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache));
1285 memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache));
1286 memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache));
1287 memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache));
1288 sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
1289 } else {
1290 memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
1291 memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram));
1292 memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1]));
1293 memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1]));
1294 memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache));
1295 memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache));
1296 sh2s[tcid - 1].rts_cache_idx = 0;
1297 }
1298 #if (DRC_DEBUG & 4)
1299 tcache_dsm_ptrs[tcid] = tcache_ring[tcid].base;
1300 #endif
1301
1302 for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++)
1303 discard_block_list(&inval_lookup[tcid][i]);
1304 discard_block_list(&inactive_blocks[tcid]);
1305 }
1306
dr_failure(void)1307 static void *dr_failure(void)
1308 {
1309 printf("recompilation failed\n");
1310 exit(1);
1311 }
1312
1313 // ---------------------------------------------------------------
1314
1315 // NB rcache allocation dependencies:
1316 // - get_reg_arg/get_tmp_arg first (might evict other regs just allocated)
1317 // - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg
1318 // - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg)
1319
1320 // register cache / constant propagation stuff
1321 typedef enum {
1322 RC_GR_READ,
1323 RC_GR_WRITE,
1324 RC_GR_RMW,
1325 } rc_gr_mode;
1326
1327 typedef struct {
1328 u32 gregs;
1329 u32 val;
1330 } gconst_t;
1331
1332 gconst_t gconsts[ARRAY_SIZE(guest_regs)];
1333
1334 static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr);
1335 static inline int rcache_is_cached(sh2_reg_e r);
1336 static void rcache_add_vreg_alias(int x, sh2_reg_e r);
1337 static void rcache_remove_vreg_alias(int x, sh2_reg_e r);
1338 static void rcache_evict_vreg(int x);
1339 static void rcache_remap_vreg(int x);
1340
rcache_set_x16(int hr,int s16_,int u16_)1341 static void rcache_set_x16(int hr, int s16_, int u16_)
1342 {
1343 int x = reg_map_host[hr];
1344 if (x >= 0) {
1345 cache_regs[x].flags &= ~(HRF_S16|HRF_U16);
1346 if (s16_) cache_regs[x].flags |= HRF_S16;
1347 if (u16_) cache_regs[x].flags |= HRF_U16;
1348 }
1349 }
1350
rcache_copy_x16(int hr,int hr2)1351 static void rcache_copy_x16(int hr, int hr2)
1352 {
1353 int x = reg_map_host[hr], y = reg_map_host[hr2];
1354 if (x >= 0 && y >= 0) {
1355 cache_regs[x].flags = (cache_regs[x].flags & ~(HRF_S16|HRF_U16)) |
1356 (cache_regs[y].flags & (HRF_S16|HRF_U16));
1357 }
1358 }
1359
rcache_is_s16(int hr)1360 static int rcache_is_s16(int hr)
1361 {
1362 int x = reg_map_host[hr];
1363 return (x >= 0 ? cache_regs[x].flags & HRF_S16 : 0);
1364 }
1365
rcache_is_u16(int hr)1366 static int rcache_is_u16(int hr)
1367 {
1368 int x = reg_map_host[hr];
1369 return (x >= 0 ? cache_regs[x].flags & HRF_U16 : 0);
1370 }
1371
1372 #define RCACHE_DUMP(msg) { \
1373 cache_reg_t *cp; \
1374 guest_reg_t *gp; \
1375 int i; \
1376 printf("cache dump %s:\n",msg); \
1377 printf(" cache_regs:\n"); \
1378 for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1379 cp = &cache_regs[i]; \
1380 if (cp->type != HR_FREE || cp->gregs || cp->locked || cp->flags) \
1381 printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \
1382 } \
1383 printf(" guest_regs:\n"); \
1384 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \
1385 gp = &guest_regs[i]; \
1386 if (gp->vreg != -1 || gp->sreg >= 0 || gp->flags) \
1387 printf(" %d: v=%d f=%x s=%d c=%d\n", i, gp->vreg, gp->flags, gp->sreg, gp->cnst); \
1388 } \
1389 printf(" gconsts:\n"); \
1390 for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \
1391 if (gconsts[i].gregs) \
1392 printf(" %d: m=%x v=%x\n", i, gconsts[i].gregs, gconsts[i].val); \
1393 } \
1394 }
1395
1396 #define RCACHE_CHECK(msg) { \
1397 cache_reg_t *cp; \
1398 guest_reg_t *gp; \
1399 int i, x, m = 0, d = 0; \
1400 for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1401 cp = &cache_regs[i]; \
1402 if (cp->flags & HRF_PINNED) m |= (1 << i); \
1403 if (cp->type == HR_FREE || cp->type == HR_TEMP) continue; \
1404 /* check connectivity greg->vreg */ \
1405 FOR_ALL_BITS_SET_DO(cp->gregs, x, \
1406 if (guest_regs[x].vreg != i) \
1407 { d = 1; printf("cache check v=%d r=%d not connected?\n",i,x); } \
1408 ) \
1409 } \
1410 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \
1411 gp = &guest_regs[i]; \
1412 if (gp->vreg != -1 && !(cache_regs[gp->vreg].gregs & (1 << i))) \
1413 { d = 1; printf("cache check r=%d v=%d not connected?\n", i, gp->vreg); }\
1414 if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_CACHED) \
1415 { d = 1; printf("cache check r=%d v=%d wrong type?\n", i, gp->vreg); }\
1416 if ((gp->flags & GRF_CONST) && !(gconsts[gp->cnst].gregs & (1 << i))) \
1417 { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\
1418 if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\
1419 { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \
1420 if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \
1421 if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\
1422 { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \
1423 else m &= ~(1 << gp->sreg); \
1424 } \
1425 } \
1426 for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \
1427 FOR_ALL_BITS_SET_DO(gconsts[i].gregs, x, \
1428 if (guest_regs[x].cnst != i || !(guest_regs[x].flags & GRF_CONST)) \
1429 { d = 1; printf("cache check c=%d v=%d not connected?\n",i,x); } \
1430 ) \
1431 } \
1432 if (m) \
1433 { d = 1; printf("cache check m=%x pinning wrong?\n",m); } \
1434 if (d) RCACHE_DUMP(msg) \
1435 /* else { \
1436 printf("locked regs %s:\n",msg); \
1437 for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1438 cp = &cache_regs[i]; \
1439 if (cp->locked) \
1440 printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \
1441 } \
1442 } */ \
1443 }
1444
1445 #if PROPAGATE_CONSTANTS
gconst_alloc(sh2_reg_e r)1446 static inline int gconst_alloc(sh2_reg_e r)
1447 {
1448 int i, n = -1;
1449
1450 for (i = 0; i < ARRAY_SIZE(gconsts); i++) {
1451 gconsts[i].gregs &= ~(1 << r);
1452 if (gconsts[i].gregs == 0 && n < 0)
1453 n = i;
1454 }
1455 if (n >= 0)
1456 gconsts[n].gregs = (1 << r);
1457 else {
1458 printf("all gconst buffers in use, aborting\n");
1459 exit(1); // cannot happen - more constants than guest regs?
1460 }
1461 return n;
1462 }
1463
gconst_set(sh2_reg_e r,u32 val)1464 static void gconst_set(sh2_reg_e r, u32 val)
1465 {
1466 int i = gconst_alloc(r);
1467
1468 guest_regs[r].flags |= GRF_CONST;
1469 guest_regs[r].cnst = i;
1470 gconsts[i].val = val;
1471 }
1472
gconst_new(sh2_reg_e r,u32 val)1473 static void gconst_new(sh2_reg_e r, u32 val)
1474 {
1475 gconst_set(r, val);
1476 guest_regs[r].flags |= GRF_CDIRTY;
1477
1478 // throw away old r that we might have cached
1479 if (guest_regs[r].vreg >= 0)
1480 rcache_remove_vreg_alias(guest_regs[r].vreg, r);
1481 }
1482 #endif
1483
gconst_get(sh2_reg_e r,u32 * val)1484 static int gconst_get(sh2_reg_e r, u32 *val)
1485 {
1486 if (guest_regs[r].flags & GRF_CONST) {
1487 *val = gconsts[guest_regs[r].cnst].val;
1488 return 1;
1489 }
1490 *val = 0;
1491 return 0;
1492 }
1493
gconst_check(sh2_reg_e r)1494 static int gconst_check(sh2_reg_e r)
1495 {
1496 if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
1497 return 1;
1498 return 0;
1499 }
1500
1501 // update hr if dirty, else do nothing
gconst_try_read(int vreg,sh2_reg_e r)1502 static int gconst_try_read(int vreg, sh2_reg_e r)
1503 {
1504 int i, x;
1505 u32 v;
1506
1507 if (guest_regs[r].flags & GRF_CDIRTY) {
1508 x = guest_regs[r].cnst;
1509 v = gconsts[x].val;
1510 emith_move_r_imm(cache_regs[vreg].hreg, v);
1511 rcache_set_x16(cache_regs[vreg].hreg, v == (s16)v, v == (u16)v);
1512 FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i,
1513 {
1514 if (guest_regs[i].vreg >= 0 && guest_regs[i].vreg != vreg)
1515 rcache_remove_vreg_alias(guest_regs[i].vreg, i);
1516 if (guest_regs[i].vreg < 0)
1517 rcache_add_vreg_alias(vreg, i);
1518 guest_regs[i].flags &= ~GRF_CDIRTY;
1519 guest_regs[i].flags |= GRF_DIRTY;
1520 });
1521 cache_regs[vreg].type = HR_CACHED;
1522 cache_regs[vreg].flags |= HRF_DIRTY;
1523 return 1;
1524 }
1525 return 0;
1526 }
1527
gconst_dirty_mask(void)1528 static u32 gconst_dirty_mask(void)
1529 {
1530 u32 mask = 0;
1531 int i;
1532
1533 for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1534 if (guest_regs[i].flags & GRF_CDIRTY)
1535 mask |= (1 << i);
1536 return mask;
1537 }
1538
gconst_kill(sh2_reg_e r)1539 static void gconst_kill(sh2_reg_e r)
1540 {
1541 if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
1542 gconsts[guest_regs[r].cnst].gregs &= ~(1 << r);
1543 guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY);
1544 }
1545
gconst_copy(sh2_reg_e rd,sh2_reg_e rs)1546 static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs)
1547 {
1548 gconst_kill(rd);
1549 if (guest_regs[rs].flags & GRF_CONST) {
1550 guest_regs[rd].flags |= GRF_CONST;
1551 if (guest_regs[rd].vreg < 0)
1552 guest_regs[rd].flags |= GRF_CDIRTY;
1553 guest_regs[rd].cnst = guest_regs[rs].cnst;
1554 gconsts[guest_regs[rd].cnst].gregs |= (1 << rd);
1555 }
1556 }
1557
gconst_clean(void)1558 static void gconst_clean(void)
1559 {
1560 int i;
1561
1562 for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1563 if (guest_regs[i].flags & GRF_CDIRTY) {
1564 // using RC_GR_READ here: it will call gconst_try_read,
1565 // cache the reg and mark it dirty.
1566 rcache_get_reg_(i, RC_GR_READ, 0, NULL);
1567 }
1568 }
1569
gconst_invalidate(void)1570 static void gconst_invalidate(void)
1571 {
1572 int i;
1573
1574 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
1575 if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY))
1576 gconsts[guest_regs[i].cnst].gregs &= ~(1 << i);
1577 guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY);
1578 }
1579 }
1580
1581
1582 static u16 rcache_counter;
1583 // SH2 register usage bitmasks
1584 static u32 rcache_vregs_reg; // regs of type HRT_REG (for pinning)
1585 static u32 rcache_regs_static; // statically allocated regs
1586 static u32 rcache_regs_pinned; // pinned regs
1587 static u32 rcache_regs_now; // regs used in current insn
1588 static u32 rcache_regs_soon; // regs used in the next few insns
1589 static u32 rcache_regs_late; // regs used in later insns
1590 static u32 rcache_regs_discard; // regs overwritten without being used
1591 static u32 rcache_regs_clean; // regs needing cleaning
1592
rcache_lock_vreg(int x)1593 static void rcache_lock_vreg(int x)
1594 {
1595 if (x >= 0) {
1596 cache_regs[x].locked ++;
1597 #if DRC_DEBUG & 64
1598 if (cache_regs[x].type == HR_FREE) {
1599 printf("locking free vreg %x, aborting\n", x);
1600 exit(1);
1601 }
1602 if (!cache_regs[x].locked) {
1603 printf("locking overflow vreg %x, aborting\n", x);
1604 exit(1);
1605 }
1606 #endif
1607 }
1608 }
1609
rcache_unlock_vreg(int x)1610 static void rcache_unlock_vreg(int x)
1611 {
1612 if (x >= 0) {
1613 #if DRC_DEBUG & 64
1614 if (cache_regs[x].type == HR_FREE) {
1615 printf("unlocking free vreg %x, aborting\n", x);
1616 exit(1);
1617 }
1618 #endif
1619 if (cache_regs[x].locked)
1620 cache_regs[x].locked --;
1621 }
1622 }
1623
rcache_free_vreg(int x)1624 static void rcache_free_vreg(int x)
1625 {
1626 cache_regs[x].type = cache_regs[x].locked ? HR_TEMP : HR_FREE;
1627 cache_regs[x].flags &= HRF_PINNED;
1628 cache_regs[x].gregs = 0;
1629 }
1630
rcache_unmap_vreg(int x)1631 static void rcache_unmap_vreg(int x)
1632 {
1633 int i;
1634
1635 FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
1636 if (guest_regs[i].flags & GRF_DIRTY) {
1637 // if a dirty reg is unmapped save its value to context
1638 if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
1639 emith_ctx_write(cache_regs[x].hreg, i * 4);
1640 guest_regs[i].flags &= ~GRF_DIRTY;
1641 }
1642 guest_regs[i].vreg = -1);
1643 rcache_free_vreg(x);
1644 }
1645
rcache_move_vreg(int d,int x)1646 static void rcache_move_vreg(int d, int x)
1647 {
1648 int i;
1649
1650 cache_regs[d].type = HR_CACHED;
1651 cache_regs[d].gregs = cache_regs[x].gregs;
1652 cache_regs[d].flags &= HRF_PINNED;
1653 cache_regs[d].flags |= cache_regs[x].flags & ~HRF_PINNED;
1654 cache_regs[d].locked = 0;
1655 cache_regs[d].stamp = cache_regs[x].stamp;
1656 emith_move_r_r(cache_regs[d].hreg, cache_regs[x].hreg);
1657 for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1658 if (guest_regs[i].vreg == x)
1659 guest_regs[i].vreg = d;
1660 rcache_free_vreg(x);
1661 }
1662
rcache_clean_vreg(int x)1663 static void rcache_clean_vreg(int x)
1664 {
1665 u32 rns = rcache_regs_now | rcache_regs_soon;
1666 int r;
1667
1668 if (cache_regs[x].flags & HRF_DIRTY) { // writeback
1669 cache_regs[x].flags &= ~HRF_DIRTY;
1670 rcache_lock_vreg(x);
1671 FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r,
1672 if (guest_regs[r].flags & GRF_DIRTY) {
1673 if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) {
1674 if (guest_regs[r].vreg != guest_regs[r].sreg &&
1675 !cache_regs[guest_regs[r].sreg].locked &&
1676 ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) &&
1677 !(rns & cache_regs[guest_regs[r].sreg].gregs)) {
1678 // statically mapped reg not in its sreg. move back to sreg
1679 rcache_evict_vreg(guest_regs[r].sreg);
1680 emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg,
1681 cache_regs[guest_regs[r].vreg].hreg);
1682 rcache_copy_x16(cache_regs[guest_regs[r].sreg].hreg,
1683 cache_regs[guest_regs[r].vreg].hreg);
1684 rcache_remove_vreg_alias(x, r);
1685 rcache_add_vreg_alias(guest_regs[r].sreg, r);
1686 cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY;
1687 } else
1688 // cannot remap. keep dirty for writeback in unmap
1689 cache_regs[x].flags |= HRF_DIRTY;
1690 } else {
1691 if ((~rcache_regs_discard | rcache_regs_now) & (1 << r))
1692 emith_ctx_write(cache_regs[x].hreg, r * 4);
1693 guest_regs[r].flags &= ~GRF_DIRTY;
1694 }
1695 rcache_regs_clean &= ~(1 << r);
1696 })
1697 rcache_unlock_vreg(x);
1698 }
1699
1700 #if DRC_DEBUG & 64
1701 RCACHE_CHECK("after clean");
1702 #endif
1703 }
1704
rcache_add_vreg_alias(int x,sh2_reg_e r)1705 static void rcache_add_vreg_alias(int x, sh2_reg_e r)
1706 {
1707 cache_regs[x].gregs |= (1 << r);
1708 guest_regs[r].vreg = x;
1709 cache_regs[x].type = HR_CACHED;
1710 }
1711
rcache_remove_vreg_alias(int x,sh2_reg_e r)1712 static void rcache_remove_vreg_alias(int x, sh2_reg_e r)
1713 {
1714 cache_regs[x].gregs &= ~(1 << r);
1715 if (!cache_regs[x].gregs) {
1716 // no reg mapped -> free vreg
1717 if (cache_regs[x].locked)
1718 cache_regs[x].type = HR_TEMP;
1719 else
1720 rcache_free_vreg(x);
1721 }
1722 guest_regs[r].vreg = -1;
1723 }
1724
rcache_evict_vreg(int x)1725 static void rcache_evict_vreg(int x)
1726 {
1727 rcache_remap_vreg(x);
1728 rcache_unmap_vreg(x);
1729 }
1730
rcache_evict_vreg_aliases(int x,sh2_reg_e r)1731 static void rcache_evict_vreg_aliases(int x, sh2_reg_e r)
1732 {
1733 rcache_remove_vreg_alias(x, r);
1734 rcache_evict_vreg(x);
1735 rcache_add_vreg_alias(x, r);
1736 }
1737
rcache_allocate(int what,int minprio)1738 static int rcache_allocate(int what, int minprio)
1739 {
1740 // evict reg with oldest stamp (only for HRT_REG, no temps)
1741 int i, i_prio, oldest = -1, prio = 0;
1742 u16 min_stamp = (u16)-1;
1743
1744 for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) {
1745 // consider only non-static, unpinned, unlocked REG or TEMP
1746 if ((cache_regs[i].flags & HRF_PINNED) || cache_regs[i].locked)
1747 continue;
1748 if ((what > 0 && !(cache_regs[i].htype & HRT_REG)) || // get a REG
1749 (what == 0 && (cache_regs[i].htype & HRT_TEMP)) || // get a non-TEMP
1750 (what < 0 && !(cache_regs[i].htype & HRT_TEMP))) // get a TEMP
1751 continue;
1752 if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) {
1753 // REG is free
1754 prio = 10;
1755 oldest = i;
1756 break;
1757 }
1758 if (cache_regs[i].type == HR_CACHED) {
1759 if (rcache_regs_now & cache_regs[i].gregs)
1760 // REGs needed for the current insn
1761 i_prio = 0;
1762 else if (rcache_regs_soon & cache_regs[i].gregs)
1763 // REGs needed in the next insns
1764 i_prio = 2;
1765 else if (rcache_regs_late & cache_regs[i].gregs)
1766 // REGs needed in some future insn
1767 i_prio = 4;
1768 else if (~rcache_regs_discard & cache_regs[i].gregs)
1769 // REGs not needed in the foreseeable future
1770 i_prio = 6;
1771 else
1772 // REGs soon overwritten anyway
1773 i_prio = 8;
1774 if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++;
1775
1776 if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
1777 min_stamp = cache_regs[i].stamp;
1778 oldest = i;
1779 prio = i_prio;
1780 }
1781 }
1782 }
1783
1784
1785 if (prio < minprio || oldest == -1)
1786 return -1;
1787
1788 if (cache_regs[oldest].type == HR_CACHED)
1789 rcache_evict_vreg(oldest);
1790 else
1791 rcache_free_vreg(oldest);
1792
1793 return oldest;
1794 }
1795
rcache_allocate_vreg(int needed)1796 static int rcache_allocate_vreg(int needed)
1797 {
1798 int x;
1799
1800 x = rcache_allocate(1, needed ? 0 : 4);
1801 if (x < 0)
1802 x = rcache_allocate(-1, 0);
1803 return x;
1804 }
1805
rcache_allocate_nontemp(void)1806 static int rcache_allocate_nontemp(void)
1807 {
1808 int x = rcache_allocate(0, 4);
1809 return x;
1810 }
1811
rcache_allocate_temp(void)1812 static int rcache_allocate_temp(void)
1813 {
1814 int x = rcache_allocate(-1, 0);
1815 if (x < 0)
1816 x = rcache_allocate(0, 0);
1817 return x;
1818 }
1819
1820 // maps a host register to a REG
rcache_map_reg(sh2_reg_e r,int hr)1821 static int rcache_map_reg(sh2_reg_e r, int hr)
1822 {
1823 #if REMAP_REGISTER
1824 int i;
1825
1826 gconst_kill(r);
1827
1828 // lookup the TEMP hr maps to
1829 i = reg_map_host[hr];
1830 if (i < 0) {
1831 // must not happen
1832 printf("invalid host register %d\n", hr);
1833 exit(1);
1834 }
1835
1836 // remove old mappings of r and i if one exists
1837 if (guest_regs[r].vreg >= 0)
1838 rcache_remove_vreg_alias(guest_regs[r].vreg, r);
1839 if (cache_regs[i].type == HR_CACHED)
1840 rcache_evict_vreg(i);
1841 // set new mappping
1842 cache_regs[i].type = HR_CACHED;
1843 cache_regs[i].gregs = 1 << r;
1844 cache_regs[i].locked = 0;
1845 cache_regs[i].stamp = ++rcache_counter;
1846 cache_regs[i].flags |= HRF_DIRTY;
1847 rcache_lock_vreg(i);
1848 guest_regs[r].flags |= GRF_DIRTY;
1849 guest_regs[r].vreg = i;
1850 #if DRC_DEBUG & 64
1851 RCACHE_CHECK("after map");
1852 #endif
1853 return cache_regs[i].hreg;
1854 #else
1855 return rcache_get_reg(r, RC_GR_WRITE, NULL);
1856 #endif
1857 }
1858
1859 // remap vreg from a TEMP to a REG if it will be used (upcoming TEMP invalidation)
rcache_remap_vreg(int x)1860 static void rcache_remap_vreg(int x)
1861 {
1862 #if REMAP_REGISTER
1863 u32 rsl_d = rcache_regs_soon | rcache_regs_late;
1864 int d;
1865
1866 // x must be a cached vreg
1867 if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked)
1868 return;
1869 // don't do it if x isn't used
1870 if (!(rsl_d & cache_regs[x].gregs)) {
1871 // clean here to avoid data loss on invalidation
1872 rcache_clean_vreg(x);
1873 return;
1874 }
1875
1876 FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d,
1877 if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) &&
1878 !cache_regs[guest_regs[d].sreg].locked &&
1879 !((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) {
1880 // STATIC not in its sreg and sreg is available
1881 rcache_evict_vreg(guest_regs[d].sreg);
1882 rcache_move_vreg(guest_regs[d].sreg, x);
1883 return;
1884 }
1885 )
1886
1887 // allocate a non-TEMP vreg
1888 rcache_lock_vreg(x); // lock to avoid evicting x
1889 d = rcache_allocate_nontemp();
1890 rcache_unlock_vreg(x);
1891 if (d < 0) {
1892 rcache_clean_vreg(x);
1893 return;
1894 }
1895
1896 // move vreg to new location
1897 rcache_move_vreg(d, x);
1898 #if DRC_DEBUG & 64
1899 RCACHE_CHECK("after remap");
1900 #endif
1901 #else
1902 rcache_clean_vreg(x);
1903 #endif
1904 }
1905
rcache_alias_vreg(sh2_reg_e rd,sh2_reg_e rs)1906 static void rcache_alias_vreg(sh2_reg_e rd, sh2_reg_e rs)
1907 {
1908 #if ALIAS_REGISTERS
1909 int x;
1910
1911 // if s isn't constant, it must be in cache for aliasing
1912 if (!gconst_check(rs))
1913 rcache_get_reg_(rs, RC_GR_READ, 0, NULL);
1914
1915 // if d and s are not already aliased
1916 x = guest_regs[rs].vreg;
1917 if (guest_regs[rd].vreg != x) {
1918 // remove possible old mapping of dst
1919 if (guest_regs[rd].vreg >= 0)
1920 rcache_remove_vreg_alias(guest_regs[rd].vreg, rd);
1921 // make dst an alias of src
1922 if (x >= 0)
1923 rcache_add_vreg_alias(x, rd);
1924 // if d is now in cache, it must be dirty
1925 if (guest_regs[rd].vreg >= 0) {
1926 x = guest_regs[rd].vreg;
1927 cache_regs[x].flags |= HRF_DIRTY;
1928 guest_regs[rd].flags |= GRF_DIRTY;
1929 }
1930 }
1931
1932 gconst_copy(rd, rs);
1933 #if DRC_DEBUG & 64
1934 RCACHE_CHECK("after alias");
1935 #endif
1936 #else
1937 int hr_s = rcache_get_reg(rs, RC_GR_READ, NULL);
1938 int hr_d = rcache_get_reg(rd, RC_GR_WRITE, NULL);
1939
1940 emith_move_r_r(hr_d, hr_s);
1941 gconst_copy(rd, rs);
1942 #endif
1943 }
1944
1945 // note: must not be called when doing conditional code
rcache_get_reg_(sh2_reg_e r,rc_gr_mode mode,int do_locking,int * hr)1946 static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr)
1947 {
1948 int src, dst, ali;
1949 cache_reg_t *tr;
1950 u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) &
1951 ~rcache_regs_discard;
1952
1953 dst = src = guest_regs[r].vreg;
1954
1955 rcache_lock_vreg(src); // lock to avoid evicting src
1956 // good opportunity to relocate a remapped STATIC?
1957 if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
1958 src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) &&
1959 !cache_regs[guest_regs[r].sreg].locked &&
1960 !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) {
1961 dst = guest_regs[r].sreg;
1962 rcache_evict_vreg(dst);
1963 } else if (dst < 0) {
1964 // allocate a cache register
1965 if ((dst = rcache_allocate_vreg(rsp_d & (1 << r))) < 0) {
1966 printf("no registers to evict, aborting\n");
1967 exit(1);
1968 }
1969 }
1970 tr = &cache_regs[dst];
1971 tr->stamp = rcache_counter;
1972 // remove r from src
1973 if (src >= 0 && src != dst)
1974 rcache_remove_vreg_alias(src, r);
1975 rcache_unlock_vreg(src);
1976
1977 // if r has a constant it may have aliases
1978 if (mode != RC_GR_WRITE && gconst_try_read(dst, r))
1979 src = dst;
1980
1981 // if r will be modified, check for aliases being needed rsn
1982 ali = tr->gregs & ~(1 << r);
1983 if (mode != RC_GR_READ && src == dst && ali) {
1984 int x = -1;
1985 if ((rsp_d|rcache_regs_now) & ali) {
1986 if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
1987 guest_regs[r].sreg == dst && !tr->locked) {
1988 // split aliases if r is STATIC in sreg and dst isn't already locked
1989 int t;
1990 FOR_ALL_BITS_SET_DO(ali, t,
1991 if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
1992 !(ali & ~(1 << t)) &&
1993 !cache_regs[guest_regs[t].sreg].locked &&
1994 !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) {
1995 // alias is a single STATIC and its sreg is available
1996 x = guest_regs[t].sreg;
1997 rcache_evict_vreg(x);
1998 } else {
1999 rcache_lock_vreg(dst); // lock to avoid evicting dst
2000 x = rcache_allocate_vreg(rsp_d & ali);
2001 rcache_unlock_vreg(dst);
2002 }
2003 break;
2004 )
2005 if (x >= 0) {
2006 rcache_remove_vreg_alias(src, r);
2007 src = dst;
2008 rcache_move_vreg(x, dst);
2009 }
2010 } else {
2011 // split r
2012 rcache_lock_vreg(src); // lock to avoid evicting src
2013 x = rcache_allocate_vreg(rsp_d & (1 << r));
2014 rcache_unlock_vreg(src);
2015 if (x >= 0) {
2016 rcache_remove_vreg_alias(src, r);
2017 dst = x;
2018 tr = &cache_regs[dst];
2019 tr->stamp = rcache_counter;
2020 }
2021 }
2022 }
2023 if (x < 0)
2024 // aliases not needed or no vreg available, remove them
2025 rcache_evict_vreg_aliases(dst, r);
2026 }
2027
2028 // assign r to dst
2029 rcache_add_vreg_alias(dst, r);
2030
2031 // handle dst register transfer
2032 if (src < 0 && mode != RC_GR_WRITE)
2033 emith_ctx_read(tr->hreg, r * 4);
2034 if (hr) {
2035 *hr = (src >= 0 ? cache_regs[src].hreg : tr->hreg);
2036 rcache_lock_vreg(src >= 0 ? src : dst);
2037 } else if (src >= 0 && mode != RC_GR_WRITE && cache_regs[src].hreg != tr->hreg)
2038 emith_move_r_r(tr->hreg, cache_regs[src].hreg);
2039
2040 // housekeeping
2041 if (do_locking)
2042 rcache_lock_vreg(dst);
2043 if (mode != RC_GR_READ) {
2044 tr->flags |= HRF_DIRTY;
2045 guest_regs[r].flags |= GRF_DIRTY;
2046 gconst_kill(r);
2047 rcache_set_x16(tr->hreg, 0, 0);
2048 } else if (src >= 0 && cache_regs[src].hreg != tr->hreg)
2049 rcache_copy_x16(tr->hreg, cache_regs[src].hreg);
2050 #if DRC_DEBUG & 64
2051 RCACHE_CHECK("after getreg");
2052 #endif
2053 return tr->hreg;
2054 }
2055
rcache_get_reg(sh2_reg_e r,rc_gr_mode mode,int * hr)2056 static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr)
2057 {
2058 return rcache_get_reg_(r, mode, 1, hr);
2059 }
2060
rcache_pin_reg(sh2_reg_e r)2061 static void rcache_pin_reg(sh2_reg_e r)
2062 {
2063 int hr, x;
2064
2065 // don't pin if static or already pinned
2066 if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED))
2067 return;
2068
2069 rcache_regs_soon |= (1 << r); // kludge to prevent allocation of a temp
2070 hr = rcache_get_reg_(r, RC_GR_RMW, 0, NULL);
2071 x = reg_map_host[hr];
2072
2073 // can only pin non-TEMPs
2074 if (!(cache_regs[x].htype & HRT_TEMP)) {
2075 guest_regs[r].flags |= GRF_PINNED;
2076 cache_regs[x].flags |= HRF_PINNED;
2077 guest_regs[r].sreg = x;
2078 rcache_regs_pinned |= (1 << r);
2079 }
2080 #if DRC_DEBUG & 64
2081 RCACHE_CHECK("after pin");
2082 #endif
2083 }
2084
rcache_get_tmp(void)2085 static int rcache_get_tmp(void)
2086 {
2087 int i;
2088
2089 i = rcache_allocate_temp();
2090 if (i < 0) {
2091 printf("cannot allocate temp\n");
2092 exit(1);
2093 }
2094
2095 cache_regs[i].type = HR_TEMP;
2096 rcache_lock_vreg(i);
2097
2098 return cache_regs[i].hreg;
2099 }
2100
rcache_get_vreg_hr(int hr)2101 static int rcache_get_vreg_hr(int hr)
2102 {
2103 int i;
2104
2105 i = reg_map_host[hr];
2106 if (i < 0 || cache_regs[i].locked) {
2107 printf("host register %d is locked\n", hr);
2108 exit(1);
2109 }
2110
2111 if (cache_regs[i].type == HR_CACHED)
2112 rcache_evict_vreg(i);
2113 else if (cache_regs[i].type == HR_TEMP && cache_regs[i].locked) {
2114 printf("host reg %d already used, aborting\n", hr);
2115 exit(1);
2116 }
2117
2118 return i;
2119 }
2120
rcache_get_vreg_arg(int arg)2121 static int rcache_get_vreg_arg(int arg)
2122 {
2123 int hr = 0;
2124
2125 host_arg2reg(hr, arg);
2126 return rcache_get_vreg_hr(hr);
2127 }
2128
2129 // get a reg to be used as function arg
rcache_get_tmp_arg(int arg)2130 static int rcache_get_tmp_arg(int arg)
2131 {
2132 int x = rcache_get_vreg_arg(arg);
2133 cache_regs[x].type = HR_TEMP;
2134 rcache_lock_vreg(x);
2135
2136 return cache_regs[x].hreg;
2137 }
2138
2139 // ... as return value after a call
rcache_get_tmp_ret(void)2140 static int rcache_get_tmp_ret(void)
2141 {
2142 int x = rcache_get_vreg_hr(RET_REG);
2143 cache_regs[x].type = HR_TEMP;
2144 rcache_lock_vreg(x);
2145
2146 return cache_regs[x].hreg;
2147 }
2148
2149 // same but caches a reg if access is readonly (announced by hr being NULL)
rcache_get_reg_arg(int arg,sh2_reg_e r,int * hr)2150 static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr)
2151 {
2152 int i, srcr, dstr, dstid, keep;
2153 u32 val;
2154 host_arg2reg(dstr, arg);
2155
2156 i = guest_regs[r].vreg;
2157 if (i >= 0 && cache_regs[i].type == HR_CACHED && cache_regs[i].hreg == dstr)
2158 // r is already in arg, avoid evicting
2159 dstid = i;
2160 else
2161 dstid = rcache_get_vreg_arg(arg);
2162 dstr = cache_regs[dstid].hreg;
2163
2164 if (rcache_is_cached(r)) {
2165 // r is needed later on anyway
2166 srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
2167 keep = 1;
2168 } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) {
2169 // r has an uncomitted const - load into arg, but keep constant uncomitted
2170 srcr = dstr;
2171 emith_move_r_imm(srcr, val);
2172 keep = 0;
2173 } else {
2174 // must read from ctx
2175 srcr = dstr;
2176 emith_ctx_read(srcr, r * 4);
2177 keep = 1;
2178 }
2179
2180 if (cache_regs[dstid].type == HR_CACHED)
2181 rcache_evict_vreg(dstid);
2182
2183 cache_regs[dstid].type = HR_TEMP;
2184 if (hr == NULL) {
2185 if (dstr != srcr)
2186 // arg is a copy of cached r
2187 emith_move_r_r(dstr, srcr);
2188 else if (keep && guest_regs[r].vreg < 0)
2189 // keep arg as vreg for r
2190 rcache_add_vreg_alias(dstid, r);
2191 } else {
2192 *hr = srcr;
2193 if (dstr != srcr) // must lock srcr if not copied here
2194 rcache_lock_vreg(reg_map_host[srcr]);
2195 }
2196
2197 cache_regs[dstid].stamp = ++rcache_counter;
2198 rcache_lock_vreg(dstid);
2199 #if DRC_DEBUG & 64
2200 RCACHE_CHECK("after getarg");
2201 #endif
2202 return dstr;
2203 }
2204
rcache_free_tmp(int hr)2205 static void rcache_free_tmp(int hr)
2206 {
2207 int i = reg_map_host[hr];
2208
2209 if (i < 0 || cache_regs[i].type != HR_TEMP) {
2210 printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, cache_regs[i].type);
2211 exit(1);
2212 }
2213
2214 rcache_unlock_vreg(i);
2215 }
2216
2217 // saves temporary result either in REG or in drctmp
rcache_save_tmp(int hr)2218 static int rcache_save_tmp(int hr)
2219 {
2220 int i;
2221
2222 // find REG, either free or unlocked temp or oldest non-hinted cached
2223 i = rcache_allocate_nontemp();
2224 if (i < 0) {
2225 // if none is available, store in drctmp
2226 emith_ctx_write(hr, offsetof(SH2, drc_tmp));
2227 rcache_free_tmp(hr);
2228 return -1;
2229 }
2230
2231 cache_regs[i].type = HR_CACHED;
2232 cache_regs[i].gregs = 0; // not storing any guest register
2233 cache_regs[i].flags &= HRF_PINNED;
2234 cache_regs[i].locked = 0;
2235 cache_regs[i].stamp = ++rcache_counter;
2236 rcache_lock_vreg(i);
2237 emith_move_r_r(cache_regs[i].hreg, hr);
2238 rcache_free_tmp(hr);
2239 return i;
2240 }
2241
rcache_restore_tmp(int x)2242 static int rcache_restore_tmp(int x)
2243 {
2244 int hr;
2245
2246 // find REG with tmp store: cached but with no gregs
2247 if (x >= 0) {
2248 if (cache_regs[x].type != HR_CACHED || cache_regs[x].gregs) {
2249 printf("invalid tmp storage %d\n", x);
2250 exit(1);
2251 }
2252 // found, transform to a TEMP
2253 cache_regs[x].type = HR_TEMP;
2254 return cache_regs[x].hreg;
2255 }
2256
2257 // if not available, create a TEMP store and fetch from drctmp
2258 hr = rcache_get_tmp();
2259 emith_ctx_read(hr, offsetof(SH2, drc_tmp));
2260
2261 return hr;
2262 }
2263
rcache_free(int hr)2264 static void rcache_free(int hr)
2265 {
2266 int x = reg_map_host[hr];
2267 rcache_unlock_vreg(x);
2268 }
2269
rcache_unlock(int x)2270 static void rcache_unlock(int x)
2271 {
2272 if (x >= 0)
2273 cache_regs[x].locked = 0;
2274 }
2275
rcache_unlock_all(void)2276 static void rcache_unlock_all(void)
2277 {
2278 int i;
2279 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2280 cache_regs[i].locked = 0;
2281 }
2282
rcache_unpin_all(void)2283 static void rcache_unpin_all(void)
2284 {
2285 int i;
2286
2287 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2288 if (guest_regs[i].flags & GRF_PINNED) {
2289 guest_regs[i].flags &= ~GRF_PINNED;
2290 cache_regs[guest_regs[i].sreg].flags &= ~HRF_PINNED;
2291 guest_regs[i].sreg = -1;
2292 rcache_regs_pinned &= ~(1 << i);
2293 }
2294 }
2295 #if DRC_DEBUG & 64
2296 RCACHE_CHECK("after unpin");
2297 #endif
2298 }
2299
rcache_save_pinned(void)2300 static void rcache_save_pinned(void)
2301 {
2302 int i;
2303
2304 // save pinned regs to context
2305 for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
2306 if ((guest_regs[i].flags & GRF_PINNED) && guest_regs[i].vreg >= 0)
2307 emith_ctx_write(cache_regs[guest_regs[i].vreg].hreg, i * 4);
2308 }
2309
rcache_set_usage_now(u32 mask)2310 static inline void rcache_set_usage_now(u32 mask)
2311 {
2312 rcache_regs_now = mask;
2313 }
2314
rcache_set_usage_soon(u32 mask)2315 static inline void rcache_set_usage_soon(u32 mask)
2316 {
2317 rcache_regs_soon = mask;
2318 }
2319
rcache_set_usage_late(u32 mask)2320 static inline void rcache_set_usage_late(u32 mask)
2321 {
2322 rcache_regs_late = mask;
2323 }
2324
rcache_set_usage_discard(u32 mask)2325 static inline void rcache_set_usage_discard(u32 mask)
2326 {
2327 rcache_regs_discard = mask;
2328 }
2329
rcache_is_cached(sh2_reg_e r)2330 static inline int rcache_is_cached(sh2_reg_e r)
2331 {
2332 // is r in cache or needed RSN?
2333 u32 rsc = rcache_regs_soon | rcache_regs_clean;
2334 return (guest_regs[r].vreg >= 0 || (rsc & (1 << r)));
2335 }
2336
rcache_is_hreg_used(int hr)2337 static inline int rcache_is_hreg_used(int hr)
2338 {
2339 int x = reg_map_host[hr];
2340 // is hr in use?
2341 return cache_regs[x].type != HR_FREE &&
2342 (cache_regs[x].type != HR_TEMP || cache_regs[x].locked);
2343 }
2344
rcache_used_hregs_mask(void)2345 static inline u32 rcache_used_hregs_mask(void)
2346 {
2347 u32 mask = 0;
2348 int i;
2349
2350 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2351 if ((cache_regs[i].htype & HRT_TEMP) && cache_regs[i].type != HR_FREE &&
2352 (cache_regs[i].type != HR_TEMP || cache_regs[i].locked))
2353 mask |= 1 << cache_regs[i].hreg;
2354
2355 return mask;
2356 }
2357
rcache_dirty_mask(void)2358 static inline u32 rcache_dirty_mask(void)
2359 {
2360 u32 mask = 0;
2361 int i;
2362
2363 for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
2364 if (guest_regs[i].flags & GRF_DIRTY)
2365 mask |= 1 << i;
2366 mask |= gconst_dirty_mask();
2367
2368 return mask;
2369 }
2370
rcache_cached_mask(void)2371 static inline u32 rcache_cached_mask(void)
2372 {
2373 u32 mask = 0;
2374 int i;
2375
2376 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2377 if (cache_regs[i].type == HR_CACHED)
2378 mask |= cache_regs[i].gregs;
2379
2380 return mask;
2381 }
2382
rcache_clean_tmp(void)2383 static void rcache_clean_tmp(void)
2384 {
2385 int i;
2386
2387 rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1;
2388 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2389 if (cache_regs[i].type == HR_CACHED && (cache_regs[i].htype & HRT_TEMP)) {
2390 rcache_unlock(i);
2391 rcache_remap_vreg(i);
2392 }
2393 rcache_regs_clean = 0;
2394 }
2395
rcache_clean_masked(u32 mask)2396 static void rcache_clean_masked(u32 mask)
2397 {
2398 int i, r, hr;
2399 u32 m;
2400
2401 rcache_regs_clean |= mask;
2402 mask = rcache_regs_clean;
2403
2404 // clean constants where all aliases are covered by the mask, exempt statics
2405 // to avoid flushing them to context if sreg isn't available
2406 m = mask & ~(rcache_regs_static | rcache_regs_pinned);
2407 for (i = 0; i < ARRAY_SIZE(gconsts); i++)
2408 if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) {
2409 FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r,
2410 if (guest_regs[r].flags & GRF_CDIRTY) {
2411 hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
2412 rcache_clean_vreg(reg_map_host[hr]);
2413 break;
2414 });
2415 }
2416 // clean vregs where all aliases are covered by the mask
2417 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2418 if (cache_regs[i].type == HR_CACHED &&
2419 (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask))
2420 rcache_clean_vreg(i);
2421 }
2422
rcache_clean(void)2423 static void rcache_clean(void)
2424 {
2425 int i;
2426 gconst_clean();
2427
2428 rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1;
2429 for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--)
2430 if (cache_regs[i].type == HR_CACHED)
2431 rcache_clean_vreg(i);
2432
2433 // relocate statics to their sregs (necessary before conditional jumps)
2434 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2435 if ((guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) &&
2436 guest_regs[i].vreg != guest_regs[i].sreg) {
2437 rcache_lock_vreg(guest_regs[i].vreg);
2438 rcache_evict_vreg(guest_regs[i].sreg);
2439 rcache_unlock_vreg(guest_regs[i].vreg);
2440 if (guest_regs[i].vreg < 0)
2441 emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4);
2442 else {
2443 emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg,
2444 cache_regs[guest_regs[i].vreg].hreg);
2445 rcache_copy_x16(cache_regs[guest_regs[i].sreg].hreg,
2446 cache_regs[guest_regs[i].vreg].hreg);
2447 rcache_remove_vreg_alias(guest_regs[i].vreg, i);
2448 }
2449 cache_regs[guest_regs[i].sreg].gregs = 1 << i;
2450 cache_regs[guest_regs[i].sreg].type = HR_CACHED;
2451 cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED;
2452 guest_regs[i].flags |= GRF_DIRTY;
2453 guest_regs[i].vreg = guest_regs[i].sreg;
2454 }
2455 }
2456 rcache_regs_clean = 0;
2457 }
2458
rcache_invalidate_tmp(void)2459 static void rcache_invalidate_tmp(void)
2460 {
2461 int i;
2462
2463 for (i = 0; i < ARRAY_SIZE(cache_regs); i++) {
2464 if (cache_regs[i].htype & HRT_TEMP) {
2465 rcache_unlock(i);
2466 if (cache_regs[i].type == HR_CACHED)
2467 rcache_evict_vreg(i);
2468 else
2469 rcache_free_vreg(i);
2470 }
2471 }
2472 }
2473
rcache_invalidate(void)2474 static void rcache_invalidate(void)
2475 {
2476 int i;
2477 gconst_invalidate();
2478 rcache_unlock_all();
2479
2480 for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2481 rcache_free_vreg(i);
2482
2483 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2484 guest_regs[i].flags &= GRF_STATIC;
2485 if (!(guest_regs[i].flags & GRF_STATIC))
2486 guest_regs[i].vreg = -1;
2487 else {
2488 cache_regs[guest_regs[i].sreg].gregs = 1 << i;
2489 cache_regs[guest_regs[i].sreg].type = HR_CACHED;
2490 cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED;
2491 guest_regs[i].flags |= GRF_DIRTY;
2492 guest_regs[i].vreg = guest_regs[i].sreg;
2493 }
2494 }
2495
2496 rcache_counter = 0;
2497 rcache_regs_now = rcache_regs_soon = rcache_regs_late = 0;
2498 rcache_regs_discard = rcache_regs_clean = 0;
2499 }
2500
rcache_flush(void)2501 static void rcache_flush(void)
2502 {
2503 rcache_clean();
2504 rcache_invalidate();
2505 }
2506
rcache_create(void)2507 static void rcache_create(void)
2508 {
2509 int x = 0, i;
2510
2511 // create cache_regs as host register representation
2512 // RET_REG/params should be first TEMPs to avoid allocation conflicts in calls
2513 cache_regs[x++] = (cache_reg_t) {.hreg = RET_REG, .htype = HRT_TEMP};
2514 for (i = 0; i < ARRAY_SIZE(hregs_param); i++)
2515 if (hregs_param[i] != RET_REG)
2516 cache_regs[x++] = (cache_reg_t){.hreg = hregs_param[i],.htype = HRT_TEMP};
2517
2518 for (i = 0; i < ARRAY_SIZE(hregs_temp); i++)
2519 if (hregs_temp[i] != RET_REG)
2520 cache_regs[x++] = (cache_reg_t){.hreg = hregs_temp[i], .htype = HRT_TEMP};
2521
2522 for (i = ARRAY_SIZE(hregs_saved)-1; i >= 0; i--)
2523 if (hregs_saved[i] != CONTEXT_REG)
2524 cache_regs[x++] = (cache_reg_t){.hreg = hregs_saved[i], .htype = HRT_REG};
2525
2526 if (x != ARRAY_SIZE(cache_regs)) {
2527 printf("rcache_create failed (conflicting register count)\n");
2528 exit(1);
2529 }
2530
2531 // mapping from host_register to cache regs index
2532 memset(reg_map_host, -1, sizeof(reg_map_host));
2533 for (i = 0; i < ARRAY_SIZE(cache_regs); i++) {
2534 if (cache_regs[i].htype)
2535 reg_map_host[cache_regs[i].hreg] = i;
2536 if (cache_regs[i].htype == HRT_REG)
2537 rcache_vregs_reg |= (1 << i);
2538 }
2539
2540 // create static host register mapping for SH2 regs
2541 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2542 guest_regs[i] = (guest_reg_t){.sreg = -1};
2543 }
2544 for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) {
2545 for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--)
2546 if (cache_regs[x].hreg == regs_static[i+1]) break;
2547 if (x >= 0) {
2548 guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x};
2549 rcache_regs_static |= (1 << regs_static[i]);
2550 rcache_vregs_reg &= ~(1 << x);
2551 }
2552 }
2553
2554 printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n",
2555 CACHE_REGS+1L, count_bits(rcache_vregs_reg),count_bits(rcache_regs_static));
2556 }
2557
rcache_init(void)2558 static void rcache_init(void)
2559 {
2560 // create DRC data structures
2561 rcache_create();
2562
2563 rcache_invalidate();
2564 #if DRC_DEBUG & 64
2565 RCACHE_CHECK("after init");
2566 #endif
2567 }
2568
2569 // ---------------------------------------------------------------
2570
2571 // NB may return either REG or TEMP
emit_get_rbase_and_offs(SH2 * sh2,sh2_reg_e r,int rmode,u32 * offs)2572 static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs)
2573 {
2574 uptr omask = emith_rw_offs_max(); // offset mask
2575 u32 mask = 0;
2576 u32 a;
2577 int poffs;
2578 int hr, hr2;
2579 uptr la;
2580
2581 // is r constant and points to a memory region?
2582 if (! gconst_get(r, &a))
2583 return -1;
2584 poffs = dr_ctx_get_mem_ptr(sh2, a, &mask);
2585 if (poffs == -1)
2586 return -1;
2587
2588 if (mask < 0x20000) {
2589 // data array, BIOS, DRAM, can't safely access directly since host addr may
2590 // change (BIOS,da code may run on either core, DRAM may be switched)
2591 hr = rcache_get_tmp();
2592 a = (a + *offs) & mask;
2593 if (poffs == offsetof(SH2, p_da)) {
2594 // access sh2->data_array directly
2595 a += offsetof(SH2, data_array);
2596 emith_add_r_r_ptr_imm(hr, CONTEXT_REG, a & ~omask);
2597 } else {
2598 emith_ctx_read_ptr(hr, poffs);
2599 if (a & ~omask)
2600 emith_add_r_r_ptr_imm(hr, hr, a & ~omask);
2601 }
2602 *offs = a & omask;
2603 return hr;
2604 }
2605
2606 // ROM, SDRAM. Host address should be mmapped to be equal to SH2 address.
2607 la = (uptr)*(void **)((char *)sh2 + poffs);
2608
2609 // if r is in rcache or needed soon anyway, and offs is relative to region,
2610 // and address translation fits in add_ptr_imm (s32), then use rcached const
2611 if (la == (s32)la && !(*offs & ~mask) && rcache_is_cached(r)) {
2612 u32 odd = a & 1; // need to fix odd address for correct byte addressing
2613 la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory
2614 hr = hr2 = rcache_get_reg(r, rmode, NULL);
2615 if ((s32)a < 0) emith_uext_ptr(hr2);
2616 if ((la & ~omask) - odd) {
2617 hr = rcache_get_tmp();
2618 emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd);
2619 rcache_free(hr2);
2620 }
2621 *offs = (la & omask);
2622 } else {
2623 // known fixed host address
2624 la += (a + *offs) & mask;
2625 hr = rcache_get_tmp();
2626 emith_move_r_ptr_imm(hr, la & ~omask);
2627 *offs = la & omask;
2628 }
2629 return hr;
2630 }
2631
2632 // read const data from const ROM address
emit_get_rom_data(SH2 * sh2,sh2_reg_e r,u32 offs,int size,u32 * val)2633 static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val)
2634 {
2635 u32 a, mask;
2636
2637 *val = 0;
2638 if (gconst_get(r, &a)) {
2639 a += offs;
2640 // check if rom is memory mapped (not bank switched), and address is in rom
2641 if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) == sh2->p_rom) {
2642 switch (size & MF_SIZEMASK) {
2643 case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8
2644 case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16
2645 case 2: *val = p32x_sh2_read32(a, sh2s); break; // 32
2646 }
2647 return 1;
2648 }
2649 }
2650 return 0;
2651 }
2652
emit_move_r_imm32(sh2_reg_e dst,u32 imm)2653 static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
2654 {
2655 #if PROPAGATE_CONSTANTS
2656 gconst_new(dst, imm);
2657 #else
2658 int hr = rcache_get_reg(dst, RC_GR_WRITE, NULL);
2659 emith_move_r_imm(hr, imm);
2660 #endif
2661 }
2662
emit_move_r_r(sh2_reg_e dst,sh2_reg_e src)2663 static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
2664 {
2665 if (gconst_check(src) || rcache_is_cached(src))
2666 rcache_alias_vreg(dst, src);
2667 else {
2668 int hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL);
2669 emith_ctx_read(hr_d, src * 4);
2670 }
2671 }
2672
emit_add_r_imm(sh2_reg_e r,u32 imm)2673 static void emit_add_r_imm(sh2_reg_e r, u32 imm)
2674 {
2675 u32 val;
2676 int isgc = gconst_get(r, &val);
2677 int hr, hr2;
2678
2679 if (!isgc || rcache_is_cached(r)) {
2680 // not constant, or r is already in cache
2681 hr = rcache_get_reg(r, RC_GR_RMW, &hr2);
2682 emith_add_r_r_imm(hr, hr2, imm);
2683 rcache_free(hr2);
2684 if (isgc)
2685 gconst_set(r, val + imm);
2686 } else
2687 gconst_new(r, val + imm);
2688 }
2689
emit_sub_r_imm(sh2_reg_e r,u32 imm)2690 static void emit_sub_r_imm(sh2_reg_e r, u32 imm)
2691 {
2692 u32 val;
2693 int isgc = gconst_get(r, &val);
2694 int hr, hr2;
2695
2696 if (!isgc || rcache_is_cached(r)) {
2697 // not constant, or r is already in cache
2698 hr = rcache_get_reg(r, RC_GR_RMW, &hr2);
2699 emith_sub_r_r_imm(hr, hr2, imm);
2700 rcache_free(hr2);
2701 if (isgc)
2702 gconst_set(r, val - imm);
2703 } else
2704 gconst_new(r, val - imm);
2705 }
2706
emit_sync_t_to_sr(void)2707 static void emit_sync_t_to_sr(void)
2708 {
2709 // avoid reloading SR from context if there's nothing to do
2710 if (emith_get_t_cond() >= 0) {
2711 int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
2712 emith_sync_t(sr);
2713 }
2714 }
2715
2716 // rd = @(arg0)
emit_memhandler_read(int size)2717 static int emit_memhandler_read(int size)
2718 {
2719 int hr;
2720
2721 emit_sync_t_to_sr();
2722 rcache_clean_tmp();
2723 #ifndef DRC_SR_REG
2724 // must writeback cycles for poll detection stuff
2725 if (guest_regs[SHR_SR].vreg != -1)
2726 rcache_unmap_vreg(guest_regs[SHR_SR].vreg);
2727 #endif
2728 rcache_invalidate_tmp();
2729
2730 if (size & MF_POLLING)
2731 switch (size & MF_SIZEMASK) {
2732 case 0: emith_call(sh2_drc_read8_poll); break; // 8
2733 case 1: emith_call(sh2_drc_read16_poll); break; // 16
2734 case 2: emith_call(sh2_drc_read32_poll); break; // 32
2735 }
2736 else
2737 switch (size & MF_SIZEMASK) {
2738 case 0: emith_call(sh2_drc_read8); break; // 8
2739 case 1: emith_call(sh2_drc_read16); break; // 16
2740 case 2: emith_call(sh2_drc_read32); break; // 32
2741 }
2742
2743 hr = rcache_get_tmp_ret();
2744 rcache_set_x16(hr, (size & MF_SIZEMASK) < 2, 0);
2745 return hr;
2746 }
2747
2748 // @(arg0) = arg1
emit_memhandler_write(int size)2749 static void emit_memhandler_write(int size)
2750 {
2751 emit_sync_t_to_sr();
2752 rcache_clean_tmp();
2753 #ifndef DRC_SR_REG
2754 if (guest_regs[SHR_SR].vreg != -1)
2755 rcache_unmap_vreg(guest_regs[SHR_SR].vreg);
2756 #endif
2757 rcache_invalidate_tmp();
2758
2759 switch (size & MF_SIZEMASK) {
2760 case 0: emith_call(sh2_drc_write8); break; // 8
2761 case 1: emith_call(sh2_drc_write16); break; // 16
2762 case 2: emith_call(sh2_drc_write32); break; // 32
2763 }
2764 }
2765
2766 // rd = @(Rs,#offs); rd < 0 -> return a temp
emit_memhandler_read_rr(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rs,u32 offs,int size)2767 static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size)
2768 {
2769 int hr, hr2;
2770 u32 val;
2771
2772 #if PROPAGATE_CONSTANTS
2773 if (emit_get_rom_data(sh2, rs, offs, size, &val)) {
2774 if (rd == SHR_TMP) {
2775 hr2 = rcache_get_tmp();
2776 emith_move_r_imm(hr2, val);
2777 } else {
2778 emit_move_r_imm32(rd, val);
2779 hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL);
2780 }
2781 rcache_set_x16(hr2, val == (s16)val, val == (u16)val);
2782 if (size & MF_POSTINCR)
2783 emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK));
2784 return hr2;
2785 }
2786
2787 val = size & MF_POSTINCR;
2788 hr = emit_get_rbase_and_offs(sh2, rs, val ? RC_GR_RMW : RC_GR_READ, &offs);
2789 if (hr != -1) {
2790 if (rd == SHR_TMP)
2791 hr2 = rcache_get_tmp();
2792 else
2793 hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL);
2794 switch (size & MF_SIZEMASK) {
2795 case 0: emith_read8s_r_r_offs(hr2, hr, offs ^ 1); break; // 8
2796 case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16
2797 case 2: emith_read_r_r_offs(hr2, hr, offs); emith_ror(hr2, hr2, 16); break;
2798 }
2799 rcache_free(hr);
2800 if (size & MF_POSTINCR)
2801 emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK));
2802 return hr2;
2803 }
2804 #endif
2805
2806 if (gconst_get(rs, &val) && !rcache_is_cached(rs)) {
2807 hr = rcache_get_tmp_arg(0);
2808 emith_move_r_imm(hr, val + offs);
2809 if (size & MF_POSTINCR)
2810 gconst_new(rs, val + (1 << (size & MF_SIZEMASK)));
2811 } else if (size & MF_POSTINCR) {
2812 hr = rcache_get_tmp_arg(0);
2813 hr2 = rcache_get_reg(rs, RC_GR_RMW, NULL);
2814 emith_add_r_r_imm(hr, hr2, offs);
2815 emith_add_r_imm(hr2, 1 << (size & MF_SIZEMASK));
2816 if (gconst_get(rs, &val))
2817 gconst_set(rs, val + (1 << (size & MF_SIZEMASK)));
2818 } else {
2819 hr = rcache_get_reg_arg(0, rs, &hr2);
2820 if (offs || hr != hr2)
2821 emith_add_r_r_imm(hr, hr2, offs);
2822 }
2823 hr = emit_memhandler_read(size);
2824
2825 if (rd == SHR_TMP)
2826 hr2 = hr;
2827 else
2828 hr2 = rcache_map_reg(rd, hr);
2829
2830 if (hr != hr2) {
2831 emith_move_r_r(hr2, hr);
2832 rcache_free_tmp(hr);
2833 }
2834 return hr2;
2835 }
2836
2837 // @(Rs,#offs) = rd; rd < 0 -> write arg1
emit_memhandler_write_rr(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rs,u32 offs,int size)2838 static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size)
2839 {
2840 int hr, hr2;
2841 u32 val;
2842
2843 if (rd == SHR_TMP) {
2844 host_arg2reg(hr2, 1); // already locked and prepared by caller
2845 } else if ((size & MF_PREDECR) && rd == rs) { // must avoid caching rd in arg1
2846 hr2 = rcache_get_reg_arg(1, rd, &hr);
2847 if (hr != hr2) {
2848 emith_move_r_r(hr2, hr);
2849 rcache_free(hr2);
2850 }
2851 } else
2852 hr2 = rcache_get_reg_arg(1, rd, NULL);
2853 if (rd != SHR_TMP)
2854 rcache_unlock(guest_regs[rd].vreg); // unlock in case rd is in arg0
2855
2856 if (gconst_get(rs, &val) && !rcache_is_cached(rs)) {
2857 hr = rcache_get_tmp_arg(0);
2858 if (size & MF_PREDECR) {
2859 val -= 1 << (size & MF_SIZEMASK);
2860 gconst_new(rs, val);
2861 }
2862 emith_move_r_imm(hr, val + offs);
2863 } else if (offs || (size & MF_PREDECR)) {
2864 if (size & MF_PREDECR)
2865 emit_sub_r_imm(rs, 1 << (size & MF_SIZEMASK));
2866 rcache_unlock(guest_regs[rs].vreg); // unlock in case rs is in arg0
2867 hr = rcache_get_reg_arg(0, rs, &hr2);
2868 if (offs || hr != hr2)
2869 emith_add_r_r_imm(hr, hr2, offs);
2870 } else
2871 hr = rcache_get_reg_arg(0, rs, NULL);
2872
2873 emit_memhandler_write(size);
2874 }
2875
2876 // rd = @(Rx,Ry); rd < 0 -> return a temp
emit_indirect_indexed_read(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rx,sh2_reg_e ry,int size)2877 static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size)
2878 {
2879 int hr, hr2;
2880 int tx, ty;
2881 #if PROPAGATE_CONSTANTS
2882 u32 offs;
2883
2884 // if offs is larger than 0x01000000, it's most probably the base address part
2885 if (gconst_get(ry, &offs) && offs < 0x01000000)
2886 return emit_memhandler_read_rr(sh2, rd, rx, offs, size);
2887 if (gconst_get(rx, &offs) && offs < 0x01000000)
2888 return emit_memhandler_read_rr(sh2, rd, ry, offs, size);
2889 #endif
2890 hr = rcache_get_reg_arg(0, rx, &tx);
2891 ty = rcache_get_reg(ry, RC_GR_READ, NULL);
2892 emith_add_r_r_r(hr, tx, ty);
2893 hr = emit_memhandler_read(size);
2894
2895 if (rd == SHR_TMP)
2896 hr2 = hr;
2897 else
2898 hr2 = rcache_map_reg(rd, hr);
2899
2900 if (hr != hr2) {
2901 emith_move_r_r(hr2, hr);
2902 rcache_free_tmp(hr);
2903 }
2904 return hr2;
2905 }
2906
2907 // @(Rx,Ry) = rd; rd < 0 -> write arg1
emit_indirect_indexed_write(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rx,sh2_reg_e ry,int size)2908 static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size)
2909 {
2910 int hr, tx, ty;
2911 #if PROPAGATE_CONSTANTS
2912 u32 offs;
2913
2914 // if offs is larger than 0x01000000, it's most probably the base address part
2915 if (gconst_get(ry, &offs) && offs < 0x01000000)
2916 return emit_memhandler_write_rr(sh2, rd, rx, offs, size);
2917 if (gconst_get(rx, &offs) && offs < 0x01000000)
2918 return emit_memhandler_write_rr(sh2, rd, ry, offs, size);
2919 #endif
2920 if (rd != SHR_TMP)
2921 rcache_get_reg_arg(1, rd, NULL);
2922 hr = rcache_get_reg_arg(0, rx, &tx);
2923 ty = rcache_get_reg(ry, RC_GR_READ, NULL);
2924 emith_add_r_r_r(hr, tx, ty);
2925 emit_memhandler_write(size);
2926 }
2927
2928 // @Rn+,@Rm+
emit_indirect_read_double(SH2 * sh2,int * rnr,int * rmr,sh2_reg_e rn,sh2_reg_e rm,int size)2929 static void emit_indirect_read_double(SH2 *sh2, int *rnr, int *rmr, sh2_reg_e rn, sh2_reg_e rm, int size)
2930 {
2931 int tmp;
2932
2933 // unlock rn, rm here to avoid REG shortage in MAC operation
2934 tmp = emit_memhandler_read_rr(sh2, SHR_TMP, rn, 0, size | MF_POSTINCR);
2935 rcache_unlock(guest_regs[rn].vreg);
2936 tmp = rcache_save_tmp(tmp);
2937 *rmr = emit_memhandler_read_rr(sh2, SHR_TMP, rm, 0, size | MF_POSTINCR);
2938 rcache_unlock(guest_regs[rm].vreg);
2939 *rnr = rcache_restore_tmp(tmp);
2940 }
2941
emit_do_static_regs(int is_write,int tmpr)2942 static void emit_do_static_regs(int is_write, int tmpr)
2943 {
2944 int i, r, count;
2945
2946 for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2947 if (guest_regs[i].flags & (GRF_STATIC|GRF_PINNED))
2948 r = cache_regs[guest_regs[i].vreg].hreg;
2949 else
2950 continue;
2951
2952 for (count = 1; i < ARRAY_SIZE(guest_regs) - 1; i++, r++) {
2953 if ((guest_regs[i + 1].flags & (GRF_STATIC|GRF_PINNED)) &&
2954 cache_regs[guest_regs[i + 1].vreg].hreg == r + 1)
2955 count++;
2956 else
2957 break;
2958 }
2959
2960 if (count > 1) {
2961 // i, r point to last item
2962 if (is_write)
2963 emith_ctx_write_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
2964 else
2965 emith_ctx_read_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
2966 } else {
2967 if (is_write)
2968 emith_ctx_write(r, i * 4);
2969 else
2970 emith_ctx_read(r, i * 4);
2971 }
2972 }
2973 }
2974
2975 #if DIV_OPTIMIZER
2976 // divide operation replacement functions, called by compiled code. Only the
2977 // 32:16 cases and the 64:32 cases described in the SH2 prog man are replaced.
2978
sh2_drc_divu32(uint32_t dv,uint32_t ds)2979 static uint32_t REGPARM(2) sh2_drc_divu32(uint32_t dv, uint32_t ds)
2980 {
2981 if (ds && ds >= dv) {
2982 // good case: no divide by 0, and no result overflow
2983 uint32_t quot = dv / (ds>>16), rem = dv - (quot * (ds>>16));
2984 if (~quot&1) rem -= ds>>16;
2985 return (uint16_t)quot | ((2*rem + (quot>>31)) << 16);
2986 } else {
2987 // bad case: use the sh2 algo to get the right result
2988 int q = 0, t = 0, s = 16;
2989 while (s--) {
2990 uint32_t v = dv>>31;
2991 dv = (dv<<1) | t;
2992 t = v;
2993 v = dv;
2994 if (q) dv += ds, q = dv < v;
2995 else dv -= ds, q = !(dv < v);
2996 q ^= t, t = !q;
2997 }
2998 return (dv<<1) | t;
2999 }
3000 }
3001
sh2_drc_divu64(uint32_t dh,uint32_t * dl,uint32_t ds)3002 static uint32_t REGPARM(3) sh2_drc_divu64(uint32_t dh, uint32_t *dl, uint32_t ds)
3003 {
3004 if (ds > 1 && ds >= dh) {
3005 // good case: no divide by 0, and no result overflow
3006 uint64_t dv = *dl | ((uint64_t)dh << 32);
3007 uint32_t quot = dv / ds, rem = dv - (quot * ds);
3008 if (~quot&1) rem -= ds;
3009 *dl = quot;
3010 return rem;
3011 } else {
3012 // bad case: use the sh2 algo to get the right result
3013 uint64_t dv = *dl | ((uint64_t)dh << 32);
3014 int q = 0, t = 0, s = 32;
3015 while (s--) {
3016 uint64_t v = dv>>63;
3017 dv = (dv<<1) | t;
3018 t = v;
3019 v = dv;
3020 if (q) dv += ((uint64_t)ds << 32), q = dv < v;
3021 else dv -= ((uint64_t)ds << 32), q = !(dv < v);
3022 q ^= t, t = !q;
3023 }
3024 *dl = (dv<<1) | t;
3025 return (dv>>32);
3026 }
3027 }
3028
sh2_drc_divs32(int32_t dv,int32_t ds)3029 static uint32_t REGPARM(2) sh2_drc_divs32(int32_t dv, int32_t ds)
3030 {
3031 uint32_t adv = abs(dv), ads = abs(ds)>>16;
3032 if (ads > 1 && ads > adv>>16 && (int32_t)ads > 0 && !(uint16_t)ds) {
3033 // good case: no divide by 0, and no result overflow
3034 uint32_t quot = adv / ads, rem = adv - (quot * ads);
3035 int m1 = (rem ? dv^ds : ds) < 0;
3036 if (rem && dv < 0) rem = (quot&1 ? -rem : +ads-rem);
3037 else rem = (quot&1 ? +rem : -ads+rem);
3038 quot = ((dv^ds)<0 ? -quot : +quot) - m1;
3039 return (uint16_t)quot | ((2*rem + (quot>>31)) << 16);
3040 } else {
3041 // bad case: use the sh2 algo to get the right result
3042 int m = (uint32_t)ds>>31, q = (uint32_t)dv>>31, t = m^q, s = 16;
3043 while (s--) {
3044 uint32_t v = (uint32_t)dv>>31;
3045 dv = (dv<<1) | t;
3046 t = v;
3047 v = dv;
3048 if (m^q) dv += ds, q = (uint32_t)dv < v;
3049 else dv -= ds, q = !((uint32_t)dv < v);
3050 q ^= m^t, t = !(m^q);
3051 }
3052 return (dv<<1) | t;
3053 }
3054 }
3055
sh2_drc_divs64(int32_t dh,uint32_t * dl,int32_t ds)3056 static uint32_t REGPARM(3) sh2_drc_divs64(int32_t dh, uint32_t *dl, int32_t ds)
3057 {
3058 int64_t _dv = *dl | ((int64_t)dh << 32);
3059 uint64_t adv = (_dv < 0 ? -_dv : _dv); // llabs isn't in older toolchains
3060 uint32_t ads = abs(ds);
3061 if (ads > 1 && ads > adv>>32 && (int64_t)adv > 0) {
3062 // good case: no divide by 0, and no result overflow
3063 uint32_t quot = adv / ads, rem = adv - ((uint64_t)quot * ads);
3064 int m1 = (rem ? dh^ds : ds) < 0;
3065 if (rem && dh < 0) rem = (quot&1 ? -rem : +ads-rem);
3066 else rem = (quot&1 ? +rem : -ads+rem);
3067 quot = ((dh^ds)<0 ? -quot : +quot) - m1;
3068 *dl = quot;
3069 return rem;
3070 } else {
3071 // bad case: use the sh2 algo to get the right result
3072 uint64_t dv = *dl | ((uint64_t)dh << 32);
3073 int m = (uint32_t)ds>>31, q = (uint64_t)dv>>63, t = m^q, s = 32;
3074 while (s--) {
3075 int64_t v = (uint64_t)dv>>63;
3076 dv = (dv<<1) | t;
3077 t = v;
3078 v = dv;
3079 if (m^q) dv += ((uint64_t)ds << 32), q = dv < v;
3080 else dv -= ((uint64_t)ds << 32), q = !(dv < v);
3081 q ^= m^t, t = !(m^q);
3082 }
3083 *dl = (dv<<1) | t;
3084 return (dv>>32);
3085 }
3086 }
3087 #endif
3088
3089 // block local link stuff
3090 struct linkage {
3091 u32 pc;
3092 void *ptr;
3093 struct block_link *bl;
3094 u32 mask;
3095 };
3096
find_in_linkage(const struct linkage * array,int size,u32 pc)3097 static inline int find_in_linkage(const struct linkage *array, int size, u32 pc)
3098 {
3099 size_t i;
3100 for (i = 0; i < size; i++)
3101 if (pc == array[i].pc)
3102 return i;
3103
3104 return -1;
3105 }
3106
find_in_sorted_linkage(const struct linkage * array,int size,u32 pc)3107 static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc)
3108 {
3109 // binary search in sorted array
3110 int left = 0, right = size-1;
3111 while (left <= right)
3112 {
3113 int middle = (left + right) / 2;
3114 if (array[middle].pc == pc)
3115 return middle;
3116 else if (array[middle].pc < pc)
3117 left = middle + 1;
3118 else
3119 right = middle - 1;
3120 }
3121 return -1;
3122 }
3123
emit_branch_linkage_code(SH2 * sh2,struct block_desc * block,int tcache_id,const struct linkage * targets,int target_count,const struct linkage * links,int link_count)3124 static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id,
3125 const struct linkage *targets, int target_count,
3126 const struct linkage *links, int link_count)
3127 {
3128 struct block_link *bl;
3129 int u, v, tmp;
3130
3131 emith_flush();
3132 for (u = 0; u < link_count; u++) {
3133 emith_pool_check();
3134 // look up local branch targets
3135 if (links[u].mask & 0x2) {
3136 v = find_in_sorted_linkage(targets, target_count, links[u].pc);
3137 if (v < 0 || ! targets[v].ptr) {
3138 // forward branch not yet resolved, prepare external linking
3139 emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3140 bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id);
3141 if (bl)
3142 bl->type = BL_LDJMP;
3143 tmp = rcache_get_tmp_arg(0);
3144 emith_move_r_imm(tmp, links[u].pc);
3145 rcache_free_tmp(tmp);
3146 emith_jump_patchable(sh2_drc_dispatcher);
3147 } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) {
3148 // inrange local branch
3149 emith_jump_patch(links[u].ptr, targets[v].ptr, NULL);
3150 } else {
3151 // far local branch
3152 emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3153 emith_jump(targets[v].ptr);
3154 }
3155 } else {
3156 // external or exit, emit blx area entry
3157 void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher);
3158 if (links[u].bl)
3159 links[u].bl->blx = tcache_ptr;
3160 emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3161 tmp = rcache_get_tmp_arg(0);
3162 emith_move_r_imm(tmp, links[u].pc & ~1);
3163 rcache_free_tmp(tmp);
3164 emith_jump(target);
3165 }
3166 }
3167 }
3168
3169 #define DELAY_SAVE_T(sr) { \
3170 int t_ = rcache_get_tmp(); \
3171 emith_bic_r_imm(sr, T_save); \
3172 emith_and_r_r_imm(t_, sr, 1); \
3173 emith_or_r_r_lsl(sr, t_, T_SHIFT); \
3174 rcache_free_tmp(t_); \
3175 }
3176
3177 #define FLUSH_CYCLES(sr) \
3178 if (cycles > 0) { \
3179 emith_sub_r_imm(sr, cycles << 12); \
3180 cycles = 0; \
3181 }
3182
3183 static void *dr_get_pc_base(u32 pc, SH2 *sh2);
3184
sh2_translate(SH2 * sh2,int tcache_id)3185 static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
3186 {
3187 // branch targets in current block
3188 static struct linkage branch_targets[MAX_LOCAL_TARGETS];
3189 int branch_target_count = 0;
3190 // unresolved local or external targets with block link/exit area if needed
3191 static struct linkage blx_targets[MAX_LOCAL_BRANCHES];
3192 int blx_target_count = 0;
3193
3194 static u8 op_flags[BLOCK_INSN_LIMIT];
3195
3196 enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 };
3197 struct drcf {
3198 int delay_reg:8;
3199 u32 loop_type:8;
3200 u32 polling:8;
3201 u32 pinning:1;
3202 u32 test_irq:1;
3203 u32 pending_branch_direct:1;
3204 u32 pending_branch_indirect:1;
3205 u32 Tflag:2, Mflag:2;
3206 } drcf = { 0, };
3207
3208 #if LOOP_OPTIMIZER
3209 // loops with pinned registers for optimzation
3210 // pinned regs are like statics and don't need saving/restoring inside a loop
3211 static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
3212 int pinned_loop_count = 0;
3213 #endif
3214
3215 // PC of current, first, last SH2 insn
3216 u32 pc, base_pc, end_pc;
3217 u32 base_literals, end_literals;
3218 u8 *block_entry_ptr;
3219 struct block_desc *block;
3220 struct block_entry *entry;
3221 struct block_link *bl;
3222 u16 *dr_pc_base;
3223 struct op_data *opd;
3224 int blkid_main = 0;
3225 int skip_op = 0;
3226 int tmp, tmp2;
3227 int cycles;
3228 int i, v;
3229 u32 u, m1, m2, m3, m4;
3230 int op;
3231 u16 crc;
3232
3233 base_pc = sh2->pc;
3234
3235 // get base/validate PC
3236 dr_pc_base = dr_get_pc_base(base_pc, sh2);
3237 if (dr_pc_base == (void *)-1) {
3238 printf("invalid PC, aborting: %08lx\n", (long)base_pc);
3239 // FIXME: be less destructive
3240 exit(1);
3241 }
3242
3243 // initial passes to disassemble and analyze the block
3244 crc = scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals);
3245 end_literals = dr_check_nolit(base_literals, end_literals, tcache_id);
3246 if (base_literals == end_literals) // map empty lit section to end of code
3247 base_literals = end_literals = end_pc;
3248
3249 // if there is already a translated but inactive block, reuse it
3250 block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc,
3251 base_literals, end_literals - base_literals);
3252
3253 if (block) {
3254 dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
3255 base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr);
3256 dr_activate_block(block, tcache_id, sh2->is_slave);
3257 emith_update_cache();
3258 return block->entryp[0].tcache_ptr;
3259 }
3260
3261 // collect branch_targets that don't land on delay slots
3262 m1 = m2 = m3 = m4 = v = op = 0;
3263 for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) {
3264 if (op_flags[i] & OF_DELAY_OP)
3265 op_flags[i] &= ~OF_BTARGET;
3266 if (op_flags[i] & OF_BTARGET) {
3267 if (branch_target_count < ARRAY_SIZE(branch_targets))
3268 branch_targets[branch_target_count++] = (struct linkage) { .pc = pc };
3269 else {
3270 printf("warning: linkage overflow\n");
3271 end_pc = pc;
3272 break;
3273 }
3274 }
3275 if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc)
3276 op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change
3277 // unify T and SR since rcache doesn't know about "virtual" guest regs
3278 if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR);
3279 if (ops[i].dest & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR);
3280 if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR);
3281 #if LOOP_DETECTION
3282 // loop types detected:
3283 // 1. target: ... BRA target -> idle loop
3284 // 2. target: ... delay insn ... BF target -> delay loop
3285 // 3. target: ... poll insn ... BF/BT target -> poll loop
3286 // 4. target: ... poll insn ... BF/BT exit ... BRA target, exit: -> poll
3287 // conditions:
3288 // a. no further branch targets between target and back jump.
3289 // b. no unconditional branch insn inside the loop.
3290 // c. exactly one poll or delay insn is allowed inside a delay/poll loop
3291 // (scan_block marks loops only if they meet conditions a through c)
3292 // d. idle loops do not modify anything but PC,SR and contain no branches
3293 // e. delay/poll loops do not modify anything but the concerned reg,PC,SR
3294 // f. loading constants into registers inside the loop is allowed
3295 // g. a delay/poll loop must have a conditional branch somewhere
3296 // h. an idle loop must not have a conditional branch
3297 if (op_flags[i] & OF_BTARGET) {
3298 // possible loop entry point
3299 drcf.loop_type = op_flags[i] & OF_LOOP;
3300 drcf.pending_branch_direct = drcf.pending_branch_indirect = 0;
3301 op = OF_IDLE_LOOP; // loop type
3302 v = i;
3303 m1 = m2 = m3 = m4 = 0;
3304 if (!drcf.loop_type) // reset basic loop it it isn't recognized as loop
3305 op_flags[i] &= ~OF_BASIC_LOOP;
3306 }
3307 if (drcf.loop_type) {
3308 // calculate reg masks for loop pinning
3309 m4 |= ops[i].source & ~m3;
3310 m3 |= ops[i].dest;
3311 // detect loop type, and store poll/delay register
3312 if (op_flags[i] & OF_POLL_INSN) {
3313 op = OF_POLL_LOOP;
3314 m1 |= ops[i].dest; // loop poll/delay regs
3315 } else if (op_flags[i] & OF_DELAY_INSN) {
3316 op = OF_DELAY_LOOP;
3317 m1 |= ops[i].dest;
3318 } else if (ops[i].op != OP_LOAD_POOL && ops[i].op != OP_LOAD_CONST
3319 && (ops[i].op != OP_MOVE || op != OF_POLL_LOOP)) {
3320 // not (MOV @(PC) or MOV # or (MOV reg and poll)), condition f
3321 m2 |= ops[i].dest; // regs modified by other insns
3322 }
3323 // branch detector
3324 if (OP_ISBRAIMM(ops[i].op)) {
3325 if (ops[i].imm == base_pc + 2*v)
3326 drcf.pending_branch_direct = 1; // backward branch detected
3327 else
3328 op_flags[v] &= ~OF_BASIC_LOOP; // no basic loop
3329 }
3330 if (OP_ISBRACND(ops[i].op))
3331 drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch
3332 // poll/idle loops terminate with their backwards branch to the loop start
3333 if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) {
3334 m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h
3335 if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect)))
3336 op = 0; // conditions not met
3337 op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type
3338 drcf.loop_type = 0;
3339 #if LOOP_OPTIMIZER
3340 if (op_flags[v] & OF_BASIC_LOOP) {
3341 m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM);
3342 if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
3343 pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) {
3344 pinned_loops[pinned_loop_count++] =
3345 (struct linkage) { .pc = base_pc + 2*v, .mask = m3 };
3346 } else
3347 op_flags[v] &= ~OF_BASIC_LOOP;
3348 }
3349 #endif
3350 }
3351 }
3352 #endif
3353 }
3354
3355 tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2, branch_target_count);
3356 #if (DRC_DEBUG & 4)
3357 tcache_dsm_ptrs[tcache_id] = tcache_ptr;
3358 #endif
3359
3360 block = dr_add_block(branch_target_count, base_pc, end_pc - base_pc,
3361 base_literals, end_literals-base_literals, crc, sh2->is_slave, &blkid_main);
3362 if (block == NULL)
3363 return NULL;
3364
3365 block_entry_ptr = tcache_ptr;
3366 dbg(2, "== %csh2 block #%d,%d %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
3367 tcache_id, blkid_main, base_pc, end_pc, base_literals, end_literals, block_entry_ptr);
3368
3369
3370 // clear stale state after compile errors
3371 rcache_invalidate();
3372 emith_invalidate_t();
3373 drcf = (struct drcf) { 0 };
3374 #if LOOP_OPTIMIZER
3375 pinned_loops[pinned_loop_count].pc = -1;
3376 pinned_loop_count = 0;
3377 #endif
3378
3379 // -------------------------------------------------
3380 // 3rd pass: actual compilation
3381 pc = base_pc;
3382 cycles = 0;
3383 for (i = 0; pc < end_pc; i++)
3384 {
3385 u32 delay_dep_fw = 0, delay_dep_bk = 0;
3386 int tmp3, tmp4;
3387 int sr;
3388
3389 if (op_flags[i] & OF_BTARGET)
3390 {
3391 if (pc != base_pc)
3392 {
3393 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3394 FLUSH_CYCLES(sr);
3395 emith_sync_t(sr);
3396 drcf.Mflag = FLG_UNKNOWN;
3397 rcache_flush();
3398 emith_flush();
3399 }
3400
3401 // make block entry
3402 v = block->entry_count;
3403 entry = &block->entryp[v];
3404 if (v < branch_target_count)
3405 {
3406 entry = &block->entryp[v];
3407 entry->pc = pc;
3408 entry->tcache_ptr = tcache_ptr;
3409 entry->links = entry->o_links = NULL;
3410 #if (DRC_DEBUG & 2)
3411 entry->block = block;
3412 #endif
3413 block->entry_count++;
3414
3415 dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p",
3416 sh2->is_slave ? 's' : 'm', tcache_id, blkid_main,
3417 pc, tcache_ptr);
3418 }
3419 else {
3420 dbg(1, "too many entryp for block #%d,%d pc=%08x",
3421 tcache_id, blkid_main, pc);
3422 break;
3423 }
3424
3425 v = find_in_sorted_linkage(branch_targets, branch_target_count, pc);
3426 if (v >= 0)
3427 branch_targets[v].ptr = tcache_ptr;
3428 #if LOOP_DETECTION
3429 drcf.loop_type = op_flags[i] & OF_LOOP;
3430 drcf.delay_reg = -1;
3431 drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0);
3432 #endif
3433
3434 rcache_clean();
3435
3436 #if (DRC_DEBUG & 0x10)
3437 tmp = rcache_get_tmp_arg(0);
3438 emith_move_r_imm(tmp, pc);
3439 tmp = emit_memhandler_read(1);
3440 tmp2 = rcache_get_tmp();
3441 tmp3 = rcache_get_tmp();
3442 emith_move_r_imm(tmp2, (s16)FETCH_OP(pc));
3443 emith_move_r_imm(tmp3, 0);
3444 emith_cmp_r_r(tmp, tmp2);
3445 EMITH_SJMP_START(DCOND_EQ);
3446 emith_read_r_r_offs_c(DCOND_NE, tmp3, tmp3, 0); // crash
3447 EMITH_SJMP_END(DCOND_EQ);
3448 rcache_free_tmp(tmp);
3449 rcache_free_tmp(tmp2);
3450 rcache_free_tmp(tmp3);
3451 #endif
3452
3453 // check cycles
3454 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3455
3456 #if LOOP_OPTIMIZER
3457 if (op_flags[i] & OF_BASIC_LOOP) {
3458 if (pinned_loops[pinned_loop_count].pc == pc) {
3459 // pin needed regs on loop entry
3460 FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v));
3461 emith_flush();
3462 // store current PC as loop target
3463 pinned_loops[pinned_loop_count].ptr = tcache_ptr;
3464 drcf.pinning = 1;
3465 } else
3466 op_flags[i] &= ~OF_BASIC_LOOP;
3467 }
3468
3469 if (op_flags[i] & OF_BASIC_LOOP) {
3470 // if exiting a pinned loop pinned regs must be written back to ctx
3471 // since they are reloaded in the loop entry code
3472 emith_cmp_r_imm(sr, 0);
3473 EMITH_JMP_START(DCOND_GT);
3474 rcache_save_pinned();
3475
3476 if (blx_target_count < ARRAY_SIZE(blx_targets)) {
3477 // exit via stub in blx table (saves some 1-3 insns in the main flow)
3478 blx_targets[blx_target_count++] =
3479 (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
3480 emith_jump_patchable(tcache_ptr);
3481 } else {
3482 // blx table full, must inline exit code
3483 tmp = rcache_get_tmp_arg(0);
3484 emith_move_r_imm(tmp, pc);
3485 emith_jump(sh2_drc_exit);
3486 rcache_free_tmp(tmp);
3487 }
3488 EMITH_JMP_END(DCOND_GT);
3489 } else
3490 #endif
3491 {
3492 if (blx_target_count < ARRAY_SIZE(blx_targets)) {
3493 // exit via stub in blx table (saves some 1-3 insns in the main flow)
3494 emith_cmp_r_imm(sr, 0);
3495 blx_targets[blx_target_count++] =
3496 (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
3497 emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
3498 } else {
3499 // blx table full, must inline exit code
3500 tmp = rcache_get_tmp_arg(0);
3501 emith_cmp_r_imm(sr, 0);
3502 EMITH_SJMP_START(DCOND_GT);
3503 emith_move_r_imm_c(DCOND_LE, tmp, pc);
3504 emith_jump_cond(DCOND_LE, sh2_drc_exit);
3505 EMITH_SJMP_END(DCOND_GT);
3506 rcache_free_tmp(tmp);
3507 }
3508 }
3509
3510 #if (DRC_DEBUG & 32)
3511 // block hit counter
3512 tmp = rcache_get_tmp_arg(0);
3513 tmp2 = rcache_get_tmp_arg(1);
3514 emith_move_r_ptr_imm(tmp, (uptr)entry);
3515 emith_read_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
3516 emith_add_r_imm(tmp2, 1);
3517 emith_write_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
3518 rcache_free_tmp(tmp);
3519 rcache_free_tmp(tmp2);
3520 #endif
3521
3522 #if (DRC_DEBUG & (8|256|512|1024))
3523 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3524 emith_sync_t(sr);
3525 rcache_clean();
3526 tmp = rcache_used_hregs_mask();
3527 emith_save_caller_regs(tmp);
3528 emit_do_static_regs(1, 0);
3529 rcache_get_reg_arg(2, SHR_SR, NULL);
3530 tmp2 = rcache_get_tmp_arg(0);
3531 tmp3 = rcache_get_tmp_arg(1);
3532 tmp4 = rcache_get_tmp();
3533 emith_move_r_ptr_imm(tmp2, tcache_ptr);
3534 emith_move_r_r_ptr(tmp3, CONTEXT_REG);
3535 emith_move_r_imm(tmp4, pc);
3536 emith_ctx_write(tmp4, SHR_PC * 4);
3537 rcache_invalidate_tmp();
3538 emith_abicall(sh2_drc_log_entry);
3539 emith_restore_caller_regs(tmp);
3540 #endif
3541
3542 do_host_disasm(tcache_id);
3543 rcache_unlock_all();
3544 }
3545
3546 #ifdef DRC_CMP
3547 if (!(op_flags[i] & OF_DELAY_OP)) {
3548 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3549 FLUSH_CYCLES(sr);
3550 emith_sync_t(sr);
3551 emit_move_r_imm32(SHR_PC, pc);
3552 rcache_clean();
3553
3554 tmp = rcache_used_hregs_mask();
3555 emith_save_caller_regs(tmp);
3556 emit_do_static_regs(1, 0);
3557 emith_pass_arg_r(0, CONTEXT_REG);
3558 emith_abicall(do_sh2_cmp);
3559 emith_restore_caller_regs(tmp);
3560 }
3561 #endif
3562
3563 // emit blx area if limits are approached
3564 if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 ||
3565 !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) {
3566 u8 *jp;
3567 rcache_invalidate_tmp();
3568 jp = tcache_ptr;
3569 emith_jump_patchable(tcache_ptr);
3570 emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
3571 branch_target_count, blx_targets, blx_target_count);
3572 blx_target_count = 0;
3573 do_host_disasm(tcache_id);
3574 emith_jump_patch(jp, tcache_ptr, NULL);
3575 }
3576
3577 emith_pool_check();
3578
3579 opd = &ops[i];
3580 op = FETCH_OP(pc);
3581 #if (DRC_DEBUG & 4)
3582 DasmSH2(sh2dasm_buff, pc, op);
3583 if (op_flags[i] & OF_BTARGET) {
3584 if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+';
3585 else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '=';
3586 else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~';
3587 else tmp3 = '*';
3588 } else if (drcf.loop_type) tmp3 = '.';
3589 else tmp3 = ' ';
3590 printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff);
3591 #endif
3592
3593 pc += 2;
3594 #if (DRC_DEBUG & 2)
3595 insns_compiled++;
3596 #endif
3597 if (skip_op > 0) {
3598 skip_op--;
3599 continue;
3600 }
3601
3602 if (op_flags[i] & OF_DELAY_OP)
3603 {
3604 // handle delay slot dependencies
3605 delay_dep_fw = opd->dest & ops[i-1].source;
3606 delay_dep_bk = opd->source & ops[i-1].dest;
3607 if (delay_dep_fw & BITMASK1(SHR_T)) {
3608 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3609 emith_sync_t(sr);
3610 DELAY_SAVE_T(sr);
3611 }
3612 if (delay_dep_bk & BITMASK1(SHR_PC)) {
3613 if (opd->op != OP_LOAD_POOL && opd->op != OP_MOVA) {
3614 // can only be those 2 really..
3615 elprintf_sh2(sh2, EL_ANOMALY,
3616 "drc: illegal slot insn %04x @ %08x?", op, pc - 2);
3617 }
3618 // store PC for MOVA/MOV @PC address calculation
3619 if (opd->imm != 0)
3620 ; // case OP_BRANCH - addr already resolved in scan_block
3621 else {
3622 switch (ops[i-1].op) {
3623 case OP_BRANCH:
3624 emit_move_r_imm32(SHR_PC, ops[i-1].imm);
3625 break;
3626 case OP_BRANCH_CT:
3627 case OP_BRANCH_CF:
3628 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3629 tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL);
3630 emith_move_r_imm(tmp, pc);
3631 tmp2 = emith_tst_t(sr, (ops[i-1].op == OP_BRANCH_CT));
3632 tmp3 = emith_invert_cond(tmp2);
3633 EMITH_SJMP_START(tmp3);
3634 emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm);
3635 EMITH_SJMP_END(tmp3);
3636 break;
3637 case OP_BRANCH_N: // BT/BF known not to be taken
3638 // XXX could modify opd->imm instead?
3639 emit_move_r_imm32(SHR_PC, pc);
3640 break;
3641 // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded
3642 }
3643 }
3644 }
3645 //if (delay_dep_fw & ~BITMASK1(SHR_T))
3646 // dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T));
3647 if (delay_dep_bk & ~BITMASK2(SHR_PC, SHR_PR))
3648 dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk);
3649 }
3650
3651 // inform cache about future register usage
3652 u32 late = 0; // regs read by future ops
3653 u32 write = 0; // regs written to (to detect write before read)
3654 u32 soon = 0; // regs read soon
3655 for (v = 1; v <= 9; v++) {
3656 // no sense in looking any further than the next rcache flush
3657 tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) ||
3658 (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP)));
3659 // XXX looking behind cond branch to avoid evicting regs used later?
3660 if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above)
3661 late |= opd[v].source & ~write;
3662 // ignore source regs after they have been written to
3663 write |= opd[v].dest;
3664 // regs needed in the next few instructions
3665 if (v <= 4)
3666 soon = late;
3667 } else
3668 break;
3669 }
3670 rcache_set_usage_now(opd[0].source); // current insn
3671 rcache_set_usage_soon(soon); // insns 1-4
3672 rcache_set_usage_late(late & ~soon); // insns 5-9
3673 rcache_set_usage_discard(write & ~(late|soon));
3674 if (v <= 9)
3675 // upcoming rcache_flush, start writing back unused dirty stuff
3676 rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
3677
3678 switch (opd->op)
3679 {
3680 case OP_BRANCH_N:
3681 // never taken, just use up cycles
3682 goto end_op;
3683 case OP_BRANCH:
3684 case OP_BRANCH_CT:
3685 case OP_BRANCH_CF:
3686 if (opd->dest & BITMASK1(SHR_PR))
3687 emit_move_r_imm32(SHR_PR, pc + 2);
3688 drcf.pending_branch_direct = 1;
3689 goto end_op;
3690
3691 case OP_BRANCH_R:
3692 if (opd->dest & BITMASK1(SHR_PR))
3693 emit_move_r_imm32(SHR_PR, pc + 2);
3694 emit_move_r_r(SHR_PC, opd->rm);
3695 drcf.pending_branch_indirect = 1;
3696 goto end_op;
3697
3698 case OP_BRANCH_RF:
3699 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
3700 tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL);
3701 emith_move_r_imm(tmp, pc + 2);
3702 if (opd->dest & BITMASK1(SHR_PR)) {
3703 tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL);
3704 emith_move_r_r(tmp3, tmp);
3705 }
3706 emith_add_r_r(tmp, tmp2);
3707 if (gconst_get(GET_Rn(), &u))
3708 gconst_set(SHR_PC, pc + 2 + u);
3709 drcf.pending_branch_indirect = 1;
3710 goto end_op;
3711
3712 case OP_SLEEP: // SLEEP 0000000000011011
3713 printf("TODO sleep\n");
3714 goto end_op;
3715
3716 case OP_RTE: // RTE 0000000000101011
3717 emith_invalidate_t();
3718 // pop PC
3719 tmp = emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR);
3720 rcache_free(tmp);
3721 // pop SR
3722 tmp = emit_memhandler_read_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_POSTINCR);
3723 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3724 emith_write_sr(sr, tmp);
3725 rcache_free_tmp(tmp);
3726 drcf.test_irq = 1;
3727 drcf.pending_branch_indirect = 1;
3728 goto end_op;
3729
3730 case OP_UNDEFINED:
3731 elprintf_sh2(sh2, EL_ANOMALY, "drc: unhandled op %04x @ %08x", op, pc-2);
3732 opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4;
3733 // fallthrough
3734 case OP_TRAPA: // TRAPA #imm 11000011iiiiiiii
3735 // push SR
3736 tmp = rcache_get_reg_arg(1, SHR_SR, &tmp2);
3737 emith_sync_t(tmp2);
3738 emith_clear_msb(tmp, tmp2, 22);
3739 emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
3740 // push PC
3741 if (opd->op == OP_TRAPA) {
3742 tmp = rcache_get_tmp_arg(1);
3743 emith_move_r_imm(tmp, pc);
3744 } else if (drcf.pending_branch_indirect) {
3745 tmp = rcache_get_reg_arg(1, SHR_PC, NULL);
3746 } else {
3747 tmp = rcache_get_tmp_arg(1);
3748 emith_move_r_imm(tmp, pc - 2);
3749 }
3750 emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
3751 // obtain new PC
3752 emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2);
3753 // indirect jump -> back to dispatcher
3754 drcf.pending_branch_indirect = 1;
3755 goto end_op;
3756
3757 case OP_LOAD_POOL:
3758 #if PROPAGATE_CONSTANTS
3759 if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) ||
3760 dr_is_rom(opd->imm))
3761 {
3762 if (opd->size == 2)
3763 u = FETCH32(opd->imm);
3764 else
3765 u = (s16)FETCH_OP(opd->imm);
3766 // tweak for Blackthorne: avoid stack overwriting
3767 if (GET_Rn() == SHR_SP && u == 0x0603f800) u = 0x0603f880;
3768 gconst_new(GET_Rn(), u);
3769 }
3770 else
3771 #endif
3772 {
3773 if (opd->imm != 0) {
3774 tmp = rcache_get_tmp_arg(0);
3775 emith_move_r_imm(tmp, opd->imm);
3776 } else {
3777 // have to calculate read addr from PC for delay slot
3778 tmp = rcache_get_reg_arg(0, SHR_PC, &tmp2);
3779 if (opd->size == 2) {
3780 emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4);
3781 emith_bic_r_imm(tmp, 3);
3782 }
3783 else
3784 emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 2);
3785 }
3786 tmp2 = emit_memhandler_read(opd->size);
3787 tmp3 = rcache_map_reg(GET_Rn(), tmp2);
3788 if (tmp3 != tmp2) {
3789 emith_move_r_r(tmp3, tmp2);
3790 rcache_free_tmp(tmp2);
3791 }
3792 }
3793 goto end_op;
3794
3795 case OP_MOVA: // MOVA @(disp,PC),R0 11000111dddddddd
3796 if (opd->imm != 0)
3797 emit_move_r_imm32(SHR_R0, opd->imm);
3798 else {
3799 // have to calculate addr from PC for delay slot
3800 tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ, NULL);
3801 tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE, NULL);
3802 emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4);
3803 emith_bic_r_imm(tmp, 3);
3804 }
3805 goto end_op;
3806 }
3807
3808 switch ((op >> 12) & 0x0f)
3809 {
3810 /////////////////////////////////////////////
3811 case 0x00:
3812 switch (op & 0x0f)
3813 {
3814 case 0x02:
3815 switch (GET_Fx())
3816 {
3817 case 0: // STC SR,Rn 0000nnnn00000010
3818 tmp2 = SHR_SR;
3819 break;
3820 case 1: // STC GBR,Rn 0000nnnn00010010
3821 tmp2 = SHR_GBR;
3822 break;
3823 case 2: // STC VBR,Rn 0000nnnn00100010
3824 tmp2 = SHR_VBR;
3825 break;
3826 default:
3827 goto default_;
3828 }
3829 if (tmp2 == SHR_SR) {
3830 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3831 emith_sync_t(sr);
3832 tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
3833 emith_clear_msb(tmp, sr, 22); // reserved bits defined by ISA as 0
3834 } else
3835 emit_move_r_r(GET_Rn(), tmp2);
3836 goto end_op;
3837 case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100
3838 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101
3839 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110
3840 emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3);
3841 goto end_op;
3842 case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111
3843 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
3844 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
3845 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
3846 emith_mul(tmp3, tmp2, tmp);
3847 goto end_op;
3848 case 0x08:
3849 switch (GET_Fx())
3850 {
3851 case 0: // CLRT 0000000000001000
3852 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3853 #if T_OPTIMIZER
3854 if (~rcache_regs_discard & BITMASK1(SHR_T))
3855 #endif
3856 emith_set_t(sr, 0);
3857 break;
3858 case 1: // SETT 0000000000011000
3859 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3860 #if T_OPTIMIZER
3861 if (~rcache_regs_discard & BITMASK1(SHR_T))
3862 #endif
3863 emith_set_t(sr, 1);
3864 break;
3865 case 2: // CLRMAC 0000000000101000
3866 emit_move_r_imm32(SHR_MACL, 0);
3867 emit_move_r_imm32(SHR_MACH, 0);
3868 break;
3869 default:
3870 goto default_;
3871 }
3872 goto end_op;
3873 case 0x09:
3874 switch (GET_Fx())
3875 {
3876 case 0: // NOP 0000000000001001
3877 break;
3878 case 1: // DIV0U 0000000000011001
3879 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3880 emith_invalidate_t();
3881 emith_bic_r_imm(sr, M|Q|T);
3882 drcf.Mflag = FLG_0;
3883 #if DIV_OPTIMIZER
3884 if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
3885 // divide 32/16
3886 rcache_get_reg_arg(0, div(opd).rn, NULL);
3887 rcache_get_reg_arg(1, div(opd).rm, NULL);
3888 rcache_invalidate_tmp();
3889 emith_abicall(sh2_drc_divu32);
3890 tmp = rcache_get_tmp_ret();
3891 tmp2 = rcache_map_reg(div(opd).rn, tmp);
3892 if (tmp != tmp2)
3893 emith_move_r_r(tmp2, tmp);
3894
3895 tmp3 = rcache_get_tmp();
3896 emith_and_r_r_imm(tmp3, tmp2, 1); // Q = !Rn[0]
3897 emith_eor_r_r_imm(tmp3, tmp3, 1);
3898 emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);
3899 rcache_free_tmp(tmp3);
3900 emith_or_r_r_r_lsr(sr, sr, tmp2, 31); // T = Rn[31]
3901 skip_op = div(opd).div1 + div(opd).rotcl;
3902 }
3903 else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
3904 // divide 64/32
3905 tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL);
3906 emith_ctx_write(tmp4, offsetof(SH2, drc_tmp));
3907 tmp = rcache_get_tmp_arg(1);
3908 emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
3909 rcache_get_reg_arg(0, div(opd).rn, NULL);
3910 rcache_get_reg_arg(2, div(opd).rm, NULL);
3911 rcache_invalidate_tmp();
3912 emith_abicall(sh2_drc_divu64);
3913 tmp = rcache_get_tmp_ret();
3914 tmp2 = rcache_map_reg(div(opd).rn, tmp);
3915 tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL);
3916 if (tmp != tmp2)
3917 emith_move_r_r(tmp2, tmp);
3918 emith_ctx_read(tmp4, offsetof(SH2, drc_tmp));
3919
3920 tmp3 = rcache_get_tmp();
3921 emith_and_r_r_imm(tmp3, tmp4, 1); // Q = !Ro[0]
3922 emith_eor_r_r_imm(tmp3, tmp3, 1);
3923 emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);
3924 rcache_free_tmp(tmp3);
3925 emith_or_r_r_r_lsr(sr, sr, tmp4, 31); // T = Ro[31]
3926 skip_op = div(opd).div1 + div(opd).rotcl;
3927 }
3928 #endif
3929 break;
3930 case 2: // MOVT Rn 0000nnnn00101001
3931 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3932 emith_sync_t(sr);
3933 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
3934 emith_clear_msb(tmp2, sr, 31);
3935 break;
3936 default:
3937 goto default_;
3938 }
3939 goto end_op;
3940 case 0x0a:
3941 switch (GET_Fx())
3942 {
3943 case 0: // STS MACH,Rn 0000nnnn00001010
3944 tmp2 = SHR_MACH;
3945 break;
3946 case 1: // STS MACL,Rn 0000nnnn00011010
3947 tmp2 = SHR_MACL;
3948 break;
3949 case 2: // STS PR,Rn 0000nnnn00101010
3950 tmp2 = SHR_PR;
3951 break;
3952 default:
3953 goto default_;
3954 }
3955 emit_move_r_r(GET_Rn(), tmp2);
3956 goto end_op;
3957 case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100
3958 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101
3959 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110
3960 emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), (op & 3) | drcf.polling);
3961 goto end_op;
3962 case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111
3963 emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2);
3964 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3965 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL);
3966 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL);
3967 emith_sh2_macl(tmp3, tmp4, tmp, tmp2, sr);
3968 rcache_free_tmp(tmp2);
3969 rcache_free_tmp(tmp);
3970 goto end_op;
3971 }
3972 goto default_;
3973
3974 /////////////////////////////////////////////
3975 case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
3976 emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2);
3977 goto end_op;
3978
3979 case 0x02:
3980 switch (op & 0x0f)
3981 {
3982 case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000
3983 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001
3984 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010
3985 emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, op & 3);
3986 goto end_op;
3987 case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100
3988 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101
3989 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110
3990 emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, (op & 3) | MF_PREDECR);
3991 goto end_op;
3992 case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
3993 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3994 emith_invalidate_t();
3995 emith_bic_r_imm(sr, M|Q|T);
3996 drcf.Mflag = FLG_UNKNOWN;
3997 #if DIV_OPTIMIZER
3998 if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
3999 // divide 32/16
4000 rcache_get_reg_arg(0, div(opd).rn, NULL);
4001 tmp2 = rcache_get_reg_arg(1, div(opd).rm, NULL);
4002 tmp3 = rcache_get_tmp();
4003 emith_lsr(tmp3, tmp2, 31);
4004 emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31]
4005 rcache_invalidate_tmp();
4006 emith_abicall(sh2_drc_divs32);
4007 tmp = rcache_get_tmp_ret();
4008 tmp2 = rcache_map_reg(div(opd).rn, tmp);
4009 if (tmp != tmp2)
4010 emith_move_r_r(tmp2, tmp);
4011 tmp3 = rcache_get_tmp();
4012
4013 emith_eor_r_r_r_lsr(tmp3, tmp2, sr, M_SHIFT);
4014 emith_and_r_r_imm(tmp3, tmp3, 1);
4015 emith_eor_r_r_imm(tmp3, tmp3, 1);
4016 emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Rn[0]^M
4017 rcache_free_tmp(tmp3);
4018 emith_or_r_r_r_lsr(sr, sr, tmp2, 31); // T = Rn[31]
4019 skip_op = div(opd).div1 + div(opd).rotcl;
4020 }
4021 else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
4022 // divide 64/32
4023 tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL);
4024 emith_ctx_write(tmp4, offsetof(SH2, drc_tmp));
4025 rcache_get_reg_arg(0, div(opd).rn, NULL);
4026 tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL);
4027 tmp3 = rcache_get_tmp_arg(1);
4028 emith_lsr(tmp3, tmp2, 31);
4029 emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31]
4030 emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp));
4031 rcache_invalidate_tmp();
4032 emith_abicall(sh2_drc_divs64);
4033 tmp = rcache_get_tmp_ret();
4034 tmp2 = rcache_map_reg(div(opd).rn, tmp);
4035 tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL);
4036 if (tmp != tmp2)
4037 emith_move_r_r(tmp2, tmp);
4038 emith_ctx_read(tmp4, offsetof(SH2, drc_tmp));
4039
4040 tmp3 = rcache_get_tmp();
4041 emith_eor_r_r_r_lsr(tmp3, tmp4, sr, M_SHIFT);
4042 emith_and_r_r_imm(tmp3, tmp3, 1);
4043 emith_eor_r_r_imm(tmp3, tmp3, 1);
4044 emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M
4045 rcache_free_tmp(tmp3);
4046 emith_or_r_r_r_lsr(sr, sr, tmp4, 31); // T = Ro[31]
4047 skip_op = div(opd).div1 + div(opd).rotcl;
4048 } else
4049 #endif
4050 {
4051 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4052 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4053 tmp = rcache_get_tmp();
4054 emith_lsr(tmp, tmp2, 31); // Q = Nn
4055 emith_or_r_r_lsl(sr, tmp, Q_SHIFT);
4056 emith_lsr(tmp, tmp3, 31); // M = Nm
4057 emith_or_r_r_lsl(sr, tmp, M_SHIFT);
4058 emith_eor_r_r_lsr(tmp, tmp2, 31);
4059 emith_or_r_r(sr, tmp); // T = Q^M
4060 rcache_free(tmp);
4061 }
4062 goto end_op;
4063 case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
4064 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4065 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4066 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4067 emith_clr_t_cond(sr);
4068 emith_tst_r_r(tmp2, tmp3);
4069 emith_set_t_cond(sr, DCOND_EQ);
4070 goto end_op;
4071 case 0x09: // AND Rm,Rn 0010nnnnmmmm1001
4072 if (GET_Rm() != GET_Rn()) {
4073 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4074 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4075 emith_and_r_r_r(tmp, tmp3, tmp2);
4076 }
4077 goto end_op;
4078 case 0x0a: // XOR Rm,Rn 0010nnnnmmmm1010
4079 #if PROPAGATE_CONSTANTS
4080 if (GET_Rn() == GET_Rm()) {
4081 gconst_new(GET_Rn(), 0);
4082 goto end_op;
4083 }
4084 #endif
4085 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4086 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4087 emith_eor_r_r_r(tmp, tmp3, tmp2);
4088 goto end_op;
4089 case 0x0b: // OR Rm,Rn 0010nnnnmmmm1011
4090 if (GET_Rm() != GET_Rn()) {
4091 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4092 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4093 emith_or_r_r_r(tmp, tmp3, tmp2);
4094 }
4095 goto end_op;
4096 case 0x0c: // CMP/STR Rm,Rn 0010nnnnmmmm1100
4097 tmp = rcache_get_tmp();
4098 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4099 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4100 emith_eor_r_r_r(tmp, tmp2, tmp3);
4101 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4102 emith_clr_t_cond(sr);
4103 emith_tst_r_imm(tmp, 0x000000ff);
4104 EMITH_SJMP_START(DCOND_EQ);
4105 emith_tst_r_imm_c(DCOND_NE, tmp, 0x0000ff00);
4106 EMITH_SJMP_START(DCOND_EQ);
4107 emith_tst_r_imm_c(DCOND_NE, tmp, 0x00ff0000);
4108 EMITH_SJMP_START(DCOND_EQ);
4109 emith_tst_r_imm_c(DCOND_NE, tmp, 0xff000000);
4110 EMITH_SJMP_END(DCOND_EQ);
4111 EMITH_SJMP_END(DCOND_EQ);
4112 EMITH_SJMP_END(DCOND_EQ);
4113 emith_set_t_cond(sr, DCOND_EQ);
4114 rcache_free_tmp(tmp);
4115 goto end_op;
4116 case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101
4117 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4118 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4119 emith_lsr(tmp, tmp3, 16);
4120 emith_or_r_r_lsl(tmp, tmp2, 16);
4121 goto end_op;
4122 case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110
4123 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111
4124 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4125 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4126 tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4127 tmp4 = tmp3;
4128 if (op & 1) {
4129 if (! rcache_is_s16(tmp2)) {
4130 emith_sext(tmp, tmp2, 16);
4131 tmp2 = tmp;
4132 }
4133 if (! rcache_is_s16(tmp3)) {
4134 tmp4 = rcache_get_tmp();
4135 emith_sext(tmp4, tmp3, 16);
4136 }
4137 } else {
4138 if (! rcache_is_u16(tmp2)) {
4139 emith_clear_msb(tmp, tmp2, 16);
4140 tmp2 = tmp;
4141 }
4142 if (! rcache_is_u16(tmp3)) {
4143 tmp4 = rcache_get_tmp();
4144 emith_clear_msb(tmp4, tmp3, 16);
4145 }
4146 }
4147 emith_mul(tmp, tmp2, tmp4);
4148 if (tmp4 != tmp3)
4149 rcache_free_tmp(tmp4);
4150 goto end_op;
4151 }
4152 goto default_;
4153
4154 /////////////////////////////////////////////
4155 case 0x03:
4156 switch (op & 0x0f)
4157 {
4158 case 0x00: // CMP/EQ Rm,Rn 0011nnnnmmmm0000
4159 case 0x02: // CMP/HS Rm,Rn 0011nnnnmmmm0010
4160 case 0x03: // CMP/GE Rm,Rn 0011nnnnmmmm0011
4161 case 0x06: // CMP/HI Rm,Rn 0011nnnnmmmm0110
4162 case 0x07: // CMP/GT Rm,Rn 0011nnnnmmmm0111
4163 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4164 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4165 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4166 switch (op & 0x07)
4167 {
4168 case 0x00: // CMP/EQ
4169 tmp = DCOND_EQ;
4170 break;
4171 case 0x02: // CMP/HS
4172 tmp = DCOND_HS;
4173 break;
4174 case 0x03: // CMP/GE
4175 tmp = DCOND_GE;
4176 break;
4177 case 0x06: // CMP/HI
4178 tmp = DCOND_HI;
4179 break;
4180 case 0x07: // CMP/GT
4181 tmp = DCOND_GT;
4182 break;
4183 }
4184 emith_clr_t_cond(sr);
4185 emith_cmp_r_r(tmp2, tmp3);
4186 emith_set_t_cond(sr, tmp);
4187 goto end_op;
4188 case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
4189 // Q1 = carry(Rn = (Rn << 1) | T)
4190 // if Q ^ M
4191 // Q2 = carry(Rn += Rm)
4192 // else
4193 // Q2 = carry(Rn -= Rm)
4194 // Q = M ^ Q1 ^ Q2
4195 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
4196 tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4197 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
4198 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4199 emith_sync_t(sr);
4200 tmp = rcache_get_tmp();
4201 if (drcf.Mflag != FLG_0) {
4202 emith_and_r_r_imm(tmp, sr, M);
4203 emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
4204 }
4205 rcache_free_tmp(tmp);
4206 // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2)
4207 // in: (Q ^ M) passed in Q
4208 emith_sh2_div1_step(tmp2, tmp3, sr);
4209 tmp = rcache_get_tmp();
4210 emith_or_r_imm(sr, Q); // Q = !T
4211 emith_and_r_r_imm(tmp, sr, T);
4212 emith_eor_r_r_lsl(sr, tmp, Q_SHIFT);
4213 if (drcf.Mflag != FLG_0) { // Q = M ^ !T = M ^ Q1 ^ Q2
4214 emith_and_r_r_imm(tmp, sr, M);
4215 emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
4216 }
4217 rcache_free_tmp(tmp);
4218 goto end_op;
4219 case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
4220 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4221 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4222 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4223 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL);
4224 emith_mul_u64(tmp3, tmp4, tmp, tmp2);
4225 goto end_op;
4226 case 0x08: // SUB Rm,Rn 0011nnnnmmmm1000
4227 #if PROPAGATE_CONSTANTS
4228 if (GET_Rn() == GET_Rm()) {
4229 gconst_new(GET_Rn(), 0);
4230 goto end_op;
4231 }
4232 #endif
4233 case 0x0c: // ADD Rm,Rn 0011nnnnmmmm1100
4234 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4235 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4236 if (op & 4) {
4237 emith_add_r_r_r(tmp, tmp3, tmp2);
4238 } else
4239 emith_sub_r_r_r(tmp, tmp3, tmp2);
4240 goto end_op;
4241 case 0x0a: // SUBC Rm,Rn 0011nnnnmmmm1010
4242 case 0x0e: // ADDC Rm,Rn 0011nnnnmmmm1110
4243 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4244 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4245 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4246 emith_sync_t(sr);
4247 #if T_OPTIMIZER
4248 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4249 if (op & 4) {
4250 emith_t_to_carry(sr, 0);
4251 emith_adc_r_r_r(tmp, tmp3, tmp2);
4252 } else {
4253 emith_t_to_carry(sr, 1);
4254 emith_sbc_r_r_r(tmp, tmp3, tmp2);
4255 }
4256 } else
4257 #endif
4258 {
4259 EMITH_HINT_COND(DCOND_CS);
4260 if (op & 4) { // adc
4261 emith_tpop_carry(sr, 0);
4262 emith_adcf_r_r_r(tmp, tmp3, tmp2);
4263 emith_tpush_carry(sr, 0);
4264 } else {
4265 emith_tpop_carry(sr, 1);
4266 emith_sbcf_r_r_r(tmp, tmp3, tmp2);
4267 emith_tpush_carry(sr, 1);
4268 }
4269 }
4270 goto end_op;
4271 case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011
4272 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111
4273 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4274 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4275 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4276 #if T_OPTIMIZER
4277 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4278 if (op & 4)
4279 emith_add_r_r_r(tmp,tmp3,tmp2);
4280 else
4281 emith_sub_r_r_r(tmp,tmp3,tmp2);
4282 } else
4283 #endif
4284 {
4285 emith_clr_t_cond(sr);
4286 EMITH_HINT_COND(DCOND_VS);
4287 if (op & 4)
4288 emith_addf_r_r_r(tmp, tmp3, tmp2);
4289 else
4290 emith_subf_r_r_r(tmp, tmp3, tmp2);
4291 emith_set_t_cond(sr, DCOND_VS);
4292 }
4293 goto end_op;
4294 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
4295 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4296 tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4297 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4298 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL);
4299 emith_mul_s64(tmp3, tmp4, tmp, tmp2);
4300 goto end_op;
4301 }
4302 goto default_;
4303
4304 /////////////////////////////////////////////
4305 case 0x04:
4306 switch (op & 0x0f)
4307 {
4308 case 0x00:
4309 switch (GET_Fx())
4310 {
4311 case 0: // SHLL Rn 0100nnnn00000000
4312 case 2: // SHAL Rn 0100nnnn00100000
4313 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4314 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4315 #if T_OPTIMIZER
4316 if (rcache_regs_discard & BITMASK1(SHR_T))
4317 emith_lsl(tmp, tmp2, 1);
4318 else
4319 #endif
4320 {
4321 emith_invalidate_t();
4322 emith_lslf(tmp, tmp2, 1);
4323 emith_carry_to_t(sr, 0);
4324 }
4325 goto end_op;
4326 case 1: // DT Rn 0100nnnn00010000
4327 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4328 #if LOOP_DETECTION
4329 if (drcf.loop_type == OF_DELAY_LOOP) {
4330 if (drcf.delay_reg == -1)
4331 drcf.delay_reg = GET_Rn();
4332 else
4333 drcf.polling = drcf.loop_type = 0;
4334 }
4335 #endif
4336 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4337 emith_clr_t_cond(sr);
4338 EMITH_HINT_COND(DCOND_EQ);
4339 emith_subf_r_r_imm(tmp, tmp2, 1);
4340 emith_set_t_cond(sr, DCOND_EQ);
4341 goto end_op;
4342 }
4343 goto default_;
4344 case 0x01:
4345 switch (GET_Fx())
4346 {
4347 case 0: // SHLR Rn 0100nnnn00000001
4348 case 2: // SHAR Rn 0100nnnn00100001
4349 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4350 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4351 #if T_OPTIMIZER
4352 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4353 if (op & 0x20)
4354 emith_asr(tmp,tmp2,1);
4355 else
4356 emith_lsr(tmp,tmp2,1);
4357 } else
4358 #endif
4359 {
4360 emith_invalidate_t();
4361 if (op & 0x20) {
4362 emith_asrf(tmp, tmp2, 1);
4363 } else
4364 emith_lsrf(tmp, tmp2, 1);
4365 emith_carry_to_t(sr, 0);
4366 }
4367 goto end_op;
4368 case 1: // CMP/PZ Rn 0100nnnn00010001
4369 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4370 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4371 emith_clr_t_cond(sr);
4372 emith_cmp_r_imm(tmp, 0);
4373 emith_set_t_cond(sr, DCOND_GE);
4374 goto end_op;
4375 }
4376 goto default_;
4377 case 0x02:
4378 case 0x03:
4379 switch (op & 0x3f)
4380 {
4381 case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010
4382 tmp = SHR_MACH;
4383 break;
4384 case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010
4385 tmp = SHR_MACL;
4386 break;
4387 case 0x22: // STS.L PR,@-Rn 0100nnnn00100010
4388 tmp = SHR_PR;
4389 break;
4390 case 0x03: // STC.L SR,@-Rn 0100nnnn00000011
4391 tmp = SHR_SR;
4392 break;
4393 case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011
4394 tmp = SHR_GBR;
4395 break;
4396 case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011
4397 tmp = SHR_VBR;
4398 break;
4399 default:
4400 goto default_;
4401 }
4402 if (tmp == SHR_SR) {
4403 tmp3 = rcache_get_reg_arg(1, tmp, &tmp4);
4404 emith_sync_t(tmp4);
4405 emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0
4406 } else
4407 tmp3 = rcache_get_reg_arg(1, tmp, NULL);
4408 emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR);
4409 goto end_op;
4410 case 0x04:
4411 case 0x05:
4412 switch (op & 0x3f)
4413 {
4414 case 0x04: // ROTL Rn 0100nnnn00000100
4415 case 0x05: // ROTR Rn 0100nnnn00000101
4416 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4417 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4418 #if T_OPTIMIZER
4419 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4420 if (op & 1)
4421 emith_ror(tmp, tmp2, 1);
4422 else
4423 emith_rol(tmp, tmp2, 1);
4424 } else
4425 #endif
4426 {
4427 emith_invalidate_t();
4428 if (op & 1)
4429 emith_rorf(tmp, tmp2, 1);
4430 else
4431 emith_rolf(tmp, tmp2, 1);
4432 emith_carry_to_t(sr, 0);
4433 }
4434 goto end_op;
4435 case 0x24: // ROTCL Rn 0100nnnn00100100
4436 case 0x25: // ROTCR Rn 0100nnnn00100101
4437 tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
4438 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4439 emith_sync_t(sr);
4440 #if T_OPTIMIZER
4441 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4442 emith_t_to_carry(sr, 0);
4443 if (op & 1)
4444 emith_rorc(tmp);
4445 else
4446 emith_rolc(tmp);
4447 } else
4448 #endif
4449 {
4450 emith_tpop_carry(sr, 0);
4451 if (op & 1)
4452 emith_rorcf(tmp);
4453 else
4454 emith_rolcf(tmp);
4455 emith_tpush_carry(sr, 0);
4456 }
4457 goto end_op;
4458 case 0x15: // CMP/PL Rn 0100nnnn00010101
4459 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4460 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4461 emith_clr_t_cond(sr);
4462 emith_cmp_r_imm(tmp, 0);
4463 emith_set_t_cond(sr, DCOND_GT);
4464 goto end_op;
4465 }
4466 goto default_;
4467 case 0x06:
4468 case 0x07:
4469 switch (op & 0x3f)
4470 {
4471 case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
4472 tmp = SHR_MACH;
4473 break;
4474 case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
4475 tmp = SHR_MACL;
4476 break;
4477 case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110
4478 tmp = SHR_PR;
4479 break;
4480 case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111
4481 tmp = SHR_SR;
4482 break;
4483 case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111
4484 tmp = SHR_GBR;
4485 break;
4486 case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111
4487 tmp = SHR_VBR;
4488 break;
4489 default:
4490 goto default_;
4491 }
4492 if (tmp == SHR_SR) {
4493 emith_invalidate_t();
4494 tmp2 = emit_memhandler_read_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_POSTINCR);
4495 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4496 emith_write_sr(sr, tmp2);
4497 rcache_free_tmp(tmp2);
4498 drcf.test_irq = 1;
4499 } else
4500 emit_memhandler_read_rr(sh2, tmp, GET_Rn(), 0, 2 | MF_POSTINCR);
4501 goto end_op;
4502 case 0x08:
4503 case 0x09:
4504 switch (GET_Fx())
4505 {
4506 case 0: // SHLL2 Rn 0100nnnn00001000
4507 // SHLR2 Rn 0100nnnn00001001
4508 tmp = 2;
4509 break;
4510 case 1: // SHLL8 Rn 0100nnnn00011000
4511 // SHLR8 Rn 0100nnnn00011001
4512 tmp = 8;
4513 break;
4514 case 2: // SHLL16 Rn 0100nnnn00101000
4515 // SHLR16 Rn 0100nnnn00101001
4516 tmp = 16;
4517 break;
4518 default:
4519 goto default_;
4520 }
4521 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4522 if (op & 1) {
4523 emith_lsr(tmp2, tmp3, tmp);
4524 } else
4525 emith_lsl(tmp2, tmp3, tmp);
4526 goto end_op;
4527 case 0x0a:
4528 switch (GET_Fx())
4529 {
4530 case 0: // LDS Rm,MACH 0100mmmm00001010
4531 tmp2 = SHR_MACH;
4532 break;
4533 case 1: // LDS Rm,MACL 0100mmmm00011010
4534 tmp2 = SHR_MACL;
4535 break;
4536 case 2: // LDS Rm,PR 0100mmmm00101010
4537 tmp2 = SHR_PR;
4538 break;
4539 default:
4540 goto default_;
4541 }
4542 emit_move_r_r(tmp2, GET_Rn());
4543 goto end_op;
4544 case 0x0b:
4545 switch (GET_Fx())
4546 {
4547 case 1: // TAS.B @Rn 0100nnnn00011011
4548 // XXX: is TAS working on 32X?
4549 rcache_get_reg_arg(0, GET_Rn(), NULL);
4550 tmp = emit_memhandler_read(0);
4551 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4552 emith_clr_t_cond(sr);
4553 emith_cmp_r_imm(tmp, 0);
4554 emith_set_t_cond(sr, DCOND_EQ);
4555 emith_or_r_imm(tmp, 0x80);
4556 tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp
4557 emith_move_r_r(tmp2, tmp);
4558 rcache_free_tmp(tmp);
4559 rcache_get_reg_arg(0, GET_Rn(), NULL);
4560 emit_memhandler_write(0);
4561 break;
4562 default:
4563 goto default_;
4564 }
4565 goto end_op;
4566 case 0x0e:
4567 switch (GET_Fx())
4568 {
4569 case 0: // LDC Rm,SR 0100mmmm00001110
4570 tmp2 = SHR_SR;
4571 break;
4572 case 1: // LDC Rm,GBR 0100mmmm00011110
4573 tmp2 = SHR_GBR;
4574 break;
4575 case 2: // LDC Rm,VBR 0100mmmm00101110
4576 tmp2 = SHR_VBR;
4577 break;
4578 default:
4579 goto default_;
4580 }
4581 if (tmp2 == SHR_SR) {
4582 emith_invalidate_t();
4583 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4584 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4585 emith_write_sr(sr, tmp);
4586 drcf.test_irq = 1;
4587 } else
4588 emit_move_r_r(tmp2, GET_Rn());
4589 goto end_op;
4590 case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
4591 emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1);
4592 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
4593 tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL);
4594 tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL);
4595 emith_sh2_macw(tmp3, tmp4, tmp, tmp2, sr);
4596 rcache_free_tmp(tmp2);
4597 rcache_free_tmp(tmp);
4598 goto end_op;
4599 }
4600 goto default_;
4601
4602 /////////////////////////////////////////////
4603 case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
4604 emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2 | drcf.polling);
4605 goto end_op;
4606
4607 /////////////////////////////////////////////
4608 case 0x06:
4609 switch (op & 0x0f)
4610 {
4611 case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000
4612 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001
4613 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010
4614 case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100
4615 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101
4616 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110
4617 tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : drcf.polling;
4618 emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp);
4619 goto end_op;
4620 case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011
4621 emit_move_r_r(GET_Rn(), GET_Rm());
4622 goto end_op;
4623 default: // 0x07 ... 0x0f
4624 tmp = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4625 tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
4626 switch (op & 0x0f)
4627 {
4628 case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111
4629 emith_mvn_r_r(tmp2, tmp);
4630 break;
4631 case 0x08: // SWAP.B Rm,Rn 0110nnnnmmmm1000
4632 tmp3 = tmp2;
4633 if (tmp == tmp2)
4634 tmp3 = rcache_get_tmp();
4635 tmp4 = rcache_get_tmp();
4636 emith_lsr(tmp3, tmp, 16);
4637 emith_or_r_r_lsl(tmp3, tmp, 24);
4638 emith_and_r_r_imm(tmp4, tmp, 0xff00);
4639 emith_or_r_r_lsl(tmp3, tmp4, 8);
4640 emith_rol(tmp2, tmp3, 16);
4641 rcache_free_tmp(tmp4);
4642 if (tmp == tmp2)
4643 rcache_free_tmp(tmp3);
4644 break;
4645 case 0x09: // SWAP.W Rm,Rn 0110nnnnmmmm1001
4646 emith_rol(tmp2, tmp, 16);
4647 break;
4648 case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010
4649 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4650 emith_sync_t(sr);
4651 #if T_OPTIMIZER
4652 if (rcache_regs_discard & BITMASK1(SHR_T)) {
4653 emith_t_to_carry(sr, 1);
4654 emith_negc_r_r(tmp2, tmp);
4655 } else
4656 #endif
4657 {
4658 EMITH_HINT_COND(DCOND_CS);
4659 emith_tpop_carry(sr, 1);
4660 emith_negcf_r_r(tmp2, tmp);
4661 emith_tpush_carry(sr, 1);
4662 }
4663 break;
4664 case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011
4665 emith_neg_r_r(tmp2, tmp);
4666 break;
4667 case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100
4668 emith_clear_msb(tmp2, tmp, 24);
4669 rcache_set_x16(tmp2, 1, 1);
4670 break;
4671 case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101
4672 emith_clear_msb(tmp2, tmp, 16);
4673 rcache_set_x16(tmp2, 0, 1);
4674 break;
4675 case 0x0e: // EXTS.B Rm,Rn 0110nnnnmmmm1110
4676 emith_sext(tmp2, tmp, 8);
4677 rcache_set_x16(tmp2, 1, 0);
4678 break;
4679 case 0x0f: // EXTS.W Rm,Rn 0110nnnnmmmm1111
4680 emith_sext(tmp2, tmp, 16);
4681 rcache_set_x16(tmp2, 1, 0);
4682 break;
4683 }
4684 goto end_op;
4685 }
4686 goto default_;
4687
4688 /////////////////////////////////////////////
4689 case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii
4690 if (op & 0x80) // adding negative
4691 emit_sub_r_imm(GET_Rn(), (u8)-op);
4692 else
4693 emit_add_r_imm(GET_Rn(), (u8)op);
4694 goto end_op;
4695
4696 /////////////////////////////////////////////
4697 case 0x08:
4698 switch (op & 0x0f00)
4699 {
4700 case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd
4701 case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd
4702 tmp = (op & 0x100) >> 8;
4703 emit_memhandler_write_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp);
4704 goto end_op;
4705 case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd
4706 case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd
4707 tmp = (op & 0x100) >> 8;
4708 emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp | drcf.polling);
4709 goto end_op;
4710 case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii
4711 tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL);
4712 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4713 emith_clr_t_cond(sr);
4714 emith_cmp_r_imm(tmp2, (s8)(op & 0xff));
4715 emith_set_t_cond(sr, DCOND_EQ);
4716 goto end_op;
4717 }
4718 goto default_;
4719
4720 /////////////////////////////////////////////
4721 case 0x0c:
4722 switch (op & 0x0f00)
4723 {
4724 case 0x0000: // MOV.B R0,@(disp,GBR) 11000000dddddddd
4725 case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd
4726 case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd
4727 tmp = (op & 0x300) >> 8;
4728 emit_memhandler_write_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp);
4729 goto end_op;
4730 case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd
4731 case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd
4732 case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd
4733 tmp = (op & 0x300) >> 8;
4734 emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp | drcf.polling);
4735 goto end_op;
4736 case 0x0800: // TST #imm,R0 11001000iiiiiiii
4737 tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL);
4738 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4739 emith_clr_t_cond(sr);
4740 emith_tst_r_imm(tmp, op & 0xff);
4741 emith_set_t_cond(sr, DCOND_EQ);
4742 goto end_op;
4743 case 0x0900: // AND #imm,R0 11001001iiiiiiii
4744 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4745 emith_and_r_r_imm(tmp, tmp2, (op & 0xff));
4746 goto end_op;
4747 case 0x0a00: // XOR #imm,R0 11001010iiiiiiii
4748 if (op & 0xff) {
4749 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4750 emith_eor_r_r_imm(tmp, tmp2, (op & 0xff));
4751 }
4752 goto end_op;
4753 case 0x0b00: // OR #imm,R0 11001011iiiiiiii
4754 if (op & 0xff) {
4755 tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4756 emith_or_r_r_imm(tmp, tmp2, (op & 0xff));
4757 }
4758 goto end_op;
4759 case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii
4760 tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling);
4761 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4762 emith_clr_t_cond(sr);
4763 emith_tst_r_imm(tmp, op & 0xff);
4764 emith_set_t_cond(sr, DCOND_EQ);
4765 rcache_free_tmp(tmp);
4766 goto end_op;
4767 case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii
4768 tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4769 tmp2 = rcache_get_tmp_arg(1);
4770 emith_and_r_r_imm(tmp2, tmp, (op & 0xff));
4771 goto end_rmw_op;
4772 case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii
4773 tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4774 tmp2 = rcache_get_tmp_arg(1);
4775 emith_eor_r_r_imm(tmp2, tmp, (op & 0xff));
4776 goto end_rmw_op;
4777 case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii
4778 tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4779 tmp2 = rcache_get_tmp_arg(1);
4780 emith_or_r_r_imm(tmp2, tmp, (op & 0xff));
4781 end_rmw_op:
4782 rcache_free_tmp(tmp);
4783 emit_indirect_indexed_write(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4784 goto end_op;
4785 }
4786 goto default_;
4787
4788 /////////////////////////////////////////////
4789 case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii
4790 emit_move_r_imm32(GET_Rn(), (s8)op);
4791 goto end_op;
4792
4793 default:
4794 default_:
4795 if (!(op_flags[i] & OF_B_IN_DS)) {
4796 elprintf_sh2(sh2, EL_ANOMALY,
4797 "drc: illegal op %04x @ %08x", op, pc - 2);
4798 exit(1);
4799 }
4800 }
4801
4802 end_op:
4803 rcache_unlock_all();
4804 rcache_set_usage_now(0);
4805 #if DRC_DEBUG & 64
4806 RCACHE_CHECK("after insn");
4807 #endif
4808
4809 cycles += opd->cycles;
4810
4811 if (op_flags[i+1] & OF_DELAY_OP) {
4812 do_host_disasm(tcache_id);
4813 continue;
4814 }
4815
4816 // test irq?
4817 if (drcf.test_irq && !drcf.pending_branch_direct) {
4818 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4819 FLUSH_CYCLES(sr);
4820 emith_sync_t(sr);
4821 if (!drcf.pending_branch_indirect)
4822 emit_move_r_imm32(SHR_PC, pc);
4823 rcache_flush();
4824 emith_call(sh2_drc_test_irq);
4825 drcf.test_irq = 0;
4826 }
4827
4828 // branch handling
4829 if (drcf.pending_branch_direct)
4830 {
4831 struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
4832 u32 target_pc = opd_b->imm;
4833 int cond = -1;
4834 int ctaken = 0;
4835 void *target = NULL;
4836
4837 if (OP_ISBRACND(opd_b->op))
4838 ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2;
4839 cycles += ctaken; // assume branch taken
4840
4841 #if LOOP_OPTIMIZER
4842 if ((drcf.loop_type == OF_IDLE_LOOP ||
4843 (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0)))
4844 {
4845 // idle or delay loop
4846 emit_sync_t_to_sr();
4847 emith_sh2_delay_loop(cycles, drcf.delay_reg);
4848 rcache_unlock_all(); // may lock delay_reg
4849 drcf.polling = drcf.loop_type = drcf.pinning = 0;
4850 }
4851 #endif
4852
4853 #if CALL_STACK
4854 void *rtsadd = NULL, *rtsret = NULL;
4855 if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
4856 // BSR - save rts data
4857 tmp = rcache_get_tmp_arg(1);
4858 rtsadd = tcache_ptr;
4859 emith_move_r_imm_s8_patchable(tmp, 0);
4860 rcache_clean_tmp();
4861 rcache_invalidate_tmp();
4862 emith_call(sh2_drc_dispatcher_call);
4863 rtsret = tcache_ptr;
4864 }
4865 #endif
4866
4867 // XXX move below cond test if not changing host cond (MIPS delay slot)?
4868 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4869 FLUSH_CYCLES(sr);
4870 rcache_clean();
4871
4872 if (OP_ISBRACND(opd_b->op)) {
4873 // BT[S], BF[S] - emit condition test
4874 cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE;
4875 if (delay_dep_fw & BITMASK1(SHR_T)) {
4876 emith_sync_t(sr);
4877 emith_tst_r_imm(sr, T_save);
4878 } else {
4879 cond = emith_tst_t(sr, (opd_b->op == OP_BRANCH_CT));
4880 if (emith_get_t_cond() >= 0) {
4881 if (opd_b->op == OP_BRANCH_CT)
4882 emith_or_r_imm_c(cond, sr, T);
4883 else
4884 emith_bic_r_imm_c(cond, sr, T);
4885 }
4886 }
4887 } else
4888 emith_sync_t(sr);
4889 // no modification of host status/flags between here and branching!
4890
4891 v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc);
4892 if (v >= 0)
4893 {
4894 // local branch
4895 if (branch_targets[v].ptr) {
4896 // local backward jump, link here now since host PC is already known
4897 target = branch_targets[v].ptr;
4898 #if LOOP_OPTIMIZER
4899 if (pinned_loops[pinned_loop_count].pc == target_pc) {
4900 // backward jump at end of optimized loop
4901 rcache_unpin_all();
4902 target = pinned_loops[pinned_loop_count].ptr;
4903 pinned_loop_count ++;
4904 }
4905 #endif
4906 if (cond != -1) {
4907 if (emith_jump_patch_inrange(tcache_ptr, target)) {
4908 emith_jump_cond(cond, target);
4909 } else {
4910 // not reachable directly, must use far branch
4911 EMITH_JMP_START(emith_invert_cond(cond));
4912 emith_jump(target);
4913 EMITH_JMP_END(emith_invert_cond(cond));
4914 }
4915 } else {
4916 emith_jump(target);
4917 rcache_invalidate();
4918 }
4919 } else if (blx_target_count < MAX_LOCAL_BRANCHES) {
4920 // local forward jump
4921 target = tcache_ptr;
4922 blx_targets[blx_target_count++] =
4923 (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 };
4924 if (cond != -1)
4925 emith_jump_cond_patchable(cond, target);
4926 else {
4927 emith_jump_patchable(target);
4928 rcache_invalidate();
4929 }
4930 } else
4931 // no space for resolving forward branch, handle it as external
4932 dbg(1, "warning: too many unresolved branches");
4933 }
4934
4935 if (target == NULL)
4936 {
4937 // can't resolve branch locally, make a block exit
4938 bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
4939 if (cond != -1) {
4940 #if 1
4941 if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) {
4942 // conditional jumps get a blx stub for the far jump
4943 bl->type = BL_JCCBLX;
4944 target = tcache_ptr;
4945 blx_targets[blx_target_count++] =
4946 (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl };
4947 emith_jump_cond_patchable(cond, target);
4948 } else {
4949 // not linkable, or blx table full; inline jump @dispatcher
4950 EMITH_JMP_START(emith_invert_cond(cond));
4951 if (bl) {
4952 bl->jump = tcache_ptr;
4953 emith_flush(); // flush to inhibit insn swapping
4954 bl->type = BL_LDJMP;
4955 }
4956 tmp = rcache_get_tmp_arg(0);
4957 emith_move_r_imm(tmp, target_pc);
4958 rcache_free_tmp(tmp);
4959 target = sh2_drc_dispatcher;
4960
4961 emith_jump_patchable(target);
4962 EMITH_JMP_END(emith_invert_cond(cond));
4963 }
4964 #else
4965 // jump @dispatcher - ARM 32bit version with conditional execution
4966 EMITH_SJMP_START(emith_invert_cond(cond));
4967 tmp = rcache_get_tmp_arg(0);
4968 emith_move_r_imm_c(cond, tmp, target_pc);
4969 rcache_free_tmp(tmp);
4970 target = sh2_drc_dispatcher;
4971
4972 if (bl) {
4973 bl->jump = tcache_ptr;
4974 bl->type = BL_JMP;
4975 }
4976 emith_jump_cond_patchable(cond, target);
4977 EMITH_SJMP_END(emith_invert_cond(cond));
4978 #endif
4979 } else {
4980 // unconditional, has the far jump inlined
4981 if (bl) {
4982 emith_flush(); // flush to inhibit insn swapping
4983 bl->type = BL_LDJMP;
4984 }
4985
4986 tmp = rcache_get_tmp_arg(0);
4987 emith_move_r_imm(tmp, target_pc);
4988 rcache_free_tmp(tmp);
4989 target = sh2_drc_dispatcher;
4990
4991 emith_jump_patchable(target);
4992 rcache_invalidate();
4993 }
4994 }
4995
4996 #if CALL_STACK
4997 if (rtsadd)
4998 emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
4999 #endif
5000
5001 // branch not taken, correct cycle count
5002 if (ctaken)
5003 cycles -= ctaken;
5004 // set T bit to reflect branch not taken for OP_BRANCH_CT/CF
5005 if (emith_get_t_cond() >= 0) // T is synced for all other cases
5006 emith_set_t(sr, opd_b->op == OP_BRANCH_CF);
5007
5008 drcf.pending_branch_direct = 0;
5009 if (target_pc >= base_pc && target_pc < pc)
5010 drcf.polling = drcf.loop_type = 0;
5011 }
5012 else if (drcf.pending_branch_indirect) {
5013 u32 target_pc;
5014
5015 tmp = rcache_get_reg_arg(0, SHR_PC, NULL);
5016
5017 #if CALL_STACK
5018 struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
5019 void *rtsadd = NULL, *rtsret = NULL;
5020
5021 if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
5022 // JSR, BSRF - save rts data
5023 tmp = rcache_get_tmp_arg(1);
5024 rtsadd = tcache_ptr;
5025 emith_move_r_imm_s8_patchable(tmp, 0);
5026 rcache_clean_tmp();
5027 rcache_invalidate_tmp();
5028 emith_call(sh2_drc_dispatcher_call);
5029 rtsret = tcache_ptr;
5030 }
5031 #endif
5032
5033 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5034 FLUSH_CYCLES(sr);
5035 emith_sync_t(sr);
5036 rcache_clean();
5037
5038 #if CALL_STACK
5039 if (opd_b->rm == SHR_PR) {
5040 // RTS - restore rts data, else jump to dispatcher
5041 emith_jump(sh2_drc_dispatcher_return);
5042 } else
5043 #endif
5044 if (gconst_get(SHR_PC, &target_pc)) {
5045 // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch
5046 bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
5047 if (bl) // pc already loaded somewhere else, can patch jump only
5048 bl->type = BL_JMP;
5049 emith_jump_patchable(sh2_drc_dispatcher);
5050 } else {
5051 // JMP, JSR, BRAF, BSRF not const
5052 emith_jump(sh2_drc_dispatcher);
5053 }
5054 rcache_invalidate();
5055
5056 #if CALL_STACK
5057 if (rtsadd)
5058 emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
5059 #endif
5060
5061 drcf.pending_branch_indirect = 0;
5062 drcf.polling = drcf.loop_type = 0;
5063 }
5064 rcache_unlock_all();
5065
5066 do_host_disasm(tcache_id);
5067 }
5068
5069 // check the last op
5070 if (op_flags[i-1] & OF_DELAY_OP)
5071 opd = &ops[i-2];
5072 else
5073 opd = &ops[i-1];
5074
5075 if (! OP_ISBRAUC(opd->op))
5076 {
5077 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5078 FLUSH_CYCLES(tmp);
5079 emith_sync_t(tmp);
5080
5081 rcache_clean();
5082 bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id);
5083 if (bl) {
5084 emith_flush(); // flush to inhibit insn swapping
5085 bl->type = BL_LDJMP;
5086 }
5087 tmp = rcache_get_tmp_arg(0);
5088 emith_move_r_imm(tmp, pc);
5089 emith_jump_patchable(sh2_drc_dispatcher);
5090 rcache_invalidate();
5091 } else
5092 rcache_flush();
5093
5094 // link unresolved branches, emitting blx area entries as needed
5095 emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
5096 branch_target_count, blx_targets, blx_target_count);
5097
5098 emith_flush();
5099 do_host_disasm(tcache_id);
5100
5101 emith_pool_commit(0);
5102
5103 // fill blx backup; do this last to backup final patched code
5104 for (i = 0; i < block->entry_count; i++)
5105 for (bl = block->entryp[i].o_links; bl; bl = bl->o_next)
5106 memcpy(bl->jdisp, bl->blx ? bl->blx : bl->jump, emith_jump_at_size());
5107
5108 ring_alloc(&tcache_ring[tcache_id], tcache_ptr - block_entry_ptr);
5109 host_instructions_updated(block_entry_ptr, tcache_ptr, 1);
5110
5111 dr_activate_block(block, tcache_id, sh2->is_slave);
5112 emith_update_cache();
5113
5114 do_host_disasm(tcache_id);
5115
5116 dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f",
5117 tcache_id, blkid_main, tcache_ptr,
5118 tcache_ring[tcache_id].used, tcache_ring[tcache_id].size,
5119 insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled);
5120 if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM
5121 dbg(2, " hash collisions %d/%d", hash_collisions, block_ring[tcache_id].used);
5122 Pico32x.emu_flags |= P32XF_DRC_ROM_C;
5123 }
5124 /*
5125 printf("~~~\n");
5126 tcache_dsm_ptrs[tcache_id] = block_entry_ptr;
5127 do_host_disasm(tcache_id);
5128 printf("~~~\n");
5129 */
5130
5131 #if (DRC_DEBUG)
5132 fflush(stdout);
5133 #endif
5134
5135 return block_entry_ptr;
5136 }
5137
sh2_generate_utils(void)5138 static void sh2_generate_utils(void)
5139 {
5140 int arg0, arg1, arg2, arg3, sr, tmp, tmp2;
5141 #if DRC_DEBUG
5142 int hic = host_insn_count; // don't count utils for insn statistics
5143 #endif
5144
5145 host_arg2reg(arg0, 0);
5146 host_arg2reg(arg1, 1);
5147 host_arg2reg(arg2, 2);
5148 host_arg2reg(arg3, 3);
5149 emith_move_r_r(arg0, arg0); // nop
5150 emith_flush();
5151
5152 // sh2_drc_write8(u32 a, u32 d)
5153 sh2_drc_write8 = (void *)tcache_ptr;
5154 emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab));
5155 emith_sh2_wcall(arg0, arg1, arg2, arg3);
5156 emith_flush();
5157
5158 // sh2_drc_write16(u32 a, u32 d)
5159 sh2_drc_write16 = (void *)tcache_ptr;
5160 emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab));
5161 emith_sh2_wcall(arg0, arg1, arg2, arg3);
5162 emith_flush();
5163
5164 // sh2_drc_write32(u32 a, u32 d)
5165 sh2_drc_write32 = (void *)tcache_ptr;
5166 emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab));
5167 emith_sh2_wcall(arg0, arg1, arg2, arg3);
5168 emith_flush();
5169
5170 // d = sh2_drc_read8(u32 a)
5171 sh2_drc_read8 = (void *)tcache_ptr;
5172 emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
5173 EMITH_HINT_COND(DCOND_CS);
5174 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5175 EMITH_SJMP_START(DCOND_CS);
5176 emith_and_r_r_c(DCOND_CC, arg0, arg3);
5177 emith_eor_r_imm_ptr_c(DCOND_CC, arg0, 1);
5178 emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5179 emith_ret_c(DCOND_CC);
5180 EMITH_SJMP_END(DCOND_CS);
5181 emith_move_r_r_ptr(arg1, CONTEXT_REG);
5182 emith_abijump_reg(arg2);
5183 emith_flush();
5184
5185 // d = sh2_drc_read16(u32 a)
5186 sh2_drc_read16 = (void *)tcache_ptr;
5187 emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
5188 EMITH_HINT_COND(DCOND_CS);
5189 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5190 EMITH_SJMP_START(DCOND_CS);
5191 emith_and_r_r_c(DCOND_CC, arg0, arg3);
5192 emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5193 emith_ret_c(DCOND_CC);
5194 EMITH_SJMP_END(DCOND_CS);
5195 emith_move_r_r_ptr(arg1, CONTEXT_REG);
5196 emith_abijump_reg(arg2);
5197 emith_flush();
5198
5199 // d = sh2_drc_read32(u32 a)
5200 sh2_drc_read32 = (void *)tcache_ptr;
5201 emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
5202 EMITH_HINT_COND(DCOND_CS);
5203 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5204 EMITH_SJMP_START(DCOND_CS);
5205 emith_and_r_r_c(DCOND_CC, arg0, arg3);
5206 emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5207 emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16);
5208 emith_ret_c(DCOND_CC);
5209 EMITH_SJMP_END(DCOND_CS);
5210 emith_move_r_r_ptr(arg1, CONTEXT_REG);
5211 emith_abijump_reg(arg2);
5212 emith_flush();
5213
5214 // d = sh2_drc_read8_poll(u32 a)
5215 sh2_drc_read8_poll = (void *)tcache_ptr;
5216 emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
5217 EMITH_HINT_COND(DCOND_CS);
5218 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5219 EMITH_SJMP_START(DCOND_CC);
5220 emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5221 emith_abijump_reg_c(DCOND_CS, arg2);
5222 EMITH_SJMP_END(DCOND_CC);
5223 emith_and_r_r_r(arg1, arg0, arg3);
5224 emith_eor_r_imm_ptr(arg1, 1);
5225 emith_read8s_r_r_r(arg1, arg2, arg1);
5226 emith_push_ret(arg1);
5227 emith_move_r_r_ptr(arg2, CONTEXT_REG);
5228 emith_abicall(p32x_sh2_poll_memory8);
5229 emith_pop_and_ret(arg1);
5230 emith_flush();
5231
5232 // d = sh2_drc_read16_poll(u32 a)
5233 sh2_drc_read16_poll = (void *)tcache_ptr;
5234 emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
5235 EMITH_HINT_COND(DCOND_CS);
5236 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5237 EMITH_SJMP_START(DCOND_CC);
5238 emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5239 emith_abijump_reg_c(DCOND_CS, arg2);
5240 EMITH_SJMP_END(DCOND_CC);
5241 emith_and_r_r_r(arg1, arg0, arg3);
5242 emith_read16s_r_r_r(arg1, arg2, arg1);
5243 emith_push_ret(arg1);
5244 emith_move_r_r_ptr(arg2, CONTEXT_REG);
5245 emith_abicall(p32x_sh2_poll_memory16);
5246 emith_pop_and_ret(arg1);
5247 emith_flush();
5248
5249 // d = sh2_drc_read32_poll(u32 a)
5250 sh2_drc_read32_poll = (void *)tcache_ptr;
5251 emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
5252 EMITH_HINT_COND(DCOND_CS);
5253 emith_sh2_rcall(arg0, arg1, arg2, arg3);
5254 EMITH_SJMP_START(DCOND_CC);
5255 emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5256 emith_abijump_reg_c(DCOND_CS, arg2);
5257 EMITH_SJMP_END(DCOND_CC);
5258 emith_and_r_r_r(arg1, arg0, arg3);
5259 emith_read_r_r_r(arg1, arg2, arg1);
5260 emith_ror(arg1, arg1, 16);
5261 emith_push_ret(arg1);
5262 emith_move_r_r_ptr(arg2, CONTEXT_REG);
5263 emith_abicall(p32x_sh2_poll_memory32);
5264 emith_pop_and_ret(arg1);
5265 emith_flush();
5266
5267 // sh2_drc_exit(u32 pc)
5268 sh2_drc_exit = (void *)tcache_ptr;
5269 emith_ctx_write(arg0, SHR_PC * 4);
5270 emit_do_static_regs(1, arg2);
5271 emith_sh2_drc_exit();
5272 emith_flush();
5273
5274 // sh2_drc_dispatcher(u32 pc)
5275 sh2_drc_dispatcher = (void *)tcache_ptr;
5276 emith_ctx_write(arg0, SHR_PC * 4);
5277 #if BRANCH_CACHE
5278 // check if PC is in branch target cache
5279 emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*8);
5280 emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0);
5281 emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache));
5282 emith_cmp_r_r(arg2, arg0);
5283 EMITH_SJMP_START(DCOND_NE);
5284 #if (DRC_DEBUG & 128)
5285 emith_move_r_ptr_imm(arg2, (uptr)&bchit);
5286 emith_read_r_r_offs_c(DCOND_EQ, arg3, arg2, 0);
5287 emith_add_r_imm_c(DCOND_EQ, arg3, 1);
5288 emith_write_r_r_offs_c(DCOND_EQ, arg3, arg2, 0);
5289 #endif
5290 emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
5291 emith_jump_reg_c(DCOND_EQ, RET_REG);
5292 EMITH_SJMP_END(DCOND_NE);
5293 #endif
5294 emith_move_r_r_ptr(arg1, CONTEXT_REG);
5295 emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
5296 emith_abicall(dr_lookup_block);
5297 // store PC and block entry ptr (in arg0) in branch target cache
5298 emith_tst_r_r_ptr(RET_REG, RET_REG);
5299 EMITH_SJMP_START(DCOND_EQ);
5300 #if BRANCH_CACHE
5301 #if (DRC_DEBUG & 128)
5302 emith_move_r_ptr_imm(arg2, (uptr)&bcmiss);
5303 emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0);
5304 emith_add_r_imm_c(DCOND_NE, arg3, 1);
5305 emith_write_r_r_offs_c(DCOND_NE, arg3, arg2, 0);
5306 #endif
5307 emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4);
5308 emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*8);
5309 emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0);
5310 emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
5311 emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
5312 #endif
5313 emith_jump_reg_c(DCOND_NE, RET_REG);
5314 EMITH_SJMP_END(DCOND_EQ);
5315 // lookup failed, call sh2_translate()
5316 emith_move_r_r_ptr(arg0, CONTEXT_REG);
5317 emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id
5318 emith_abicall(sh2_translate);
5319 emith_tst_r_r_ptr(RET_REG, RET_REG);
5320 EMITH_SJMP_START(DCOND_EQ);
5321 emith_jump_reg_c(DCOND_NE, RET_REG);
5322 EMITH_SJMP_END(DCOND_EQ);
5323 // XXX: can't translate, fail
5324 emith_abicall(dr_failure);
5325 emith_flush();
5326
5327 #if CALL_STACK
5328 // pc = sh2_drc_dispatcher_call(u32 pc)
5329 sh2_drc_dispatcher_call = (void *)tcache_ptr;
5330 emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
5331 emith_add_r_imm(arg2, (u32)(2*sizeof(void *)));
5332 emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
5333 emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
5334 emith_add_r_r_r_lsl_ptr(arg3, CONTEXT_REG, arg2, 0);
5335 rcache_get_reg_arg(2, SHR_PR, NULL);
5336 emith_add_r_ret(arg1);
5337 emith_write_r_r_offs_ptr(arg1, arg3, offsetof(SH2, rts_cache)+sizeof(void *));
5338 emith_write_r_r_offs(arg2, arg3, offsetof(SH2, rts_cache));
5339 rcache_flush();
5340 emith_ret();
5341 emith_flush();
5342
5343 // sh2_drc_dispatcher_return(u32 pc)
5344 sh2_drc_dispatcher_return = (void *)tcache_ptr;
5345 emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
5346 emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0);
5347 emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache));
5348 emith_cmp_r_r(arg0, arg3);
5349 #if (DRC_DEBUG & 128)
5350 EMITH_SJMP_START(DCOND_EQ);
5351 emith_move_r_ptr_imm(arg3, (uptr)&rcmiss);
5352 emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
5353 emith_add_r_imm_c(DCOND_NE, arg1, 1);
5354 emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
5355 emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
5356 EMITH_SJMP_END(DCOND_EQ);
5357 #else
5358 emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
5359 #endif
5360 emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
5361 emith_sub_r_imm(arg2, (u32)(2*sizeof(void *)));
5362 emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
5363 emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
5364 #if (DRC_DEBUG & 128)
5365 emith_move_r_ptr_imm(arg3, (uptr)&rchit);
5366 emith_read_r_r_offs(arg1, arg3, 0);
5367 emith_add_r_imm(arg1, 1);
5368 emith_write_r_r_offs(arg1, arg3, 0);
5369 #endif
5370 emith_jump_reg(arg0);
5371 emith_flush();
5372 #endif
5373
5374 // sh2_drc_test_irq(void)
5375 // assumes it's called from main function (may jump to dispatcher)
5376 sh2_drc_test_irq = (void *)tcache_ptr;
5377 emith_ctx_read(arg1, offsetof(SH2, pending_level));
5378 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
5379 emith_lsr(arg0, sr, I_SHIFT);
5380 emith_and_r_imm(arg0, 0x0f);
5381 emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)?
5382 EMITH_SJMP_START(DCOND_GT);
5383 emith_ret_c(DCOND_LE); // nope, return
5384 EMITH_SJMP_END(DCOND_GT);
5385 // adjust SP
5386 tmp = rcache_get_reg(SHR_SP, RC_GR_RMW, NULL);
5387 emith_sub_r_imm(tmp, 4*2);
5388 rcache_clean();
5389 // push SR
5390 tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2);
5391 emith_add_r_r_imm(tmp, tmp2, 4);
5392 tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
5393 emith_clear_msb(tmp, tmp, 22);
5394 emith_move_r_r_ptr(arg2, CONTEXT_REG);
5395 rcache_invalidate_tmp();
5396 emith_abicall(p32x_sh2_write32); // XXX: use sh2_drc_write32?
5397 // push PC
5398 rcache_get_reg_arg(0, SHR_SP, NULL);
5399 rcache_get_reg_arg(1, SHR_PC, NULL);
5400 emith_move_r_r_ptr(arg2, CONTEXT_REG);
5401 rcache_invalidate_tmp();
5402 emith_abicall(p32x_sh2_write32);
5403 // update I, cycles, do callback
5404 emith_ctx_read(arg1, offsetof(SH2, pending_level));
5405 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5406 emith_bic_r_imm(sr, I);
5407 emith_or_r_r_lsl(sr, arg1, I_SHIFT);
5408 emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles
5409 rcache_flush();
5410 emith_move_r_r_ptr(arg0, CONTEXT_REG);
5411 emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level);
5412 // obtain new PC
5413 tmp = rcache_get_reg_arg(1, SHR_VBR, &tmp2);
5414 emith_add_r_r_r_lsl(arg0, tmp2, RET_REG, 2);
5415 emith_call(sh2_drc_read32);
5416 if (arg0 != RET_REG)
5417 emith_move_r_r(arg0, RET_REG);
5418 emith_call_cleanup();
5419 rcache_invalidate();
5420 emith_jump(sh2_drc_dispatcher);
5421 emith_flush();
5422
5423 // sh2_drc_entry(SH2 *sh2)
5424 sh2_drc_entry = (void *)tcache_ptr;
5425 emith_sh2_drc_entry();
5426 emith_move_r_r_ptr(CONTEXT_REG, arg0); // move ctx, arg0
5427 emit_do_static_regs(0, arg2);
5428 emith_call(sh2_drc_test_irq);
5429 emith_ctx_read(arg0, SHR_PC * 4);
5430 emith_jump(sh2_drc_dispatcher);
5431 emith_flush();
5432
5433 #ifdef DRC_SR_REG
5434 // sh2_drc_save_sr(SH2 *sh2)
5435 sh2_drc_save_sr = (void *)tcache_ptr;
5436 tmp = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
5437 emith_write_r_r_offs(tmp, arg0, SHR_SR * 4);
5438 rcache_invalidate();
5439 emith_ret();
5440 emith_flush();
5441
5442 // sh2_drc_restore_sr(SH2 *sh2)
5443 sh2_drc_restore_sr = (void *)tcache_ptr;
5444 tmp = rcache_get_reg(SHR_SR, RC_GR_WRITE, NULL);
5445 emith_read_r_r_offs(tmp, arg0, SHR_SR * 4);
5446 rcache_flush();
5447 emith_ret();
5448 emith_flush();
5449 #endif
5450
5451 #ifdef PDB_NET
5452 // debug
5453 #define MAKE_READ_WRAPPER(func) { \
5454 void *tmp = (void *)tcache_ptr; \
5455 emith_push_ret(); \
5456 emith_call(func); \
5457 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \
5458 emith_addf_r_r(arg2, arg0); \
5459 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
5460 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \
5461 emith_adc_r_imm(arg2, 0x01000000); \
5462 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
5463 emith_pop_and_ret(); \
5464 emith_flush(); \
5465 func = tmp; \
5466 }
5467 #define MAKE_WRITE_WRAPPER(func) { \
5468 void *tmp = (void *)tcache_ptr; \
5469 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \
5470 emith_addf_r_r(arg2, arg1); \
5471 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
5472 emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \
5473 emith_adc_r_imm(arg2, 0x01000000); \
5474 emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
5475 emith_move_r_r_ptr(arg2, CONTEXT_REG); \
5476 emith_jump(func); \
5477 emith_flush(); \
5478 func = tmp; \
5479 }
5480
5481 MAKE_READ_WRAPPER(sh2_drc_read8);
5482 MAKE_READ_WRAPPER(sh2_drc_read16);
5483 MAKE_READ_WRAPPER(sh2_drc_read32);
5484 MAKE_WRITE_WRAPPER(sh2_drc_write8);
5485 MAKE_WRITE_WRAPPER(sh2_drc_write16);
5486 MAKE_WRITE_WRAPPER(sh2_drc_write32);
5487 MAKE_READ_WRAPPER(sh2_drc_read8_poll);
5488 MAKE_READ_WRAPPER(sh2_drc_read16_poll);
5489 MAKE_READ_WRAPPER(sh2_drc_read32_poll);
5490 #endif
5491
5492 emith_pool_commit(0);
5493 rcache_invalidate();
5494 #if (DRC_DEBUG & 4)
5495 host_dasm_new_symbol(sh2_drc_entry);
5496 host_dasm_new_symbol(sh2_drc_dispatcher);
5497 #if CALL_STACK
5498 host_dasm_new_symbol(sh2_drc_dispatcher_call);
5499 host_dasm_new_symbol(sh2_drc_dispatcher_return);
5500 #endif
5501 host_dasm_new_symbol(sh2_drc_exit);
5502 host_dasm_new_symbol(sh2_drc_test_irq);
5503 host_dasm_new_symbol(sh2_drc_write8);
5504 host_dasm_new_symbol(sh2_drc_write16);
5505 host_dasm_new_symbol(sh2_drc_write32);
5506 host_dasm_new_symbol(sh2_drc_read8);
5507 host_dasm_new_symbol(sh2_drc_read16);
5508 host_dasm_new_symbol(sh2_drc_read32);
5509 host_dasm_new_symbol(sh2_drc_read8_poll);
5510 host_dasm_new_symbol(sh2_drc_read16_poll);
5511 host_dasm_new_symbol(sh2_drc_read32_poll);
5512 #ifdef DRC_SR_REG
5513 host_dasm_new_symbol(sh2_drc_save_sr);
5514 host_dasm_new_symbol(sh2_drc_restore_sr);
5515 #endif
5516 #endif
5517
5518 #if DRC_DEBUG
5519 host_insn_count = hic;
5520 #endif
5521 }
5522
sh2_smc_rm_blocks(u32 a,int len,int tcache_id,u32 shift)5523 static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
5524 {
5525 struct block_list **blist, *entry, *next;
5526 u32 mask = RAM_SIZE(tcache_id) - 1;
5527 u32 wtmask = ~0x20000000; // writethrough area mask
5528 u32 start_addr, end_addr;
5529 u32 start_lit, end_lit;
5530 struct block_desc *block;
5531 #if (DRC_DEBUG & 2)
5532 int removed = 0;
5533 #endif
5534
5535 // ignore cache-through
5536 a &= wtmask;
5537
5538 blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE];
5539 entry = *blist;
5540 // go through the block list for this range
5541 while (entry != NULL) {
5542 next = entry->next;
5543 block = entry->block;
5544 start_addr = block->addr & wtmask;
5545 end_addr = start_addr + block->size;
5546 start_lit = block->addr_lit & wtmask;
5547 end_lit = start_lit + block->size_lit;
5548 // disable/delete block if it covers the modified address
5549 if ((start_addr < a+len && a < end_addr) ||
5550 (start_lit < a+len && a < end_lit))
5551 {
5552 dbg(2, "smc remove @%08x", a);
5553 end_addr = (start_lit < a+len && block->size_lit ? a : 0);
5554 dr_rm_block_entry(block, tcache_id, end_addr, 0);
5555 #if (DRC_DEBUG & 2)
5556 removed = 1;
5557 #endif
5558 }
5559 entry = next;
5560 }
5561 #if (DRC_DEBUG & 2)
5562 if (!removed)
5563 dbg(2, "rm_blocks called @%08x, no work?", a);
5564 #endif
5565 #if BRANCH_CACHE
5566 if (tcache_id)
5567 memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
5568 else {
5569 memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
5570 memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
5571 }
5572 #endif
5573 #if CALL_STACK
5574 if (tcache_id) {
5575 memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
5576 sh2s[tcache_id-1].rts_cache_idx = 0;
5577 } else {
5578 memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
5579 memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4);
5580 sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
5581 }
5582 #endif
5583 }
5584
sh2_drc_wcheck_ram(u32 a,unsigned len,SH2 * sh2)5585 void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2)
5586 {
5587 sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT);
5588 }
5589
sh2_drc_wcheck_da(u32 a,unsigned len,SH2 * sh2)5590 void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2)
5591 {
5592 sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
5593 }
5594
sh2_execute_drc(SH2 * sh2c,int cycles)5595 int sh2_execute_drc(SH2 *sh2c, int cycles)
5596 {
5597 int ret_cycles;
5598
5599 // cycles are kept in SHR_SR unused bits (upper 20)
5600 // bit11 contains T saved for delay slot
5601 // others are usual SH2 flags
5602 sh2c->sr &= 0x3f3;
5603 sh2c->sr |= cycles << 12;
5604
5605 sh2c->state |= SH2_IN_DRC;
5606 sh2_drc_entry(sh2c);
5607 sh2c->state &= ~SH2_IN_DRC;
5608
5609 // TODO: irq cycles
5610 ret_cycles = (int32_t)sh2c->sr >> 12;
5611 if (ret_cycles > 0)
5612 dbg(1, "warning: drc returned with cycles: %d, pc %08x", ret_cycles, sh2c->pc);
5613
5614 sh2c->sr &= 0x3f3;
5615 return ret_cycles;
5616 }
5617
block_stats(void)5618 static void block_stats(void)
5619 {
5620 #if (DRC_DEBUG & 2)
5621 int c, b, i;
5622 long total = 0;
5623
5624 printf("block stats:\n");
5625 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5626 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5627 if (block_tables[b][i].addr != 0)
5628 total += block_tables[b][i].refcount;
5629 }
5630 printf("total: %ld\n",total);
5631
5632 for (c = 0; c < 20; c++) {
5633 struct block_desc *blk, *maxb = NULL;
5634 int max = 0;
5635 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5636 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5637 if ((blk = &block_tables[b][i])->addr != 0 && blk->refcount > max) {
5638 max = blk->refcount;
5639 maxb = blk;
5640 }
5641 }
5642 if (maxb == NULL)
5643 break;
5644 printf("%08x %p %9d %2.3f%%\n", maxb->addr, maxb->tcache_ptr, maxb->refcount,
5645 (double)maxb->refcount / total * 100.0);
5646 maxb->refcount = 0;
5647 }
5648
5649 for (b = 0; b < ARRAY_SIZE(block_tables); b++)
5650 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5651 block_tables[b][i].refcount = 0;
5652 #endif
5653 }
5654
entry_stats(void)5655 void entry_stats(void)
5656 {
5657 #if (DRC_DEBUG & 32)
5658 int c, b, i, j;
5659 long total = 0;
5660
5661 printf("block entry stats:\n");
5662 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5663 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5664 for (j = 0; j < block_tables[b][i].entry_count; j++)
5665 total += block_tables[b][i].entryp[j].entry_count;
5666 }
5667 printf("total: %ld\n",total);
5668
5669 for (c = 0; c < 20; c++) {
5670 struct block_desc *blk;
5671 struct block_entry *maxb = NULL;
5672 int max = 0;
5673 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5674 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) {
5675 blk = &block_tables[b][i];
5676 for (j = 0; j < blk->entry_count; j++)
5677 if (blk->entryp[j].entry_count > max) {
5678 max = blk->entryp[j].entry_count;
5679 maxb = &blk->entryp[j];
5680 }
5681 }
5682 }
5683 if (maxb == NULL)
5684 break;
5685 printf("%08x %p %9d %2.3f%%\n", maxb->pc, maxb->tcache_ptr, maxb->entry_count,
5686 (double)100 * maxb->entry_count / total);
5687 maxb->entry_count = 0;
5688 }
5689
5690 for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5691 for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5692 for (j = 0; j < block_tables[b][i].entry_count; j++)
5693 block_tables[b][i].entryp[j].entry_count = 0;
5694 }
5695 #endif
5696 }
5697
backtrace(void)5698 static void backtrace(void)
5699 {
5700 #if (DRC_DEBUG & 1024)
5701 int i;
5702 printf("backtrace master:\n");
5703 for (i = 0; i < ARRAY_SIZE(csh2[0]); i++)
5704 SH2_DUMP(&csh2[0][i], "bt msh2");
5705 printf("backtrace slave:\n");
5706 for (i = 0; i < ARRAY_SIZE(csh2[1]); i++)
5707 SH2_DUMP(&csh2[1][i], "bt ssh2");
5708 #endif
5709 }
5710
state_dump(void)5711 static void state_dump(void)
5712 {
5713 #if (DRC_DEBUG & 2048)
5714 int i;
5715
5716 SH2_DUMP(&sh2s[0], "master");
5717 printf("VBR msh2: %x\n", sh2s[0].vbr);
5718 for (i = 0; i < 0x60; i++) {
5719 printf("%08x ",p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0]));
5720 if ((i+1) % 8 == 0) printf("\n");
5721 }
5722 printf("stack msh2: %x\n", sh2s[0].r[15]);
5723 for (i = -0x30; i < 0x30; i++) {
5724 printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0]));
5725 if ((i+1) % 8 == 0) printf("\n");
5726 }
5727 SH2_DUMP(&sh2s[1], "slave");
5728 printf("VBR ssh2: %x\n", sh2s[1].vbr);
5729 for (i = 0; i < 0x60; i++) {
5730 printf("%08x ",p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1]));
5731 if ((i+1) % 8 == 0) printf("\n");
5732 }
5733 printf("stack ssh2: %x\n", sh2s[1].r[15]);
5734 for (i = -0x30; i < 0x30; i++) {
5735 printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1]));
5736 if ((i+1) % 8 == 0) printf("\n");
5737 }
5738 #endif
5739 }
5740
bcache_stats(void)5741 static void bcache_stats(void)
5742 {
5743 #if (DRC_DEBUG & 128)
5744 int i;
5745 #if CALL_STACK
5746 for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++)
5747 if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break;
5748
5749 printf("return cache hits:%d misses:%d depth: %d index: %d/%d\n", rchit, rcmiss, i,sh2s[0].rts_cache_idx,sh2s[1].rts_cache_idx);
5750 for (i = 0; i < ARRAY_SIZE(sh2s[0].rts_cache); i++) {
5751 printf("%08x ",sh2s[0].rts_cache[i].pc);
5752 if ((i+1) % 8 == 0) printf("\n");
5753 }
5754 for (i = 0; i < ARRAY_SIZE(sh2s[1].rts_cache); i++) {
5755 printf("%08x ",sh2s[1].rts_cache[i].pc);
5756 if ((i+1) % 8 == 0) printf("\n");
5757 }
5758 #endif
5759 #if BRANCH_CACHE
5760 printf("branch cache hits:%d misses:%d\n", bchit, bcmiss);
5761 printf("branch cache master:\n");
5762 for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) {
5763 printf("%08x ",sh2s[0].branch_cache[i].pc);
5764 if ((i+1) % 8 == 0) printf("\n");
5765 }
5766 printf("branch cache slave:\n");
5767 for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) {
5768 printf("%08x ",sh2s[1].branch_cache[i].pc);
5769 if ((i+1) % 8 == 0) printf("\n");
5770 }
5771 #endif
5772 #endif
5773 }
5774
sh2_drc_flush_all(void)5775 void sh2_drc_flush_all(void)
5776 {
5777 backtrace();
5778 state_dump();
5779 block_stats();
5780 entry_stats();
5781 bcache_stats();
5782 dr_flush_tcache(0);
5783 dr_flush_tcache(1);
5784 dr_flush_tcache(2);
5785 Pico32x.emu_flags &= ~P32XF_DRC_ROM_C;
5786 }
5787
sh2_drc_mem_setup(SH2 * sh2)5788 void sh2_drc_mem_setup(SH2 *sh2)
5789 {
5790 // fill the DRC-only convenience pointers
5791 sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave];
5792 sh2->p_drcblk_ram = Pico32xMem->drcblk_ram;
5793 }
5794
sh2_drc_init(SH2 * sh2)5795 int sh2_drc_init(SH2 *sh2)
5796 {
5797 int i;
5798
5799 if (block_tables[0] == NULL)
5800 {
5801 for (i = 0; i < TCACHE_BUFFERS; i++) {
5802 block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0]));
5803 if (block_tables[i] == NULL)
5804 goto fail;
5805 entry_tables[i] = calloc(ENTRY_MAX_COUNT(i), sizeof(*entry_tables[0]));
5806 if (entry_tables[i] == NULL)
5807 goto fail;
5808 block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i),
5809 sizeof(*block_link_pool[0]));
5810 if (block_link_pool[i] == NULL)
5811 goto fail;
5812
5813 inval_lookup[i] = calloc(RAM_SIZE(i) / INVAL_PAGE_SIZE,
5814 sizeof(inval_lookup[0]));
5815 if (inval_lookup[i] == NULL)
5816 goto fail;
5817
5818 hash_tables[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*hash_tables[0]));
5819 if (hash_tables[i] == NULL)
5820 goto fail;
5821
5822 unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0]));
5823 if (unresolved_links[i] == NULL)
5824 goto fail;
5825 //atexit(sh2_drc_finish);
5826
5827 RING_INIT(&block_ring[i], block_tables[i], BLOCK_MAX_COUNT(i));
5828 RING_INIT(&entry_ring[i], entry_tables[i], ENTRY_MAX_COUNT(i));
5829 }
5830
5831 block_list_pool = calloc(BLOCK_LIST_MAX_COUNT, sizeof(*block_list_pool));
5832 if (block_list_pool == NULL)
5833 goto fail;
5834 block_list_pool_count = 0;
5835 blist_free = NULL;
5836
5837 memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts));
5838 memset(blink_free, 0, sizeof(blink_free));
5839
5840 drc_cmn_init();
5841 rcache_init();
5842
5843 tcache_ptr = tcache;
5844 sh2_generate_utils();
5845 host_instructions_updated(tcache, tcache_ptr, 1);
5846 emith_update_cache();
5847
5848 i = tcache_ptr - tcache;
5849 RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i);
5850 for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) {
5851 RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size,
5852 tcache_sizes[i]);
5853 }
5854
5855 #if (DRC_DEBUG & 4)
5856 for (i = 0; i < ARRAY_SIZE(block_tables); i++)
5857 tcache_dsm_ptrs[i] = tcache_ring[i].base;
5858 // disasm the utils
5859 tcache_dsm_ptrs[0] = tcache;
5860 do_host_disasm(0);
5861 fflush(stdout);
5862 #endif
5863 #if (DRC_DEBUG & 1)
5864 hash_collisions = 0;
5865 #endif
5866 }
5867 memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache));
5868 memset(sh2->rts_cache, -1, sizeof(sh2->rts_cache));
5869 sh2->rts_cache_idx = 0;
5870
5871 return 0;
5872
5873 fail:
5874 sh2_drc_finish(sh2);
5875 return -1;
5876 }
5877
sh2_drc_finish(SH2 * sh2)5878 void sh2_drc_finish(SH2 *sh2)
5879 {
5880 int i;
5881
5882 if (block_tables[0] == NULL)
5883 return;
5884
5885 #if (DRC_DEBUG & (256|512))
5886 if (trace[0]) fclose(trace[0]);
5887 if (trace[1]) fclose(trace[1]);
5888 trace[0] = trace[1] = NULL;
5889 #endif
5890
5891 #if (DRC_DEBUG & 4)
5892 for (i = 0; i < TCACHE_BUFFERS; i++) {
5893 printf("~~~ tcache %d\n", i);
5894 #if 0
5895 if (tcache_ring[i].first < tcache_ring[i].next) {
5896 tcache_dsm_ptrs[i] = tcache_ring[i].first;
5897 tcache_ptr = tcache_ring[i].next;
5898 do_host_disasm(i);
5899 } else if (tcache_ring[i].used) {
5900 tcache_dsm_ptrs[i] = tcache_ring[i].first;
5901 tcache_ptr = tcache_ring[i].base + tcache_ring[i].size;
5902 do_host_disasm(i);
5903 tcache_dsm_ptrs[i] = tcache_ring[i].base;
5904 tcache_ptr = tcache_ring[i].next;
5905 do_host_disasm(i);
5906 }
5907 #endif
5908 printf("max links: %d\n", block_link_pool_counts[i]);
5909 }
5910 printf("max block list: %d\n", block_list_pool_count);
5911 #endif
5912
5913 sh2_drc_flush_all();
5914
5915 for (i = 0; i < TCACHE_BUFFERS; i++) {
5916 if (block_tables[i] != NULL)
5917 free(block_tables[i]);
5918 block_tables[i] = NULL;
5919 if (entry_tables[i] != NULL)
5920 free(entry_tables[i]);
5921 entry_tables[i] = NULL;
5922 if (block_link_pool[i] != NULL)
5923 free(block_link_pool[i]);
5924 block_link_pool[i] = NULL;
5925 blink_free[i] = NULL;
5926
5927 if (inval_lookup[i] != NULL)
5928 free(inval_lookup[i]);
5929 inval_lookup[i] = NULL;
5930
5931 if (hash_tables[i] != NULL) {
5932 free(hash_tables[i]);
5933 hash_tables[i] = NULL;
5934 }
5935
5936 if (unresolved_links[i] != NULL) {
5937 free(unresolved_links[i]);
5938 unresolved_links[i] = NULL;
5939 }
5940 }
5941
5942 if (block_list_pool != NULL)
5943 free(block_list_pool);
5944 block_list_pool = NULL;
5945 blist_free = NULL;
5946
5947 drc_cmn_cleanup();
5948 }
5949
5950 #endif /* DRC_SH2 */
5951
dr_get_pc_base(u32 pc,SH2 * sh2)5952 static void *dr_get_pc_base(u32 pc, SH2 *sh2)
5953 {
5954 void *ret;
5955 u32 mask = 0;
5956
5957 ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2);
5958 if (ret == (void *)-1)
5959 return ret;
5960
5961 return (char *)ret - (pc & ~mask);
5962 }
5963
scan_block(u32 base_pc,int is_slave,u8 * op_flags,u32 * end_pc_out,u32 * base_literals_out,u32 * end_literals_out)5964 u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
5965 u32 *base_literals_out, u32 *end_literals_out)
5966 {
5967 u16 *dr_pc_base;
5968 u32 pc, op, tmp;
5969 u32 end_pc, end_literals = 0;
5970 u32 lowest_literal = 0;
5971 u32 lowest_mova = 0;
5972 struct op_data *opd;
5973 int next_is_delay = 0;
5974 int end_block = 0;
5975 int is_divop;
5976 int i, i_end, i_div = -1;
5977 u32 crc = 0;
5978 // 2nd pass stuff
5979 int last_btarget; // loop detector
5980 enum { T_UNKNOWN, T_CLEAR, T_SET } t; // T propagation state
5981
5982 memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT);
5983 op_flags[0] |= OF_BTARGET; // block start is always a target
5984
5985 dr_pc_base = dr_get_pc_base(base_pc, &sh2s[!!is_slave]);
5986
5987 // 1st pass: disassemble
5988 for (i = 0, pc = base_pc; ; i++, pc += 2) {
5989 // we need an ops[] entry after the last one initialized,
5990 // so do it before end_block checks
5991 opd = &ops[i];
5992 opd->op = OP_UNHANDLED;
5993 opd->rm = -1;
5994 opd->source = opd->dest = 0;
5995 opd->cycles = 1;
5996 opd->imm = 0;
5997
5998 if (next_is_delay) {
5999 op_flags[i] |= OF_DELAY_OP;
6000 next_is_delay = 0;
6001 }
6002 else if (end_block || i >= BLOCK_INSN_LIMIT - 2)
6003 break;
6004 else if ((lowest_mova && lowest_mova <= pc) ||
6005 (lowest_literal && lowest_literal <= pc))
6006 break; // text area collides with data area
6007
6008 is_divop = 0;
6009 op = FETCH_OP(pc);
6010 switch ((op & 0xf000) >> 12)
6011 {
6012 /////////////////////////////////////////////
6013 case 0x00:
6014 switch (op & 0x0f)
6015 {
6016 case 0x02:
6017 switch (GET_Fx())
6018 {
6019 case 0: // STC SR,Rn 0000nnnn00000010
6020 tmp = BITMASK2(SHR_SR, SHR_T);
6021 break;
6022 case 1: // STC GBR,Rn 0000nnnn00010010
6023 tmp = BITMASK1(SHR_GBR);
6024 break;
6025 case 2: // STC VBR,Rn 0000nnnn00100010
6026 tmp = BITMASK1(SHR_VBR);
6027 break;
6028 default:
6029 goto undefined;
6030 }
6031 opd->op = OP_MOVE;
6032 opd->source = tmp;
6033 opd->dest = BITMASK1(GET_Rn());
6034 break;
6035 case 0x03:
6036 CHECK_UNHANDLED_BITS(0xd0, undefined);
6037 // BRAF Rm 0000mmmm00100011
6038 // BSRF Rm 0000mmmm00000011
6039 opd->op = OP_BRANCH_RF;
6040 opd->rm = GET_Rn();
6041 opd->source = BITMASK2(SHR_PC, opd->rm);
6042 opd->dest = BITMASK1(SHR_PC);
6043 if (!(op & 0x20))
6044 opd->dest |= BITMASK1(SHR_PR);
6045 opd->cycles = 2;
6046 next_is_delay = 1;
6047 if (!(opd->dest & BITMASK1(SHR_PR)))
6048 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6049 else
6050 op_flags[i+1+next_is_delay] |= OF_BTARGET;
6051 break;
6052 case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100
6053 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101
6054 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110
6055 opd->source = BITMASK3(GET_Rm(), SHR_R0, GET_Rn());
6056 opd->dest = BITMASK1(SHR_MEM);
6057 break;
6058 case 0x07:
6059 // MUL.L Rm,Rn 0000nnnnmmmm0111
6060 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6061 opd->dest = BITMASK1(SHR_MACL);
6062 opd->cycles = 2;
6063 break;
6064 case 0x08:
6065 CHECK_UNHANDLED_BITS(0xf00, undefined);
6066 switch (GET_Fx())
6067 {
6068 case 0: // CLRT 0000000000001000
6069 opd->op = OP_SETCLRT;
6070 opd->dest = BITMASK1(SHR_T);
6071 opd->imm = 0;
6072 break;
6073 case 1: // SETT 0000000000011000
6074 opd->op = OP_SETCLRT;
6075 opd->dest = BITMASK1(SHR_T);
6076 opd->imm = 1;
6077 break;
6078 case 2: // CLRMAC 0000000000101000
6079 opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
6080 break;
6081 default:
6082 goto undefined;
6083 }
6084 break;
6085 case 0x09:
6086 switch (GET_Fx())
6087 {
6088 case 0: // NOP 0000000000001001
6089 CHECK_UNHANDLED_BITS(0xf00, undefined);
6090 break;
6091 case 1: // DIV0U 0000000000011001
6092 CHECK_UNHANDLED_BITS(0xf00, undefined);
6093 opd->op = OP_DIV0;
6094 opd->source = BITMASK1(SHR_SR);
6095 opd->dest = BITMASK2(SHR_SR, SHR_T);
6096 div(opd) = (struct div){ .rn=SHR_MEM, .rm=SHR_MEM, .ro=SHR_MEM };
6097 i_div = i;
6098 is_divop = 1;
6099 break;
6100 case 2: // MOVT Rn 0000nnnn00101001
6101 opd->source = BITMASK1(SHR_T);
6102 opd->dest = BITMASK1(GET_Rn());
6103 break;
6104 default:
6105 goto undefined;
6106 }
6107 break;
6108 case 0x0a:
6109 switch (GET_Fx())
6110 {
6111 case 0: // STS MACH,Rn 0000nnnn00001010
6112 tmp = SHR_MACH;
6113 break;
6114 case 1: // STS MACL,Rn 0000nnnn00011010
6115 tmp = SHR_MACL;
6116 break;
6117 case 2: // STS PR,Rn 0000nnnn00101010
6118 tmp = SHR_PR;
6119 break;
6120 default:
6121 goto undefined;
6122 }
6123 opd->op = OP_MOVE;
6124 opd->source = BITMASK1(tmp);
6125 opd->dest = BITMASK1(GET_Rn());
6126 break;
6127 case 0x0b:
6128 CHECK_UNHANDLED_BITS(0xf00, undefined);
6129 switch (GET_Fx())
6130 {
6131 case 0: // RTS 0000000000001011
6132 opd->op = OP_BRANCH_R;
6133 opd->rm = SHR_PR;
6134 opd->source = BITMASK1(opd->rm);
6135 opd->dest = BITMASK1(SHR_PC);
6136 opd->cycles = 2;
6137 next_is_delay = 1;
6138 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6139 break;
6140 case 1: // SLEEP 0000000000011011
6141 opd->op = OP_SLEEP;
6142 end_block = 1;
6143 break;
6144 case 2: // RTE 0000000000101011
6145 opd->op = OP_RTE;
6146 opd->source = BITMASK1(SHR_SP);
6147 opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC);
6148 opd->cycles = 4;
6149 next_is_delay = 1;
6150 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6151 break;
6152 default:
6153 goto undefined;
6154 }
6155 break;
6156 case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100
6157 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101
6158 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110
6159 opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM);
6160 opd->dest = BITMASK1(GET_Rn());
6161 op_flags[i] |= OF_POLL_INSN;
6162 break;
6163 case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111
6164 opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM);
6165 opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH);
6166 opd->cycles = 3;
6167 break;
6168 default:
6169 goto undefined;
6170 }
6171 break;
6172
6173 /////////////////////////////////////////////
6174 case 0x01:
6175 // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
6176 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6177 opd->dest = BITMASK1(SHR_MEM);
6178 opd->imm = (op & 0x0f) * 4;
6179 break;
6180
6181 /////////////////////////////////////////////
6182 case 0x02:
6183 switch (op & 0x0f)
6184 {
6185 case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000
6186 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001
6187 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010
6188 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6189 opd->dest = BITMASK1(SHR_MEM);
6190 break;
6191 case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100
6192 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101
6193 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110
6194 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6195 opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
6196 break;
6197 case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
6198 opd->op = OP_DIV0;
6199 opd->source = BITMASK3(SHR_SR, GET_Rm(), GET_Rn());
6200 opd->dest = BITMASK2(SHR_SR, SHR_T);
6201 div(opd) = (struct div){ .rn=GET_Rn(), .rm=GET_Rm(), .ro=SHR_MEM };
6202 i_div = i;
6203 is_divop = 1;
6204 break;
6205 case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
6206 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6207 opd->dest = BITMASK1(SHR_T);
6208 break;
6209 case 0x09: // AND Rm,Rn 0010nnnnmmmm1001
6210 case 0x0a: // XOR Rm,Rn 0010nnnnmmmm1010
6211 case 0x0b: // OR Rm,Rn 0010nnnnmmmm1011
6212 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6213 opd->dest = BITMASK1(GET_Rn());
6214 break;
6215 case 0x0c: // CMP/STR Rm,Rn 0010nnnnmmmm1100
6216 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6217 opd->dest = BITMASK1(SHR_T);
6218 break;
6219 case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101
6220 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6221 opd->dest = BITMASK1(GET_Rn());
6222 break;
6223 case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110
6224 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111
6225 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6226 opd->dest = BITMASK1(SHR_MACL);
6227 break;
6228 default:
6229 goto undefined;
6230 }
6231 break;
6232
6233 /////////////////////////////////////////////
6234 case 0x03:
6235 switch (op & 0x0f)
6236 {
6237 case 0x00: // CMP/EQ Rm,Rn 0011nnnnmmmm0000
6238 case 0x02: // CMP/HS Rm,Rn 0011nnnnmmmm0010
6239 case 0x03: // CMP/GE Rm,Rn 0011nnnnmmmm0011
6240 case 0x06: // CMP/HI Rm,Rn 0011nnnnmmmm0110
6241 case 0x07: // CMP/GT Rm,Rn 0011nnnnmmmm0111
6242 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6243 opd->dest = BITMASK1(SHR_T);
6244 break;
6245 case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
6246 opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T);
6247 opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T);
6248 if (i_div >= 0) {
6249 // divide operation: all DIV1 operations must use the same reg pair
6250 if (div(&ops[i_div]).rn == SHR_MEM)
6251 div(&ops[i_div]).rn=GET_Rn(), div(&ops[i_div]).rm=GET_Rm();
6252 if (div(&ops[i_div]).rn == GET_Rn() && div(&ops[i_div]).rm == GET_Rm()) {
6253 div(&ops[i_div]).div1 += 1;
6254 div(&ops[i_div]).state = 0;
6255 is_divop = 1;
6256 } else {
6257 ops[i_div].imm = 0;
6258 i_div = -1;
6259 }
6260 }
6261 break;
6262 case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
6263 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
6264 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6265 opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
6266 opd->cycles = 2;
6267 break;
6268 case 0x08: // SUB Rm,Rn 0011nnnnmmmm1000
6269 case 0x0c: // ADD Rm,Rn 0011nnnnmmmm1100
6270 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6271 opd->dest = BITMASK1(GET_Rn());
6272 break;
6273 case 0x0a: // SUBC Rm,Rn 0011nnnnmmmm1010
6274 case 0x0e: // ADDC Rm,Rn 0011nnnnmmmm1110
6275 opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_T);
6276 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6277 break;
6278 case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011
6279 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111
6280 opd->source = BITMASK2(GET_Rm(), GET_Rn());
6281 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6282 break;
6283 default:
6284 goto undefined;
6285 }
6286 break;
6287
6288 /////////////////////////////////////////////
6289 case 0x04:
6290 switch (op & 0x0f)
6291 {
6292 case 0x00:
6293 switch (GET_Fx())
6294 {
6295 case 0: // SHLL Rn 0100nnnn00000000
6296 case 2: // SHAL Rn 0100nnnn00100000
6297 opd->source = BITMASK1(GET_Rn());
6298 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6299 break;
6300 case 1: // DT Rn 0100nnnn00010000
6301 opd->source = BITMASK1(GET_Rn());
6302 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6303 op_flags[i] |= OF_DELAY_INSN;
6304 break;
6305 default:
6306 goto undefined;
6307 }
6308 break;
6309 case 0x01:
6310 switch (GET_Fx())
6311 {
6312 case 0: // SHLR Rn 0100nnnn00000001
6313 case 2: // SHAR Rn 0100nnnn00100001
6314 opd->source = BITMASK1(GET_Rn());
6315 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6316 break;
6317 case 1: // CMP/PZ Rn 0100nnnn00010001
6318 opd->source = BITMASK1(GET_Rn());
6319 opd->dest = BITMASK1(SHR_T);
6320 break;
6321 default:
6322 goto undefined;
6323 }
6324 break;
6325 case 0x02:
6326 case 0x03:
6327 switch (op & 0x3f)
6328 {
6329 case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010
6330 tmp = BITMASK1(SHR_MACH);
6331 break;
6332 case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010
6333 tmp = BITMASK1(SHR_MACL);
6334 break;
6335 case 0x22: // STS.L PR,@-Rn 0100nnnn00100010
6336 tmp = BITMASK1(SHR_PR);
6337 break;
6338 case 0x03: // STC.L SR,@-Rn 0100nnnn00000011
6339 tmp = BITMASK2(SHR_SR, SHR_T);
6340 opd->cycles = 2;
6341 break;
6342 case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011
6343 tmp = BITMASK1(SHR_GBR);
6344 opd->cycles = 2;
6345 break;
6346 case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011
6347 tmp = BITMASK1(SHR_VBR);
6348 opd->cycles = 2;
6349 break;
6350 default:
6351 goto undefined;
6352 }
6353 opd->source = BITMASK1(GET_Rn()) | tmp;
6354 opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
6355 break;
6356 case 0x04:
6357 case 0x05:
6358 switch (op & 0x3f)
6359 {
6360 case 0x04: // ROTL Rn 0100nnnn00000100
6361 case 0x05: // ROTR Rn 0100nnnn00000101
6362 opd->source = BITMASK1(GET_Rn());
6363 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6364 break;
6365 case 0x24: // ROTCL Rn 0100nnnn00100100
6366 if (i_div >= 0) {
6367 // divide operation: all ROTCL operations must use the same register
6368 if (div(&ops[i_div]).ro == SHR_MEM)
6369 div(&ops[i_div]).ro = GET_Rn();
6370 if (div(&ops[i_div]).ro == GET_Rn() && !div(&ops[i_div]).state) {
6371 div(&ops[i_div]).rotcl += 1;
6372 div(&ops[i_div]).state = 1;
6373 is_divop = 1;
6374 } else {
6375 ops[i_div].imm = 0;
6376 i_div = -1;
6377 }
6378 }
6379 case 0x25: // ROTCR Rn 0100nnnn00100101
6380 opd->source = BITMASK2(GET_Rn(), SHR_T);
6381 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6382 break;
6383 case 0x15: // CMP/PL Rn 0100nnnn00010101
6384 opd->source = BITMASK1(GET_Rn());
6385 opd->dest = BITMASK1(SHR_T);
6386 break;
6387 default:
6388 goto undefined;
6389 }
6390 break;
6391 case 0x06:
6392 case 0x07:
6393 switch (op & 0x3f)
6394 {
6395 case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
6396 tmp = BITMASK1(SHR_MACH);
6397 break;
6398 case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
6399 tmp = BITMASK1(SHR_MACL);
6400 break;
6401 case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110
6402 tmp = BITMASK1(SHR_PR);
6403 break;
6404 case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111
6405 tmp = BITMASK2(SHR_SR, SHR_T);
6406 opd->op = OP_LDC;
6407 opd->cycles = 3;
6408 break;
6409 case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111
6410 tmp = BITMASK1(SHR_GBR);
6411 opd->op = OP_LDC;
6412 opd->cycles = 3;
6413 break;
6414 case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111
6415 tmp = BITMASK1(SHR_VBR);
6416 opd->op = OP_LDC;
6417 opd->cycles = 3;
6418 break;
6419 default:
6420 goto undefined;
6421 }
6422 opd->source = BITMASK2(GET_Rn(), SHR_MEM);
6423 opd->dest = BITMASK1(GET_Rn()) | tmp;
6424 break;
6425 case 0x08:
6426 case 0x09:
6427 switch (GET_Fx())
6428 {
6429 case 0:
6430 // SHLL2 Rn 0100nnnn00001000
6431 // SHLR2 Rn 0100nnnn00001001
6432 break;
6433 case 1:
6434 // SHLL8 Rn 0100nnnn00011000
6435 // SHLR8 Rn 0100nnnn00011001
6436 break;
6437 case 2:
6438 // SHLL16 Rn 0100nnnn00101000
6439 // SHLR16 Rn 0100nnnn00101001
6440 break;
6441 default:
6442 goto undefined;
6443 }
6444 opd->source = BITMASK1(GET_Rn());
6445 opd->dest = BITMASK1(GET_Rn());
6446 break;
6447 case 0x0a:
6448 switch (GET_Fx())
6449 {
6450 case 0: // LDS Rm,MACH 0100mmmm00001010
6451 tmp = SHR_MACH;
6452 break;
6453 case 1: // LDS Rm,MACL 0100mmmm00011010
6454 tmp = SHR_MACL;
6455 break;
6456 case 2: // LDS Rm,PR 0100mmmm00101010
6457 tmp = SHR_PR;
6458 break;
6459 default:
6460 goto undefined;
6461 }
6462 opd->op = OP_MOVE;
6463 opd->source = BITMASK1(GET_Rn());
6464 opd->dest = BITMASK1(tmp);
6465 break;
6466 case 0x0b:
6467 switch (GET_Fx())
6468 {
6469 case 0: // JSR @Rm 0100mmmm00001011
6470 opd->dest = BITMASK1(SHR_PR);
6471 case 2: // JMP @Rm 0100mmmm00101011
6472 opd->op = OP_BRANCH_R;
6473 opd->rm = GET_Rn();
6474 opd->source = BITMASK1(opd->rm);
6475 opd->dest |= BITMASK1(SHR_PC);
6476 opd->cycles = 2;
6477 next_is_delay = 1;
6478 if (!(opd->dest & BITMASK1(SHR_PR)))
6479 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6480 else
6481 op_flags[i+1+next_is_delay] |= OF_BTARGET;
6482 break;
6483 case 1: // TAS.B @Rn 0100nnnn00011011
6484 opd->source = BITMASK2(GET_Rn(), SHR_MEM);
6485 opd->dest = BITMASK2(SHR_T, SHR_MEM);
6486 opd->cycles = 4;
6487 break;
6488 default:
6489 goto undefined;
6490 }
6491 break;
6492 case 0x0e:
6493 switch (GET_Fx())
6494 {
6495 case 0: // LDC Rm,SR 0100mmmm00001110
6496 tmp = BITMASK2(SHR_SR, SHR_T);
6497 break;
6498 case 1: // LDC Rm,GBR 0100mmmm00011110
6499 tmp = BITMASK1(SHR_GBR);
6500 break;
6501 case 2: // LDC Rm,VBR 0100mmmm00101110
6502 tmp = BITMASK1(SHR_VBR);
6503 break;
6504 default:
6505 goto undefined;
6506 }
6507 opd->op = OP_LDC;
6508 opd->source = BITMASK1(GET_Rn());
6509 opd->dest = tmp;
6510 break;
6511 case 0x0f:
6512 // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
6513 opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM);
6514 opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH);
6515 opd->cycles = 3;
6516 break;
6517 default:
6518 goto undefined;
6519 }
6520 break;
6521
6522 /////////////////////////////////////////////
6523 case 0x05:
6524 // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
6525 opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6526 opd->dest = BITMASK1(GET_Rn());
6527 opd->imm = (op & 0x0f) * 4;
6528 op_flags[i] |= OF_POLL_INSN;
6529 break;
6530
6531 /////////////////////////////////////////////
6532 case 0x06:
6533 switch (op & 0x0f)
6534 {
6535 case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100
6536 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101
6537 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110
6538 opd->dest = BITMASK2(GET_Rm(), GET_Rn());
6539 opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6540 break;
6541 case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000
6542 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001
6543 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010
6544 opd->dest = BITMASK1(GET_Rn());
6545 opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6546 op_flags[i] |= OF_POLL_INSN;
6547 break;
6548 case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010
6549 opd->source = BITMASK2(GET_Rm(), SHR_T);
6550 opd->dest = BITMASK2(GET_Rn(), SHR_T);
6551 break;
6552 case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011
6553 opd->op = OP_MOVE;
6554 goto arith_rmrn;
6555 case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111
6556 case 0x08: // SWAP.B Rm,Rn 0110nnnnmmmm1000
6557 case 0x09: // SWAP.W Rm,Rn 0110nnnnmmmm1001
6558 case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011
6559 case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100
6560 case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101
6561 case 0x0e: // EXTS.B Rm,Rn 0110nnnnmmmm1110
6562 case 0x0f: // EXTS.W Rm,Rn 0110nnnnmmmm1111
6563 arith_rmrn:
6564 opd->source = BITMASK1(GET_Rm());
6565 opd->dest = BITMASK1(GET_Rn());
6566 break;
6567 }
6568 break;
6569
6570 /////////////////////////////////////////////
6571 case 0x07:
6572 // ADD #imm,Rn 0111nnnniiiiiiii
6573 opd->source = opd->dest = BITMASK1(GET_Rn());
6574 opd->imm = (s8)op;
6575 break;
6576
6577 /////////////////////////////////////////////
6578 case 0x08:
6579 switch (op & 0x0f00)
6580 {
6581 case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd
6582 opd->source = BITMASK2(GET_Rm(), SHR_R0);
6583 opd->dest = BITMASK1(SHR_MEM);
6584 opd->imm = (op & 0x0f);
6585 break;
6586 case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd
6587 opd->source = BITMASK2(GET_Rm(), SHR_R0);
6588 opd->dest = BITMASK1(SHR_MEM);
6589 opd->imm = (op & 0x0f) * 2;
6590 break;
6591 case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd
6592 opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6593 opd->dest = BITMASK1(SHR_R0);
6594 opd->imm = (op & 0x0f);
6595 op_flags[i] |= OF_POLL_INSN;
6596 break;
6597 case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd
6598 opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6599 opd->dest = BITMASK1(SHR_R0);
6600 opd->imm = (op & 0x0f) * 2;
6601 op_flags[i] |= OF_POLL_INSN;
6602 break;
6603 case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii
6604 opd->source = BITMASK1(SHR_R0);
6605 opd->dest = BITMASK1(SHR_T);
6606 opd->imm = (s8)op;
6607 break;
6608 case 0x0d00: // BT/S label 10001101dddddddd
6609 case 0x0f00: // BF/S label 10001111dddddddd
6610 next_is_delay = 1;
6611 // fallthrough
6612 case 0x0900: // BT label 10001001dddddddd
6613 case 0x0b00: // BF label 10001011dddddddd
6614 opd->op = (op & 0x0200) ? OP_BRANCH_CF : OP_BRANCH_CT;
6615 opd->source = BITMASK2(SHR_PC, SHR_T);
6616 opd->dest = BITMASK1(SHR_PC);
6617 opd->imm = ((signed int)(op << 24) >> 23);
6618 opd->imm += pc + 4;
6619 if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2)
6620 op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET;
6621 break;
6622 default:
6623 goto undefined;
6624 }
6625 break;
6626
6627 /////////////////////////////////////////////
6628 case 0x09:
6629 // MOV.W @(disp,PC),Rn 1001nnnndddddddd
6630 opd->op = OP_LOAD_POOL;
6631 tmp = pc + 2;
6632 if (op_flags[i] & OF_DELAY_OP) {
6633 if (ops[i-1].op == OP_BRANCH)
6634 tmp = ops[i-1].imm;
6635 else if (ops[i-1].op != OP_BRANCH_N)
6636 tmp = 0;
6637 }
6638 opd->source = BITMASK2(SHR_PC, SHR_MEM);
6639 opd->dest = BITMASK1(GET_Rn());
6640 if (tmp) {
6641 opd->imm = tmp + 2 + (op & 0xff) * 2;
6642 if (lowest_literal == 0 || opd->imm < lowest_literal)
6643 lowest_literal = opd->imm;
6644 }
6645 opd->size = 1;
6646 break;
6647
6648 /////////////////////////////////////////////
6649 case 0x0b:
6650 // BSR label 1011dddddddddddd
6651 opd->dest = BITMASK1(SHR_PR);
6652 case 0x0a:
6653 // BRA label 1010dddddddddddd
6654 opd->op = OP_BRANCH;
6655 opd->source = BITMASK1(SHR_PC);
6656 opd->dest |= BITMASK1(SHR_PC);
6657 opd->imm = ((signed int)(op << 20) >> 19);
6658 opd->imm += pc + 4;
6659 opd->cycles = 2;
6660 next_is_delay = 1;
6661 if (!(opd->dest & BITMASK1(SHR_PR))) {
6662 if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) {
6663 op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET;
6664 if (opd->imm <= pc)
6665 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6666 } else
6667 end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6668 } else
6669 op_flags[i+1+next_is_delay] |= OF_BTARGET;
6670 break;
6671
6672 /////////////////////////////////////////////
6673 case 0x0c:
6674 switch (op & 0x0f00)
6675 {
6676 case 0x0000: // MOV.B R0,@(disp,GBR) 11000000dddddddd
6677 case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd
6678 case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd
6679 opd->source = BITMASK2(SHR_GBR, SHR_R0);
6680 opd->dest = BITMASK1(SHR_MEM);
6681 opd->size = (op & 0x300) >> 8;
6682 opd->imm = (op & 0xff) << opd->size;
6683 break;
6684 case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd
6685 case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd
6686 case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd
6687 opd->source = BITMASK2(SHR_GBR, SHR_MEM);
6688 opd->dest = BITMASK1(SHR_R0);
6689 opd->size = (op & 0x300) >> 8;
6690 opd->imm = (op & 0xff) << opd->size;
6691 op_flags[i] |= OF_POLL_INSN;
6692 break;
6693 case 0x0300: // TRAPA #imm 11000011iiiiiiii
6694 opd->op = OP_TRAPA;
6695 opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T);
6696 opd->dest = BITMASK2(SHR_SP, SHR_PC);
6697 opd->imm = (op & 0xff);
6698 opd->cycles = 8;
6699 op_flags[i+1] |= OF_BTARGET;
6700 break;
6701 case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd
6702 opd->op = OP_MOVA;
6703 tmp = pc + 2;
6704 if (op_flags[i] & OF_DELAY_OP) {
6705 if (ops[i-1].op == OP_BRANCH)
6706 tmp = ops[i-1].imm;
6707 else if (ops[i-1].op != OP_BRANCH_N)
6708 tmp = 0;
6709 }
6710 opd->dest = BITMASK1(SHR_R0);
6711 if (tmp) {
6712 opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3;
6713 if (opd->imm >= base_pc) {
6714 if (lowest_mova == 0 || opd->imm < lowest_mova)
6715 lowest_mova = opd->imm;
6716 }
6717 }
6718 break;
6719 case 0x0800: // TST #imm,R0 11001000iiiiiiii
6720 opd->source = BITMASK1(SHR_R0);
6721 opd->dest = BITMASK1(SHR_T);
6722 opd->imm = op & 0xff;
6723 break;
6724 case 0x0900: // AND #imm,R0 11001001iiiiiiii
6725 opd->source = opd->dest = BITMASK1(SHR_R0);
6726 opd->imm = op & 0xff;
6727 break;
6728 case 0x0a00: // XOR #imm,R0 11001010iiiiiiii
6729 opd->source = opd->dest = BITMASK1(SHR_R0);
6730 opd->imm = op & 0xff;
6731 break;
6732 case 0x0b00: // OR #imm,R0 11001011iiiiiiii
6733 opd->source = opd->dest = BITMASK1(SHR_R0);
6734 opd->imm = op & 0xff;
6735 break;
6736 case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii
6737 opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM);
6738 opd->dest = BITMASK1(SHR_T);
6739 opd->imm = op & 0xff;
6740 op_flags[i] |= OF_POLL_INSN;
6741 opd->cycles = 3;
6742 break;
6743 case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii
6744 case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii
6745 case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii
6746 opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM);
6747 opd->dest = BITMASK1(SHR_MEM);
6748 opd->imm = op & 0xff;
6749 opd->cycles = 3;
6750 break;
6751 default:
6752 goto undefined;
6753 }
6754 break;
6755
6756 /////////////////////////////////////////////
6757 case 0x0d:
6758 // MOV.L @(disp,PC),Rn 1101nnnndddddddd
6759 opd->op = OP_LOAD_POOL;
6760 tmp = pc + 2;
6761 if (op_flags[i] & OF_DELAY_OP) {
6762 if (ops[i-1].op == OP_BRANCH)
6763 tmp = ops[i-1].imm;
6764 else if (ops[i-1].op != OP_BRANCH_N)
6765 tmp = 0;
6766 }
6767 opd->source = BITMASK2(SHR_PC, SHR_MEM);
6768 opd->dest = BITMASK1(GET_Rn());
6769 if (tmp) {
6770 opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3;
6771 if (lowest_literal == 0 || opd->imm < lowest_literal)
6772 lowest_literal = opd->imm;
6773 }
6774 opd->size = 2;
6775 break;
6776
6777 /////////////////////////////////////////////
6778 case 0x0e:
6779 // MOV #imm,Rn 1110nnnniiiiiiii
6780 opd->op = OP_LOAD_CONST;
6781 opd->dest = BITMASK1(GET_Rn());
6782 opd->imm = (s8)op;
6783 break;
6784
6785 default:
6786 undefined:
6787 opd->op = OP_UNDEFINED;
6788 // an unhandled instruction is probably not code if it's not the 1st insn
6789 if (!(op_flags[i] & OF_DELAY_OP) && pc != base_pc)
6790 goto end;
6791 break;
6792 }
6793
6794 if (op_flags[i] & OF_DELAY_OP) {
6795 switch (opd->op) {
6796 case OP_BRANCH:
6797 case OP_BRANCH_N:
6798 case OP_BRANCH_CT:
6799 case OP_BRANCH_CF:
6800 case OP_BRANCH_R:
6801 case OP_BRANCH_RF:
6802 elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x",
6803 is_slave ? 's' : 'm', pc);
6804 opd->op = OP_UNDEFINED;
6805 op_flags[i] |= OF_B_IN_DS;
6806 next_is_delay = 0;
6807 break;
6808 }
6809 } else if (!is_divop && i_div >= 0)
6810 i_div = -1; // divide parser stop
6811 }
6812 end:
6813 i_end = i;
6814 end_pc = pc;
6815
6816 // 2nd pass: some analysis
6817 lowest_literal = end_literals = lowest_mova = 0;
6818 t = T_UNKNOWN; // T flag state
6819 last_btarget = 0;
6820 op = 0; // delay/poll insns counter
6821 is_divop = 0; // divide op insns counter
6822 i_div = -1; // index of current divide op
6823 for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
6824 opd = &ops[i];
6825 crc += FETCH_OP(pc);
6826
6827 // propagate T (TODO: DIV0U)
6828 if (op_flags[i] & OF_BTARGET)
6829 t = T_UNKNOWN;
6830
6831 if ((opd->op == OP_BRANCH_CT && t == T_SET) ||
6832 (opd->op == OP_BRANCH_CF && t == T_CLEAR)) {
6833 opd->op = OP_BRANCH;
6834 opd->cycles = (op_flags[i + 1] & OF_DELAY_OP) ? 2 : 3;
6835 } else if ((opd->op == OP_BRANCH_CT && t == T_CLEAR) ||
6836 (opd->op == OP_BRANCH_CF && t == T_SET))
6837 opd->op = OP_BRANCH_N;
6838 else if (OP_ISBRACND(opd->op))
6839 t = (opd->op == OP_BRANCH_CF ? T_SET : T_CLEAR);
6840 else if (opd->op == OP_SETCLRT)
6841 t = (opd->imm ? T_SET : T_CLEAR);
6842 else if (opd->dest & BITMASK1(SHR_T))
6843 t = T_UNKNOWN;
6844
6845 // "overscan" detection: unreachable code after unconditional branch
6846 // this can happen if the insn after a forward branch isn't a local target
6847 if (OP_ISBRAUC(opd->op)) {
6848 if (op_flags[i + 1] & OF_DELAY_OP) {
6849 if (i_end > i + 2 && !(op_flags[i + 2] & OF_BTARGET))
6850 i_end = i + 2;
6851 } else {
6852 if (i_end > i + 1 && !(op_flags[i + 1] & OF_BTARGET))
6853 i_end = i + 1;
6854 }
6855 }
6856
6857 // divide operation verification:
6858 // 1. there must not be a branch target inside
6859 // 2. nothing is in a delay slot (could only be DIV0)
6860 // 2. DIV0/n*(ROTCL+DIV1)/ROTCL:
6861 // div.div1 > 0 && div.rotcl == div.div1+1 && div.rn =! div.ro
6862 // 3. DIV0/n*DIV1/ROTCL:
6863 // div.div1 > 0 && div.rotcl == 1 && div.ro == div.rn
6864 if (i_div >= 0) {
6865 if (op_flags[i] & OF_BTARGET) { // condition 1
6866 ops[i_div].imm = 0;
6867 i_div = -1;
6868 } else if (--is_divop == 0)
6869 i_div = -1;
6870 } else if (opd->op == OP_DIV0) {
6871 struct div *div = &div(opd);
6872 is_divop = div->div1 + div->rotcl;
6873 if (op_flags[i] & OF_DELAY_OP) // condition 2
6874 opd->imm = 0;
6875 else if (! div->div1 || ! ((div->ro == div->rn && div->rotcl == 1) ||
6876 (div->ro != div->rn && div->rotcl == div->div1+1)))
6877 opd->imm = 0; // condition 3+4
6878 else if (is_divop)
6879 i_div = i;
6880 }
6881
6882 // literal pool size detection
6883 if (opd->op == OP_MOVA && opd->imm >= base_pc)
6884 if (lowest_mova == 0 || opd->imm < lowest_mova)
6885 lowest_mova = opd->imm;
6886 if (opd->op == OP_LOAD_POOL) {
6887 if (opd->imm >= base_pc && opd->imm < end_pc + MAX_LITERAL_OFFSET) {
6888 if (end_literals < opd->imm + opd->size * 2)
6889 end_literals = opd->imm + opd->size * 2;
6890 if (lowest_literal == 0 || lowest_literal > opd->imm)
6891 lowest_literal = opd->imm;
6892 if (opd->size == 2) {
6893 // tweak for NFL: treat a 32bit literal as an address and check if it
6894 // points to the literal space. In that case handle it like MOVA.
6895 tmp = FETCH32(opd->imm) & ~0x20000000; // MUST ignore wt bit here
6896 if (tmp >= end_pc && tmp < end_pc + MAX_LITERAL_OFFSET)
6897 if (lowest_mova == 0 || tmp < lowest_mova)
6898 lowest_mova = tmp;
6899 }
6900 }
6901 }
6902 #if LOOP_DETECTION
6903 // inner loop detection
6904 // 1. a loop always starts with a branch target (for the backwards jump)
6905 // 2. it doesn't contain more than one polling and/or delaying insn
6906 // 3. it doesn't contain unconditional jumps
6907 // 4. no overlapping of loops
6908 if (op_flags[i] & OF_BTARGET) {
6909 last_btarget = i; // possible loop starting point
6910 op = 0;
6911 }
6912 // XXX let's hope nobody is putting a delay or poll insn in a delay slot :-/
6913 if (OP_ISBRAIMM(opd->op)) {
6914 // BSR, BRA, BT, BF with immediate target
6915 int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops
6916 if (i_tmp == last_btarget) // candidate for basic loop optimizer
6917 op_flags[i_tmp] |= OF_BASIC_LOOP;
6918 if (i_tmp == last_btarget && op <= 1) {
6919 op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop
6920 last_btarget = i+1; // condition 4
6921 } else if (opd->op == OP_BRANCH)
6922 last_btarget = i+1; // condition 3
6923 }
6924 else if (OP_ISBRAIND(opd->op))
6925 // BRAF, BSRF, JMP, JSR, register indirect. treat it as off-limits jump
6926 last_btarget = i+1; // condition 3
6927 else if (op_flags[i] & (OF_POLL_INSN|OF_DELAY_INSN))
6928 op ++; // condition 2
6929 #endif
6930 }
6931 end_pc = pc;
6932
6933 // end_literals is used to decide to inline a literal or not
6934 // XXX: need better detection if this actually is used in write
6935 if (lowest_literal >= base_pc) {
6936 if (lowest_literal < end_pc) {
6937 dbg(1, "warning: lowest_literal=%08x < end_pc=%08x", lowest_literal, end_pc);
6938 // TODO: does this always mean end_pc covers data?
6939 }
6940 }
6941 if (lowest_mova >= base_pc) {
6942 if (lowest_mova < end_literals) {
6943 dbg(1, "warning: mova=%08x < end_literals=%08x", lowest_mova, end_literals);
6944 end_literals = lowest_mova;
6945 }
6946 if (lowest_mova < end_pc) {
6947 dbg(1, "warning: mova=%08x < end_pc=%08x", lowest_mova, end_pc);
6948 end_literals = end_pc;
6949 }
6950 }
6951 if (lowest_literal >= end_literals)
6952 lowest_literal = end_literals;
6953
6954 if (lowest_literal && end_literals)
6955 for (pc = lowest_literal; pc < end_literals; pc += 2)
6956 crc += FETCH_OP(pc);
6957
6958 *end_pc_out = end_pc;
6959 if (base_literals_out != NULL)
6960 *base_literals_out = (lowest_literal ? lowest_literal : end_pc);
6961 if (end_literals_out != NULL)
6962 *end_literals_out = (end_literals ? end_literals : end_pc);
6963
6964 // crc overflow handling, twice to collect all overflows
6965 crc = (crc & 0xffff) + (crc >> 16);
6966 crc = (crc & 0xffff) + (crc >> 16);
6967 return crc;
6968 }
6969
6970 // vim:shiftwidth=2:ts=2:expandtab
6971