1 /*
2  * SH2 recompiler
3  * (C) notaz, 2009,2010,2013
4  * (C) kub, 2018,2019,2020
5  *
6  * This work is licensed under the terms of MAME license.
7  * See COPYING file in the top-level directory.
8  *
9  * notes:
10  * - tcache, block descriptor, block entry buffer overflows result in oldest
11  *   blocks being deleted until enough space is available
12  * - link and list element buffer overflows result in failure and exit
13  * - jumps between blocks are tracked for SMC handling (in block_entry->links),
14  *   except jumps from global to CPU-local tcaches
15  *
16  * implemented:
17  * - static register allocation
18  * - remaining register caching and tracking in temporaries
19  * - block-local branch linking
20  * - block linking
21  * - some constant propagation
22  * - call stack caching for host block entry address
23  * - delay, poll, and idle loop detection and handling
24  * - some T/M flag optimizations where the value is known or isn't used
25  *
26  * TODO:
27  * - better constant propagation
28  * - bug fixing
29  */
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <assert.h>
34 
35 #include "../../pico/pico_int.h"
36 #include "../../pico/arm_features.h"
37 #include "sh2.h"
38 #include "compiler.h"
39 #include "../drc/cmn.h"
40 #include "../debug.h"
41 
42 // features
43 #define PROPAGATE_CONSTANTS     1
44 #define LINK_BRANCHES           1
45 #define BRANCH_CACHE            1
46 #define CALL_STACK              1
47 #define ALIAS_REGISTERS         1
48 #define REMAP_REGISTER          1
49 #define LOOP_DETECTION          1
50 #define LOOP_OPTIMIZER          1
51 #define T_OPTIMIZER             1
52 #define DIV_OPTIMIZER           0
53 
54 #define MAX_LITERAL_OFFSET      0x200	// max. MOVA, MOV @(PC) offset
55 #define MAX_LOCAL_TARGETS       (BLOCK_INSN_LIMIT / 4)
56 #define MAX_LOCAL_BRANCHES      (BLOCK_INSN_LIMIT / 2)
57 
58 // debug stuff
59 // 01 - warnings/errors
60 // 02 - block info/smc
61 // 04 - asm
62 // 08 - runtime block entry log
63 // 10 - smc self-check
64 // 20 - runtime block entry counter
65 // 40 - rcache checking
66 // 80 - branch cache statistics
67 // 100 - write trace
68 // 200 - compare trace
69 // 400 - block entry backtrace on exit
70 // 800 - state dump on exit
71 // {
72 #ifndef DRC_DEBUG
73 #define DRC_DEBUG 0//x847
74 #endif
75 
76 #if DRC_DEBUG
77 #define dbg(l,...) { \
78   if ((l) & DRC_DEBUG) \
79     elprintf(EL_STATUS, ##__VA_ARGS__); \
80 }
81 #include "mame/sh2dasm.h"
82 #include <platform/libpicofe/linux/host_dasm.h>
83 static int insns_compiled, hash_collisions, host_insn_count;
84 #define COUNT_OP \
85 	host_insn_count++
86 #else // !DRC_DEBUG
87 #define COUNT_OP
88 #define dbg(...)
89 #endif
90 
91 
92 ///
93 #define FETCH_OP(pc) \
94   dr_pc_base[(pc) / 2]
95 
96 #define FETCH32(a) \
97   ((dr_pc_base[(a) / 2] << 16) | dr_pc_base[(a) / 2 + 1])
98 
99 #define CHECK_UNHANDLED_BITS(mask, label) { \
100   if ((op & (mask)) != 0) \
101     goto label; \
102 }
103 
104 #define GET_Fx() \
105   ((op >> 4) & 0x0f)
106 
107 #define GET_Rm GET_Fx
108 
109 #define GET_Rn() \
110   ((op >> 8) & 0x0f)
111 
112 #define T	0x00000001
113 #define S	0x00000002
114 #define I	0x000000f0
115 #define Q	0x00000100
116 #define M	0x00000200
117 #define T_save	0x00000800
118 
119 #define I_SHIFT 4
120 #define Q_SHIFT 8
121 #define M_SHIFT 9
122 #define T_SHIFT 11
123 
124 static struct op_data {
125   u8 op;
126   u8 cycles;
127   u8 size;     // 0, 1, 2 - byte, word, long
128   s8 rm;       // branch or load/store data reg
129   u32 source;  // bitmask of src regs
130   u32 dest;    // bitmask of dest regs
131   u32 imm;     // immediate/io address/branch target
132                // (for literal - address, not value)
133 } ops[BLOCK_INSN_LIMIT];
134 
135 enum op_types {
136   OP_UNHANDLED = 0,
137   OP_BRANCH,
138   OP_BRANCH_N,  // conditional known not to be taken
139   OP_BRANCH_CT, // conditional, branch if T set
140   OP_BRANCH_CF, // conditional, branch if T clear
141   OP_BRANCH_R,  // indirect
142   OP_BRANCH_RF, // indirect far (PC + Rm)
143   OP_SETCLRT,   // T flag set/clear
144   OP_MOVE,      // register move
145   OP_LOAD_CONST,// load const to register
146   OP_LOAD_POOL, // literal pool load, imm is address
147   OP_MOVA,      // MOVA instruction
148   OP_SLEEP,     // SLEEP instruction
149   OP_RTE,       // RTE instruction
150   OP_TRAPA,     // TRAPA instruction
151   OP_LDC,       // LDC instruction
152   OP_DIV0,      // DIV0[US] instruction
153   OP_UNDEFINED,
154 };
155 
156 struct div {
157   u32 state:1;          // 0: expect DIV1/ROTCL, 1: expect DIV1
158   u32 rn:5, rm:5, ro:5; // rn and rm for DIV1, ro for ROTCL
159   u32 div1:8, rotcl:8;  // DIV1 count, ROTCL count
160 };
161 union _div { u32 imm; struct div div; };  // XXX tut-tut type punning...
162 #define div(opd)	((union _div *)&((opd)->imm))->div
163 
164 // XXX consider trap insns: OP_TRAPA, OP_UNDEFINED?
165 #define OP_ISBRANCH(op) ((BITRANGE(OP_BRANCH, OP_BRANCH_RF)| BITMASK1(OP_RTE)) \
166                                 & BITMASK1(op))
167 #define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \
168                                 & BITMASK1(op))
169 #define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) \
170                                 & BITMASK1(op))
171 #define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \
172                                 & BITMASK1(op))
173 #define OP_ISBRAIND(op) (BITMASK3(OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \
174                                 & BITMASK1(op))
175 
176 #ifdef DRC_SH2
177 
178 #if (DRC_DEBUG & 4)
179 static u8 *tcache_dsm_ptrs[3];
180 static char sh2dasm_buff[64];
181 #define do_host_disasm(tcid) \
182   host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \
183   tcache_dsm_ptrs[tcid] = emith_insn_ptr()
184 #else
185 #define do_host_disasm(x)
186 #endif
187 
188 #define SH2_DUMP(sh2, reason) { \
189 	char ms = (sh2)->is_slave ? 's' : 'm'; \
190 	printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \
191 	printf("%csh2 r0-7  %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
192 		(sh2)->r[0], (sh2)->r[1], (sh2)->r[2], (sh2)->r[3], \
193 		(sh2)->r[4], (sh2)->r[5], (sh2)->r[6], (sh2)->r[7]); \
194 	printf("%csh2 r8-15 %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
195 		(sh2)->r[8], (sh2)->r[9], (sh2)->r[10], (sh2)->r[11], \
196 		(sh2)->r[12], (sh2)->r[13], (sh2)->r[14], (sh2)->r[15]); \
197 	printf("%csh2 pc-ml %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
198 		(sh2)->pc, (sh2)->ppc, (sh2)->pr, (sh2)->sr&0xfff, \
199 		(sh2)->gbr, (sh2)->vbr, (sh2)->mach, (sh2)->macl); \
200 	printf("%csh2 tmp-p  %08x %08x %08x %08x %08x %08x %08x %08x\n", ms, \
201 		(sh2)->drc_tmp, (sh2)->irq_cycles, \
202 		(sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \
203 		(sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \
204 }
205 
206 #if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
207 #if (DRC_DEBUG & (256|512|1024))
208 static SH2 csh2[2][8];
209 static FILE *trace[2];
210 #endif
sh2_drc_log_entry(void * block,SH2 * sh2,u32 sr)211 static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
212 {
213   if (block != NULL) {
214     dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm',
215       sh2->pc, block, (signed int)sr >> 12);
216 #if defined PDB
217     pdb_step(sh2, sh2->pc);
218 #elif (DRC_DEBUG & 256)
219   {
220     int idx = sh2->is_slave;
221     if (!trace[0]) {
222       trace[0] = fopen("pico.trace0", "wb");
223       trace[1] = fopen("pico.trace1", "wb");
224     }
225     if (csh2[idx][0].pc != sh2->pc) {
226       fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]);
227       fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]);
228       memcpy(&csh2[idx][0], sh2, offsetof(SH2, poll_cnt)+4);
229       csh2[idx][0].is_slave = idx;
230     }
231   }
232 #elif (DRC_DEBUG & 512)
233   {
234     static SH2 fsh2;
235     int idx = sh2->is_slave;
236     if (!trace[0]) {
237       trace[0] = fopen("pico.trace0", "rb");
238       trace[1] = fopen("pico.trace1", "rb");
239     }
240     if (csh2[idx][0].pc != sh2->pc) {
241       if (!fread(&fsh2, offsetof(SH2, read8_map), 1, trace[idx]) ||
242           !fread(&fsh2.pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx])) {
243         printf("trace eof at %08lx\n",ftell(trace[idx]));
244         exit(1);
245       }
246       fsh2.sr = (fsh2.sr & 0xfff) | (sh2->sr & ~0xfff);
247       fsh2.is_slave = idx;
248       if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) ||
249           0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum)))
250       {
251         printf("difference at %08lx!\n",ftell(trace[idx]));
252         SH2_DUMP(&fsh2, "file");
253         SH2_DUMP(sh2, "current");
254         SH2_DUMP(&csh2[idx][0], "previous");
255 	char *ps = (char *)sh2, *pf = (char *)&fsh2;
256 	for (idx = 0; idx < offsetof(SH2, read8_map); idx += sizeof(u32))
257 		if (*(u32 *)(ps+idx) != *(u32 *)(pf+idx))
258 			printf("diff reg %ld\n",idx/sizeof(u32));
259         exit(1);
260       }
261       csh2[idx][0] = fsh2;
262     }
263   }
264 #elif (DRC_DEBUG & 1024)
265   {
266     int x = sh2->is_slave, i;
267     for (i = 0; i < ARRAY_SIZE(csh2[x])-1; i++)
268       memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, poll_cnt)+4);
269     memcpy(&csh2[x][ARRAY_SIZE(csh2[x])-1], sh2, offsetof(SH2, poll_cnt)+4);
270     csh2[x][0].is_slave = x;
271   }
272 #endif
273   }
274   return block;
275 }
276 #endif
277 
278 
279 // we have 3 translation cache buffers, split from one drc/cmn buffer.
280 // BIOS shares tcache with data array because it's only used for init
281 // and can be discarded early
282 #define TCACHE_BUFFERS 3
283 
284 
285 struct ring_buffer {
286   u8 *base;                  // ring buffer memory
287   unsigned item_sz;          // size of one buffer item
288   unsigned size;             // number of itmes in ring
289   int first, next;           // read and write pointers
290   int used;                  // number of used items in ring
291 };
292 
293 enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX };
294 struct block_link {
295   short tcache_id;
296   short type;                // BL_JMP et al
297   u32 target_pc;
298   void *jump;                // insn address
299   void *blx;                 // block link/exit  area if any
300   u8 jdisp[12];              // jump backup buffer
301   struct block_link *next;   // either in block_entry->links or unresolved
302   struct block_link *o_next; //     ...in block_entry->o_links
303   struct block_link *prev;
304   struct block_link *o_prev;
305   struct block_entry *target;// target block this is linked in (be->links)
306 };
307 
308 struct block_entry {
309   u32 pc;
310   u8 *tcache_ptr;            // translated block for above PC
311   struct block_entry *next;  // chain in hash_table with same pc hash
312   struct block_entry *prev;
313   struct block_link *links;  // incoming links to this entry
314   struct block_link *o_links;// outgoing links from this entry
315 #if (DRC_DEBUG & 2)
316   struct block_desc *block;
317 #endif
318 #if (DRC_DEBUG & 32)
319   int entry_count;
320 #endif
321 };
322 
323 struct block_desc {
324   u32 addr;                  // block start SH2 PC address
325   u32 addr_lit;              // block start SH2 literal pool addr
326   int size;                  // ..of recompiled insns
327   int size_lit;              // ..of (insns+)literal pool
328   u8 *tcache_ptr;            // start address of block in cache
329   u16 crc;                   // crc of insns and literals
330   u16 active;                // actively used or deactivated?
331   struct block_list *list;
332 #if (DRC_DEBUG & 2)
333   int refcount;
334 #endif
335   int entry_count;
336   struct block_entry *entryp;
337 };
338 
339 struct block_list {
340   struct block_desc *block;  // block reference
341   struct block_list *next;   // pointers for doubly linked list
342   struct block_list *prev;
343   struct block_list **head;  // list head (for removing from list)
344   struct block_list *l_next;
345 };
346 
347 static u8 *tcache_ptr;       // ptr for code emitters
348 
349 // XXX: need to tune sizes
350 
351 static struct ring_buffer tcache_ring[TCACHE_BUFFERS];
352 static const int tcache_sizes[TCACHE_BUFFERS] = {
353   DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM
354   DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2
355   DRC_TCACHE_SIZE / 32, // ... slave
356 };
357 
358 #define BLOCK_MAX_COUNT(tcid)		((tcid) ? 256 : 32*256)
359 static struct ring_buffer block_ring[TCACHE_BUFFERS];
360 static struct block_desc *block_tables[TCACHE_BUFFERS];
361 
362 #define ENTRY_MAX_COUNT(tcid)		((tcid) ? 8*512 : 256*512)
363 static struct ring_buffer entry_ring[TCACHE_BUFFERS];
364 static struct block_entry *entry_tables[TCACHE_BUFFERS];
365 
366 // we have block_link_pool to avoid using mallocs
367 #define BLOCK_LINK_MAX_COUNT(tcid)	((tcid) ? 512 : 32*512)
368 static struct block_link *block_link_pool[TCACHE_BUFFERS];
369 static int block_link_pool_counts[TCACHE_BUFFERS];
370 static struct block_link **unresolved_links[TCACHE_BUFFERS];
371 static struct block_link *blink_free[TCACHE_BUFFERS];
372 
373 // used for invalidation
374 #define RAM_SIZE(tcid) 			((tcid) ? 0x1000 : 0x40000)
375 #define INVAL_PAGE_SIZE 0x100
376 
377 static struct block_list *inactive_blocks[TCACHE_BUFFERS];
378 
379 // array of pointers to block_lists for RAM and 2 data arrays
380 // each array has len: sizeof(mem) / INVAL_PAGE_SIZE
381 static struct block_list **inval_lookup[TCACHE_BUFFERS];
382 
383 #define HASH_TABLE_SIZE(tcid)		((tcid) ? 512 : 32*512)
384 static struct block_entry **hash_tables[TCACHE_BUFFERS];
385 
386 #define HASH_FUNC(hash_tab, addr, mask) \
387   (hash_tab)[((addr) >> 1) & (mask)]
388 
389 #define BLOCK_LIST_MAX_COUNT		(64*1024)
390 static struct block_list *block_list_pool;
391 static int block_list_pool_count;
392 static struct block_list *blist_free;
393 
394 #if (DRC_DEBUG & 128)
395 #if BRANCH_CACHE
396 int bchit, bcmiss;
397 #endif
398 #if CALL_STACK
399 int rchit, rcmiss;
400 #endif
401 #endif
402 
403 // host register tracking
404 enum cache_reg_htype {
405   HRT_TEMP   = 1, // is for temps and args
406   HRT_REG    = 2, // is for sh2 regs
407 };
408 
409 enum cache_reg_flags {
410   HRF_DIRTY  = 1 << 0, // has "dirty" value to be written to ctx
411   HRF_PINNED = 1 << 1, // has a pinned mapping
412   HRF_S16    = 1 << 2, // has a sign extended 16 bit value
413   HRF_U16    = 1 << 3, // has a zero extended 16 bit value
414 };
415 
416 enum cache_reg_type {
417   HR_FREE,
418   HR_CACHED, // vreg has sh2_reg_e
419   HR_TEMP,   // reg used for temp storage
420 };
421 
422 typedef struct {
423   u8 hreg:6;    // "host" reg
424   u8 htype:2;   // TEMP or REG?
425   u8 flags:4;   // DIRTY, PINNED?
426   u8 type:2;    // CACHED or TEMP?
427   u8 locked:2;  // LOCKED reference counter
428   u16 stamp;    // kind of a timestamp
429   u32 gregs;    // "guest" reg mask
430 } cache_reg_t;
431 
432 // guest register tracking
433 enum guest_reg_flags {
434   GRF_DIRTY  = 1 << 0, // reg has "dirty" value to be written to ctx
435   GRF_CONST  = 1 << 1, // reg has a constant
436   GRF_CDIRTY = 1 << 2, // constant not yet written to ctx
437   GRF_STATIC = 1 << 3, // reg has static mapping to vreg
438   GRF_PINNED = 1 << 4, // reg has pinned mapping to vreg
439 };
440 
441 typedef struct {
442   u8 flags;     // guest flags: is constant, is dirty?
443   s8 sreg;      // cache reg for static mapping
444   s8 vreg;      // cache_reg this is currently mapped to, -1 if not mapped
445   s8 cnst;      // const index if this is constant
446 } guest_reg_t;
447 
448 
449 // possibly needed in code emitter
450 static int rcache_get_tmp(void);
451 static void rcache_free_tmp(int hr);
452 
453 // Note: Register assignment goes by ABI convention. Caller save registers are
454 // TEMPORARY, callee save registers are PRESERVED. Unusable regs are omitted.
455 // there must be at least the free (not context or statically mapped) amount of
456 // PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4).
457 // there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4.
458 // SR must and R0 should by all means be statically mapped.
459 // XXX the static definition of SR MUST match that in compiler.h
460 
461 #if defined(__arm__) || defined(_M_ARM)
462 #include "../drc/emit_arm.c"
463 #elif defined(__aarch64__) || defined(_M_ARM64)
464 #include "../drc/emit_arm64.c"
465 #elif defined(__mips__)
466 #include "../drc/emit_mips.c"
467 #elif defined(__riscv__) || defined(__riscv)
468 #include "../drc/emit_riscv.c"
469 #elif defined(__powerpc__)
470 #include "../drc/emit_ppc.c"
471 #elif defined(__i386__) || defined(_M_X86)
472 #include "../drc/emit_x86.c"
473 #elif defined(__x86_64__) || defined(_M_X64)
474 #include "../drc/emit_x86.c"
475 #else
476 #error unsupported arch
477 #endif
478 
479 static const signed char hregs_param[] = PARAM_REGS;
480 static const signed char hregs_temp [] = TEMPORARY_REGS;
481 static const signed char hregs_saved[] = PRESERVED_REGS;
482 static const signed char regs_static[] = STATIC_SH2_REGS;
483 
484 #define CACHE_REGS \
485     (ARRAY_SIZE(hregs_param)+ARRAY_SIZE(hregs_temp)+ARRAY_SIZE(hregs_saved)-1)
486 static cache_reg_t cache_regs[CACHE_REGS];
487 
488 static signed char reg_map_host[HOST_REGS];
489 
490 static guest_reg_t guest_regs[SH2_REGS];
491 
492 static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2);
493 static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc);
494 #if CALL_STACK
495 static u32  REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc);
496 static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc);
497 #endif
498 static void REGPARM(1) (*sh2_drc_exit)(u32 pc);
499 static void            (*sh2_drc_test_irq)(void);
500 
501 static u32  REGPARM(1) (*sh2_drc_read8)(u32 a);
502 static u32  REGPARM(1) (*sh2_drc_read16)(u32 a);
503 static u32  REGPARM(1) (*sh2_drc_read32)(u32 a);
504 static u32  REGPARM(1) (*sh2_drc_read8_poll)(u32 a);
505 static u32  REGPARM(1) (*sh2_drc_read16_poll)(u32 a);
506 static u32  REGPARM(1) (*sh2_drc_read32_poll)(u32 a);
507 static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d);
508 static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d);
509 static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d);
510 
511 #ifdef DRC_SR_REG
512 void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2);
513 void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2);
514 #endif
515 
516 // flags for memory access
517 #define MF_SIZEMASK 0x03        // size of access
518 #define MF_POSTINCR 0x10        // post increment (for read_rr)
519 #define MF_PREDECR  MF_POSTINCR // pre decrement (for write_rr)
520 #define MF_POLLING  0x20	// include polling check in read
521 
522 // address space stuff
dr_is_rom(u32 a)523 static int dr_is_rom(u32 a)
524 {
525   // tweak for WWF Raw which writes data to some high ROM addresses
526   return (a & 0xc6000000) == 0x02000000 && (a & 0x3f0000) < 0x3e0000;
527 }
528 
dr_ctx_get_mem_ptr(SH2 * sh2,u32 a,u32 * mask)529 static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask)
530 {
531   void *memptr;
532   int poffs = -1;
533 
534   // check if region is mapped memory
535   memptr = p32x_sh2_get_mem_ptr(a, mask, sh2);
536   if (memptr == NULL)
537     return poffs;
538 
539   if (memptr == sh2->p_bios)        // BIOS
540     poffs = offsetof(SH2, p_bios);
541   else if (memptr == sh2->p_da)     // data array
542     poffs = offsetof(SH2, p_da);
543   else if (memptr == sh2->p_sdram)  // SDRAM
544     poffs = offsetof(SH2, p_sdram);
545   else if (memptr == sh2->p_rom)    // ROM
546     poffs = offsetof(SH2, p_rom);
547 
548   return poffs;
549 }
550 
dr_get_tcache_id(u32 pc,int is_slave)551 static int dr_get_tcache_id(u32 pc, int is_slave)
552 {
553   u32 tcid = 0;
554 
555   if ((pc & 0xe0000000) == 0xc0000000)
556     tcid = 1 + is_slave; // data array
557   if ((pc & ~0xfff) == 0)
558     tcid = 1 + is_slave; // BIOS
559   return tcid;
560 }
561 
dr_get_entry(u32 pc,int is_slave,int * tcache_id)562 static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id)
563 {
564   struct block_entry *be;
565 
566   *tcache_id = dr_get_tcache_id(pc, is_slave);
567 
568   be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1);
569   if (be != NULL) // don't ask... gcc code generation hint
570   for (; be != NULL; be = be->next)
571     if (be->pc == pc)
572       return be;
573 
574   return NULL;
575 }
576 
577 // ---------------------------------------------------------------
578 
579 // ring buffer management
580 #define RING_INIT(r,m,n)    *(r) = (struct ring_buffer) { .base = (u8 *)m, \
581                                         .item_sz = sizeof(*(m)), .size = n };
582 
ring_alloc(struct ring_buffer * rb,int count)583 static void *ring_alloc(struct ring_buffer *rb, int count)
584 {
585   // allocate space in ring buffer
586   void *p;
587 
588   p = rb->base + rb->next * rb->item_sz;
589   if (rb->next+count > rb->size) {
590     rb->used += rb->size - rb->next;
591     p = rb->base; // wrap if overflow at end
592     rb->next = count;
593   } else {
594     rb->next += count;
595     if (rb->next == rb->size) rb->next = 0;
596   }
597 
598   rb->used += count;
599   return p;
600 }
601 
ring_wrap(struct ring_buffer * rb)602 static void ring_wrap(struct ring_buffer *rb)
603 {
604   // insufficient space at end of buffer memory, wrap around
605   rb->used += rb->size - rb->next;
606   rb->next = 0;
607 }
608 
ring_free(struct ring_buffer * rb,int count)609 static void ring_free(struct ring_buffer *rb, int count)
610 {
611   // free oldest space in ring buffer
612   rb->first += count;
613   if (rb->first >= rb->size) rb->first -= rb->size;
614 
615   rb->used -= count;
616 }
617 
ring_free_p(struct ring_buffer * rb,void * p)618 static void ring_free_p(struct ring_buffer *rb, void *p)
619 {
620   // free ring buffer space upto given pointer
621   rb->first = ((u8 *)p - rb->base) / rb->item_sz;
622 
623   rb->used = rb->next - rb->first;
624   if (rb->used < 0) rb->used += rb->size;
625 }
626 
ring_reset(struct ring_buffer * rb)627 static void *ring_reset(struct ring_buffer *rb)
628 {
629   // reset to initial state
630   rb->first = rb->next = rb->used = 0;
631   return rb->base + rb->next * rb->item_sz;
632 }
633 
ring_first(struct ring_buffer * rb)634 static void *ring_first(struct ring_buffer *rb)
635 {
636   return rb->base + rb->first * rb->item_sz;
637 }
638 
ring_next(struct ring_buffer * rb)639 static void *ring_next(struct ring_buffer *rb)
640 {
641   return rb->base + rb->next * rb->item_sz;
642 }
643 
644 
645 // block management
add_to_block_list(struct block_list ** blist,struct block_desc * block)646 static void add_to_block_list(struct block_list **blist, struct block_desc *block)
647 {
648   struct block_list *added;
649 
650   if (blist_free) {
651     added = blist_free;
652     blist_free = added->next;
653   } else if (block_list_pool_count >= BLOCK_LIST_MAX_COUNT) {
654     printf( "block list overflow\n");
655     exit(1);
656   } else {
657     added = block_list_pool + block_list_pool_count;
658     block_list_pool_count ++;
659   }
660 
661   added->block = block;
662   added->l_next = block->list;
663   block->list = added;
664   added->head = blist;
665 
666   added->prev = NULL;
667   if (*blist)
668     (*blist)->prev = added;
669   added->next = *blist;
670   *blist = added;
671 }
672 
rm_from_block_lists(struct block_desc * block)673 static void rm_from_block_lists(struct block_desc *block)
674 {
675   struct block_list *entry;
676 
677   entry = block->list;
678   while (entry != NULL) {
679     if (entry->prev != NULL)
680       entry->prev->next = entry->next;
681     else
682       *(entry->head) = entry->next;
683     if (entry->next != NULL)
684       entry->next->prev = entry->prev;
685 
686     entry->next = blist_free;
687     blist_free = entry;
688 
689     entry = entry->l_next;
690   }
691   block->list = NULL;
692 }
693 
discard_block_list(struct block_list ** blist)694 static void discard_block_list(struct block_list **blist)
695 {
696   struct block_list *next, *current = *blist;
697   while (current != NULL) {
698     next = current->next;
699     current->next = blist_free;
700     blist_free = current;
701     current = next;
702   }
703   *blist = NULL;
704 }
705 
add_to_hashlist(struct block_entry * be,int tcache_id)706 static void add_to_hashlist(struct block_entry *be, int tcache_id)
707 {
708   u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
709   struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
710 
711   be->prev = NULL;
712   if (*head)
713     (*head)->prev = be;
714   be->next = *head;
715   *head = be;
716 
717 #if (DRC_DEBUG & 2)
718   if (be->next != NULL) {
719     printf(" %08x@%p: entry hash collision with %08x@%p\n",
720       be->pc, be->tcache_ptr, be->next->pc, be->next->tcache_ptr);
721     hash_collisions++;
722   }
723 #endif
724 }
725 
rm_from_hashlist(struct block_entry * be,int tcache_id)726 static void rm_from_hashlist(struct block_entry *be, int tcache_id)
727 {
728   u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
729   struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
730 
731 #if DRC_DEBUG & 1
732   struct block_entry *current = be;
733   while (current->prev != NULL)
734     current = current->prev;
735   if (current != *head)
736     dbg(1, "rm_from_hashlist @%p: be %p %08x missing?", head, be, be->pc);
737 #endif
738 
739   if (be->prev != NULL)
740     be->prev->next = be->next;
741   else
742     *head = be->next;
743   if (be->next != NULL)
744     be->next->prev = be->prev;
745 }
746 
747 
add_to_hashlist_unresolved(struct block_link * bl,int tcache_id)748 static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id)
749 {
750   u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
751   struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
752 
753 #if DRC_DEBUG & 1
754   struct block_link *current = *head;
755   while (current != NULL && current != bl)
756     current = current->next;
757   if (current == bl)
758     dbg(1, "add_to_hashlist_unresolved @%p: bl %p %p %08x already in?", head, bl, bl->target, bl->target_pc);
759 #endif
760 
761   bl->target = NULL; // marker for not resolved
762   bl->prev = NULL;
763   if (*head)
764     (*head)->prev = bl;
765   bl->next = *head;
766   *head = bl;
767 }
768 
rm_from_hashlist_unresolved(struct block_link * bl,int tcache_id)769 static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id)
770 {
771   u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
772   struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
773 
774 #if DRC_DEBUG & 1
775   struct block_link *current = bl;
776   while (current->prev != NULL)
777     current = current->prev;
778   if (current != *head)
779     dbg(1, "rm_from_hashlist_unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc);
780 #endif
781 
782   if (bl->prev != NULL)
783     bl->prev->next = bl->next;
784   else
785     *head = bl->next;
786   if (bl->next != NULL)
787     bl->next->prev = bl->prev;
788 }
789 
790 #if LINK_BRANCHES
dr_block_link(struct block_entry * be,struct block_link * bl,int emit_jump)791 static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump)
792 {
793   dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ",
794     bl->jump, bl->target_pc, be->tcache_ptr);
795 
796   if (emit_jump) {
797     u8 *jump = bl->jump;
798     int jsz = emith_jump_patch_size();
799     if (bl->type == BL_JMP) { // patch: jump @entry
800       // inlined: @jump far jump to target
801       emith_jump_patch(jump, be->tcache_ptr, &jump);
802     } else if (bl->type == BL_LDJMP) { // write: jump @entry
803       // inlined: @jump far jump to target
804       emith_jump_at(jump, be->tcache_ptr);
805       jsz = emith_jump_at_size();
806     } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry
807       if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) {
808         // inlined: @jump near jumpcc to target
809         emith_jump_patch(jump, be->tcache_ptr, &jump);
810       } else { // dispatcher cond immediate
811         // via blx: @jump near jumpcc to blx; @blx far jump
812         emith_jump_patch(jump, bl->blx, &jump);
813         emith_jump_at(bl->blx, be->tcache_ptr);
814         host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(),
815             ((uintptr_t)bl->blx & 0x1f) + emith_jump_at_size()-1 > 0x1f);
816       }
817     } else {
818       printf("unknown BL type %d\n", bl->type);
819       exit(1);
820     }
821     host_instructions_updated(jump, jump + jsz, ((uintptr_t)jump & 0x1f) + jsz-1 > 0x1f);
822   }
823 
824   // move bl to block_entry
825   bl->target = be;
826   bl->prev = NULL;
827   if (be->links)
828     be->links->prev = bl;
829   bl->next = be->links;
830   be->links = bl;
831 }
832 
dr_block_unlink(struct block_link * bl,int emit_jump)833 static void dr_block_unlink(struct block_link *bl, int emit_jump)
834 {
835   dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc);
836 
837   if (bl->target) {
838     if (emit_jump) {
839       u8 *jump = bl->jump;
840       int jsz = emith_jump_patch_size();
841       if (bl->type == BL_JMP) { // jump_patch @dispatcher
842         // inlined: @jump far jump to dispatcher
843         emith_jump_patch(jump, sh2_drc_dispatcher, &jump);
844       } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher
845         // inlined: @jump load target_pc, far jump to dispatcher
846         memcpy(jump, bl->jdisp, emith_jump_at_size());
847         jsz = emith_jump_at_size();
848       } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump
849         // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump
850         emith_jump_patch(bl->jump, bl->blx, &jump);
851         memcpy(bl->blx, bl->jdisp, emith_jump_at_size());
852         host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(), 1);
853       } else {
854         printf("unknown BL type %d\n", bl->type);
855         exit(1);
856       }
857       // update cpu caches since the previous jump target doesn't exist anymore
858       host_instructions_updated(jump, jump + jsz, 1);
859     }
860 
861     if (bl->prev)
862       bl->prev->next = bl->next;
863     else
864       bl->target->links = bl->next;
865     if (bl->next)
866       bl->next->prev = bl->prev;
867     bl->target = NULL;
868   }
869 }
870 #endif
871 
dr_prepare_ext_branch(struct block_entry * owner,u32 pc,int is_slave,int tcache_id)872 static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id)
873 {
874 #if LINK_BRANCHES
875   struct block_link *bl = block_link_pool[tcache_id];
876   int cnt = block_link_pool_counts[tcache_id];
877   int target_tcache_id;
878 
879   // get the target block entry
880   target_tcache_id = dr_get_tcache_id(pc, is_slave);
881   if (target_tcache_id && target_tcache_id != tcache_id)
882     return NULL;
883 
884   // get a block link
885   if (blink_free[tcache_id] != NULL) {
886     bl = blink_free[tcache_id];
887     blink_free[tcache_id] = bl->next;
888   } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) {
889     dbg(1, "bl overflow for tcache %d", tcache_id);
890     return NULL;
891   } else {
892     bl += cnt;
893     block_link_pool_counts[tcache_id] = cnt+1;
894   }
895 
896   // prepare link and add to outgoing list of owner
897   bl->tcache_id = tcache_id;
898   bl->target_pc = pc;
899   bl->jump = tcache_ptr;
900   bl->blx = NULL;
901   bl->o_next = owner->o_links;
902   owner->o_links = bl;
903 
904   add_to_hashlist_unresolved(bl, tcache_id);
905   return bl;
906 #else
907   return NULL;
908 #endif
909 }
910 
dr_mark_memory(int mark,struct block_desc * block,int tcache_id,u32 nolit)911 static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit)
912 {
913   u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL;
914   u32 addr, end, mask = 0, shift = 0, idx;
915 
916   // mark memory blocks as containing compiled code
917   if ((block->addr & 0xc7fc0000) == 0x06000000
918       || (block->addr & 0xfffff000) == 0xc0000000)
919   {
920     if (tcache_id != 0) {
921       // data array
922       drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1];
923       lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1];
924       shift = SH2_DRCBLK_DA_SHIFT;
925     }
926     else {
927       // SDRAM
928       drc_ram_blk = Pico32xMem->drcblk_ram;
929       lit_ram_blk = Pico32xMem->drclit_ram;
930       shift = SH2_DRCBLK_RAM_SHIFT;
931     }
932     mask = RAM_SIZE(tcache_id) - 1;
933 
934     // mark recompiled insns
935     addr = block->addr & ~((1 << shift) - 1);
936     end = block->addr + block->size;
937     for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
938       drc_ram_blk[idx++] += mark;
939 
940     // mark literal pool
941     if (addr < (block->addr_lit & ~((1 << shift) - 1)))
942       addr = block->addr_lit & ~((1 << shift) - 1);
943     end = block->addr_lit + block->size_lit;
944     for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
945       drc_ram_blk[idx++] += mark;
946 
947     // mark for literals disabled
948     if (nolit) {
949       addr = nolit & ~((1 << shift) - 1);
950       end = block->addr_lit + block->size_lit;
951       for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
952         lit_ram_blk[idx++] = 1;
953     }
954 
955     if (mark < 0)
956       rm_from_block_lists(block);
957     else {
958       // add to invalidation lookup lists
959       addr = block->addr & ~(INVAL_PAGE_SIZE - 1);
960       end = block->addr + block->size;
961       for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE)
962         add_to_block_list(&inval_lookup[tcache_id][idx++], block);
963 
964       if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1)))
965         addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1);
966       end = block->addr_lit + block->size_lit;
967       for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE)
968         add_to_block_list(&inval_lookup[tcache_id][idx++], block);
969     }
970   }
971 }
972 
dr_check_nolit(u32 start,u32 end,int tcache_id)973 static u32 dr_check_nolit(u32 start, u32 end, int tcache_id)
974 {
975   u8 *lit_ram_blk = NULL;
976   u32 mask = 0, shift = 0, addr, idx;
977 
978   if ((start & 0xc7fc0000) == 0x06000000
979       || (start & 0xfffff000) == 0xc0000000)
980   {
981     if (tcache_id != 0) {
982       // data array
983       lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1];
984       shift = SH2_DRCBLK_DA_SHIFT;
985     }
986     else {
987       // SDRAM
988       lit_ram_blk = Pico32xMem->drclit_ram;
989       shift = SH2_DRCBLK_RAM_SHIFT;
990     }
991     mask = RAM_SIZE(tcache_id) - 1;
992 
993     addr = start & ~((1 << shift) - 1);
994     for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
995       if (lit_ram_blk[idx++])
996         break;
997 
998     return (addr < start ? start : addr > end ? end : addr);
999   }
1000 
1001   return end;
1002 }
1003 
dr_rm_block_entry(struct block_desc * bd,int tcache_id,u32 nolit,int free)1004 static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free)
1005 {
1006   struct block_link *bl;
1007   u32 i;
1008 
1009   free = free || nolit; // block is invalid if literals are overwritten
1010   dbg(2,"  %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl",
1011     bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit,
1012     tcache_id, bd - block_tables[tcache_id]);
1013   if (bd->addr == 0 || bd->entry_count == 0) {
1014     dbg(1, "  killing dead block!? %08x", bd->addr);
1015     return;
1016   }
1017 
1018 #if LINK_BRANCHES
1019   // remove from hash table, make incoming links unresolved
1020   if (bd->active) {
1021     for (i = 0; i < bd->entry_count; i++) {
1022       rm_from_hashlist(&bd->entryp[i], tcache_id);
1023 
1024       while ((bl = bd->entryp[i].links) != NULL) {
1025         dr_block_unlink(bl, 1);
1026         add_to_hashlist_unresolved(bl, tcache_id);
1027       }
1028     }
1029 
1030     dr_mark_memory(-1, bd, tcache_id, nolit);
1031     add_to_block_list(&inactive_blocks[tcache_id], bd);
1032   }
1033   bd->active = 0;
1034 #endif
1035 
1036   if (free) {
1037 #if LINK_BRANCHES
1038     // revoke outgoing links
1039     for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) {
1040       if (bl->target)
1041         dr_block_unlink(bl, 0);
1042       else
1043         rm_from_hashlist_unresolved(bl, tcache_id);
1044       bl->jump = NULL;
1045       bl->next = blink_free[bl->tcache_id];
1046       blink_free[bl->tcache_id] = bl;
1047     }
1048     bd->entryp[0].o_links = NULL;
1049 #endif
1050     // invalidate block
1051     rm_from_block_lists(bd);
1052     bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0;
1053     bd->entry_count = 0;
1054     bd->entryp = NULL;
1055   }
1056   emith_update_cache();
1057 }
1058 
dr_find_inactive_block(int tcache_id,u16 crc,u32 addr,int size,u32 addr_lit,int size_lit)1059 static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc,
1060   u32 addr, int size, u32 addr_lit, int size_lit)
1061 {
1062   struct block_list **head = &inactive_blocks[tcache_id];
1063   struct block_list *current;
1064 
1065   for (current = *head; current != NULL; current = current->next) {
1066     struct block_desc *block = current->block;
1067     if (block->crc == crc && block->addr == addr && block->size == size &&
1068         block->addr_lit == addr_lit && block->size_lit == size_lit)
1069     {
1070       rm_from_block_lists(block);
1071       return block;
1072     }
1073   }
1074   return NULL;
1075 }
1076 
dr_add_block(int entries,u32 addr,int size,u32 addr_lit,int size_lit,u16 crc,int is_slave,int * blk_id)1077 static struct block_desc *dr_add_block(int entries, u32 addr, int size,
1078   u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id)
1079 {
1080   struct block_entry *be;
1081   struct block_desc *bd;
1082   int tcache_id;
1083 
1084   // do a lookup to get tcache_id and override check
1085   be = dr_get_entry(addr, is_slave, &tcache_id);
1086   if (be != NULL)
1087     dbg(1, "block override for %08x", addr);
1088 
1089   if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size ||
1090       entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) {
1091     dbg(1, "bd overflow for tcache %d", tcache_id);
1092     return NULL;
1093   }
1094 
1095   *blk_id = block_ring[tcache_id].next;
1096   bd = ring_alloc(&block_ring[tcache_id], 1);
1097   bd->entryp = ring_alloc(&entry_ring[tcache_id], entries);
1098 
1099   bd->addr = addr;
1100   bd->size = size;
1101   bd->addr_lit = addr_lit;
1102   bd->size_lit = size_lit;
1103   bd->tcache_ptr = tcache_ptr;
1104   bd->crc = crc;
1105   bd->active = 0;
1106   bd->list = NULL;
1107   bd->entry_count = 0;
1108 #if (DRC_DEBUG & 2)
1109   bd->refcount = 0;
1110 #endif
1111 
1112   return bd;
1113 }
1114 
dr_link_blocks(struct block_entry * be,int tcache_id)1115 static void dr_link_blocks(struct block_entry *be, int tcache_id)
1116 {
1117 #if LINK_BRANCHES
1118   u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
1119   u32 pc = be->pc;
1120   struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask);
1121   struct block_link *bl = *head, *next;
1122 
1123   while (bl != NULL) {
1124     next = bl->next;
1125     if (bl->target_pc == pc && (!bl->tcache_id || bl->tcache_id == tcache_id)) {
1126       rm_from_hashlist_unresolved(bl, bl->tcache_id);
1127       dr_block_link(be, bl, 1);
1128     }
1129     bl = next;
1130   }
1131 #endif
1132 }
1133 
dr_link_outgoing(struct block_entry * be,int tcache_id,int is_slave)1134 static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave)
1135 {
1136 #if LINK_BRANCHES
1137   struct block_link *bl;
1138   int target_tcache_id;
1139 
1140   for (bl = be->o_links; bl; bl = bl->o_next) {
1141     if (bl->target == NULL) {
1142       be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id);
1143       if (be != NULL && (!target_tcache_id || target_tcache_id == tcache_id)) {
1144         // remove bl from unresolved_links (must've been since target was NULL)
1145         rm_from_hashlist_unresolved(bl, bl->tcache_id);
1146         dr_block_link(be, bl, 1);
1147       }
1148     }
1149   }
1150 #endif
1151 }
1152 
dr_activate_block(struct block_desc * bd,int tcache_id,int is_slave)1153 static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave)
1154 {
1155   int i;
1156 
1157   // connect branches
1158   for (i = 0; i < bd->entry_count; i++) {
1159     struct block_entry *entry = &bd->entryp[i];
1160     add_to_hashlist(entry, tcache_id);
1161     // incoming branches
1162     dr_link_blocks(entry, tcache_id);
1163     if (!tcache_id)
1164       dr_link_blocks(entry, is_slave?2:1);
1165     // outgoing branches
1166     dr_link_outgoing(entry, tcache_id, is_slave);
1167   }
1168 
1169   // mark memory for overwrite detection
1170   dr_mark_memory(1, bd, tcache_id, 0);
1171   bd->active = 1;
1172 }
1173 
dr_lookup_block(u32 pc,SH2 * sh2,int * tcache_id)1174 static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id)
1175 {
1176   struct block_entry *be = NULL;
1177   void *block = NULL;
1178 
1179   be = dr_get_entry(pc, sh2->is_slave, tcache_id);
1180   if (be != NULL)
1181     block = be->tcache_ptr;
1182 
1183 #if (DRC_DEBUG & 2)
1184   if (be != NULL)
1185     be->block->refcount++;
1186 #endif
1187   return block;
1188 }
1189 
dr_free_oldest_block(int tcache_id)1190 static void dr_free_oldest_block(int tcache_id)
1191 {
1192   struct block_desc *bf;
1193 
1194   bf = ring_first(&block_ring[tcache_id]);
1195   if (bf->addr && bf->entry_count)
1196     dr_rm_block_entry(bf, tcache_id, 0, 1);
1197   ring_free(&block_ring[tcache_id], 1);
1198 
1199   if (block_ring[tcache_id].used) {
1200     bf = ring_first(&block_ring[tcache_id]);
1201     ring_free_p(&entry_ring[tcache_id], bf->entryp);
1202     ring_free_p(&tcache_ring[tcache_id], bf->tcache_ptr);
1203   } else {
1204     // reset since size of code block isn't known if no successor block exists
1205     ring_reset(&block_ring[tcache_id]);
1206     ring_reset(&entry_ring[tcache_id]);
1207     ring_reset(&tcache_ring[tcache_id]);
1208   }
1209 }
1210 
dr_reserve_cache(int tcache_id,struct ring_buffer * rb,int count)1211 static inline void dr_reserve_cache(int tcache_id, struct ring_buffer *rb, int count)
1212 {
1213   // while not enough space available
1214   if (rb->next + count >= rb->size){
1215     // not enough space in rest of buffer -> wrap around
1216     while (rb->first >= rb->next && rb->used)
1217       dr_free_oldest_block(tcache_id);
1218     if (rb->first == 0 && rb->used)
1219       dr_free_oldest_block(tcache_id);
1220     ring_wrap(rb);
1221   }
1222   while (rb->first >= rb->next && rb->next + count > rb->first && rb->used)
1223     dr_free_oldest_block(tcache_id);
1224 }
1225 
dr_prepare_cache(int tcache_id,int insn_count,int entry_count)1226 static u8 *dr_prepare_cache(int tcache_id, int insn_count, int entry_count)
1227 {
1228   int bf = block_ring[tcache_id].first;
1229 
1230   // reserve one block desc
1231   if (block_ring[tcache_id].used >= block_ring[tcache_id].size)
1232     dr_free_oldest_block(tcache_id);
1233   // reserve block entries
1234   dr_reserve_cache(tcache_id, &entry_ring[tcache_id], entry_count);
1235   // reserve cache space
1236   dr_reserve_cache(tcache_id, &tcache_ring[tcache_id], insn_count*128);
1237 
1238   if (bf != block_ring[tcache_id].first) {
1239     // deleted some block(s), clear branch cache and return stack
1240 #if BRANCH_CACHE
1241     if (tcache_id)
1242       memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
1243     else {
1244       memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
1245       memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
1246     }
1247 #endif
1248 #if CALL_STACK
1249     if (tcache_id) {
1250       memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
1251       sh2s[tcache_id-1].rts_cache_idx = 0;
1252     } else {
1253       memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
1254       memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4);
1255       sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
1256     }
1257 #endif
1258   }
1259 
1260   return ring_next(&tcache_ring[tcache_id]);
1261 }
1262 
dr_flush_tcache(int tcid)1263 static void dr_flush_tcache(int tcid)
1264 {
1265   int i;
1266 #if (DRC_DEBUG & 1)
1267   elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d bes %d/%d)", tcid,
1268     tcache_ring[tcid].used, tcache_ring[tcid].size, block_ring[tcid].used,
1269     block_ring[tcid].size, entry_ring[tcid].used, entry_ring[tcid].size);
1270 #endif
1271 
1272   ring_reset(&tcache_ring[tcid]);
1273   ring_reset(&block_ring[tcid]);
1274   ring_reset(&entry_ring[tcid]);
1275 
1276   block_link_pool_counts[tcid] = 0;
1277   blink_free[tcid] = NULL;
1278   memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid));
1279   memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid));
1280 
1281   if (tcid == 0) { // ROM, RAM
1282     memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
1283     memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram));
1284     memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache));
1285     memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache));
1286     memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache));
1287     memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache));
1288     sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
1289   } else {
1290     memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram));
1291     memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram));
1292     memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1]));
1293     memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1]));
1294     memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache));
1295     memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache));
1296     sh2s[tcid - 1].rts_cache_idx = 0;
1297   }
1298 #if (DRC_DEBUG & 4)
1299   tcache_dsm_ptrs[tcid] = tcache_ring[tcid].base;
1300 #endif
1301 
1302   for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++)
1303     discard_block_list(&inval_lookup[tcid][i]);
1304   discard_block_list(&inactive_blocks[tcid]);
1305 }
1306 
dr_failure(void)1307 static void *dr_failure(void)
1308 {
1309   printf("recompilation failed\n");
1310   exit(1);
1311 }
1312 
1313 // ---------------------------------------------------------------
1314 
1315 // NB rcache allocation dependencies:
1316 // - get_reg_arg/get_tmp_arg first (might evict other regs just allocated)
1317 // - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg
1318 // - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg)
1319 
1320 // register cache / constant propagation stuff
1321 typedef enum {
1322   RC_GR_READ,
1323   RC_GR_WRITE,
1324   RC_GR_RMW,
1325 } rc_gr_mode;
1326 
1327 typedef struct {
1328   u32 gregs;
1329   u32 val;
1330 } gconst_t;
1331 
1332 gconst_t gconsts[ARRAY_SIZE(guest_regs)];
1333 
1334 static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr);
1335 static inline int rcache_is_cached(sh2_reg_e r);
1336 static void rcache_add_vreg_alias(int x, sh2_reg_e r);
1337 static void rcache_remove_vreg_alias(int x, sh2_reg_e r);
1338 static void rcache_evict_vreg(int x);
1339 static void rcache_remap_vreg(int x);
1340 
rcache_set_x16(int hr,int s16_,int u16_)1341 static void rcache_set_x16(int hr, int s16_, int u16_)
1342 {
1343   int x = reg_map_host[hr];
1344   if (x >= 0) {
1345     cache_regs[x].flags &= ~(HRF_S16|HRF_U16);
1346     if (s16_) cache_regs[x].flags |= HRF_S16;
1347     if (u16_) cache_regs[x].flags |= HRF_U16;
1348   }
1349 }
1350 
rcache_copy_x16(int hr,int hr2)1351 static void rcache_copy_x16(int hr, int hr2)
1352 {
1353   int x = reg_map_host[hr], y = reg_map_host[hr2];
1354   if (x >= 0 && y >= 0) {
1355     cache_regs[x].flags = (cache_regs[x].flags & ~(HRF_S16|HRF_U16)) |
1356                           (cache_regs[y].flags &  (HRF_S16|HRF_U16));
1357   }
1358 }
1359 
rcache_is_s16(int hr)1360 static int rcache_is_s16(int hr)
1361 {
1362   int x = reg_map_host[hr];
1363   return (x >= 0 ? cache_regs[x].flags & HRF_S16 : 0);
1364 }
1365 
rcache_is_u16(int hr)1366 static int rcache_is_u16(int hr)
1367 {
1368   int x = reg_map_host[hr];
1369   return (x >= 0 ? cache_regs[x].flags & HRF_U16 : 0);
1370 }
1371 
1372 #define RCACHE_DUMP(msg) { \
1373   cache_reg_t *cp; \
1374   guest_reg_t *gp; \
1375   int i; \
1376   printf("cache dump %s:\n",msg); \
1377   printf(" cache_regs:\n"); \
1378   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1379     cp = &cache_regs[i]; \
1380     if (cp->type != HR_FREE || cp->gregs || cp->locked || cp->flags) \
1381       printf("  %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \
1382   } \
1383   printf(" guest_regs:\n"); \
1384   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \
1385     gp = &guest_regs[i]; \
1386     if (gp->vreg != -1 || gp->sreg >= 0 || gp->flags) \
1387       printf("  %d: v=%d f=%x s=%d c=%d\n", i, gp->vreg, gp->flags, gp->sreg, gp->cnst); \
1388   } \
1389   printf(" gconsts:\n"); \
1390   for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \
1391     if (gconsts[i].gregs) \
1392       printf("  %d: m=%x v=%x\n", i, gconsts[i].gregs, gconsts[i].val); \
1393   } \
1394 }
1395 
1396 #define RCACHE_CHECK(msg) { \
1397   cache_reg_t *cp; \
1398   guest_reg_t *gp; \
1399   int i, x, m = 0, d = 0; \
1400   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1401     cp = &cache_regs[i]; \
1402     if (cp->flags & HRF_PINNED) m |= (1 << i); \
1403     if (cp->type == HR_FREE || cp->type == HR_TEMP) continue; \
1404     /* check connectivity greg->vreg */ \
1405     FOR_ALL_BITS_SET_DO(cp->gregs, x, \
1406       if (guest_regs[x].vreg != i) \
1407         { d = 1; printf("cache check v=%d r=%d not connected?\n",i,x); } \
1408     ) \
1409   } \
1410   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \
1411     gp = &guest_regs[i]; \
1412     if (gp->vreg != -1 && !(cache_regs[gp->vreg].gregs & (1 << i))) \
1413       { d = 1; printf("cache check r=%d v=%d not connected?\n", i, gp->vreg); }\
1414     if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_CACHED) \
1415       { d = 1; printf("cache check r=%d v=%d wrong type?\n", i, gp->vreg); }\
1416     if ((gp->flags & GRF_CONST) && !(gconsts[gp->cnst].gregs & (1 << i))) \
1417       { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\
1418     if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\
1419       { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \
1420     if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \
1421       if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\
1422         { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \
1423       else m &= ~(1 << gp->sreg); \
1424     } \
1425   } \
1426   for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \
1427     FOR_ALL_BITS_SET_DO(gconsts[i].gregs, x, \
1428       if (guest_regs[x].cnst != i || !(guest_regs[x].flags & GRF_CONST)) \
1429         { d = 1; printf("cache check c=%d v=%d not connected?\n",i,x); } \
1430     ) \
1431   } \
1432   if (m) \
1433     { d = 1; printf("cache check m=%x pinning wrong?\n",m); } \
1434   if (d) RCACHE_DUMP(msg) \
1435 /*  else { \
1436     printf("locked regs %s:\n",msg); \
1437     for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \
1438       cp = &cache_regs[i]; \
1439       if (cp->locked) \
1440         printf("  %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \
1441     } \
1442   } */ \
1443 }
1444 
1445 #if PROPAGATE_CONSTANTS
gconst_alloc(sh2_reg_e r)1446 static inline int gconst_alloc(sh2_reg_e r)
1447 {
1448   int i, n = -1;
1449 
1450   for (i = 0; i < ARRAY_SIZE(gconsts); i++) {
1451     gconsts[i].gregs &= ~(1 << r);
1452     if (gconsts[i].gregs == 0 && n < 0)
1453       n = i;
1454   }
1455   if (n >= 0)
1456     gconsts[n].gregs = (1 << r);
1457   else {
1458     printf("all gconst buffers in use, aborting\n");
1459     exit(1); // cannot happen - more constants than guest regs?
1460   }
1461   return n;
1462 }
1463 
gconst_set(sh2_reg_e r,u32 val)1464 static void gconst_set(sh2_reg_e r, u32 val)
1465 {
1466   int i = gconst_alloc(r);
1467 
1468   guest_regs[r].flags |= GRF_CONST;
1469   guest_regs[r].cnst = i;
1470   gconsts[i].val = val;
1471 }
1472 
gconst_new(sh2_reg_e r,u32 val)1473 static void gconst_new(sh2_reg_e r, u32 val)
1474 {
1475   gconst_set(r, val);
1476   guest_regs[r].flags |= GRF_CDIRTY;
1477 
1478   // throw away old r that we might have cached
1479   if (guest_regs[r].vreg >= 0)
1480     rcache_remove_vreg_alias(guest_regs[r].vreg, r);
1481 }
1482 #endif
1483 
gconst_get(sh2_reg_e r,u32 * val)1484 static int gconst_get(sh2_reg_e r, u32 *val)
1485 {
1486   if (guest_regs[r].flags & GRF_CONST) {
1487     *val = gconsts[guest_regs[r].cnst].val;
1488     return 1;
1489   }
1490   *val = 0;
1491   return 0;
1492 }
1493 
gconst_check(sh2_reg_e r)1494 static int gconst_check(sh2_reg_e r)
1495 {
1496   if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
1497     return 1;
1498   return 0;
1499 }
1500 
1501 // update hr if dirty, else do nothing
gconst_try_read(int vreg,sh2_reg_e r)1502 static int gconst_try_read(int vreg, sh2_reg_e r)
1503 {
1504   int i, x;
1505   u32 v;
1506 
1507   if (guest_regs[r].flags & GRF_CDIRTY) {
1508     x = guest_regs[r].cnst;
1509     v = gconsts[x].val;
1510     emith_move_r_imm(cache_regs[vreg].hreg, v);
1511     rcache_set_x16(cache_regs[vreg].hreg, v == (s16)v, v == (u16)v);
1512     FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i,
1513       {
1514         if (guest_regs[i].vreg >= 0 && guest_regs[i].vreg != vreg)
1515           rcache_remove_vreg_alias(guest_regs[i].vreg, i);
1516         if (guest_regs[i].vreg < 0)
1517           rcache_add_vreg_alias(vreg, i);
1518         guest_regs[i].flags &= ~GRF_CDIRTY;
1519         guest_regs[i].flags |= GRF_DIRTY;
1520       });
1521     cache_regs[vreg].type = HR_CACHED;
1522     cache_regs[vreg].flags |= HRF_DIRTY;
1523     return 1;
1524   }
1525   return 0;
1526 }
1527 
gconst_dirty_mask(void)1528 static u32 gconst_dirty_mask(void)
1529 {
1530   u32 mask = 0;
1531   int i;
1532 
1533   for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1534     if (guest_regs[i].flags & GRF_CDIRTY)
1535       mask |= (1 << i);
1536   return mask;
1537 }
1538 
gconst_kill(sh2_reg_e r)1539 static void gconst_kill(sh2_reg_e r)
1540 {
1541   if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY))
1542     gconsts[guest_regs[r].cnst].gregs &= ~(1 << r);
1543   guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY);
1544 }
1545 
gconst_copy(sh2_reg_e rd,sh2_reg_e rs)1546 static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs)
1547 {
1548   gconst_kill(rd);
1549   if (guest_regs[rs].flags & GRF_CONST) {
1550     guest_regs[rd].flags |= GRF_CONST;
1551     if (guest_regs[rd].vreg < 0)
1552       guest_regs[rd].flags |= GRF_CDIRTY;
1553     guest_regs[rd].cnst = guest_regs[rs].cnst;
1554     gconsts[guest_regs[rd].cnst].gregs |= (1 << rd);
1555   }
1556 }
1557 
gconst_clean(void)1558 static void gconst_clean(void)
1559 {
1560   int i;
1561 
1562   for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1563     if (guest_regs[i].flags & GRF_CDIRTY) {
1564       // using RC_GR_READ here: it will call gconst_try_read,
1565       // cache the reg and mark it dirty.
1566       rcache_get_reg_(i, RC_GR_READ, 0, NULL);
1567     }
1568 }
1569 
gconst_invalidate(void)1570 static void gconst_invalidate(void)
1571 {
1572   int i;
1573 
1574   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
1575     if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY))
1576       gconsts[guest_regs[i].cnst].gregs &= ~(1 << i);
1577     guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY);
1578   }
1579 }
1580 
1581 
1582 static u16 rcache_counter;
1583 // SH2 register usage bitmasks
1584 static u32 rcache_vregs_reg;     // regs of type HRT_REG (for pinning)
1585 static u32 rcache_regs_static;   // statically allocated regs
1586 static u32 rcache_regs_pinned;   // pinned regs
1587 static u32 rcache_regs_now;      // regs used in current insn
1588 static u32 rcache_regs_soon;     // regs used in the next few insns
1589 static u32 rcache_regs_late;     // regs used in later insns
1590 static u32 rcache_regs_discard;  // regs overwritten without being used
1591 static u32 rcache_regs_clean;    // regs needing cleaning
1592 
rcache_lock_vreg(int x)1593 static void rcache_lock_vreg(int x)
1594 {
1595   if (x >= 0) {
1596     cache_regs[x].locked ++;
1597 #if DRC_DEBUG & 64
1598     if (cache_regs[x].type == HR_FREE) {
1599       printf("locking free vreg %x, aborting\n", x);
1600       exit(1);
1601     }
1602     if (!cache_regs[x].locked) {
1603       printf("locking overflow vreg %x, aborting\n", x);
1604       exit(1);
1605     }
1606 #endif
1607   }
1608 }
1609 
rcache_unlock_vreg(int x)1610 static void rcache_unlock_vreg(int x)
1611 {
1612   if (x >= 0) {
1613 #if DRC_DEBUG & 64
1614     if (cache_regs[x].type == HR_FREE) {
1615       printf("unlocking free vreg %x, aborting\n", x);
1616       exit(1);
1617     }
1618 #endif
1619     if (cache_regs[x].locked)
1620       cache_regs[x].locked --;
1621   }
1622 }
1623 
rcache_free_vreg(int x)1624 static void rcache_free_vreg(int x)
1625 {
1626   cache_regs[x].type = cache_regs[x].locked ? HR_TEMP : HR_FREE;
1627   cache_regs[x].flags &= HRF_PINNED;
1628   cache_regs[x].gregs = 0;
1629 }
1630 
rcache_unmap_vreg(int x)1631 static void rcache_unmap_vreg(int x)
1632 {
1633   int i;
1634 
1635   FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
1636       if (guest_regs[i].flags & GRF_DIRTY) {
1637         // if a dirty reg is unmapped save its value to context
1638         if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
1639           emith_ctx_write(cache_regs[x].hreg, i * 4);
1640         guest_regs[i].flags &= ~GRF_DIRTY;
1641       }
1642       guest_regs[i].vreg = -1);
1643   rcache_free_vreg(x);
1644 }
1645 
rcache_move_vreg(int d,int x)1646 static void rcache_move_vreg(int d, int x)
1647 {
1648   int i;
1649 
1650   cache_regs[d].type = HR_CACHED;
1651   cache_regs[d].gregs = cache_regs[x].gregs;
1652   cache_regs[d].flags &= HRF_PINNED;
1653   cache_regs[d].flags |= cache_regs[x].flags & ~HRF_PINNED;
1654   cache_regs[d].locked = 0;
1655   cache_regs[d].stamp = cache_regs[x].stamp;
1656   emith_move_r_r(cache_regs[d].hreg, cache_regs[x].hreg);
1657   for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
1658     if (guest_regs[i].vreg == x)
1659       guest_regs[i].vreg = d;
1660   rcache_free_vreg(x);
1661 }
1662 
rcache_clean_vreg(int x)1663 static void rcache_clean_vreg(int x)
1664 {
1665   u32 rns = rcache_regs_now | rcache_regs_soon;
1666   int r;
1667 
1668   if (cache_regs[x].flags & HRF_DIRTY) { // writeback
1669     cache_regs[x].flags &= ~HRF_DIRTY;
1670     rcache_lock_vreg(x);
1671     FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r,
1672         if (guest_regs[r].flags & GRF_DIRTY) {
1673           if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) {
1674             if (guest_regs[r].vreg != guest_regs[r].sreg &&
1675                 !cache_regs[guest_regs[r].sreg].locked &&
1676                 ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) &&
1677                 !(rns & cache_regs[guest_regs[r].sreg].gregs)) {
1678               // statically mapped reg not in its sreg. move back to sreg
1679               rcache_evict_vreg(guest_regs[r].sreg);
1680               emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg,
1681                              cache_regs[guest_regs[r].vreg].hreg);
1682               rcache_copy_x16(cache_regs[guest_regs[r].sreg].hreg,
1683                              cache_regs[guest_regs[r].vreg].hreg);
1684               rcache_remove_vreg_alias(x, r);
1685               rcache_add_vreg_alias(guest_regs[r].sreg, r);
1686               cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY;
1687             } else
1688               // cannot remap. keep dirty for writeback in unmap
1689               cache_regs[x].flags |= HRF_DIRTY;
1690           } else {
1691             if ((~rcache_regs_discard | rcache_regs_now) & (1 << r))
1692               emith_ctx_write(cache_regs[x].hreg, r * 4);
1693             guest_regs[r].flags &= ~GRF_DIRTY;
1694           }
1695           rcache_regs_clean &= ~(1 << r);
1696         })
1697     rcache_unlock_vreg(x);
1698   }
1699 
1700 #if DRC_DEBUG & 64
1701   RCACHE_CHECK("after clean");
1702 #endif
1703 }
1704 
rcache_add_vreg_alias(int x,sh2_reg_e r)1705 static void rcache_add_vreg_alias(int x, sh2_reg_e r)
1706 {
1707   cache_regs[x].gregs |= (1 << r);
1708   guest_regs[r].vreg = x;
1709   cache_regs[x].type = HR_CACHED;
1710 }
1711 
rcache_remove_vreg_alias(int x,sh2_reg_e r)1712 static void rcache_remove_vreg_alias(int x, sh2_reg_e r)
1713 {
1714   cache_regs[x].gregs &= ~(1 << r);
1715   if (!cache_regs[x].gregs) {
1716     // no reg mapped -> free vreg
1717     if (cache_regs[x].locked)
1718       cache_regs[x].type = HR_TEMP;
1719     else
1720       rcache_free_vreg(x);
1721   }
1722   guest_regs[r].vreg = -1;
1723 }
1724 
rcache_evict_vreg(int x)1725 static void rcache_evict_vreg(int x)
1726 {
1727   rcache_remap_vreg(x);
1728   rcache_unmap_vreg(x);
1729 }
1730 
rcache_evict_vreg_aliases(int x,sh2_reg_e r)1731 static void rcache_evict_vreg_aliases(int x, sh2_reg_e r)
1732 {
1733   rcache_remove_vreg_alias(x, r);
1734   rcache_evict_vreg(x);
1735   rcache_add_vreg_alias(x, r);
1736 }
1737 
rcache_allocate(int what,int minprio)1738 static int rcache_allocate(int what, int minprio)
1739 {
1740   // evict reg with oldest stamp (only for HRT_REG, no temps)
1741   int i, i_prio, oldest = -1, prio = 0;
1742   u16 min_stamp = (u16)-1;
1743 
1744   for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) {
1745     // consider only non-static, unpinned, unlocked REG or TEMP
1746     if ((cache_regs[i].flags & HRF_PINNED) || cache_regs[i].locked)
1747       continue;
1748     if ((what > 0 && !(cache_regs[i].htype & HRT_REG)) ||   // get a REG
1749         (what == 0 && (cache_regs[i].htype & HRT_TEMP)) ||  // get a non-TEMP
1750         (what < 0 && !(cache_regs[i].htype & HRT_TEMP)))    // get a TEMP
1751       continue;
1752     if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) {
1753       // REG is free
1754       prio = 10;
1755       oldest = i;
1756       break;
1757     }
1758     if (cache_regs[i].type == HR_CACHED) {
1759       if (rcache_regs_now & cache_regs[i].gregs)
1760         // REGs needed for the current insn
1761         i_prio = 0;
1762       else if (rcache_regs_soon & cache_regs[i].gregs)
1763         // REGs needed in the next insns
1764         i_prio = 2;
1765       else if (rcache_regs_late & cache_regs[i].gregs)
1766         // REGs needed in some future insn
1767         i_prio = 4;
1768       else if (~rcache_regs_discard & cache_regs[i].gregs)
1769         // REGs not needed in the foreseeable future
1770         i_prio = 6;
1771       else
1772         // REGs soon overwritten anyway
1773         i_prio = 8;
1774       if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++;
1775 
1776       if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) {
1777         min_stamp = cache_regs[i].stamp;
1778         oldest = i;
1779         prio = i_prio;
1780       }
1781     }
1782   }
1783 
1784 
1785   if (prio < minprio || oldest == -1)
1786     return -1;
1787 
1788   if (cache_regs[oldest].type == HR_CACHED)
1789     rcache_evict_vreg(oldest);
1790   else
1791     rcache_free_vreg(oldest);
1792 
1793   return oldest;
1794 }
1795 
rcache_allocate_vreg(int needed)1796 static int rcache_allocate_vreg(int needed)
1797 {
1798   int x;
1799 
1800   x = rcache_allocate(1, needed ? 0 : 4);
1801   if (x < 0)
1802     x = rcache_allocate(-1, 0);
1803   return x;
1804 }
1805 
rcache_allocate_nontemp(void)1806 static int rcache_allocate_nontemp(void)
1807 {
1808   int x = rcache_allocate(0, 4);
1809   return x;
1810 }
1811 
rcache_allocate_temp(void)1812 static int rcache_allocate_temp(void)
1813 {
1814   int x = rcache_allocate(-1, 0);
1815   if (x < 0)
1816     x = rcache_allocate(0, 0);
1817   return x;
1818 }
1819 
1820 // maps a host register to a REG
rcache_map_reg(sh2_reg_e r,int hr)1821 static int rcache_map_reg(sh2_reg_e r, int hr)
1822 {
1823 #if REMAP_REGISTER
1824   int i;
1825 
1826   gconst_kill(r);
1827 
1828   // lookup the TEMP hr maps to
1829   i = reg_map_host[hr];
1830   if (i < 0) {
1831     // must not happen
1832     printf("invalid host register %d\n", hr);
1833     exit(1);
1834   }
1835 
1836   // remove old mappings of r and i if one exists
1837   if (guest_regs[r].vreg >= 0)
1838     rcache_remove_vreg_alias(guest_regs[r].vreg, r);
1839   if (cache_regs[i].type == HR_CACHED)
1840     rcache_evict_vreg(i);
1841   // set new mappping
1842   cache_regs[i].type = HR_CACHED;
1843   cache_regs[i].gregs = 1 << r;
1844   cache_regs[i].locked = 0;
1845   cache_regs[i].stamp = ++rcache_counter;
1846   cache_regs[i].flags |= HRF_DIRTY;
1847   rcache_lock_vreg(i);
1848   guest_regs[r].flags |= GRF_DIRTY;
1849   guest_regs[r].vreg = i;
1850 #if DRC_DEBUG & 64
1851   RCACHE_CHECK("after map");
1852 #endif
1853   return cache_regs[i].hreg;
1854 #else
1855   return rcache_get_reg(r, RC_GR_WRITE, NULL);
1856 #endif
1857 }
1858 
1859 // remap vreg from a TEMP to a REG if it will be used (upcoming TEMP invalidation)
rcache_remap_vreg(int x)1860 static void rcache_remap_vreg(int x)
1861 {
1862 #if REMAP_REGISTER
1863   u32 rsl_d = rcache_regs_soon | rcache_regs_late;
1864   int d;
1865 
1866   // x must be a cached vreg
1867   if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked)
1868     return;
1869   // don't do it if x isn't used
1870   if (!(rsl_d & cache_regs[x].gregs)) {
1871     // clean here to avoid data loss on invalidation
1872     rcache_clean_vreg(x);
1873     return;
1874   }
1875 
1876   FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d,
1877     if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) &&
1878         !cache_regs[guest_regs[d].sreg].locked &&
1879         !((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) {
1880       // STATIC not in its sreg and sreg is available
1881       rcache_evict_vreg(guest_regs[d].sreg);
1882       rcache_move_vreg(guest_regs[d].sreg, x);
1883       return;
1884     }
1885   )
1886 
1887   // allocate a non-TEMP vreg
1888   rcache_lock_vreg(x); // lock to avoid evicting x
1889   d = rcache_allocate_nontemp();
1890   rcache_unlock_vreg(x);
1891   if (d < 0) {
1892     rcache_clean_vreg(x);
1893     return;
1894   }
1895 
1896   // move vreg to new location
1897   rcache_move_vreg(d, x);
1898 #if DRC_DEBUG & 64
1899   RCACHE_CHECK("after remap");
1900 #endif
1901 #else
1902   rcache_clean_vreg(x);
1903 #endif
1904 }
1905 
rcache_alias_vreg(sh2_reg_e rd,sh2_reg_e rs)1906 static void rcache_alias_vreg(sh2_reg_e rd, sh2_reg_e rs)
1907 {
1908 #if ALIAS_REGISTERS
1909   int x;
1910 
1911   // if s isn't constant, it must be in cache for aliasing
1912   if (!gconst_check(rs))
1913     rcache_get_reg_(rs, RC_GR_READ, 0, NULL);
1914 
1915   // if d and s are not already aliased
1916   x = guest_regs[rs].vreg;
1917   if (guest_regs[rd].vreg != x) {
1918     // remove possible old mapping of dst
1919     if (guest_regs[rd].vreg >= 0)
1920       rcache_remove_vreg_alias(guest_regs[rd].vreg, rd);
1921     // make dst an alias of src
1922     if (x >= 0)
1923       rcache_add_vreg_alias(x, rd);
1924     // if d is now in cache, it must be dirty
1925     if (guest_regs[rd].vreg >= 0) {
1926       x = guest_regs[rd].vreg;
1927       cache_regs[x].flags |= HRF_DIRTY;
1928       guest_regs[rd].flags |= GRF_DIRTY;
1929     }
1930   }
1931 
1932   gconst_copy(rd, rs);
1933 #if DRC_DEBUG & 64
1934   RCACHE_CHECK("after alias");
1935 #endif
1936 #else
1937   int hr_s = rcache_get_reg(rs, RC_GR_READ, NULL);
1938   int hr_d = rcache_get_reg(rd, RC_GR_WRITE, NULL);
1939 
1940   emith_move_r_r(hr_d, hr_s);
1941   gconst_copy(rd, rs);
1942 #endif
1943 }
1944 
1945 // note: must not be called when doing conditional code
rcache_get_reg_(sh2_reg_e r,rc_gr_mode mode,int do_locking,int * hr)1946 static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr)
1947 {
1948   int src, dst, ali;
1949   cache_reg_t *tr;
1950   u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) &
1951                ~rcache_regs_discard;
1952 
1953   dst = src = guest_regs[r].vreg;
1954 
1955   rcache_lock_vreg(src); // lock to avoid evicting src
1956   // good opportunity to relocate a remapped STATIC?
1957   if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
1958       src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) &&
1959       !cache_regs[guest_regs[r].sreg].locked &&
1960       !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) {
1961     dst = guest_regs[r].sreg;
1962     rcache_evict_vreg(dst);
1963   } else if (dst < 0) {
1964     // allocate a cache register
1965     if ((dst = rcache_allocate_vreg(rsp_d & (1 << r))) < 0) {
1966       printf("no registers to evict, aborting\n");
1967       exit(1);
1968     }
1969   }
1970   tr = &cache_regs[dst];
1971   tr->stamp = rcache_counter;
1972   // remove r from src
1973   if (src >= 0 && src != dst)
1974     rcache_remove_vreg_alias(src, r);
1975   rcache_unlock_vreg(src);
1976 
1977   // if r has a constant it may have aliases
1978   if (mode != RC_GR_WRITE && gconst_try_read(dst, r))
1979     src = dst;
1980 
1981   // if r will be modified, check for aliases being needed rsn
1982   ali = tr->gregs & ~(1 << r);
1983   if (mode != RC_GR_READ && src == dst && ali) {
1984     int x = -1;
1985     if ((rsp_d|rcache_regs_now) & ali) {
1986       if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
1987           guest_regs[r].sreg == dst && !tr->locked) {
1988         // split aliases if r is STATIC in sreg and dst isn't already locked
1989         int t;
1990         FOR_ALL_BITS_SET_DO(ali, t,
1991           if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
1992               !(ali & ~(1 << t)) &&
1993               !cache_regs[guest_regs[t].sreg].locked &&
1994               !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) {
1995             // alias is a single STATIC and its sreg is available
1996             x = guest_regs[t].sreg;
1997             rcache_evict_vreg(x);
1998           } else {
1999             rcache_lock_vreg(dst); // lock to avoid evicting dst
2000             x = rcache_allocate_vreg(rsp_d & ali);
2001             rcache_unlock_vreg(dst);
2002           }
2003           break;
2004         )
2005         if (x >= 0) {
2006           rcache_remove_vreg_alias(src, r);
2007           src = dst;
2008           rcache_move_vreg(x, dst);
2009         }
2010       } else {
2011         // split r
2012         rcache_lock_vreg(src); // lock to avoid evicting src
2013         x = rcache_allocate_vreg(rsp_d & (1 << r));
2014         rcache_unlock_vreg(src);
2015         if (x >= 0) {
2016           rcache_remove_vreg_alias(src, r);
2017           dst = x;
2018           tr = &cache_regs[dst];
2019           tr->stamp = rcache_counter;
2020         }
2021       }
2022     }
2023     if (x < 0)
2024       // aliases not needed or no vreg available, remove them
2025       rcache_evict_vreg_aliases(dst, r);
2026   }
2027 
2028   // assign r to dst
2029   rcache_add_vreg_alias(dst, r);
2030 
2031   // handle dst register transfer
2032   if (src < 0 && mode != RC_GR_WRITE)
2033     emith_ctx_read(tr->hreg, r * 4);
2034   if (hr) {
2035     *hr = (src >= 0 ? cache_regs[src].hreg : tr->hreg);
2036     rcache_lock_vreg(src >= 0 ? src : dst);
2037   } else if (src >= 0 && mode != RC_GR_WRITE && cache_regs[src].hreg != tr->hreg)
2038     emith_move_r_r(tr->hreg, cache_regs[src].hreg);
2039 
2040   // housekeeping
2041   if (do_locking)
2042     rcache_lock_vreg(dst);
2043   if (mode != RC_GR_READ) {
2044     tr->flags |= HRF_DIRTY;
2045     guest_regs[r].flags |= GRF_DIRTY;
2046     gconst_kill(r);
2047     rcache_set_x16(tr->hreg, 0, 0);
2048   } else if (src >= 0 && cache_regs[src].hreg != tr->hreg)
2049     rcache_copy_x16(tr->hreg, cache_regs[src].hreg);
2050 #if DRC_DEBUG & 64
2051   RCACHE_CHECK("after getreg");
2052 #endif
2053   return tr->hreg;
2054 }
2055 
rcache_get_reg(sh2_reg_e r,rc_gr_mode mode,int * hr)2056 static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr)
2057 {
2058   return rcache_get_reg_(r, mode, 1, hr);
2059 }
2060 
rcache_pin_reg(sh2_reg_e r)2061 static void rcache_pin_reg(sh2_reg_e r)
2062 {
2063   int hr, x;
2064 
2065   // don't pin if static or already pinned
2066   if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED))
2067     return;
2068 
2069   rcache_regs_soon |= (1 << r); // kludge to prevent allocation of a temp
2070   hr = rcache_get_reg_(r, RC_GR_RMW, 0, NULL);
2071   x = reg_map_host[hr];
2072 
2073   // can only pin non-TEMPs
2074   if (!(cache_regs[x].htype & HRT_TEMP)) {
2075     guest_regs[r].flags |= GRF_PINNED;
2076     cache_regs[x].flags |= HRF_PINNED;
2077     guest_regs[r].sreg = x;
2078     rcache_regs_pinned |= (1 << r);
2079   }
2080 #if DRC_DEBUG & 64
2081   RCACHE_CHECK("after pin");
2082 #endif
2083 }
2084 
rcache_get_tmp(void)2085 static int rcache_get_tmp(void)
2086 {
2087   int i;
2088 
2089   i = rcache_allocate_temp();
2090   if (i < 0) {
2091     printf("cannot allocate temp\n");
2092     exit(1);
2093   }
2094 
2095   cache_regs[i].type = HR_TEMP;
2096   rcache_lock_vreg(i);
2097 
2098   return cache_regs[i].hreg;
2099 }
2100 
rcache_get_vreg_hr(int hr)2101 static int rcache_get_vreg_hr(int hr)
2102 {
2103   int i;
2104 
2105   i = reg_map_host[hr];
2106   if (i < 0 || cache_regs[i].locked) {
2107     printf("host register %d is locked\n", hr);
2108     exit(1);
2109   }
2110 
2111   if (cache_regs[i].type == HR_CACHED)
2112     rcache_evict_vreg(i);
2113   else if (cache_regs[i].type == HR_TEMP && cache_regs[i].locked) {
2114     printf("host reg %d already used, aborting\n", hr);
2115     exit(1);
2116   }
2117 
2118   return i;
2119 }
2120 
rcache_get_vreg_arg(int arg)2121 static int rcache_get_vreg_arg(int arg)
2122 {
2123   int hr = 0;
2124 
2125   host_arg2reg(hr, arg);
2126   return rcache_get_vreg_hr(hr);
2127 }
2128 
2129 // get a reg to be used as function arg
rcache_get_tmp_arg(int arg)2130 static int rcache_get_tmp_arg(int arg)
2131 {
2132   int x = rcache_get_vreg_arg(arg);
2133   cache_regs[x].type = HR_TEMP;
2134   rcache_lock_vreg(x);
2135 
2136   return cache_regs[x].hreg;
2137 }
2138 
2139 // ... as return value after a call
rcache_get_tmp_ret(void)2140 static int rcache_get_tmp_ret(void)
2141 {
2142   int x = rcache_get_vreg_hr(RET_REG);
2143   cache_regs[x].type = HR_TEMP;
2144   rcache_lock_vreg(x);
2145 
2146   return cache_regs[x].hreg;
2147 }
2148 
2149 // same but caches a reg if access is readonly (announced by hr being NULL)
rcache_get_reg_arg(int arg,sh2_reg_e r,int * hr)2150 static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr)
2151 {
2152   int i, srcr, dstr, dstid, keep;
2153   u32 val;
2154   host_arg2reg(dstr, arg);
2155 
2156   i = guest_regs[r].vreg;
2157   if (i >= 0 && cache_regs[i].type == HR_CACHED && cache_regs[i].hreg == dstr)
2158     // r is already in arg, avoid evicting
2159     dstid = i;
2160   else
2161     dstid = rcache_get_vreg_arg(arg);
2162   dstr = cache_regs[dstid].hreg;
2163 
2164   if (rcache_is_cached(r)) {
2165     // r is needed later on anyway
2166     srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
2167     keep = 1;
2168   } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) {
2169     // r has an uncomitted const - load into arg, but keep constant uncomitted
2170     srcr = dstr;
2171     emith_move_r_imm(srcr, val);
2172     keep = 0;
2173   } else {
2174     // must read from ctx
2175     srcr = dstr;
2176     emith_ctx_read(srcr, r * 4);
2177     keep = 1;
2178   }
2179 
2180   if (cache_regs[dstid].type == HR_CACHED)
2181     rcache_evict_vreg(dstid);
2182 
2183   cache_regs[dstid].type = HR_TEMP;
2184   if (hr == NULL) {
2185     if (dstr != srcr)
2186       // arg is a copy of cached r
2187       emith_move_r_r(dstr, srcr);
2188     else if (keep && guest_regs[r].vreg < 0)
2189       // keep arg as vreg for r
2190       rcache_add_vreg_alias(dstid, r);
2191   } else {
2192     *hr = srcr;
2193     if (dstr != srcr) // must lock srcr if not copied here
2194       rcache_lock_vreg(reg_map_host[srcr]);
2195   }
2196 
2197   cache_regs[dstid].stamp = ++rcache_counter;
2198   rcache_lock_vreg(dstid);
2199 #if DRC_DEBUG & 64
2200   RCACHE_CHECK("after getarg");
2201 #endif
2202   return dstr;
2203 }
2204 
rcache_free_tmp(int hr)2205 static void rcache_free_tmp(int hr)
2206 {
2207   int i = reg_map_host[hr];
2208 
2209   if (i < 0 || cache_regs[i].type != HR_TEMP) {
2210     printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, cache_regs[i].type);
2211     exit(1);
2212   }
2213 
2214   rcache_unlock_vreg(i);
2215 }
2216 
2217 // saves temporary result either in REG or in drctmp
rcache_save_tmp(int hr)2218 static int rcache_save_tmp(int hr)
2219 {
2220   int i;
2221 
2222   // find REG, either free or unlocked temp or oldest non-hinted cached
2223   i = rcache_allocate_nontemp();
2224   if (i < 0) {
2225     // if none is available, store in drctmp
2226     emith_ctx_write(hr, offsetof(SH2, drc_tmp));
2227     rcache_free_tmp(hr);
2228     return -1;
2229   }
2230 
2231   cache_regs[i].type = HR_CACHED;
2232   cache_regs[i].gregs = 0; // not storing any guest register
2233   cache_regs[i].flags &= HRF_PINNED;
2234   cache_regs[i].locked = 0;
2235   cache_regs[i].stamp = ++rcache_counter;
2236   rcache_lock_vreg(i);
2237   emith_move_r_r(cache_regs[i].hreg, hr);
2238   rcache_free_tmp(hr);
2239   return i;
2240 }
2241 
rcache_restore_tmp(int x)2242 static int rcache_restore_tmp(int x)
2243 {
2244   int hr;
2245 
2246   // find REG with tmp store: cached but with no gregs
2247   if (x >= 0) {
2248     if (cache_regs[x].type != HR_CACHED || cache_regs[x].gregs) {
2249       printf("invalid tmp storage %d\n", x);
2250       exit(1);
2251     }
2252     // found, transform to a TEMP
2253     cache_regs[x].type = HR_TEMP;
2254     return cache_regs[x].hreg;
2255   }
2256 
2257   // if not available, create a TEMP store and fetch from drctmp
2258   hr = rcache_get_tmp();
2259   emith_ctx_read(hr, offsetof(SH2, drc_tmp));
2260 
2261   return hr;
2262 }
2263 
rcache_free(int hr)2264 static void rcache_free(int hr)
2265 {
2266   int x = reg_map_host[hr];
2267   rcache_unlock_vreg(x);
2268 }
2269 
rcache_unlock(int x)2270 static void rcache_unlock(int x)
2271 {
2272   if (x >= 0)
2273     cache_regs[x].locked = 0;
2274 }
2275 
rcache_unlock_all(void)2276 static void rcache_unlock_all(void)
2277 {
2278   int i;
2279   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2280     cache_regs[i].locked = 0;
2281 }
2282 
rcache_unpin_all(void)2283 static void rcache_unpin_all(void)
2284 {
2285   int i;
2286 
2287   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2288     if (guest_regs[i].flags & GRF_PINNED) {
2289       guest_regs[i].flags &= ~GRF_PINNED;
2290       cache_regs[guest_regs[i].sreg].flags &= ~HRF_PINNED;
2291       guest_regs[i].sreg = -1;
2292       rcache_regs_pinned &= ~(1 << i);
2293     }
2294   }
2295 #if DRC_DEBUG & 64
2296   RCACHE_CHECK("after unpin");
2297 #endif
2298 }
2299 
rcache_save_pinned(void)2300 static void rcache_save_pinned(void)
2301 {
2302   int i;
2303 
2304   // save pinned regs to context
2305   for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
2306     if ((guest_regs[i].flags & GRF_PINNED) && guest_regs[i].vreg >= 0)
2307       emith_ctx_write(cache_regs[guest_regs[i].vreg].hreg, i * 4);
2308 }
2309 
rcache_set_usage_now(u32 mask)2310 static inline void rcache_set_usage_now(u32 mask)
2311 {
2312   rcache_regs_now = mask;
2313 }
2314 
rcache_set_usage_soon(u32 mask)2315 static inline void rcache_set_usage_soon(u32 mask)
2316 {
2317   rcache_regs_soon = mask;
2318 }
2319 
rcache_set_usage_late(u32 mask)2320 static inline void rcache_set_usage_late(u32 mask)
2321 {
2322   rcache_regs_late = mask;
2323 }
2324 
rcache_set_usage_discard(u32 mask)2325 static inline void rcache_set_usage_discard(u32 mask)
2326 {
2327   rcache_regs_discard = mask;
2328 }
2329 
rcache_is_cached(sh2_reg_e r)2330 static inline int rcache_is_cached(sh2_reg_e r)
2331 {
2332   // is r in cache or needed RSN?
2333   u32 rsc = rcache_regs_soon | rcache_regs_clean;
2334   return (guest_regs[r].vreg >= 0 || (rsc & (1 << r)));
2335 }
2336 
rcache_is_hreg_used(int hr)2337 static inline int rcache_is_hreg_used(int hr)
2338 {
2339   int x = reg_map_host[hr];
2340   // is hr in use?
2341   return cache_regs[x].type != HR_FREE &&
2342         (cache_regs[x].type != HR_TEMP || cache_regs[x].locked);
2343 }
2344 
rcache_used_hregs_mask(void)2345 static inline u32 rcache_used_hregs_mask(void)
2346 {
2347   u32 mask = 0;
2348   int i;
2349 
2350   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2351     if ((cache_regs[i].htype & HRT_TEMP) && cache_regs[i].type != HR_FREE &&
2352         (cache_regs[i].type != HR_TEMP || cache_regs[i].locked))
2353       mask |= 1 << cache_regs[i].hreg;
2354 
2355   return mask;
2356 }
2357 
rcache_dirty_mask(void)2358 static inline u32 rcache_dirty_mask(void)
2359 {
2360   u32 mask = 0;
2361   int i;
2362 
2363   for (i = 0; i < ARRAY_SIZE(guest_regs); i++)
2364     if (guest_regs[i].flags & GRF_DIRTY)
2365       mask |= 1 << i;
2366   mask |= gconst_dirty_mask();
2367 
2368   return mask;
2369 }
2370 
rcache_cached_mask(void)2371 static inline u32 rcache_cached_mask(void)
2372 {
2373   u32 mask = 0;
2374   int i;
2375 
2376   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2377     if (cache_regs[i].type == HR_CACHED)
2378       mask |= cache_regs[i].gregs;
2379 
2380   return mask;
2381 }
2382 
rcache_clean_tmp(void)2383 static void rcache_clean_tmp(void)
2384 {
2385   int i;
2386 
2387   rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1;
2388   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2389     if (cache_regs[i].type == HR_CACHED && (cache_regs[i].htype & HRT_TEMP)) {
2390       rcache_unlock(i);
2391       rcache_remap_vreg(i);
2392     }
2393   rcache_regs_clean = 0;
2394 }
2395 
rcache_clean_masked(u32 mask)2396 static void rcache_clean_masked(u32 mask)
2397 {
2398   int i, r, hr;
2399   u32 m;
2400 
2401   rcache_regs_clean |= mask;
2402   mask = rcache_regs_clean;
2403 
2404   // clean constants where all aliases are covered by the mask, exempt statics
2405   // to avoid flushing them to context if sreg isn't available
2406   m = mask & ~(rcache_regs_static | rcache_regs_pinned);
2407   for (i = 0; i < ARRAY_SIZE(gconsts); i++)
2408     if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) {
2409       FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r,
2410           if (guest_regs[r].flags & GRF_CDIRTY) {
2411             hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL);
2412             rcache_clean_vreg(reg_map_host[hr]);
2413             break;
2414           });
2415     }
2416   // clean vregs where all aliases are covered by the mask
2417   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2418     if (cache_regs[i].type == HR_CACHED &&
2419         (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask))
2420       rcache_clean_vreg(i);
2421 }
2422 
rcache_clean(void)2423 static void rcache_clean(void)
2424 {
2425   int i;
2426   gconst_clean();
2427 
2428   rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1;
2429   for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--)
2430     if (cache_regs[i].type == HR_CACHED)
2431       rcache_clean_vreg(i);
2432 
2433   // relocate statics to their sregs (necessary before conditional jumps)
2434   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2435     if ((guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) &&
2436           guest_regs[i].vreg != guest_regs[i].sreg) {
2437       rcache_lock_vreg(guest_regs[i].vreg);
2438       rcache_evict_vreg(guest_regs[i].sreg);
2439       rcache_unlock_vreg(guest_regs[i].vreg);
2440       if (guest_regs[i].vreg < 0)
2441         emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4);
2442       else {
2443         emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg,
2444                         cache_regs[guest_regs[i].vreg].hreg);
2445         rcache_copy_x16(cache_regs[guest_regs[i].sreg].hreg,
2446                         cache_regs[guest_regs[i].vreg].hreg);
2447         rcache_remove_vreg_alias(guest_regs[i].vreg, i);
2448       }
2449       cache_regs[guest_regs[i].sreg].gregs = 1 << i;
2450       cache_regs[guest_regs[i].sreg].type = HR_CACHED;
2451       cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED;
2452       guest_regs[i].flags |= GRF_DIRTY;
2453       guest_regs[i].vreg = guest_regs[i].sreg;
2454     }
2455   }
2456   rcache_regs_clean = 0;
2457 }
2458 
rcache_invalidate_tmp(void)2459 static void rcache_invalidate_tmp(void)
2460 {
2461   int i;
2462 
2463   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) {
2464     if (cache_regs[i].htype & HRT_TEMP) {
2465       rcache_unlock(i);
2466       if (cache_regs[i].type == HR_CACHED)
2467         rcache_evict_vreg(i);
2468       else
2469         rcache_free_vreg(i);
2470     }
2471   }
2472 }
2473 
rcache_invalidate(void)2474 static void rcache_invalidate(void)
2475 {
2476   int i;
2477   gconst_invalidate();
2478   rcache_unlock_all();
2479 
2480   for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
2481     rcache_free_vreg(i);
2482 
2483   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2484     guest_regs[i].flags &= GRF_STATIC;
2485     if (!(guest_regs[i].flags & GRF_STATIC))
2486       guest_regs[i].vreg = -1;
2487     else {
2488       cache_regs[guest_regs[i].sreg].gregs = 1 << i;
2489       cache_regs[guest_regs[i].sreg].type = HR_CACHED;
2490       cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED;
2491       guest_regs[i].flags |= GRF_DIRTY;
2492       guest_regs[i].vreg = guest_regs[i].sreg;
2493     }
2494   }
2495 
2496   rcache_counter = 0;
2497   rcache_regs_now = rcache_regs_soon = rcache_regs_late = 0;
2498   rcache_regs_discard = rcache_regs_clean = 0;
2499 }
2500 
rcache_flush(void)2501 static void rcache_flush(void)
2502 {
2503   rcache_clean();
2504   rcache_invalidate();
2505 }
2506 
rcache_create(void)2507 static void rcache_create(void)
2508 {
2509   int x = 0, i;
2510 
2511   // create cache_regs as host register representation
2512   // RET_REG/params should be first TEMPs to avoid allocation conflicts in calls
2513   cache_regs[x++] = (cache_reg_t) {.hreg = RET_REG, .htype = HRT_TEMP};
2514   for (i = 0; i < ARRAY_SIZE(hregs_param); i++)
2515     if (hregs_param[i] != RET_REG)
2516       cache_regs[x++] = (cache_reg_t){.hreg = hregs_param[i],.htype = HRT_TEMP};
2517 
2518   for (i = 0; i < ARRAY_SIZE(hregs_temp); i++)
2519     if (hregs_temp[i] != RET_REG)
2520       cache_regs[x++] = (cache_reg_t){.hreg = hregs_temp[i], .htype = HRT_TEMP};
2521 
2522   for (i = ARRAY_SIZE(hregs_saved)-1; i >= 0; i--)
2523     if (hregs_saved[i] != CONTEXT_REG)
2524       cache_regs[x++] = (cache_reg_t){.hreg = hregs_saved[i], .htype = HRT_REG};
2525 
2526   if (x != ARRAY_SIZE(cache_regs)) {
2527     printf("rcache_create failed (conflicting register count)\n");
2528     exit(1);
2529   }
2530 
2531   // mapping from host_register to cache regs index
2532   memset(reg_map_host, -1, sizeof(reg_map_host));
2533   for (i = 0; i < ARRAY_SIZE(cache_regs); i++) {
2534     if (cache_regs[i].htype)
2535       reg_map_host[cache_regs[i].hreg] = i;
2536     if (cache_regs[i].htype == HRT_REG)
2537       rcache_vregs_reg |= (1 << i);
2538   }
2539 
2540   // create static host register mapping for SH2 regs
2541   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2542     guest_regs[i] = (guest_reg_t){.sreg = -1};
2543   }
2544   for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) {
2545     for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--)
2546       if (cache_regs[x].hreg == regs_static[i+1])	break;
2547     if (x >= 0) {
2548       guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x};
2549       rcache_regs_static |= (1 << regs_static[i]);
2550       rcache_vregs_reg &= ~(1 << x);
2551     }
2552   }
2553 
2554   printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n",
2555     CACHE_REGS+1L, count_bits(rcache_vregs_reg),count_bits(rcache_regs_static));
2556 }
2557 
rcache_init(void)2558 static void rcache_init(void)
2559 {
2560   // create DRC data structures
2561   rcache_create();
2562 
2563   rcache_invalidate();
2564 #if DRC_DEBUG & 64
2565   RCACHE_CHECK("after init");
2566 #endif
2567 }
2568 
2569 // ---------------------------------------------------------------
2570 
2571 // NB may return either REG or TEMP
emit_get_rbase_and_offs(SH2 * sh2,sh2_reg_e r,int rmode,u32 * offs)2572 static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs)
2573 {
2574   uptr omask = emith_rw_offs_max(); // offset mask
2575   u32 mask = 0;
2576   u32 a;
2577   int poffs;
2578   int hr, hr2;
2579   uptr la;
2580 
2581   // is r constant and points to a memory region?
2582   if (! gconst_get(r, &a))
2583     return -1;
2584   poffs = dr_ctx_get_mem_ptr(sh2, a, &mask);
2585   if (poffs == -1)
2586     return -1;
2587 
2588   if (mask < 0x20000) {
2589     // data array, BIOS, DRAM, can't safely access directly since host addr may
2590     // change (BIOS,da code may run on either core, DRAM may be switched)
2591     hr = rcache_get_tmp();
2592     a = (a + *offs) & mask;
2593     if (poffs == offsetof(SH2, p_da)) {
2594       // access sh2->data_array directly
2595       a += offsetof(SH2, data_array);
2596       emith_add_r_r_ptr_imm(hr, CONTEXT_REG, a & ~omask);
2597     } else {
2598       emith_ctx_read_ptr(hr, poffs);
2599       if (a & ~omask)
2600         emith_add_r_r_ptr_imm(hr, hr, a & ~omask);
2601     }
2602     *offs = a & omask;
2603     return hr;
2604   }
2605 
2606   // ROM, SDRAM. Host address should be mmapped to be equal to SH2 address.
2607   la = (uptr)*(void **)((char *)sh2 + poffs);
2608 
2609   // if r is in rcache or needed soon anyway, and offs is relative to region,
2610   // and address translation fits in add_ptr_imm (s32), then use rcached const
2611   if (la == (s32)la && !(*offs & ~mask) && rcache_is_cached(r)) {
2612     u32 odd = a & 1; // need to fix odd address for correct byte addressing
2613     la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory
2614     hr = hr2 = rcache_get_reg(r, rmode, NULL);
2615     if ((s32)a < 0) emith_uext_ptr(hr2);
2616     if ((la & ~omask) - odd) {
2617       hr = rcache_get_tmp();
2618       emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd);
2619       rcache_free(hr2);
2620     }
2621     *offs = (la & omask);
2622   } else {
2623     // known fixed host address
2624     la += (a + *offs) & mask;
2625     hr = rcache_get_tmp();
2626     emith_move_r_ptr_imm(hr, la & ~omask);
2627     *offs = la & omask;
2628   }
2629   return hr;
2630 }
2631 
2632 // read const data from const ROM address
emit_get_rom_data(SH2 * sh2,sh2_reg_e r,u32 offs,int size,u32 * val)2633 static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val)
2634 {
2635   u32 a, mask;
2636 
2637   *val = 0;
2638   if (gconst_get(r, &a)) {
2639     a += offs;
2640     // check if rom is memory mapped (not bank switched), and address is in rom
2641     if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) == sh2->p_rom) {
2642       switch (size & MF_SIZEMASK) {
2643       case 0:   *val = (s8)p32x_sh2_read8(a, sh2s);   break;  // 8
2644       case 1:   *val = (s16)p32x_sh2_read16(a, sh2s); break;  // 16
2645       case 2:   *val = p32x_sh2_read32(a, sh2s);      break;  // 32
2646       }
2647       return 1;
2648     }
2649   }
2650   return 0;
2651 }
2652 
emit_move_r_imm32(sh2_reg_e dst,u32 imm)2653 static void emit_move_r_imm32(sh2_reg_e dst, u32 imm)
2654 {
2655 #if PROPAGATE_CONSTANTS
2656   gconst_new(dst, imm);
2657 #else
2658   int hr = rcache_get_reg(dst, RC_GR_WRITE, NULL);
2659   emith_move_r_imm(hr, imm);
2660 #endif
2661 }
2662 
emit_move_r_r(sh2_reg_e dst,sh2_reg_e src)2663 static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src)
2664 {
2665   if (gconst_check(src) || rcache_is_cached(src))
2666     rcache_alias_vreg(dst, src);
2667   else {
2668     int hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL);
2669     emith_ctx_read(hr_d, src * 4);
2670   }
2671 }
2672 
emit_add_r_imm(sh2_reg_e r,u32 imm)2673 static void emit_add_r_imm(sh2_reg_e r, u32 imm)
2674 {
2675   u32 val;
2676   int isgc = gconst_get(r, &val);
2677   int hr, hr2;
2678 
2679   if (!isgc || rcache_is_cached(r)) {
2680     // not constant, or r is already in cache
2681     hr = rcache_get_reg(r, RC_GR_RMW, &hr2);
2682     emith_add_r_r_imm(hr, hr2, imm);
2683     rcache_free(hr2);
2684     if (isgc)
2685       gconst_set(r, val + imm);
2686   } else
2687     gconst_new(r, val + imm);
2688 }
2689 
emit_sub_r_imm(sh2_reg_e r,u32 imm)2690 static void emit_sub_r_imm(sh2_reg_e r, u32 imm)
2691 {
2692   u32 val;
2693   int isgc = gconst_get(r, &val);
2694   int hr, hr2;
2695 
2696   if (!isgc || rcache_is_cached(r)) {
2697     // not constant, or r is already in cache
2698     hr = rcache_get_reg(r, RC_GR_RMW, &hr2);
2699     emith_sub_r_r_imm(hr, hr2, imm);
2700     rcache_free(hr2);
2701     if (isgc)
2702       gconst_set(r, val - imm);
2703   } else
2704     gconst_new(r, val - imm);
2705 }
2706 
emit_sync_t_to_sr(void)2707 static void emit_sync_t_to_sr(void)
2708 {
2709   // avoid reloading SR from context if there's nothing to do
2710   if (emith_get_t_cond() >= 0) {
2711     int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
2712     emith_sync_t(sr);
2713   }
2714 }
2715 
2716 // rd = @(arg0)
emit_memhandler_read(int size)2717 static int emit_memhandler_read(int size)
2718 {
2719   int hr;
2720 
2721   emit_sync_t_to_sr();
2722   rcache_clean_tmp();
2723 #ifndef DRC_SR_REG
2724   // must writeback cycles for poll detection stuff
2725   if (guest_regs[SHR_SR].vreg != -1)
2726     rcache_unmap_vreg(guest_regs[SHR_SR].vreg);
2727 #endif
2728   rcache_invalidate_tmp();
2729 
2730   if (size & MF_POLLING)
2731     switch (size & MF_SIZEMASK) {
2732     case 0:   emith_call(sh2_drc_read8_poll);   break; // 8
2733     case 1:   emith_call(sh2_drc_read16_poll);  break; // 16
2734     case 2:   emith_call(sh2_drc_read32_poll);  break; // 32
2735     }
2736   else
2737     switch (size & MF_SIZEMASK) {
2738     case 0:   emith_call(sh2_drc_read8);        break; // 8
2739     case 1:   emith_call(sh2_drc_read16);       break; // 16
2740     case 2:   emith_call(sh2_drc_read32);       break; // 32
2741     }
2742 
2743   hr = rcache_get_tmp_ret();
2744   rcache_set_x16(hr, (size & MF_SIZEMASK) < 2, 0);
2745   return hr;
2746 }
2747 
2748 // @(arg0) = arg1
emit_memhandler_write(int size)2749 static void emit_memhandler_write(int size)
2750 {
2751   emit_sync_t_to_sr();
2752   rcache_clean_tmp();
2753 #ifndef DRC_SR_REG
2754   if (guest_regs[SHR_SR].vreg != -1)
2755     rcache_unmap_vreg(guest_regs[SHR_SR].vreg);
2756 #endif
2757   rcache_invalidate_tmp();
2758 
2759   switch (size & MF_SIZEMASK) {
2760   case 0:   emith_call(sh2_drc_write8);     break;  // 8
2761   case 1:   emith_call(sh2_drc_write16);    break;  // 16
2762   case 2:   emith_call(sh2_drc_write32);    break;  // 32
2763   }
2764 }
2765 
2766 // rd = @(Rs,#offs); rd < 0 -> return a temp
emit_memhandler_read_rr(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rs,u32 offs,int size)2767 static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size)
2768 {
2769   int hr, hr2;
2770   u32 val;
2771 
2772 #if PROPAGATE_CONSTANTS
2773   if (emit_get_rom_data(sh2, rs, offs, size, &val)) {
2774     if (rd == SHR_TMP) {
2775       hr2 = rcache_get_tmp();
2776       emith_move_r_imm(hr2, val);
2777     } else {
2778       emit_move_r_imm32(rd, val);
2779       hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL);
2780     }
2781     rcache_set_x16(hr2, val == (s16)val, val == (u16)val);
2782     if (size & MF_POSTINCR)
2783       emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK));
2784     return hr2;
2785   }
2786 
2787   val = size & MF_POSTINCR;
2788   hr = emit_get_rbase_and_offs(sh2, rs, val ? RC_GR_RMW : RC_GR_READ, &offs);
2789   if (hr != -1) {
2790     if (rd == SHR_TMP)
2791       hr2 = rcache_get_tmp();
2792     else
2793       hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL);
2794     switch (size & MF_SIZEMASK) {
2795     case 0: emith_read8s_r_r_offs(hr2, hr, offs ^ 1);  break; // 8
2796     case 1: emith_read16s_r_r_offs(hr2, hr, offs);     break; // 16
2797     case 2: emith_read_r_r_offs(hr2, hr, offs); emith_ror(hr2, hr2, 16); break;
2798     }
2799     rcache_free(hr);
2800     if (size & MF_POSTINCR)
2801       emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK));
2802     return hr2;
2803   }
2804 #endif
2805 
2806   if (gconst_get(rs, &val) && !rcache_is_cached(rs)) {
2807     hr = rcache_get_tmp_arg(0);
2808     emith_move_r_imm(hr, val + offs);
2809     if (size & MF_POSTINCR)
2810       gconst_new(rs, val + (1 << (size & MF_SIZEMASK)));
2811   } else if (size & MF_POSTINCR) {
2812     hr = rcache_get_tmp_arg(0);
2813     hr2 = rcache_get_reg(rs, RC_GR_RMW, NULL);
2814     emith_add_r_r_imm(hr, hr2, offs);
2815     emith_add_r_imm(hr2, 1 << (size & MF_SIZEMASK));
2816     if (gconst_get(rs, &val))
2817       gconst_set(rs, val + (1 << (size & MF_SIZEMASK)));
2818   } else {
2819     hr = rcache_get_reg_arg(0, rs, &hr2);
2820     if (offs || hr != hr2)
2821       emith_add_r_r_imm(hr, hr2, offs);
2822   }
2823   hr = emit_memhandler_read(size);
2824 
2825   if (rd == SHR_TMP)
2826     hr2 = hr;
2827   else
2828     hr2 = rcache_map_reg(rd, hr);
2829 
2830   if (hr != hr2) {
2831     emith_move_r_r(hr2, hr);
2832     rcache_free_tmp(hr);
2833   }
2834   return hr2;
2835 }
2836 
2837 // @(Rs,#offs) = rd; rd < 0 -> write arg1
emit_memhandler_write_rr(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rs,u32 offs,int size)2838 static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size)
2839 {
2840   int hr, hr2;
2841   u32 val;
2842 
2843   if (rd == SHR_TMP) {
2844     host_arg2reg(hr2, 1); // already locked and prepared by caller
2845   } else if ((size & MF_PREDECR) && rd == rs) { // must avoid caching rd in arg1
2846     hr2 = rcache_get_reg_arg(1, rd, &hr);
2847     if (hr != hr2) {
2848       emith_move_r_r(hr2, hr);
2849       rcache_free(hr2);
2850     }
2851   } else
2852     hr2 = rcache_get_reg_arg(1, rd, NULL);
2853   if (rd != SHR_TMP)
2854     rcache_unlock(guest_regs[rd].vreg); // unlock in case rd is in arg0
2855 
2856   if (gconst_get(rs, &val) && !rcache_is_cached(rs)) {
2857     hr = rcache_get_tmp_arg(0);
2858     if (size & MF_PREDECR) {
2859       val -= 1 << (size & MF_SIZEMASK);
2860       gconst_new(rs, val);
2861     }
2862     emith_move_r_imm(hr, val + offs);
2863   } else if (offs || (size & MF_PREDECR)) {
2864     if (size & MF_PREDECR)
2865       emit_sub_r_imm(rs, 1 << (size & MF_SIZEMASK));
2866     rcache_unlock(guest_regs[rs].vreg); // unlock in case rs is in arg0
2867     hr = rcache_get_reg_arg(0, rs, &hr2);
2868     if (offs || hr != hr2)
2869       emith_add_r_r_imm(hr, hr2, offs);
2870   } else
2871     hr = rcache_get_reg_arg(0, rs, NULL);
2872 
2873   emit_memhandler_write(size);
2874 }
2875 
2876 // rd = @(Rx,Ry); rd < 0 -> return a temp
emit_indirect_indexed_read(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rx,sh2_reg_e ry,int size)2877 static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size)
2878 {
2879   int hr, hr2;
2880   int tx, ty;
2881 #if PROPAGATE_CONSTANTS
2882   u32 offs;
2883 
2884   // if offs is larger than 0x01000000, it's most probably the base address part
2885   if (gconst_get(ry, &offs) && offs < 0x01000000)
2886     return emit_memhandler_read_rr(sh2, rd, rx, offs, size);
2887   if (gconst_get(rx, &offs) && offs < 0x01000000)
2888     return emit_memhandler_read_rr(sh2, rd, ry, offs, size);
2889 #endif
2890   hr = rcache_get_reg_arg(0, rx, &tx);
2891   ty = rcache_get_reg(ry, RC_GR_READ, NULL);
2892   emith_add_r_r_r(hr, tx, ty);
2893   hr = emit_memhandler_read(size);
2894 
2895   if (rd == SHR_TMP)
2896     hr2 = hr;
2897   else
2898     hr2 = rcache_map_reg(rd, hr);
2899 
2900   if (hr != hr2) {
2901     emith_move_r_r(hr2, hr);
2902     rcache_free_tmp(hr);
2903   }
2904   return hr2;
2905 }
2906 
2907 // @(Rx,Ry) = rd; rd < 0 -> write arg1
emit_indirect_indexed_write(SH2 * sh2,sh2_reg_e rd,sh2_reg_e rx,sh2_reg_e ry,int size)2908 static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size)
2909 {
2910   int hr, tx, ty;
2911 #if PROPAGATE_CONSTANTS
2912   u32 offs;
2913 
2914   // if offs is larger than 0x01000000, it's most probably the base address part
2915   if (gconst_get(ry, &offs) && offs < 0x01000000)
2916     return emit_memhandler_write_rr(sh2, rd, rx, offs, size);
2917   if (gconst_get(rx, &offs) && offs < 0x01000000)
2918     return emit_memhandler_write_rr(sh2, rd, ry, offs, size);
2919 #endif
2920   if (rd != SHR_TMP)
2921     rcache_get_reg_arg(1, rd, NULL);
2922   hr = rcache_get_reg_arg(0, rx, &tx);
2923   ty = rcache_get_reg(ry, RC_GR_READ, NULL);
2924   emith_add_r_r_r(hr, tx, ty);
2925   emit_memhandler_write(size);
2926 }
2927 
2928 // @Rn+,@Rm+
emit_indirect_read_double(SH2 * sh2,int * rnr,int * rmr,sh2_reg_e rn,sh2_reg_e rm,int size)2929 static void emit_indirect_read_double(SH2 *sh2, int *rnr, int *rmr, sh2_reg_e rn, sh2_reg_e rm, int size)
2930 {
2931   int tmp;
2932 
2933   // unlock rn, rm here to avoid REG shortage in MAC operation
2934   tmp = emit_memhandler_read_rr(sh2, SHR_TMP, rn, 0, size | MF_POSTINCR);
2935   rcache_unlock(guest_regs[rn].vreg);
2936   tmp = rcache_save_tmp(tmp);
2937   *rmr = emit_memhandler_read_rr(sh2, SHR_TMP, rm, 0, size | MF_POSTINCR);
2938   rcache_unlock(guest_regs[rm].vreg);
2939   *rnr = rcache_restore_tmp(tmp);
2940 }
2941 
emit_do_static_regs(int is_write,int tmpr)2942 static void emit_do_static_regs(int is_write, int tmpr)
2943 {
2944   int i, r, count;
2945 
2946   for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
2947     if (guest_regs[i].flags & (GRF_STATIC|GRF_PINNED))
2948       r = cache_regs[guest_regs[i].vreg].hreg;
2949     else
2950       continue;
2951 
2952     for (count = 1; i < ARRAY_SIZE(guest_regs) - 1; i++, r++) {
2953       if ((guest_regs[i + 1].flags & (GRF_STATIC|GRF_PINNED)) &&
2954           cache_regs[guest_regs[i + 1].vreg].hreg == r + 1)
2955         count++;
2956       else
2957         break;
2958     }
2959 
2960     if (count > 1) {
2961       // i, r point to last item
2962       if (is_write)
2963         emith_ctx_write_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
2964       else
2965         emith_ctx_read_multiple(r - count + 1, (i - count + 1) * 4, count, tmpr);
2966     } else {
2967       if (is_write)
2968         emith_ctx_write(r, i * 4);
2969       else
2970         emith_ctx_read(r, i * 4);
2971     }
2972   }
2973 }
2974 
2975 #if DIV_OPTIMIZER
2976 // divide operation replacement functions, called by compiled code. Only the
2977 // 32:16 cases and the 64:32 cases described in the SH2 prog man are replaced.
2978 
sh2_drc_divu32(uint32_t dv,uint32_t ds)2979 static uint32_t REGPARM(2) sh2_drc_divu32(uint32_t dv, uint32_t ds)
2980 {
2981   if (ds && ds >= dv) {
2982     // good case: no divide by 0, and no result overflow
2983     uint32_t quot = dv / (ds>>16), rem = dv - (quot * (ds>>16));
2984     if (~quot&1) rem -= ds>>16;
2985     return (uint16_t)quot | ((2*rem + (quot>>31)) << 16);
2986   } else {
2987     // bad case: use the sh2 algo to get the right result
2988     int q = 0, t = 0, s = 16;
2989     while (s--) {
2990       uint32_t v = dv>>31;
2991       dv = (dv<<1) | t;
2992       t = v;
2993       v = dv;
2994       if (q)  dv += ds, q =   dv < v;
2995       else    dv -= ds, q = !(dv < v);
2996       q ^= t, t = !q;
2997     }
2998     return (dv<<1) | t;
2999   }
3000 }
3001 
sh2_drc_divu64(uint32_t dh,uint32_t * dl,uint32_t ds)3002 static uint32_t REGPARM(3) sh2_drc_divu64(uint32_t dh, uint32_t *dl, uint32_t ds)
3003 {
3004   if (ds > 1 && ds >= dh) {
3005     // good case: no divide by 0, and no result overflow
3006     uint64_t dv = *dl | ((uint64_t)dh << 32);
3007     uint32_t quot = dv / ds, rem = dv - (quot * ds);
3008     if (~quot&1) rem -= ds;
3009     *dl = quot;
3010     return rem;
3011   } else {
3012     // bad case: use the sh2 algo to get the right result
3013     uint64_t dv = *dl | ((uint64_t)dh << 32);
3014     int q = 0, t = 0, s = 32;
3015     while (s--) {
3016       uint64_t v = dv>>63;
3017       dv = (dv<<1) | t;
3018       t = v;
3019       v = dv;
3020       if (q)  dv += ((uint64_t)ds << 32), q =   dv < v;
3021       else    dv -= ((uint64_t)ds << 32), q = !(dv < v);
3022       q ^= t, t = !q;
3023     }
3024     *dl = (dv<<1) | t;
3025     return (dv>>32);
3026   }
3027 }
3028 
sh2_drc_divs32(int32_t dv,int32_t ds)3029 static uint32_t REGPARM(2) sh2_drc_divs32(int32_t dv, int32_t ds)
3030 {
3031   uint32_t adv = abs(dv), ads = abs(ds)>>16;
3032   if (ads > 1 && ads > adv>>16 && (int32_t)ads > 0 && !(uint16_t)ds) {
3033     // good case: no divide by 0, and no result overflow
3034     uint32_t quot = adv / ads, rem = adv - (quot * ads);
3035     int m1 = (rem ? dv^ds : ds) < 0;
3036     if (rem && dv < 0)  rem = (quot&1 ? -rem : +ads-rem);
3037     else                rem = (quot&1 ? +rem : -ads+rem);
3038     quot = ((dv^ds)<0 ? -quot : +quot) - m1;
3039     return (uint16_t)quot | ((2*rem + (quot>>31)) << 16);
3040   } else {
3041     // bad case: use the sh2 algo to get the right result
3042     int m = (uint32_t)ds>>31, q = (uint32_t)dv>>31, t = m^q, s = 16;
3043     while (s--) {
3044       uint32_t v = (uint32_t)dv>>31;
3045       dv = (dv<<1) | t;
3046       t = v;
3047       v = dv;
3048       if (m^q)  dv += ds, q =   (uint32_t)dv < v;
3049       else      dv -= ds, q = !((uint32_t)dv < v);
3050       q ^= m^t, t = !(m^q);
3051     }
3052     return (dv<<1) | t;
3053   }
3054 }
3055 
sh2_drc_divs64(int32_t dh,uint32_t * dl,int32_t ds)3056 static uint32_t REGPARM(3) sh2_drc_divs64(int32_t dh, uint32_t *dl, int32_t ds)
3057 {
3058   int64_t _dv = *dl | ((int64_t)dh << 32);
3059   uint64_t adv = (_dv < 0 ? -_dv : _dv); // llabs isn't in older toolchains
3060   uint32_t ads = abs(ds);
3061   if (ads > 1 && ads > adv>>32 && (int64_t)adv > 0) {
3062     // good case: no divide by 0, and no result overflow
3063     uint32_t quot = adv / ads, rem = adv - ((uint64_t)quot * ads);
3064     int m1 = (rem ? dh^ds : ds) < 0;
3065     if (rem && dh < 0) rem = (quot&1 ? -rem : +ads-rem);
3066     else               rem = (quot&1 ? +rem : -ads+rem);
3067     quot = ((dh^ds)<0 ? -quot : +quot) - m1;
3068     *dl = quot;
3069     return rem;
3070   } else {
3071     // bad case: use the sh2 algo to get the right result
3072     uint64_t dv = *dl | ((uint64_t)dh << 32);
3073     int m = (uint32_t)ds>>31, q = (uint64_t)dv>>63, t = m^q, s = 32;
3074     while (s--) {
3075       int64_t v = (uint64_t)dv>>63;
3076       dv = (dv<<1) | t;
3077       t = v;
3078       v = dv;
3079       if (m^q)  dv += ((uint64_t)ds << 32), q =   dv < v;
3080       else      dv -= ((uint64_t)ds << 32), q = !(dv < v);
3081       q ^= m^t, t = !(m^q);
3082     }
3083     *dl = (dv<<1) | t;
3084     return (dv>>32);
3085   }
3086 }
3087 #endif
3088 
3089 // block local link stuff
3090 struct linkage {
3091   u32 pc;
3092   void *ptr;
3093   struct block_link *bl;
3094   u32 mask;
3095 };
3096 
find_in_linkage(const struct linkage * array,int size,u32 pc)3097 static inline int find_in_linkage(const struct linkage *array, int size, u32 pc)
3098 {
3099   size_t i;
3100   for (i = 0; i < size; i++)
3101     if (pc == array[i].pc)
3102       return i;
3103 
3104   return -1;
3105 }
3106 
find_in_sorted_linkage(const struct linkage * array,int size,u32 pc)3107 static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc)
3108 {
3109   // binary search in sorted array
3110   int left = 0, right = size-1;
3111   while (left <= right)
3112   {
3113     int middle = (left + right) / 2;
3114     if (array[middle].pc == pc)
3115       return middle;
3116     else if (array[middle].pc < pc)
3117       left = middle + 1;
3118     else
3119       right = middle - 1;
3120   }
3121   return -1;
3122 }
3123 
emit_branch_linkage_code(SH2 * sh2,struct block_desc * block,int tcache_id,const struct linkage * targets,int target_count,const struct linkage * links,int link_count)3124 static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id,
3125                                 const struct linkage *targets, int target_count,
3126                                 const struct linkage *links, int link_count)
3127 {
3128   struct block_link *bl;
3129   int u, v, tmp;
3130 
3131   emith_flush();
3132   for (u = 0; u < link_count; u++) {
3133     emith_pool_check();
3134     // look up local branch targets
3135     if (links[u].mask & 0x2) {
3136       v = find_in_sorted_linkage(targets, target_count, links[u].pc);
3137       if (v < 0 || ! targets[v].ptr) {
3138         // forward branch not yet resolved, prepare external linking
3139         emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3140         bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id);
3141         if (bl)
3142           bl->type = BL_LDJMP;
3143         tmp = rcache_get_tmp_arg(0);
3144         emith_move_r_imm(tmp, links[u].pc);
3145         rcache_free_tmp(tmp);
3146         emith_jump_patchable(sh2_drc_dispatcher);
3147       } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) {
3148         // inrange local branch
3149         emith_jump_patch(links[u].ptr, targets[v].ptr, NULL);
3150       } else {
3151         // far local branch
3152         emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3153         emith_jump(targets[v].ptr);
3154       }
3155     } else {
3156       // external or exit, emit blx area entry
3157       void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher);
3158       if (links[u].bl)
3159         links[u].bl->blx = tcache_ptr;
3160       emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
3161       tmp = rcache_get_tmp_arg(0);
3162       emith_move_r_imm(tmp, links[u].pc & ~1);
3163       rcache_free_tmp(tmp);
3164       emith_jump(target);
3165     }
3166   }
3167 }
3168 
3169 #define DELAY_SAVE_T(sr) { \
3170   int t_ = rcache_get_tmp(); \
3171   emith_bic_r_imm(sr, T_save); \
3172   emith_and_r_r_imm(t_, sr, 1); \
3173   emith_or_r_r_lsl(sr, t_, T_SHIFT); \
3174   rcache_free_tmp(t_); \
3175 }
3176 
3177 #define FLUSH_CYCLES(sr) \
3178   if (cycles > 0) { \
3179     emith_sub_r_imm(sr, cycles << 12); \
3180     cycles = 0; \
3181   }
3182 
3183 static void *dr_get_pc_base(u32 pc, SH2 *sh2);
3184 
sh2_translate(SH2 * sh2,int tcache_id)3185 static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
3186 {
3187   // branch targets in current block
3188   static struct linkage branch_targets[MAX_LOCAL_TARGETS];
3189   int branch_target_count = 0;
3190   // unresolved local or external targets with block link/exit area if needed
3191   static struct linkage blx_targets[MAX_LOCAL_BRANCHES];
3192   int blx_target_count = 0;
3193 
3194   static u8 op_flags[BLOCK_INSN_LIMIT];
3195 
3196   enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 };
3197   struct drcf {
3198     int delay_reg:8;
3199     u32 loop_type:8;
3200     u32 polling:8;
3201     u32 pinning:1;
3202     u32 test_irq:1;
3203     u32 pending_branch_direct:1;
3204     u32 pending_branch_indirect:1;
3205     u32 Tflag:2, Mflag:2;
3206   } drcf = { 0, };
3207 
3208 #if LOOP_OPTIMIZER
3209   // loops with pinned registers for optimzation
3210   // pinned regs are like statics and don't need saving/restoring inside a loop
3211   static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
3212   int pinned_loop_count = 0;
3213 #endif
3214 
3215   // PC of current, first, last SH2 insn
3216   u32 pc, base_pc, end_pc;
3217   u32 base_literals, end_literals;
3218   u8 *block_entry_ptr;
3219   struct block_desc *block;
3220   struct block_entry *entry;
3221   struct block_link *bl;
3222   u16 *dr_pc_base;
3223   struct op_data *opd;
3224   int blkid_main = 0;
3225   int skip_op = 0;
3226   int tmp, tmp2;
3227   int cycles;
3228   int i, v;
3229   u32 u, m1, m2, m3, m4;
3230   int op;
3231   u16 crc;
3232 
3233   base_pc = sh2->pc;
3234 
3235   // get base/validate PC
3236   dr_pc_base = dr_get_pc_base(base_pc, sh2);
3237   if (dr_pc_base == (void *)-1) {
3238     printf("invalid PC, aborting: %08lx\n", (long)base_pc);
3239     // FIXME: be less destructive
3240     exit(1);
3241   }
3242 
3243   // initial passes to disassemble and analyze the block
3244   crc = scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals);
3245   end_literals = dr_check_nolit(base_literals, end_literals, tcache_id);
3246   if (base_literals == end_literals) // map empty lit section to end of code
3247     base_literals = end_literals = end_pc;
3248 
3249   // if there is already a translated but inactive block, reuse it
3250   block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc,
3251     base_literals, end_literals - base_literals);
3252 
3253   if (block) {
3254     dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
3255       base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr);
3256     dr_activate_block(block, tcache_id, sh2->is_slave);
3257     emith_update_cache();
3258     return block->entryp[0].tcache_ptr;
3259   }
3260 
3261   // collect branch_targets that don't land on delay slots
3262   m1 = m2 = m3 = m4 = v = op = 0;
3263   for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) {
3264     if (op_flags[i] & OF_DELAY_OP)
3265       op_flags[i] &= ~OF_BTARGET;
3266     if (op_flags[i] & OF_BTARGET) {
3267       if (branch_target_count < ARRAY_SIZE(branch_targets))
3268         branch_targets[branch_target_count++] = (struct linkage) { .pc = pc };
3269       else {
3270         printf("warning: linkage overflow\n");
3271         end_pc = pc;
3272         break;
3273       }
3274     }
3275     if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc)
3276       op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change
3277     // unify T and SR since rcache doesn't know about "virtual" guest regs
3278     if (ops[i].source & BITMASK1(SHR_T))  ops[i].source |= BITMASK1(SHR_SR);
3279     if (ops[i].dest   & BITMASK1(SHR_T))  ops[i].source |= BITMASK1(SHR_SR);
3280     if (ops[i].dest   & BITMASK1(SHR_T))  ops[i].dest   |= BITMASK1(SHR_SR);
3281 #if LOOP_DETECTION
3282     // loop types detected:
3283     // 1. target: ... BRA target -> idle loop
3284     // 2. target: ... delay insn ... BF target -> delay loop
3285     // 3. target: ... poll  insn ... BF/BT target -> poll loop
3286     // 4. target: ... poll  insn ... BF/BT exit ... BRA target, exit: -> poll
3287     // conditions:
3288     // a. no further branch targets between target and back jump.
3289     // b. no unconditional branch insn inside the loop.
3290     // c. exactly one poll or delay insn is allowed inside a delay/poll loop
3291     // (scan_block marks loops only if they meet conditions a through c)
3292     // d. idle loops do not modify anything but PC,SR and contain no branches
3293     // e. delay/poll loops do not modify anything but the concerned reg,PC,SR
3294     // f. loading constants into registers inside the loop is allowed
3295     // g. a delay/poll loop must have a conditional branch somewhere
3296     // h. an idle loop must not have a conditional branch
3297     if (op_flags[i] & OF_BTARGET) {
3298       // possible loop entry point
3299       drcf.loop_type = op_flags[i] & OF_LOOP;
3300       drcf.pending_branch_direct = drcf.pending_branch_indirect = 0;
3301       op = OF_IDLE_LOOP; // loop type
3302       v = i;
3303       m1 = m2 = m3 = m4 = 0;
3304       if (!drcf.loop_type)   // reset basic loop it it isn't recognized as loop
3305         op_flags[i] &= ~OF_BASIC_LOOP;
3306     }
3307     if (drcf.loop_type) {
3308       // calculate reg masks for loop pinning
3309       m4 |= ops[i].source & ~m3;
3310       m3 |= ops[i].dest;
3311       // detect loop type, and store poll/delay register
3312       if (op_flags[i] & OF_POLL_INSN) {
3313         op = OF_POLL_LOOP;
3314         m1 |= ops[i].dest;   // loop poll/delay regs
3315       } else if (op_flags[i] & OF_DELAY_INSN) {
3316         op = OF_DELAY_LOOP;
3317         m1 |= ops[i].dest;
3318       } else if (ops[i].op != OP_LOAD_POOL && ops[i].op != OP_LOAD_CONST
3319               && (ops[i].op != OP_MOVE || op != OF_POLL_LOOP)) {
3320         // not (MOV @(PC) or MOV # or (MOV reg and poll)),   condition f
3321         m2 |= ops[i].dest;   // regs modified by other insns
3322       }
3323       // branch detector
3324       if (OP_ISBRAIMM(ops[i].op)) {
3325         if (ops[i].imm == base_pc + 2*v)
3326           drcf.pending_branch_direct = 1;       // backward branch detected
3327         else
3328           op_flags[v] &= ~OF_BASIC_LOOP;        // no basic loop
3329       }
3330       if (OP_ISBRACND(ops[i].op))
3331         drcf.pending_branch_indirect = 1;       // conditions g,h - cond.branch
3332       // poll/idle loops terminate with their backwards branch to the loop start
3333       if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) {
3334         m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h
3335         if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect)))
3336           op = 0;                               // conditions not met
3337         op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type
3338         drcf.loop_type = 0;
3339 #if LOOP_OPTIMIZER
3340         if (op_flags[v] & OF_BASIC_LOOP) {
3341           m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM);
3342           if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
3343               pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) {
3344             pinned_loops[pinned_loop_count++] =
3345                 (struct linkage) { .pc = base_pc + 2*v, .mask = m3 };
3346           } else
3347             op_flags[v] &= ~OF_BASIC_LOOP;
3348         }
3349 #endif
3350       }
3351     }
3352 #endif
3353   }
3354 
3355   tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2, branch_target_count);
3356 #if (DRC_DEBUG & 4)
3357   tcache_dsm_ptrs[tcache_id] = tcache_ptr;
3358 #endif
3359 
3360   block = dr_add_block(branch_target_count, base_pc, end_pc - base_pc,
3361     base_literals, end_literals-base_literals, crc, sh2->is_slave, &blkid_main);
3362   if (block == NULL)
3363     return NULL;
3364 
3365   block_entry_ptr = tcache_ptr;
3366   dbg(2, "== %csh2 block #%d,%d %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
3367     tcache_id, blkid_main, base_pc, end_pc, base_literals, end_literals, block_entry_ptr);
3368 
3369 
3370   // clear stale state after compile errors
3371   rcache_invalidate();
3372   emith_invalidate_t();
3373   drcf = (struct drcf) { 0 };
3374 #if LOOP_OPTIMIZER
3375   pinned_loops[pinned_loop_count].pc = -1;
3376   pinned_loop_count = 0;
3377 #endif
3378 
3379   // -------------------------------------------------
3380   // 3rd pass: actual compilation
3381   pc = base_pc;
3382   cycles = 0;
3383   for (i = 0; pc < end_pc; i++)
3384   {
3385     u32 delay_dep_fw = 0, delay_dep_bk = 0;
3386     int tmp3, tmp4;
3387     int sr;
3388 
3389     if (op_flags[i] & OF_BTARGET)
3390     {
3391       if (pc != base_pc)
3392       {
3393         sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3394         FLUSH_CYCLES(sr);
3395         emith_sync_t(sr);
3396         drcf.Mflag = FLG_UNKNOWN;
3397         rcache_flush();
3398         emith_flush();
3399       }
3400 
3401       // make block entry
3402       v = block->entry_count;
3403       entry = &block->entryp[v];
3404       if (v < branch_target_count)
3405       {
3406         entry = &block->entryp[v];
3407         entry->pc = pc;
3408         entry->tcache_ptr = tcache_ptr;
3409         entry->links = entry->o_links = NULL;
3410 #if (DRC_DEBUG & 2)
3411         entry->block = block;
3412 #endif
3413         block->entry_count++;
3414 
3415         dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p",
3416           sh2->is_slave ? 's' : 'm', tcache_id, blkid_main,
3417           pc, tcache_ptr);
3418       }
3419       else {
3420         dbg(1, "too many entryp for block #%d,%d pc=%08x",
3421           tcache_id, blkid_main, pc);
3422         break;
3423       }
3424 
3425       v = find_in_sorted_linkage(branch_targets, branch_target_count, pc);
3426       if (v >= 0)
3427         branch_targets[v].ptr = tcache_ptr;
3428 #if LOOP_DETECTION
3429       drcf.loop_type = op_flags[i] & OF_LOOP;
3430       drcf.delay_reg = -1;
3431       drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0);
3432 #endif
3433 
3434       rcache_clean();
3435 
3436 #if (DRC_DEBUG & 0x10)
3437       tmp = rcache_get_tmp_arg(0);
3438       emith_move_r_imm(tmp, pc);
3439       tmp = emit_memhandler_read(1);
3440       tmp2 = rcache_get_tmp();
3441       tmp3 = rcache_get_tmp();
3442       emith_move_r_imm(tmp2, (s16)FETCH_OP(pc));
3443       emith_move_r_imm(tmp3, 0);
3444       emith_cmp_r_r(tmp, tmp2);
3445       EMITH_SJMP_START(DCOND_EQ);
3446       emith_read_r_r_offs_c(DCOND_NE, tmp3, tmp3, 0); // crash
3447       EMITH_SJMP_END(DCOND_EQ);
3448       rcache_free_tmp(tmp);
3449       rcache_free_tmp(tmp2);
3450       rcache_free_tmp(tmp3);
3451 #endif
3452 
3453       // check cycles
3454       sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3455 
3456 #if LOOP_OPTIMIZER
3457       if (op_flags[i] & OF_BASIC_LOOP) {
3458         if (pinned_loops[pinned_loop_count].pc == pc) {
3459           // pin needed regs on loop entry
3460           FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v));
3461           emith_flush();
3462           // store current PC as loop target
3463           pinned_loops[pinned_loop_count].ptr = tcache_ptr;
3464           drcf.pinning = 1;
3465         } else
3466           op_flags[i] &= ~OF_BASIC_LOOP;
3467       }
3468 
3469       if (op_flags[i] & OF_BASIC_LOOP) {
3470         // if exiting a pinned loop pinned regs must be written back to ctx
3471         // since they are reloaded in the loop entry code
3472         emith_cmp_r_imm(sr, 0);
3473         EMITH_JMP_START(DCOND_GT);
3474         rcache_save_pinned();
3475 
3476         if (blx_target_count < ARRAY_SIZE(blx_targets)) {
3477           // exit via stub in blx table (saves some 1-3 insns in the main flow)
3478           blx_targets[blx_target_count++] =
3479               (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
3480           emith_jump_patchable(tcache_ptr);
3481         } else {
3482           // blx table full, must inline exit code
3483           tmp = rcache_get_tmp_arg(0);
3484           emith_move_r_imm(tmp, pc);
3485           emith_jump(sh2_drc_exit);
3486           rcache_free_tmp(tmp);
3487         }
3488         EMITH_JMP_END(DCOND_GT);
3489       } else
3490 #endif
3491       {
3492         if (blx_target_count < ARRAY_SIZE(blx_targets)) {
3493           // exit via stub in blx table (saves some 1-3 insns in the main flow)
3494           emith_cmp_r_imm(sr, 0);
3495           blx_targets[blx_target_count++] =
3496               (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
3497           emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
3498         } else {
3499           // blx table full, must inline exit code
3500           tmp = rcache_get_tmp_arg(0);
3501           emith_cmp_r_imm(sr, 0);
3502           EMITH_SJMP_START(DCOND_GT);
3503           emith_move_r_imm_c(DCOND_LE, tmp, pc);
3504           emith_jump_cond(DCOND_LE, sh2_drc_exit);
3505           EMITH_SJMP_END(DCOND_GT);
3506           rcache_free_tmp(tmp);
3507         }
3508       }
3509 
3510 #if (DRC_DEBUG & 32)
3511       // block hit counter
3512       tmp  = rcache_get_tmp_arg(0);
3513       tmp2 = rcache_get_tmp_arg(1);
3514       emith_move_r_ptr_imm(tmp, (uptr)entry);
3515       emith_read_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
3516       emith_add_r_imm(tmp2, 1);
3517       emith_write_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
3518       rcache_free_tmp(tmp);
3519       rcache_free_tmp(tmp2);
3520 #endif
3521 
3522 #if (DRC_DEBUG & (8|256|512|1024))
3523       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3524       emith_sync_t(sr);
3525       rcache_clean();
3526       tmp = rcache_used_hregs_mask();
3527       emith_save_caller_regs(tmp);
3528       emit_do_static_regs(1, 0);
3529       rcache_get_reg_arg(2, SHR_SR, NULL);
3530       tmp2 = rcache_get_tmp_arg(0);
3531       tmp3 = rcache_get_tmp_arg(1);
3532       tmp4 = rcache_get_tmp();
3533       emith_move_r_ptr_imm(tmp2, tcache_ptr);
3534       emith_move_r_r_ptr(tmp3, CONTEXT_REG);
3535       emith_move_r_imm(tmp4, pc);
3536       emith_ctx_write(tmp4, SHR_PC * 4);
3537       rcache_invalidate_tmp();
3538       emith_abicall(sh2_drc_log_entry);
3539       emith_restore_caller_regs(tmp);
3540 #endif
3541 
3542       do_host_disasm(tcache_id);
3543       rcache_unlock_all();
3544     }
3545 
3546 #ifdef DRC_CMP
3547     if (!(op_flags[i] & OF_DELAY_OP)) {
3548       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3549       FLUSH_CYCLES(sr);
3550       emith_sync_t(sr);
3551       emit_move_r_imm32(SHR_PC, pc);
3552       rcache_clean();
3553 
3554       tmp = rcache_used_hregs_mask();
3555       emith_save_caller_regs(tmp);
3556       emit_do_static_regs(1, 0);
3557       emith_pass_arg_r(0, CONTEXT_REG);
3558       emith_abicall(do_sh2_cmp);
3559       emith_restore_caller_regs(tmp);
3560     }
3561 #endif
3562 
3563     // emit blx area if limits are approached
3564     if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 ||
3565         !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) {
3566       u8 *jp;
3567       rcache_invalidate_tmp();
3568       jp = tcache_ptr;
3569       emith_jump_patchable(tcache_ptr);
3570       emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
3571                           branch_target_count, blx_targets, blx_target_count);
3572       blx_target_count = 0;
3573       do_host_disasm(tcache_id);
3574       emith_jump_patch(jp, tcache_ptr, NULL);
3575     }
3576 
3577     emith_pool_check();
3578 
3579     opd = &ops[i];
3580     op = FETCH_OP(pc);
3581 #if (DRC_DEBUG & 4)
3582     DasmSH2(sh2dasm_buff, pc, op);
3583     if (op_flags[i] & OF_BTARGET) {
3584       if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP)     tmp3 = '+';
3585       else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '=';
3586       else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~';
3587       else                                              tmp3 = '*';
3588     } else if (drcf.loop_type)                          tmp3 = '.';
3589     else                                                tmp3 = ' ';
3590     printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff);
3591 #endif
3592 
3593     pc += 2;
3594 #if (DRC_DEBUG & 2)
3595     insns_compiled++;
3596 #endif
3597     if (skip_op > 0) {
3598       skip_op--;
3599       continue;
3600     }
3601 
3602     if (op_flags[i] & OF_DELAY_OP)
3603     {
3604       // handle delay slot dependencies
3605       delay_dep_fw = opd->dest & ops[i-1].source;
3606       delay_dep_bk = opd->source & ops[i-1].dest;
3607       if (delay_dep_fw & BITMASK1(SHR_T)) {
3608         sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3609         emith_sync_t(sr);
3610         DELAY_SAVE_T(sr);
3611       }
3612       if (delay_dep_bk & BITMASK1(SHR_PC)) {
3613         if (opd->op != OP_LOAD_POOL && opd->op != OP_MOVA) {
3614           // can only be those 2 really..
3615           elprintf_sh2(sh2, EL_ANOMALY,
3616             "drc: illegal slot insn %04x @ %08x?", op, pc - 2);
3617         }
3618         // store PC for MOVA/MOV @PC address calculation
3619         if (opd->imm != 0)
3620           ; // case OP_BRANCH - addr already resolved in scan_block
3621         else {
3622           switch (ops[i-1].op) {
3623           case OP_BRANCH:
3624             emit_move_r_imm32(SHR_PC, ops[i-1].imm);
3625             break;
3626           case OP_BRANCH_CT:
3627           case OP_BRANCH_CF:
3628             sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3629             tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL);
3630             emith_move_r_imm(tmp, pc);
3631             tmp2 = emith_tst_t(sr, (ops[i-1].op == OP_BRANCH_CT));
3632             tmp3 = emith_invert_cond(tmp2);
3633             EMITH_SJMP_START(tmp3);
3634             emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm);
3635             EMITH_SJMP_END(tmp3);
3636             break;
3637           case OP_BRANCH_N: // BT/BF known not to be taken
3638             // XXX could modify opd->imm instead?
3639             emit_move_r_imm32(SHR_PC, pc);
3640             break;
3641           // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded
3642           }
3643         }
3644       }
3645       //if (delay_dep_fw & ~BITMASK1(SHR_T))
3646       //  dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T));
3647       if (delay_dep_bk & ~BITMASK2(SHR_PC, SHR_PR))
3648         dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk);
3649     }
3650 
3651     // inform cache about future register usage
3652     u32 late = 0;             // regs read by future ops
3653     u32 write = 0;            // regs written to (to detect write before read)
3654     u32 soon = 0;             // regs read soon
3655     for (v = 1; v <= 9; v++) {
3656       // no sense in looking any further than the next rcache flush
3657       tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) ||
3658                 (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP)));
3659       // XXX looking behind cond branch to avoid evicting regs used later?
3660       if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above)
3661         late |= opd[v].source & ~write;
3662         // ignore source regs after they have been written to
3663         write |= opd[v].dest;
3664         // regs needed in the next few instructions
3665         if (v <= 4)
3666           soon = late;
3667       } else
3668         break;
3669     }
3670     rcache_set_usage_now(opd[0].source);   // current insn
3671     rcache_set_usage_soon(soon);           // insns 1-4
3672     rcache_set_usage_late(late & ~soon);   // insns 5-9
3673     rcache_set_usage_discard(write & ~(late|soon));
3674     if (v <= 9)
3675       // upcoming rcache_flush, start writing back unused dirty stuff
3676       rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
3677 
3678     switch (opd->op)
3679     {
3680     case OP_BRANCH_N:
3681       // never taken, just use up cycles
3682       goto end_op;
3683     case OP_BRANCH:
3684     case OP_BRANCH_CT:
3685     case OP_BRANCH_CF:
3686       if (opd->dest & BITMASK1(SHR_PR))
3687         emit_move_r_imm32(SHR_PR, pc + 2);
3688       drcf.pending_branch_direct = 1;
3689       goto end_op;
3690 
3691     case OP_BRANCH_R:
3692       if (opd->dest & BITMASK1(SHR_PR))
3693         emit_move_r_imm32(SHR_PR, pc + 2);
3694       emit_move_r_r(SHR_PC, opd->rm);
3695       drcf.pending_branch_indirect = 1;
3696       goto end_op;
3697 
3698     case OP_BRANCH_RF:
3699       tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
3700       tmp  = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL);
3701       emith_move_r_imm(tmp, pc + 2);
3702       if (opd->dest & BITMASK1(SHR_PR)) {
3703         tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL);
3704         emith_move_r_r(tmp3, tmp);
3705       }
3706       emith_add_r_r(tmp, tmp2);
3707       if (gconst_get(GET_Rn(), &u))
3708         gconst_set(SHR_PC, pc + 2 + u);
3709       drcf.pending_branch_indirect = 1;
3710       goto end_op;
3711 
3712     case OP_SLEEP: // SLEEP      0000000000011011
3713       printf("TODO sleep\n");
3714       goto end_op;
3715 
3716     case OP_RTE: // RTE        0000000000101011
3717       emith_invalidate_t();
3718       // pop PC
3719       tmp = emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR);
3720       rcache_free(tmp);
3721       // pop SR
3722       tmp = emit_memhandler_read_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_POSTINCR);
3723       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3724       emith_write_sr(sr, tmp);
3725       rcache_free_tmp(tmp);
3726       drcf.test_irq = 1;
3727       drcf.pending_branch_indirect = 1;
3728       goto end_op;
3729 
3730     case OP_UNDEFINED:
3731       elprintf_sh2(sh2, EL_ANOMALY, "drc: unhandled op %04x @ %08x", op, pc-2);
3732       opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4;
3733       // fallthrough
3734     case OP_TRAPA: // TRAPA #imm      11000011iiiiiiii
3735       // push SR
3736       tmp  = rcache_get_reg_arg(1, SHR_SR, &tmp2);
3737       emith_sync_t(tmp2);
3738       emith_clear_msb(tmp, tmp2, 22);
3739       emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
3740       // push PC
3741       if (opd->op == OP_TRAPA) {
3742         tmp = rcache_get_tmp_arg(1);
3743         emith_move_r_imm(tmp, pc);
3744       } else if (drcf.pending_branch_indirect) {
3745         tmp = rcache_get_reg_arg(1, SHR_PC, NULL);
3746       } else {
3747         tmp = rcache_get_tmp_arg(1);
3748         emith_move_r_imm(tmp, pc - 2);
3749       }
3750       emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
3751       // obtain new PC
3752       emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2);
3753       // indirect jump -> back to dispatcher
3754       drcf.pending_branch_indirect = 1;
3755       goto end_op;
3756 
3757     case OP_LOAD_POOL:
3758 #if PROPAGATE_CONSTANTS
3759       if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) ||
3760           dr_is_rom(opd->imm))
3761       {
3762         if (opd->size == 2)
3763           u = FETCH32(opd->imm);
3764         else
3765           u = (s16)FETCH_OP(opd->imm);
3766         // tweak for Blackthorne: avoid stack overwriting
3767         if (GET_Rn() == SHR_SP && u == 0x0603f800) u = 0x0603f880;
3768         gconst_new(GET_Rn(), u);
3769       }
3770       else
3771 #endif
3772       {
3773         if (opd->imm != 0) {
3774           tmp = rcache_get_tmp_arg(0);
3775           emith_move_r_imm(tmp, opd->imm);
3776         } else {
3777           // have to calculate read addr from PC for delay slot
3778           tmp = rcache_get_reg_arg(0, SHR_PC, &tmp2);
3779           if (opd->size == 2) {
3780             emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4);
3781             emith_bic_r_imm(tmp, 3);
3782           }
3783           else
3784             emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 2);
3785         }
3786         tmp2 = emit_memhandler_read(opd->size);
3787         tmp3 = rcache_map_reg(GET_Rn(), tmp2);
3788         if (tmp3 != tmp2) {
3789           emith_move_r_r(tmp3, tmp2);
3790           rcache_free_tmp(tmp2);
3791         }
3792       }
3793       goto end_op;
3794 
3795     case OP_MOVA: // MOVA @(disp,PC),R0    11000111dddddddd
3796       if (opd->imm != 0)
3797         emit_move_r_imm32(SHR_R0, opd->imm);
3798       else {
3799         // have to calculate addr from PC for delay slot
3800         tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ, NULL);
3801         tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE, NULL);
3802         emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4);
3803         emith_bic_r_imm(tmp, 3);
3804       }
3805       goto end_op;
3806     }
3807 
3808     switch ((op >> 12) & 0x0f)
3809     {
3810     /////////////////////////////////////////////
3811     case 0x00:
3812       switch (op & 0x0f)
3813       {
3814       case 0x02:
3815         switch (GET_Fx())
3816         {
3817         case 0: // STC SR,Rn  0000nnnn00000010
3818           tmp2 = SHR_SR;
3819           break;
3820         case 1: // STC GBR,Rn 0000nnnn00010010
3821           tmp2 = SHR_GBR;
3822           break;
3823         case 2: // STC VBR,Rn 0000nnnn00100010
3824           tmp2 = SHR_VBR;
3825           break;
3826         default:
3827           goto default_;
3828         }
3829         if (tmp2 == SHR_SR) {
3830           sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3831           emith_sync_t(sr);
3832           tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
3833           emith_clear_msb(tmp, sr, 22); // reserved bits defined by ISA as 0
3834         } else
3835           emit_move_r_r(GET_Rn(), tmp2);
3836         goto end_op;
3837       case 0x04: // MOV.B Rm,@(R0,Rn)   0000nnnnmmmm0100
3838       case 0x05: // MOV.W Rm,@(R0,Rn)   0000nnnnmmmm0101
3839       case 0x06: // MOV.L Rm,@(R0,Rn)   0000nnnnmmmm0110
3840         emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3);
3841         goto end_op;
3842       case 0x07: // MUL.L     Rm,Rn      0000nnnnmmmm0111
3843         tmp  = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
3844         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
3845         tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
3846         emith_mul(tmp3, tmp2, tmp);
3847         goto end_op;
3848       case 0x08:
3849         switch (GET_Fx())
3850         {
3851         case 0: // CLRT               0000000000001000
3852           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3853 #if T_OPTIMIZER
3854           if (~rcache_regs_discard & BITMASK1(SHR_T))
3855 #endif
3856             emith_set_t(sr, 0);
3857           break;
3858         case 1: // SETT               0000000000011000
3859           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3860 #if T_OPTIMIZER
3861           if (~rcache_regs_discard & BITMASK1(SHR_T))
3862 #endif
3863             emith_set_t(sr, 1);
3864           break;
3865         case 2: // CLRMAC             0000000000101000
3866           emit_move_r_imm32(SHR_MACL, 0);
3867           emit_move_r_imm32(SHR_MACH, 0);
3868           break;
3869         default:
3870           goto default_;
3871         }
3872         goto end_op;
3873       case 0x09:
3874         switch (GET_Fx())
3875         {
3876         case 0: // NOP        0000000000001001
3877           break;
3878         case 1: // DIV0U      0000000000011001
3879           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3880           emith_invalidate_t();
3881           emith_bic_r_imm(sr, M|Q|T);
3882           drcf.Mflag = FLG_0;
3883 #if DIV_OPTIMIZER
3884           if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
3885             // divide 32/16
3886             rcache_get_reg_arg(0, div(opd).rn, NULL);
3887             rcache_get_reg_arg(1, div(opd).rm, NULL);
3888             rcache_invalidate_tmp();
3889             emith_abicall(sh2_drc_divu32);
3890             tmp = rcache_get_tmp_ret();
3891             tmp2 = rcache_map_reg(div(opd).rn, tmp);
3892             if (tmp != tmp2)
3893               emith_move_r_r(tmp2, tmp);
3894 
3895             tmp3  = rcache_get_tmp();
3896             emith_and_r_r_imm(tmp3, tmp2, 1);     // Q = !Rn[0]
3897             emith_eor_r_r_imm(tmp3, tmp3, 1);
3898             emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);
3899             rcache_free_tmp(tmp3);
3900             emith_or_r_r_r_lsr(sr, sr, tmp2, 31); // T = Rn[31]
3901             skip_op = div(opd).div1 + div(opd).rotcl;
3902           }
3903           else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
3904             // divide 64/32
3905             tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL);
3906             emith_ctx_write(tmp4, offsetof(SH2, drc_tmp));
3907             tmp = rcache_get_tmp_arg(1);
3908             emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
3909             rcache_get_reg_arg(0, div(opd).rn, NULL);
3910             rcache_get_reg_arg(2, div(opd).rm, NULL);
3911             rcache_invalidate_tmp();
3912             emith_abicall(sh2_drc_divu64);
3913             tmp = rcache_get_tmp_ret();
3914             tmp2 = rcache_map_reg(div(opd).rn, tmp);
3915             tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL);
3916             if (tmp != tmp2)
3917               emith_move_r_r(tmp2, tmp);
3918             emith_ctx_read(tmp4, offsetof(SH2, drc_tmp));
3919 
3920             tmp3  = rcache_get_tmp();
3921             emith_and_r_r_imm(tmp3, tmp4, 1);     // Q = !Ro[0]
3922             emith_eor_r_r_imm(tmp3, tmp3, 1);
3923             emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);
3924             rcache_free_tmp(tmp3);
3925             emith_or_r_r_r_lsr(sr, sr, tmp4, 31); // T = Ro[31]
3926             skip_op = div(opd).div1 + div(opd).rotcl;
3927           }
3928 #endif
3929           break;
3930         case 2: // MOVT Rn    0000nnnn00101001
3931           sr   = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3932           emith_sync_t(sr);
3933           tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
3934           emith_clear_msb(tmp2, sr, 31);
3935           break;
3936         default:
3937           goto default_;
3938         }
3939         goto end_op;
3940       case 0x0a:
3941         switch (GET_Fx())
3942         {
3943         case 0: // STS      MACH,Rn   0000nnnn00001010
3944           tmp2 = SHR_MACH;
3945           break;
3946         case 1: // STS      MACL,Rn   0000nnnn00011010
3947           tmp2 = SHR_MACL;
3948           break;
3949         case 2: // STS      PR,Rn     0000nnnn00101010
3950           tmp2 = SHR_PR;
3951           break;
3952         default:
3953           goto default_;
3954         }
3955         emit_move_r_r(GET_Rn(), tmp2);
3956         goto end_op;
3957       case 0x0c: // MOV.B    @(R0,Rm),Rn      0000nnnnmmmm1100
3958       case 0x0d: // MOV.W    @(R0,Rm),Rn      0000nnnnmmmm1101
3959       case 0x0e: // MOV.L    @(R0,Rm),Rn      0000nnnnmmmm1110
3960         emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), (op & 3) | drcf.polling);
3961         goto end_op;
3962       case 0x0f: // MAC.L   @Rm+,@Rn+  0000nnnnmmmm1111
3963         emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2);
3964         sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
3965         tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL);
3966         tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL);
3967         emith_sh2_macl(tmp3, tmp4, tmp, tmp2, sr);
3968         rcache_free_tmp(tmp2);
3969         rcache_free_tmp(tmp);
3970         goto end_op;
3971       }
3972       goto default_;
3973 
3974     /////////////////////////////////////////////
3975     case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
3976       emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2);
3977       goto end_op;
3978 
3979     case 0x02:
3980       switch (op & 0x0f)
3981       {
3982       case 0x00: // MOV.B Rm,@Rn        0010nnnnmmmm0000
3983       case 0x01: // MOV.W Rm,@Rn        0010nnnnmmmm0001
3984       case 0x02: // MOV.L Rm,@Rn        0010nnnnmmmm0010
3985         emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, op & 3);
3986         goto end_op;
3987       case 0x04: // MOV.B Rm,@-Rn       0010nnnnmmmm0100
3988       case 0x05: // MOV.W Rm,@-Rn       0010nnnnmmmm0101
3989       case 0x06: // MOV.L Rm,@-Rn       0010nnnnmmmm0110
3990         emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, (op & 3) | MF_PREDECR);
3991         goto end_op;
3992       case 0x07: // DIV0S Rm,Rn         0010nnnnmmmm0111
3993         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
3994         emith_invalidate_t();
3995         emith_bic_r_imm(sr, M|Q|T);
3996         drcf.Mflag = FLG_UNKNOWN;
3997 #if DIV_OPTIMIZER
3998         if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
3999           // divide 32/16
4000           rcache_get_reg_arg(0, div(opd).rn, NULL);
4001           tmp2 = rcache_get_reg_arg(1, div(opd).rm, NULL);
4002           tmp3 = rcache_get_tmp();
4003           emith_lsr(tmp3, tmp2, 31);
4004           emith_or_r_r_lsl(sr, tmp3, M_SHIFT);        // M = Rm[31]
4005           rcache_invalidate_tmp();
4006           emith_abicall(sh2_drc_divs32);
4007           tmp = rcache_get_tmp_ret();
4008           tmp2 = rcache_map_reg(div(opd).rn, tmp);
4009           if (tmp != tmp2)
4010             emith_move_r_r(tmp2, tmp);
4011           tmp3  = rcache_get_tmp();
4012 
4013           emith_eor_r_r_r_lsr(tmp3, tmp2, sr, M_SHIFT);
4014           emith_and_r_r_imm(tmp3, tmp3, 1);
4015           emith_eor_r_r_imm(tmp3, tmp3, 1);
4016           emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);        // Q = !Rn[0]^M
4017           rcache_free_tmp(tmp3);
4018           emith_or_r_r_r_lsr(sr, sr, tmp2, 31);       // T = Rn[31]
4019           skip_op = div(opd).div1 + div(opd).rotcl;
4020         }
4021         else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
4022           // divide 64/32
4023           tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL);
4024           emith_ctx_write(tmp4, offsetof(SH2, drc_tmp));
4025           rcache_get_reg_arg(0, div(opd).rn, NULL);
4026           tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL);
4027           tmp3 = rcache_get_tmp_arg(1);
4028           emith_lsr(tmp3, tmp2, 31);
4029           emith_or_r_r_lsl(sr, tmp3, M_SHIFT);         // M = Rm[31]
4030           emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp));
4031           rcache_invalidate_tmp();
4032           emith_abicall(sh2_drc_divs64);
4033           tmp = rcache_get_tmp_ret();
4034           tmp2 = rcache_map_reg(div(opd).rn, tmp);
4035           tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL);
4036           if (tmp != tmp2)
4037             emith_move_r_r(tmp2, tmp);
4038           emith_ctx_read(tmp4, offsetof(SH2, drc_tmp));
4039 
4040           tmp3  = rcache_get_tmp();
4041           emith_eor_r_r_r_lsr(tmp3, tmp4, sr, M_SHIFT);
4042           emith_and_r_r_imm(tmp3, tmp3, 1);
4043           emith_eor_r_r_imm(tmp3, tmp3, 1);
4044           emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);        // Q = !Ro[0]^M
4045           rcache_free_tmp(tmp3);
4046           emith_or_r_r_r_lsr(sr, sr, tmp4, 31);       // T = Ro[31]
4047           skip_op = div(opd).div1 + div(opd).rotcl;
4048         } else
4049 #endif
4050         {
4051           tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4052           tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4053           tmp  = rcache_get_tmp();
4054           emith_lsr(tmp, tmp2, 31);       // Q = Nn
4055           emith_or_r_r_lsl(sr, tmp, Q_SHIFT);
4056           emith_lsr(tmp, tmp3, 31);       // M = Nm
4057           emith_or_r_r_lsl(sr, tmp, M_SHIFT);
4058           emith_eor_r_r_lsr(tmp, tmp2, 31);
4059           emith_or_r_r(sr, tmp);          // T = Q^M
4060           rcache_free(tmp);
4061         }
4062         goto end_op;
4063       case 0x08: // TST Rm,Rn           0010nnnnmmmm1000
4064         sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4065         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4066         tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4067         emith_clr_t_cond(sr);
4068         emith_tst_r_r(tmp2, tmp3);
4069         emith_set_t_cond(sr, DCOND_EQ);
4070         goto end_op;
4071       case 0x09: // AND Rm,Rn           0010nnnnmmmm1001
4072         if (GET_Rm() != GET_Rn()) {
4073           tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4074           tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4075           emith_and_r_r_r(tmp, tmp3, tmp2);
4076         }
4077         goto end_op;
4078       case 0x0a: // XOR Rm,Rn           0010nnnnmmmm1010
4079 #if PROPAGATE_CONSTANTS
4080         if (GET_Rn() == GET_Rm()) {
4081           gconst_new(GET_Rn(), 0);
4082           goto end_op;
4083         }
4084 #endif
4085         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4086         tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4087         emith_eor_r_r_r(tmp, tmp3, tmp2);
4088         goto end_op;
4089       case 0x0b: // OR  Rm,Rn           0010nnnnmmmm1011
4090         if (GET_Rm() != GET_Rn()) {
4091           tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4092           tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4093           emith_or_r_r_r(tmp, tmp3, tmp2);
4094         }
4095         goto end_op;
4096       case 0x0c: // CMP/STR Rm,Rn       0010nnnnmmmm1100
4097         tmp  = rcache_get_tmp();
4098         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4099         tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4100         emith_eor_r_r_r(tmp, tmp2, tmp3);
4101         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4102         emith_clr_t_cond(sr);
4103         emith_tst_r_imm(tmp, 0x000000ff);
4104         EMITH_SJMP_START(DCOND_EQ);
4105         emith_tst_r_imm_c(DCOND_NE, tmp, 0x0000ff00);
4106         EMITH_SJMP_START(DCOND_EQ);
4107         emith_tst_r_imm_c(DCOND_NE, tmp, 0x00ff0000);
4108         EMITH_SJMP_START(DCOND_EQ);
4109         emith_tst_r_imm_c(DCOND_NE, tmp, 0xff000000);
4110         EMITH_SJMP_END(DCOND_EQ);
4111         EMITH_SJMP_END(DCOND_EQ);
4112         EMITH_SJMP_END(DCOND_EQ);
4113         emith_set_t_cond(sr, DCOND_EQ);
4114         rcache_free_tmp(tmp);
4115         goto end_op;
4116       case 0x0d: // XTRCT  Rm,Rn        0010nnnnmmmm1101
4117         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4118         tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4119         emith_lsr(tmp, tmp3, 16);
4120         emith_or_r_r_lsl(tmp, tmp2, 16);
4121         goto end_op;
4122       case 0x0e: // MULU.W Rm,Rn        0010nnnnmmmm1110
4123       case 0x0f: // MULS.W Rm,Rn        0010nnnnmmmm1111
4124         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4125         tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4126         tmp  = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4127         tmp4 = tmp3;
4128         if (op & 1) {
4129           if (! rcache_is_s16(tmp2)) {
4130             emith_sext(tmp, tmp2, 16);
4131             tmp2 = tmp;
4132           }
4133           if (! rcache_is_s16(tmp3)) {
4134             tmp4 = rcache_get_tmp();
4135             emith_sext(tmp4, tmp3, 16);
4136           }
4137         } else {
4138           if (! rcache_is_u16(tmp2)) {
4139             emith_clear_msb(tmp, tmp2, 16);
4140             tmp2 = tmp;
4141           }
4142           if (! rcache_is_u16(tmp3)) {
4143             tmp4 = rcache_get_tmp();
4144             emith_clear_msb(tmp4, tmp3, 16);
4145           }
4146         }
4147         emith_mul(tmp, tmp2, tmp4);
4148         if (tmp4 != tmp3)
4149           rcache_free_tmp(tmp4);
4150         goto end_op;
4151       }
4152       goto default_;
4153 
4154     /////////////////////////////////////////////
4155     case 0x03:
4156       switch (op & 0x0f)
4157       {
4158       case 0x00: // CMP/EQ Rm,Rn        0011nnnnmmmm0000
4159       case 0x02: // CMP/HS Rm,Rn        0011nnnnmmmm0010
4160       case 0x03: // CMP/GE Rm,Rn        0011nnnnmmmm0011
4161       case 0x06: // CMP/HI Rm,Rn        0011nnnnmmmm0110
4162       case 0x07: // CMP/GT Rm,Rn        0011nnnnmmmm0111
4163         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4164         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4165         tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4166         switch (op & 0x07)
4167         {
4168         case 0x00: // CMP/EQ
4169           tmp = DCOND_EQ;
4170           break;
4171         case 0x02: // CMP/HS
4172           tmp = DCOND_HS;
4173           break;
4174         case 0x03: // CMP/GE
4175           tmp = DCOND_GE;
4176           break;
4177         case 0x06: // CMP/HI
4178           tmp = DCOND_HI;
4179           break;
4180         case 0x07: // CMP/GT
4181           tmp = DCOND_GT;
4182           break;
4183         }
4184         emith_clr_t_cond(sr);
4185         emith_cmp_r_r(tmp2, tmp3);
4186         emith_set_t_cond(sr, tmp);
4187         goto end_op;
4188       case 0x04: // DIV1    Rm,Rn       0011nnnnmmmm0100
4189         // Q1 = carry(Rn = (Rn << 1) | T)
4190         // if Q ^ M
4191         //   Q2 = carry(Rn += Rm)
4192         // else
4193         //   Q2 = carry(Rn -= Rm)
4194         // Q = M ^ Q1 ^ Q2
4195         // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
4196         tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4197         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
4198         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4199         emith_sync_t(sr);
4200         tmp = rcache_get_tmp();
4201         if (drcf.Mflag != FLG_0) {
4202           emith_and_r_r_imm(tmp, sr, M);
4203           emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
4204         }
4205         rcache_free_tmp(tmp);
4206         // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2)
4207         // in: (Q ^ M) passed in Q
4208         emith_sh2_div1_step(tmp2, tmp3, sr);
4209         tmp = rcache_get_tmp();
4210         emith_or_r_imm(sr, Q);              // Q = !T
4211         emith_and_r_r_imm(tmp, sr, T);
4212         emith_eor_r_r_lsl(sr, tmp, Q_SHIFT);
4213         if (drcf.Mflag != FLG_0) {          // Q = M ^ !T = M ^ Q1 ^ Q2
4214           emith_and_r_r_imm(tmp, sr, M);
4215           emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
4216         }
4217         rcache_free_tmp(tmp);
4218         goto end_op;
4219       case 0x05: // DMULU.L Rm,Rn       0011nnnnmmmm0101
4220         tmp  = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4221         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4222         tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4223         tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL);
4224         emith_mul_u64(tmp3, tmp4, tmp, tmp2);
4225         goto end_op;
4226       case 0x08: // SUB     Rm,Rn       0011nnnnmmmm1000
4227 #if PROPAGATE_CONSTANTS
4228         if (GET_Rn() == GET_Rm()) {
4229           gconst_new(GET_Rn(), 0);
4230           goto end_op;
4231         }
4232 #endif
4233       case 0x0c: // ADD     Rm,Rn       0011nnnnmmmm1100
4234         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4235         tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4236         if (op & 4) {
4237           emith_add_r_r_r(tmp, tmp3, tmp2);
4238         } else
4239           emith_sub_r_r_r(tmp, tmp3, tmp2);
4240         goto end_op;
4241       case 0x0a: // SUBC    Rm,Rn       0011nnnnmmmm1010
4242       case 0x0e: // ADDC    Rm,Rn       0011nnnnmmmm1110
4243         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4244         tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4245         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4246         emith_sync_t(sr);
4247 #if T_OPTIMIZER
4248         if (rcache_regs_discard & BITMASK1(SHR_T)) {
4249           if (op & 4) {
4250             emith_t_to_carry(sr, 0);
4251             emith_adc_r_r_r(tmp, tmp3, tmp2);
4252           } else {
4253             emith_t_to_carry(sr, 1);
4254             emith_sbc_r_r_r(tmp, tmp3, tmp2);
4255           }
4256         } else
4257 #endif
4258         {
4259           EMITH_HINT_COND(DCOND_CS);
4260           if (op & 4) { // adc
4261             emith_tpop_carry(sr, 0);
4262             emith_adcf_r_r_r(tmp, tmp3, tmp2);
4263             emith_tpush_carry(sr, 0);
4264           } else {
4265             emith_tpop_carry(sr, 1);
4266             emith_sbcf_r_r_r(tmp, tmp3, tmp2);
4267             emith_tpush_carry(sr, 1);
4268           }
4269         }
4270         goto end_op;
4271       case 0x0b: // SUBV    Rm,Rn       0011nnnnmmmm1011
4272       case 0x0f: // ADDV    Rm,Rn       0011nnnnmmmm1111
4273         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4274         tmp  = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4275         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4276 #if T_OPTIMIZER
4277         if (rcache_regs_discard & BITMASK1(SHR_T)) {
4278           if (op & 4)
4279             emith_add_r_r_r(tmp,tmp3,tmp2);
4280           else
4281             emith_sub_r_r_r(tmp,tmp3,tmp2);
4282         } else
4283 #endif
4284         {
4285           emith_clr_t_cond(sr);
4286           EMITH_HINT_COND(DCOND_VS);
4287           if (op & 4)
4288             emith_addf_r_r_r(tmp, tmp3, tmp2);
4289           else
4290             emith_subf_r_r_r(tmp, tmp3, tmp2);
4291           emith_set_t_cond(sr, DCOND_VS);
4292         }
4293         goto end_op;
4294       case 0x0d: // DMULS.L Rm,Rn       0011nnnnmmmm1101
4295         tmp  = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4296         tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4297         tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
4298         tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL);
4299         emith_mul_s64(tmp3, tmp4, tmp, tmp2);
4300         goto end_op;
4301       }
4302       goto default_;
4303 
4304     /////////////////////////////////////////////
4305     case 0x04:
4306       switch (op & 0x0f)
4307       {
4308       case 0x00:
4309         switch (GET_Fx())
4310         {
4311         case 0: // SHLL Rn    0100nnnn00000000
4312         case 2: // SHAL Rn    0100nnnn00100000
4313           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4314           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4315 #if T_OPTIMIZER
4316           if (rcache_regs_discard & BITMASK1(SHR_T))
4317             emith_lsl(tmp, tmp2, 1);
4318           else
4319 #endif
4320           {
4321             emith_invalidate_t();
4322             emith_lslf(tmp, tmp2, 1);
4323             emith_carry_to_t(sr, 0);
4324           }
4325           goto end_op;
4326         case 1: // DT Rn      0100nnnn00010000
4327           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4328 #if LOOP_DETECTION
4329           if (drcf.loop_type == OF_DELAY_LOOP) {
4330             if (drcf.delay_reg == -1)
4331               drcf.delay_reg = GET_Rn();
4332             else
4333               drcf.polling = drcf.loop_type = 0;
4334           }
4335 #endif
4336           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4337           emith_clr_t_cond(sr);
4338           EMITH_HINT_COND(DCOND_EQ);
4339           emith_subf_r_r_imm(tmp, tmp2, 1);
4340           emith_set_t_cond(sr, DCOND_EQ);
4341           goto end_op;
4342         }
4343         goto default_;
4344       case 0x01:
4345         switch (GET_Fx())
4346         {
4347         case 0: // SHLR Rn    0100nnnn00000001
4348         case 2: // SHAR Rn    0100nnnn00100001
4349           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4350           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4351 #if T_OPTIMIZER
4352           if (rcache_regs_discard & BITMASK1(SHR_T)) {
4353             if (op & 0x20)
4354               emith_asr(tmp,tmp2,1);
4355             else
4356               emith_lsr(tmp,tmp2,1);
4357           } else
4358 #endif
4359           {
4360             emith_invalidate_t();
4361             if (op & 0x20) {
4362               emith_asrf(tmp, tmp2, 1);
4363             } else
4364               emith_lsrf(tmp, tmp2, 1);
4365             emith_carry_to_t(sr, 0);
4366           }
4367           goto end_op;
4368         case 1: // CMP/PZ Rn  0100nnnn00010001
4369           tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4370           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4371           emith_clr_t_cond(sr);
4372           emith_cmp_r_imm(tmp, 0);
4373           emith_set_t_cond(sr, DCOND_GE);
4374           goto end_op;
4375         }
4376         goto default_;
4377       case 0x02:
4378       case 0x03:
4379         switch (op & 0x3f)
4380         {
4381         case 0x02: // STS.L    MACH,@-Rn 0100nnnn00000010
4382           tmp = SHR_MACH;
4383           break;
4384         case 0x12: // STS.L    MACL,@-Rn 0100nnnn00010010
4385           tmp = SHR_MACL;
4386           break;
4387         case 0x22: // STS.L    PR,@-Rn   0100nnnn00100010
4388           tmp = SHR_PR;
4389           break;
4390         case 0x03: // STC.L    SR,@-Rn   0100nnnn00000011
4391           tmp = SHR_SR;
4392           break;
4393         case 0x13: // STC.L    GBR,@-Rn  0100nnnn00010011
4394           tmp = SHR_GBR;
4395           break;
4396         case 0x23: // STC.L    VBR,@-Rn  0100nnnn00100011
4397           tmp = SHR_VBR;
4398           break;
4399         default:
4400           goto default_;
4401         }
4402         if (tmp == SHR_SR) {
4403           tmp3 = rcache_get_reg_arg(1, tmp, &tmp4);
4404           emith_sync_t(tmp4);
4405           emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0
4406         } else
4407           tmp3 = rcache_get_reg_arg(1, tmp, NULL);
4408         emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR);
4409         goto end_op;
4410       case 0x04:
4411       case 0x05:
4412         switch (op & 0x3f)
4413         {
4414         case 0x04: // ROTL   Rn          0100nnnn00000100
4415         case 0x05: // ROTR   Rn          0100nnnn00000101
4416           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
4417           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4418 #if T_OPTIMIZER
4419           if (rcache_regs_discard & BITMASK1(SHR_T)) {
4420             if (op & 1)
4421               emith_ror(tmp, tmp2, 1);
4422             else
4423               emith_rol(tmp, tmp2, 1);
4424           } else
4425 #endif
4426           {
4427             emith_invalidate_t();
4428             if (op & 1)
4429               emith_rorf(tmp, tmp2, 1);
4430             else
4431               emith_rolf(tmp, tmp2, 1);
4432             emith_carry_to_t(sr, 0);
4433           }
4434           goto end_op;
4435         case 0x24: // ROTCL  Rn          0100nnnn00100100
4436         case 0x25: // ROTCR  Rn          0100nnnn00100101
4437           tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
4438           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4439           emith_sync_t(sr);
4440 #if T_OPTIMIZER
4441           if (rcache_regs_discard & BITMASK1(SHR_T)) {
4442             emith_t_to_carry(sr, 0);
4443             if (op & 1)
4444               emith_rorc(tmp);
4445             else
4446               emith_rolc(tmp);
4447           } else
4448 #endif
4449           {
4450             emith_tpop_carry(sr, 0);
4451             if (op & 1)
4452               emith_rorcf(tmp);
4453             else
4454               emith_rolcf(tmp);
4455             emith_tpush_carry(sr, 0);
4456           }
4457           goto end_op;
4458         case 0x15: // CMP/PL Rn          0100nnnn00010101
4459           tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4460           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4461           emith_clr_t_cond(sr);
4462           emith_cmp_r_imm(tmp, 0);
4463           emith_set_t_cond(sr, DCOND_GT);
4464           goto end_op;
4465         }
4466         goto default_;
4467       case 0x06:
4468       case 0x07:
4469         switch (op & 0x3f)
4470         {
4471         case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
4472           tmp = SHR_MACH;
4473           break;
4474         case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
4475           tmp = SHR_MACL;
4476           break;
4477         case 0x26: // LDS.L @Rm+,PR   0100mmmm00100110
4478           tmp = SHR_PR;
4479           break;
4480         case 0x07: // LDC.L @Rm+,SR   0100mmmm00000111
4481           tmp = SHR_SR;
4482           break;
4483         case 0x17: // LDC.L @Rm+,GBR  0100mmmm00010111
4484           tmp = SHR_GBR;
4485           break;
4486         case 0x27: // LDC.L @Rm+,VBR  0100mmmm00100111
4487           tmp = SHR_VBR;
4488           break;
4489         default:
4490           goto default_;
4491         }
4492         if (tmp == SHR_SR) {
4493           emith_invalidate_t();
4494           tmp2 = emit_memhandler_read_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_POSTINCR);
4495           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4496           emith_write_sr(sr, tmp2);
4497           rcache_free_tmp(tmp2);
4498           drcf.test_irq = 1;
4499         } else
4500           emit_memhandler_read_rr(sh2, tmp, GET_Rn(), 0, 2 | MF_POSTINCR);
4501         goto end_op;
4502       case 0x08:
4503       case 0x09:
4504         switch (GET_Fx())
4505         {
4506         case 0: // SHLL2 Rn        0100nnnn00001000
4507                 // SHLR2 Rn        0100nnnn00001001
4508           tmp = 2;
4509           break;
4510         case 1: // SHLL8 Rn        0100nnnn00011000
4511                 // SHLR8 Rn        0100nnnn00011001
4512           tmp = 8;
4513           break;
4514         case 2: // SHLL16 Rn       0100nnnn00101000
4515                 // SHLR16 Rn       0100nnnn00101001
4516           tmp = 16;
4517           break;
4518         default:
4519           goto default_;
4520         }
4521         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
4522         if (op & 1) {
4523           emith_lsr(tmp2, tmp3, tmp);
4524         } else
4525           emith_lsl(tmp2, tmp3, tmp);
4526         goto end_op;
4527       case 0x0a:
4528         switch (GET_Fx())
4529         {
4530         case 0: // LDS      Rm,MACH   0100mmmm00001010
4531           tmp2 = SHR_MACH;
4532           break;
4533         case 1: // LDS      Rm,MACL   0100mmmm00011010
4534           tmp2 = SHR_MACL;
4535           break;
4536         case 2: // LDS      Rm,PR     0100mmmm00101010
4537           tmp2 = SHR_PR;
4538           break;
4539         default:
4540           goto default_;
4541         }
4542         emit_move_r_r(tmp2, GET_Rn());
4543         goto end_op;
4544       case 0x0b:
4545         switch (GET_Fx())
4546         {
4547         case 1: // TAS.B @Rn  0100nnnn00011011
4548           // XXX: is TAS working on 32X?
4549           rcache_get_reg_arg(0, GET_Rn(), NULL);
4550           tmp = emit_memhandler_read(0);
4551           sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4552           emith_clr_t_cond(sr);
4553           emith_cmp_r_imm(tmp, 0);
4554           emith_set_t_cond(sr, DCOND_EQ);
4555           emith_or_r_imm(tmp, 0x80);
4556           tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp
4557           emith_move_r_r(tmp2, tmp);
4558           rcache_free_tmp(tmp);
4559           rcache_get_reg_arg(0, GET_Rn(), NULL);
4560           emit_memhandler_write(0);
4561           break;
4562         default:
4563           goto default_;
4564         }
4565         goto end_op;
4566       case 0x0e:
4567         switch (GET_Fx())
4568         {
4569         case 0: // LDC Rm,SR   0100mmmm00001110
4570           tmp2 = SHR_SR;
4571           break;
4572         case 1: // LDC Rm,GBR  0100mmmm00011110
4573           tmp2 = SHR_GBR;
4574           break;
4575         case 2: // LDC Rm,VBR  0100mmmm00101110
4576           tmp2 = SHR_VBR;
4577           break;
4578         default:
4579           goto default_;
4580         }
4581         if (tmp2 == SHR_SR) {
4582           emith_invalidate_t();
4583           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4584           tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
4585           emith_write_sr(sr, tmp);
4586           drcf.test_irq = 1;
4587         } else
4588           emit_move_r_r(tmp2, GET_Rn());
4589         goto end_op;
4590       case 0x0f: // MAC.W @Rm+,@Rn+  0100nnnnmmmm1111
4591         emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1);
4592         sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
4593         tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL);
4594         tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL);
4595         emith_sh2_macw(tmp3, tmp4, tmp, tmp2, sr);
4596         rcache_free_tmp(tmp2);
4597         rcache_free_tmp(tmp);
4598         goto end_op;
4599       }
4600       goto default_;
4601 
4602     /////////////////////////////////////////////
4603     case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
4604       emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2 | drcf.polling);
4605       goto end_op;
4606 
4607     /////////////////////////////////////////////
4608     case 0x06:
4609       switch (op & 0x0f)
4610       {
4611       case 0x00: // MOV.B @Rm,Rn        0110nnnnmmmm0000
4612       case 0x01: // MOV.W @Rm,Rn        0110nnnnmmmm0001
4613       case 0x02: // MOV.L @Rm,Rn        0110nnnnmmmm0010
4614       case 0x04: // MOV.B @Rm+,Rn       0110nnnnmmmm0100
4615       case 0x05: // MOV.W @Rm+,Rn       0110nnnnmmmm0101
4616       case 0x06: // MOV.L @Rm+,Rn       0110nnnnmmmm0110
4617         tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : drcf.polling;
4618         emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp);
4619         goto end_op;
4620       case 0x03: // MOV    Rm,Rn        0110nnnnmmmm0011
4621         emit_move_r_r(GET_Rn(), GET_Rm());
4622         goto end_op;
4623       default: // 0x07 ... 0x0f
4624         tmp  = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
4625         tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL);
4626         switch (op & 0x0f)
4627         {
4628         case 0x07: // NOT    Rm,Rn        0110nnnnmmmm0111
4629           emith_mvn_r_r(tmp2, tmp);
4630           break;
4631         case 0x08: // SWAP.B Rm,Rn        0110nnnnmmmm1000
4632           tmp3 = tmp2;
4633           if (tmp == tmp2)
4634             tmp3 = rcache_get_tmp();
4635           tmp4 = rcache_get_tmp();
4636           emith_lsr(tmp3, tmp, 16);
4637           emith_or_r_r_lsl(tmp3, tmp, 24);
4638           emith_and_r_r_imm(tmp4, tmp, 0xff00);
4639           emith_or_r_r_lsl(tmp3, tmp4, 8);
4640           emith_rol(tmp2, tmp3, 16);
4641           rcache_free_tmp(tmp4);
4642           if (tmp == tmp2)
4643             rcache_free_tmp(tmp3);
4644           break;
4645         case 0x09: // SWAP.W Rm,Rn        0110nnnnmmmm1001
4646           emith_rol(tmp2, tmp, 16);
4647           break;
4648         case 0x0a: // NEGC   Rm,Rn        0110nnnnmmmm1010
4649           sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4650           emith_sync_t(sr);
4651 #if T_OPTIMIZER
4652           if (rcache_regs_discard & BITMASK1(SHR_T)) {
4653             emith_t_to_carry(sr, 1);
4654             emith_negc_r_r(tmp2, tmp);
4655           } else
4656 #endif
4657           {
4658             EMITH_HINT_COND(DCOND_CS);
4659             emith_tpop_carry(sr, 1);
4660             emith_negcf_r_r(tmp2, tmp);
4661             emith_tpush_carry(sr, 1);
4662           }
4663           break;
4664         case 0x0b: // NEG    Rm,Rn        0110nnnnmmmm1011
4665           emith_neg_r_r(tmp2, tmp);
4666           break;
4667         case 0x0c: // EXTU.B Rm,Rn        0110nnnnmmmm1100
4668           emith_clear_msb(tmp2, tmp, 24);
4669           rcache_set_x16(tmp2, 1, 1);
4670           break;
4671         case 0x0d: // EXTU.W Rm,Rn        0110nnnnmmmm1101
4672           emith_clear_msb(tmp2, tmp, 16);
4673           rcache_set_x16(tmp2, 0, 1);
4674           break;
4675         case 0x0e: // EXTS.B Rm,Rn        0110nnnnmmmm1110
4676           emith_sext(tmp2, tmp, 8);
4677           rcache_set_x16(tmp2, 1, 0);
4678           break;
4679         case 0x0f: // EXTS.W Rm,Rn        0110nnnnmmmm1111
4680           emith_sext(tmp2, tmp, 16);
4681           rcache_set_x16(tmp2, 1, 0);
4682           break;
4683         }
4684         goto end_op;
4685       }
4686       goto default_;
4687 
4688     /////////////////////////////////////////////
4689     case 0x07: // ADD #imm,Rn  0111nnnniiiiiiii
4690       if (op & 0x80) // adding negative
4691         emit_sub_r_imm(GET_Rn(), (u8)-op);
4692       else
4693         emit_add_r_imm(GET_Rn(), (u8)op);
4694       goto end_op;
4695 
4696     /////////////////////////////////////////////
4697     case 0x08:
4698       switch (op & 0x0f00)
4699       {
4700       case 0x0000: // MOV.B R0,@(disp,Rn)  10000000nnnndddd
4701       case 0x0100: // MOV.W R0,@(disp,Rn)  10000001nnnndddd
4702         tmp = (op & 0x100) >> 8;
4703         emit_memhandler_write_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp);
4704         goto end_op;
4705       case 0x0400: // MOV.B @(disp,Rm),R0  10000100mmmmdddd
4706       case 0x0500: // MOV.W @(disp,Rm),R0  10000101mmmmdddd
4707         tmp = (op & 0x100) >> 8;
4708         emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp | drcf.polling);
4709         goto end_op;
4710       case 0x0800: // CMP/EQ #imm,R0       10001000iiiiiiii
4711         tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL);
4712         sr   = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4713         emith_clr_t_cond(sr);
4714         emith_cmp_r_imm(tmp2, (s8)(op & 0xff));
4715         emith_set_t_cond(sr, DCOND_EQ);
4716         goto end_op;
4717       }
4718       goto default_;
4719 
4720     /////////////////////////////////////////////
4721     case 0x0c:
4722       switch (op & 0x0f00)
4723       {
4724       case 0x0000: // MOV.B R0,@(disp,GBR)   11000000dddddddd
4725       case 0x0100: // MOV.W R0,@(disp,GBR)   11000001dddddddd
4726       case 0x0200: // MOV.L R0,@(disp,GBR)   11000010dddddddd
4727         tmp = (op & 0x300) >> 8;
4728         emit_memhandler_write_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp);
4729         goto end_op;
4730       case 0x0400: // MOV.B @(disp,GBR),R0   11000100dddddddd
4731       case 0x0500: // MOV.W @(disp,GBR),R0   11000101dddddddd
4732       case 0x0600: // MOV.L @(disp,GBR),R0   11000110dddddddd
4733         tmp = (op & 0x300) >> 8;
4734         emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp | drcf.polling);
4735         goto end_op;
4736       case 0x0800: // TST #imm,R0           11001000iiiiiiii
4737         tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL);
4738         sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4739         emith_clr_t_cond(sr);
4740         emith_tst_r_imm(tmp, op & 0xff);
4741         emith_set_t_cond(sr, DCOND_EQ);
4742         goto end_op;
4743       case 0x0900: // AND #imm,R0           11001001iiiiiiii
4744         tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4745         emith_and_r_r_imm(tmp, tmp2, (op & 0xff));
4746         goto end_op;
4747       case 0x0a00: // XOR #imm,R0           11001010iiiiiiii
4748         if (op & 0xff) {
4749           tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4750           emith_eor_r_r_imm(tmp, tmp2, (op & 0xff));
4751         }
4752         goto end_op;
4753       case 0x0b00: // OR  #imm,R0           11001011iiiiiiii
4754         if (op & 0xff) {
4755           tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2);
4756           emith_or_r_r_imm(tmp, tmp2, (op & 0xff));
4757         }
4758         goto end_op;
4759       case 0x0c00: // TST.B #imm,@(R0,GBR)  11001100iiiiiiii
4760         tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling);
4761         sr  = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4762         emith_clr_t_cond(sr);
4763         emith_tst_r_imm(tmp, op & 0xff);
4764         emith_set_t_cond(sr, DCOND_EQ);
4765         rcache_free_tmp(tmp);
4766         goto end_op;
4767       case 0x0d00: // AND.B #imm,@(R0,GBR)  11001101iiiiiiii
4768         tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4769         tmp2 = rcache_get_tmp_arg(1);
4770         emith_and_r_r_imm(tmp2, tmp, (op & 0xff));
4771         goto end_rmw_op;
4772       case 0x0e00: // XOR.B #imm,@(R0,GBR)  11001110iiiiiiii
4773         tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4774         tmp2 = rcache_get_tmp_arg(1);
4775         emith_eor_r_r_imm(tmp2, tmp, (op & 0xff));
4776         goto end_rmw_op;
4777       case 0x0f00: // OR.B  #imm,@(R0,GBR)  11001111iiiiiiii
4778         tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4779         tmp2 = rcache_get_tmp_arg(1);
4780         emith_or_r_r_imm(tmp2, tmp, (op & 0xff));
4781       end_rmw_op:
4782         rcache_free_tmp(tmp);
4783         emit_indirect_indexed_write(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0);
4784         goto end_op;
4785       }
4786       goto default_;
4787 
4788     /////////////////////////////////////////////
4789     case 0x0e: // MOV #imm,Rn   1110nnnniiiiiiii
4790       emit_move_r_imm32(GET_Rn(), (s8)op);
4791       goto end_op;
4792 
4793     default:
4794     default_:
4795       if (!(op_flags[i] & OF_B_IN_DS)) {
4796         elprintf_sh2(sh2, EL_ANOMALY,
4797           "drc: illegal op %04x @ %08x", op, pc - 2);
4798         exit(1);
4799       }
4800     }
4801 
4802 end_op:
4803     rcache_unlock_all();
4804     rcache_set_usage_now(0);
4805 #if DRC_DEBUG & 64
4806     RCACHE_CHECK("after insn");
4807 #endif
4808 
4809     cycles += opd->cycles;
4810 
4811     if (op_flags[i+1] & OF_DELAY_OP) {
4812       do_host_disasm(tcache_id);
4813       continue;
4814     }
4815 
4816     // test irq?
4817     if (drcf.test_irq && !drcf.pending_branch_direct) {
4818       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4819       FLUSH_CYCLES(sr);
4820       emith_sync_t(sr);
4821       if (!drcf.pending_branch_indirect)
4822         emit_move_r_imm32(SHR_PC, pc);
4823       rcache_flush();
4824       emith_call(sh2_drc_test_irq);
4825       drcf.test_irq = 0;
4826     }
4827 
4828     // branch handling
4829     if (drcf.pending_branch_direct)
4830     {
4831       struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
4832       u32 target_pc = opd_b->imm;
4833       int cond = -1;
4834       int ctaken = 0;
4835       void *target = NULL;
4836 
4837       if (OP_ISBRACND(opd_b->op))
4838         ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2;
4839       cycles += ctaken; // assume branch taken
4840 
4841 #if LOOP_OPTIMIZER
4842       if ((drcf.loop_type == OF_IDLE_LOOP ||
4843           (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0)))
4844       {
4845         // idle or delay loop
4846         emit_sync_t_to_sr();
4847         emith_sh2_delay_loop(cycles, drcf.delay_reg);
4848         rcache_unlock_all(); // may lock delay_reg
4849         drcf.polling = drcf.loop_type = drcf.pinning = 0;
4850       }
4851 #endif
4852 
4853 #if CALL_STACK
4854       void *rtsadd = NULL, *rtsret = NULL;
4855       if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
4856         // BSR - save rts data
4857         tmp = rcache_get_tmp_arg(1);
4858         rtsadd = tcache_ptr;
4859         emith_move_r_imm_s8_patchable(tmp, 0);
4860         rcache_clean_tmp();
4861         rcache_invalidate_tmp();
4862         emith_call(sh2_drc_dispatcher_call);
4863         rtsret = tcache_ptr;
4864       }
4865 #endif
4866 
4867       // XXX move below cond test if not changing host cond (MIPS delay slot)?
4868       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
4869       FLUSH_CYCLES(sr);
4870       rcache_clean();
4871 
4872       if (OP_ISBRACND(opd_b->op)) {
4873         // BT[S], BF[S] - emit condition test
4874         cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE;
4875         if (delay_dep_fw & BITMASK1(SHR_T)) {
4876           emith_sync_t(sr);
4877           emith_tst_r_imm(sr, T_save);
4878         } else {
4879           cond = emith_tst_t(sr, (opd_b->op == OP_BRANCH_CT));
4880           if (emith_get_t_cond() >= 0) {
4881             if (opd_b->op == OP_BRANCH_CT)
4882               emith_or_r_imm_c(cond, sr, T);
4883             else
4884               emith_bic_r_imm_c(cond, sr, T);
4885           }
4886         }
4887       } else
4888         emith_sync_t(sr);
4889       // no modification of host status/flags between here and branching!
4890 
4891       v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc);
4892       if (v >= 0)
4893       {
4894         // local branch
4895         if (branch_targets[v].ptr) {
4896           // local backward jump, link here now since host PC is already known
4897           target = branch_targets[v].ptr;
4898 #if LOOP_OPTIMIZER
4899           if (pinned_loops[pinned_loop_count].pc == target_pc) {
4900             // backward jump at end of optimized loop
4901             rcache_unpin_all();
4902             target = pinned_loops[pinned_loop_count].ptr;
4903             pinned_loop_count ++;
4904           }
4905 #endif
4906           if (cond != -1) {
4907             if (emith_jump_patch_inrange(tcache_ptr, target)) {
4908               emith_jump_cond(cond, target);
4909             } else {
4910               // not reachable directly, must use far branch
4911               EMITH_JMP_START(emith_invert_cond(cond));
4912               emith_jump(target);
4913               EMITH_JMP_END(emith_invert_cond(cond));
4914             }
4915           } else {
4916             emith_jump(target);
4917             rcache_invalidate();
4918           }
4919         } else if (blx_target_count < MAX_LOCAL_BRANCHES) {
4920           // local forward jump
4921           target = tcache_ptr;
4922           blx_targets[blx_target_count++] =
4923               (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 };
4924           if (cond != -1)
4925             emith_jump_cond_patchable(cond, target);
4926           else {
4927             emith_jump_patchable(target);
4928             rcache_invalidate();
4929           }
4930         } else
4931           // no space for resolving forward branch, handle it as external
4932           dbg(1, "warning: too many unresolved branches");
4933       }
4934 
4935       if (target == NULL)
4936       {
4937         // can't resolve branch locally, make a block exit
4938         bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
4939         if (cond != -1) {
4940 #if 1
4941           if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) {
4942             // conditional jumps get a blx stub for the far jump
4943             bl->type = BL_JCCBLX;
4944             target = tcache_ptr;
4945             blx_targets[blx_target_count++] =
4946                 (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl };
4947             emith_jump_cond_patchable(cond, target);
4948           } else {
4949             // not linkable, or blx table full; inline jump @dispatcher
4950             EMITH_JMP_START(emith_invert_cond(cond));
4951             if (bl) {
4952               bl->jump = tcache_ptr;
4953               emith_flush(); // flush to inhibit insn swapping
4954               bl->type = BL_LDJMP;
4955             }
4956             tmp = rcache_get_tmp_arg(0);
4957             emith_move_r_imm(tmp, target_pc);
4958             rcache_free_tmp(tmp);
4959             target = sh2_drc_dispatcher;
4960 
4961             emith_jump_patchable(target);
4962             EMITH_JMP_END(emith_invert_cond(cond));
4963           }
4964 #else
4965           // jump @dispatcher - ARM 32bit version with conditional execution
4966           EMITH_SJMP_START(emith_invert_cond(cond));
4967           tmp = rcache_get_tmp_arg(0);
4968           emith_move_r_imm_c(cond, tmp, target_pc);
4969           rcache_free_tmp(tmp);
4970           target = sh2_drc_dispatcher;
4971 
4972           if (bl) {
4973             bl->jump = tcache_ptr;
4974             bl->type = BL_JMP;
4975           }
4976           emith_jump_cond_patchable(cond, target);
4977           EMITH_SJMP_END(emith_invert_cond(cond));
4978 #endif
4979         } else {
4980           // unconditional, has the far jump inlined
4981           if (bl) {
4982             emith_flush(); // flush to inhibit insn swapping
4983             bl->type = BL_LDJMP;
4984           }
4985 
4986           tmp = rcache_get_tmp_arg(0);
4987           emith_move_r_imm(tmp, target_pc);
4988           rcache_free_tmp(tmp);
4989           target = sh2_drc_dispatcher;
4990 
4991           emith_jump_patchable(target);
4992           rcache_invalidate();
4993         }
4994       }
4995 
4996 #if CALL_STACK
4997       if (rtsadd)
4998         emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
4999 #endif
5000 
5001       // branch not taken, correct cycle count
5002       if (ctaken)
5003         cycles -= ctaken;
5004       // set T bit to reflect branch not taken for OP_BRANCH_CT/CF
5005       if (emith_get_t_cond() >= 0) // T is synced for all other cases
5006         emith_set_t(sr, opd_b->op == OP_BRANCH_CF);
5007 
5008       drcf.pending_branch_direct = 0;
5009       if (target_pc >= base_pc && target_pc < pc)
5010         drcf.polling = drcf.loop_type = 0;
5011     }
5012     else if (drcf.pending_branch_indirect) {
5013       u32 target_pc;
5014 
5015       tmp = rcache_get_reg_arg(0, SHR_PC, NULL);
5016 
5017 #if CALL_STACK
5018       struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd;
5019       void *rtsadd = NULL, *rtsret = NULL;
5020 
5021       if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) {
5022         // JSR, BSRF - save rts data
5023         tmp = rcache_get_tmp_arg(1);
5024         rtsadd = tcache_ptr;
5025         emith_move_r_imm_s8_patchable(tmp, 0);
5026         rcache_clean_tmp();
5027         rcache_invalidate_tmp();
5028         emith_call(sh2_drc_dispatcher_call);
5029         rtsret = tcache_ptr;
5030       }
5031 #endif
5032 
5033       sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5034       FLUSH_CYCLES(sr);
5035       emith_sync_t(sr);
5036       rcache_clean();
5037 
5038 #if CALL_STACK
5039       if (opd_b->rm == SHR_PR) {
5040         // RTS - restore rts data, else jump to dispatcher
5041         emith_jump(sh2_drc_dispatcher_return);
5042       } else
5043 #endif
5044       if (gconst_get(SHR_PC, &target_pc)) {
5045         // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch
5046         bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
5047         if (bl) // pc already loaded somewhere else, can patch jump only
5048           bl->type = BL_JMP;
5049         emith_jump_patchable(sh2_drc_dispatcher);
5050       } else {
5051         // JMP, JSR, BRAF, BSRF not const
5052         emith_jump(sh2_drc_dispatcher);
5053       }
5054       rcache_invalidate();
5055 
5056 #if CALL_STACK
5057       if (rtsadd)
5058         emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
5059 #endif
5060 
5061       drcf.pending_branch_indirect = 0;
5062       drcf.polling = drcf.loop_type = 0;
5063     }
5064     rcache_unlock_all();
5065 
5066     do_host_disasm(tcache_id);
5067   }
5068 
5069   // check the last op
5070   if (op_flags[i-1] & OF_DELAY_OP)
5071     opd = &ops[i-2];
5072   else
5073     opd = &ops[i-1];
5074 
5075   if (! OP_ISBRAUC(opd->op))
5076   {
5077     tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5078     FLUSH_CYCLES(tmp);
5079     emith_sync_t(tmp);
5080 
5081     rcache_clean();
5082     bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id);
5083     if (bl) {
5084       emith_flush(); // flush to inhibit insn swapping
5085       bl->type = BL_LDJMP;
5086     }
5087     tmp = rcache_get_tmp_arg(0);
5088     emith_move_r_imm(tmp, pc);
5089     emith_jump_patchable(sh2_drc_dispatcher);
5090     rcache_invalidate();
5091   } else
5092     rcache_flush();
5093 
5094   // link unresolved branches, emitting blx area entries as needed
5095   emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
5096                       branch_target_count, blx_targets, blx_target_count);
5097 
5098   emith_flush();
5099   do_host_disasm(tcache_id);
5100 
5101   emith_pool_commit(0);
5102 
5103   // fill blx backup; do this last to backup final patched code
5104   for (i = 0; i < block->entry_count; i++)
5105     for (bl = block->entryp[i].o_links; bl; bl = bl->o_next)
5106       memcpy(bl->jdisp, bl->blx ? bl->blx : bl->jump, emith_jump_at_size());
5107 
5108   ring_alloc(&tcache_ring[tcache_id], tcache_ptr - block_entry_ptr);
5109   host_instructions_updated(block_entry_ptr, tcache_ptr, 1);
5110 
5111   dr_activate_block(block, tcache_id, sh2->is_slave);
5112   emith_update_cache();
5113 
5114   do_host_disasm(tcache_id);
5115 
5116   dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f",
5117     tcache_id, blkid_main, tcache_ptr,
5118     tcache_ring[tcache_id].used, tcache_ring[tcache_id].size,
5119     insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled);
5120   if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM
5121     dbg(2, "  hash collisions %d/%d", hash_collisions, block_ring[tcache_id].used);
5122     Pico32x.emu_flags |= P32XF_DRC_ROM_C;
5123   }
5124 /*
5125  printf("~~~\n");
5126  tcache_dsm_ptrs[tcache_id] = block_entry_ptr;
5127  do_host_disasm(tcache_id);
5128  printf("~~~\n");
5129 */
5130 
5131 #if (DRC_DEBUG)
5132   fflush(stdout);
5133 #endif
5134 
5135   return block_entry_ptr;
5136 }
5137 
sh2_generate_utils(void)5138 static void sh2_generate_utils(void)
5139 {
5140   int arg0, arg1, arg2, arg3, sr, tmp, tmp2;
5141 #if DRC_DEBUG
5142   int hic = host_insn_count; // don't count utils for insn statistics
5143 #endif
5144 
5145   host_arg2reg(arg0, 0);
5146   host_arg2reg(arg1, 1);
5147   host_arg2reg(arg2, 2);
5148   host_arg2reg(arg3, 3);
5149   emith_move_r_r(arg0, arg0); // nop
5150   emith_flush();
5151 
5152   // sh2_drc_write8(u32 a, u32 d)
5153   sh2_drc_write8 = (void *)tcache_ptr;
5154   emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab));
5155   emith_sh2_wcall(arg0, arg1, arg2, arg3);
5156   emith_flush();
5157 
5158   // sh2_drc_write16(u32 a, u32 d)
5159   sh2_drc_write16 = (void *)tcache_ptr;
5160   emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab));
5161   emith_sh2_wcall(arg0, arg1, arg2, arg3);
5162   emith_flush();
5163 
5164   // sh2_drc_write32(u32 a, u32 d)
5165   sh2_drc_write32 = (void *)tcache_ptr;
5166   emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab));
5167   emith_sh2_wcall(arg0, arg1, arg2, arg3);
5168   emith_flush();
5169 
5170   // d = sh2_drc_read8(u32 a)
5171   sh2_drc_read8 = (void *)tcache_ptr;
5172   emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
5173   EMITH_HINT_COND(DCOND_CS);
5174   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5175   EMITH_SJMP_START(DCOND_CS);
5176   emith_and_r_r_c(DCOND_CC, arg0, arg3);
5177   emith_eor_r_imm_ptr_c(DCOND_CC, arg0, 1);
5178   emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5179   emith_ret_c(DCOND_CC);
5180   EMITH_SJMP_END(DCOND_CS);
5181   emith_move_r_r_ptr(arg1, CONTEXT_REG);
5182   emith_abijump_reg(arg2);
5183   emith_flush();
5184 
5185   // d = sh2_drc_read16(u32 a)
5186   sh2_drc_read16 = (void *)tcache_ptr;
5187   emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
5188   EMITH_HINT_COND(DCOND_CS);
5189   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5190   EMITH_SJMP_START(DCOND_CS);
5191   emith_and_r_r_c(DCOND_CC, arg0, arg3);
5192   emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5193   emith_ret_c(DCOND_CC);
5194   EMITH_SJMP_END(DCOND_CS);
5195   emith_move_r_r_ptr(arg1, CONTEXT_REG);
5196   emith_abijump_reg(arg2);
5197   emith_flush();
5198 
5199   // d = sh2_drc_read32(u32 a)
5200   sh2_drc_read32 = (void *)tcache_ptr;
5201   emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
5202   EMITH_HINT_COND(DCOND_CS);
5203   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5204   EMITH_SJMP_START(DCOND_CS);
5205   emith_and_r_r_c(DCOND_CC, arg0, arg3);
5206   emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
5207   emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16);
5208   emith_ret_c(DCOND_CC);
5209   EMITH_SJMP_END(DCOND_CS);
5210   emith_move_r_r_ptr(arg1, CONTEXT_REG);
5211   emith_abijump_reg(arg2);
5212   emith_flush();
5213 
5214   // d = sh2_drc_read8_poll(u32 a)
5215   sh2_drc_read8_poll = (void *)tcache_ptr;
5216   emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
5217   EMITH_HINT_COND(DCOND_CS);
5218   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5219   EMITH_SJMP_START(DCOND_CC);
5220   emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5221   emith_abijump_reg_c(DCOND_CS, arg2);
5222   EMITH_SJMP_END(DCOND_CC);
5223   emith_and_r_r_r(arg1, arg0, arg3);
5224   emith_eor_r_imm_ptr(arg1, 1);
5225   emith_read8s_r_r_r(arg1, arg2, arg1);
5226   emith_push_ret(arg1);
5227   emith_move_r_r_ptr(arg2, CONTEXT_REG);
5228   emith_abicall(p32x_sh2_poll_memory8);
5229   emith_pop_and_ret(arg1);
5230   emith_flush();
5231 
5232   // d = sh2_drc_read16_poll(u32 a)
5233   sh2_drc_read16_poll = (void *)tcache_ptr;
5234   emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
5235   EMITH_HINT_COND(DCOND_CS);
5236   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5237   EMITH_SJMP_START(DCOND_CC);
5238   emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5239   emith_abijump_reg_c(DCOND_CS, arg2);
5240   EMITH_SJMP_END(DCOND_CC);
5241   emith_and_r_r_r(arg1, arg0, arg3);
5242   emith_read16s_r_r_r(arg1, arg2, arg1);
5243   emith_push_ret(arg1);
5244   emith_move_r_r_ptr(arg2, CONTEXT_REG);
5245   emith_abicall(p32x_sh2_poll_memory16);
5246   emith_pop_and_ret(arg1);
5247   emith_flush();
5248 
5249   // d = sh2_drc_read32_poll(u32 a)
5250   sh2_drc_read32_poll = (void *)tcache_ptr;
5251   emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
5252   EMITH_HINT_COND(DCOND_CS);
5253   emith_sh2_rcall(arg0, arg1, arg2, arg3);
5254   EMITH_SJMP_START(DCOND_CC);
5255   emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
5256   emith_abijump_reg_c(DCOND_CS, arg2);
5257   EMITH_SJMP_END(DCOND_CC);
5258   emith_and_r_r_r(arg1, arg0, arg3);
5259   emith_read_r_r_r(arg1, arg2, arg1);
5260   emith_ror(arg1, arg1, 16);
5261   emith_push_ret(arg1);
5262   emith_move_r_r_ptr(arg2, CONTEXT_REG);
5263   emith_abicall(p32x_sh2_poll_memory32);
5264   emith_pop_and_ret(arg1);
5265   emith_flush();
5266 
5267   // sh2_drc_exit(u32 pc)
5268   sh2_drc_exit = (void *)tcache_ptr;
5269   emith_ctx_write(arg0, SHR_PC * 4);
5270   emit_do_static_regs(1, arg2);
5271   emith_sh2_drc_exit();
5272   emith_flush();
5273 
5274   // sh2_drc_dispatcher(u32 pc)
5275   sh2_drc_dispatcher = (void *)tcache_ptr;
5276   emith_ctx_write(arg0, SHR_PC * 4);
5277 #if BRANCH_CACHE
5278   // check if PC is in branch target cache
5279   emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*8);
5280   emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0);
5281   emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache));
5282   emith_cmp_r_r(arg2, arg0);
5283   EMITH_SJMP_START(DCOND_NE);
5284 #if (DRC_DEBUG & 128)
5285   emith_move_r_ptr_imm(arg2, (uptr)&bchit);
5286   emith_read_r_r_offs_c(DCOND_EQ, arg3, arg2, 0);
5287   emith_add_r_imm_c(DCOND_EQ, arg3, 1);
5288   emith_write_r_r_offs_c(DCOND_EQ, arg3, arg2, 0);
5289 #endif
5290   emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
5291   emith_jump_reg_c(DCOND_EQ, RET_REG);
5292   EMITH_SJMP_END(DCOND_NE);
5293 #endif
5294   emith_move_r_r_ptr(arg1, CONTEXT_REG);
5295   emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
5296   emith_abicall(dr_lookup_block);
5297   // store PC and block entry ptr (in arg0) in branch target cache
5298   emith_tst_r_r_ptr(RET_REG, RET_REG);
5299   EMITH_SJMP_START(DCOND_EQ);
5300 #if BRANCH_CACHE
5301 #if (DRC_DEBUG & 128)
5302   emith_move_r_ptr_imm(arg2, (uptr)&bcmiss);
5303   emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0);
5304   emith_add_r_imm_c(DCOND_NE, arg3, 1);
5305   emith_write_r_r_offs_c(DCOND_NE, arg3, arg2, 0);
5306 #endif
5307   emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4);
5308   emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*8);
5309   emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0);
5310   emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
5311   emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
5312 #endif
5313   emith_jump_reg_c(DCOND_NE, RET_REG);
5314   EMITH_SJMP_END(DCOND_EQ);
5315   // lookup failed, call sh2_translate()
5316   emith_move_r_r_ptr(arg0, CONTEXT_REG);
5317   emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id
5318   emith_abicall(sh2_translate);
5319   emith_tst_r_r_ptr(RET_REG, RET_REG);
5320   EMITH_SJMP_START(DCOND_EQ);
5321   emith_jump_reg_c(DCOND_NE, RET_REG);
5322   EMITH_SJMP_END(DCOND_EQ);
5323   // XXX: can't translate, fail
5324   emith_abicall(dr_failure);
5325   emith_flush();
5326 
5327 #if CALL_STACK
5328   // pc = sh2_drc_dispatcher_call(u32 pc)
5329   sh2_drc_dispatcher_call = (void *)tcache_ptr;
5330   emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
5331   emith_add_r_imm(arg2, (u32)(2*sizeof(void *)));
5332   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
5333   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
5334   emith_add_r_r_r_lsl_ptr(arg3, CONTEXT_REG, arg2, 0);
5335   rcache_get_reg_arg(2, SHR_PR, NULL);
5336   emith_add_r_ret(arg1);
5337   emith_write_r_r_offs_ptr(arg1, arg3, offsetof(SH2, rts_cache)+sizeof(void *));
5338   emith_write_r_r_offs(arg2, arg3, offsetof(SH2, rts_cache));
5339   rcache_flush();
5340   emith_ret();
5341   emith_flush();
5342 
5343   // sh2_drc_dispatcher_return(u32 pc)
5344   sh2_drc_dispatcher_return = (void *)tcache_ptr;
5345   emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
5346   emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0);
5347   emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache));
5348   emith_cmp_r_r(arg0, arg3);
5349 #if (DRC_DEBUG & 128)
5350   EMITH_SJMP_START(DCOND_EQ);
5351   emith_move_r_ptr_imm(arg3, (uptr)&rcmiss);
5352   emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
5353   emith_add_r_imm_c(DCOND_NE, arg1, 1);
5354   emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
5355   emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
5356   EMITH_SJMP_END(DCOND_EQ);
5357 #else
5358   emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
5359 #endif
5360   emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
5361   emith_sub_r_imm(arg2, (u32)(2*sizeof(void *)));
5362   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
5363   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
5364 #if (DRC_DEBUG & 128)
5365   emith_move_r_ptr_imm(arg3, (uptr)&rchit);
5366   emith_read_r_r_offs(arg1, arg3, 0);
5367   emith_add_r_imm(arg1, 1);
5368   emith_write_r_r_offs(arg1, arg3, 0);
5369 #endif
5370   emith_jump_reg(arg0);
5371   emith_flush();
5372 #endif
5373 
5374   // sh2_drc_test_irq(void)
5375   // assumes it's called from main function (may jump to dispatcher)
5376   sh2_drc_test_irq = (void *)tcache_ptr;
5377   emith_ctx_read(arg1, offsetof(SH2, pending_level));
5378   sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
5379   emith_lsr(arg0, sr, I_SHIFT);
5380   emith_and_r_imm(arg0, 0x0f);
5381   emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)?
5382   EMITH_SJMP_START(DCOND_GT);
5383   emith_ret_c(DCOND_LE);     // nope, return
5384   EMITH_SJMP_END(DCOND_GT);
5385   // adjust SP
5386   tmp = rcache_get_reg(SHR_SP, RC_GR_RMW, NULL);
5387   emith_sub_r_imm(tmp, 4*2);
5388   rcache_clean();
5389   // push SR
5390   tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2);
5391   emith_add_r_r_imm(tmp, tmp2, 4);
5392   tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
5393   emith_clear_msb(tmp, tmp, 22);
5394   emith_move_r_r_ptr(arg2, CONTEXT_REG);
5395   rcache_invalidate_tmp();
5396   emith_abicall(p32x_sh2_write32); // XXX: use sh2_drc_write32?
5397   // push PC
5398   rcache_get_reg_arg(0, SHR_SP, NULL);
5399   rcache_get_reg_arg(1, SHR_PC, NULL);
5400   emith_move_r_r_ptr(arg2, CONTEXT_REG);
5401   rcache_invalidate_tmp();
5402   emith_abicall(p32x_sh2_write32);
5403   // update I, cycles, do callback
5404   emith_ctx_read(arg1, offsetof(SH2, pending_level));
5405   sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
5406   emith_bic_r_imm(sr, I);
5407   emith_or_r_r_lsl(sr, arg1, I_SHIFT);
5408   emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles
5409   rcache_flush();
5410   emith_move_r_r_ptr(arg0, CONTEXT_REG);
5411   emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level);
5412   // obtain new PC
5413   tmp = rcache_get_reg_arg(1, SHR_VBR, &tmp2);
5414   emith_add_r_r_r_lsl(arg0, tmp2, RET_REG, 2);
5415   emith_call(sh2_drc_read32);
5416   if (arg0 != RET_REG)
5417     emith_move_r_r(arg0, RET_REG);
5418   emith_call_cleanup();
5419   rcache_invalidate();
5420   emith_jump(sh2_drc_dispatcher);
5421   emith_flush();
5422 
5423   // sh2_drc_entry(SH2 *sh2)
5424   sh2_drc_entry = (void *)tcache_ptr;
5425   emith_sh2_drc_entry();
5426   emith_move_r_r_ptr(CONTEXT_REG, arg0); // move ctx, arg0
5427   emit_do_static_regs(0, arg2);
5428   emith_call(sh2_drc_test_irq);
5429   emith_ctx_read(arg0, SHR_PC * 4);
5430   emith_jump(sh2_drc_dispatcher);
5431   emith_flush();
5432 
5433 #ifdef DRC_SR_REG
5434   // sh2_drc_save_sr(SH2 *sh2)
5435   sh2_drc_save_sr = (void *)tcache_ptr;
5436   tmp = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
5437   emith_write_r_r_offs(tmp, arg0, SHR_SR * 4);
5438   rcache_invalidate();
5439   emith_ret();
5440   emith_flush();
5441 
5442   // sh2_drc_restore_sr(SH2 *sh2)
5443   sh2_drc_restore_sr = (void *)tcache_ptr;
5444   tmp = rcache_get_reg(SHR_SR, RC_GR_WRITE, NULL);
5445   emith_read_r_r_offs(tmp, arg0, SHR_SR * 4);
5446   rcache_flush();
5447   emith_ret();
5448   emith_flush();
5449 #endif
5450 
5451 #ifdef PDB_NET
5452   // debug
5453   #define MAKE_READ_WRAPPER(func) { \
5454     void *tmp = (void *)tcache_ptr; \
5455     emith_push_ret(); \
5456     emith_call(func); \
5457     emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0]));  \
5458     emith_addf_r_r(arg2, arg0);                           \
5459     emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
5460     emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1]));  \
5461     emith_adc_r_imm(arg2, 0x01000000);                    \
5462     emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
5463     emith_pop_and_ret(); \
5464     emith_flush(); \
5465     func = tmp; \
5466   }
5467   #define MAKE_WRITE_WRAPPER(func) { \
5468     void *tmp = (void *)tcache_ptr; \
5469     emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0]));  \
5470     emith_addf_r_r(arg2, arg1);                           \
5471     emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[0])); \
5472     emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1]));  \
5473     emith_adc_r_imm(arg2, 0x01000000);                    \
5474     emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \
5475     emith_move_r_r_ptr(arg2, CONTEXT_REG);                \
5476     emith_jump(func); \
5477     emith_flush(); \
5478     func = tmp; \
5479   }
5480 
5481   MAKE_READ_WRAPPER(sh2_drc_read8);
5482   MAKE_READ_WRAPPER(sh2_drc_read16);
5483   MAKE_READ_WRAPPER(sh2_drc_read32);
5484   MAKE_WRITE_WRAPPER(sh2_drc_write8);
5485   MAKE_WRITE_WRAPPER(sh2_drc_write16);
5486   MAKE_WRITE_WRAPPER(sh2_drc_write32);
5487   MAKE_READ_WRAPPER(sh2_drc_read8_poll);
5488   MAKE_READ_WRAPPER(sh2_drc_read16_poll);
5489   MAKE_READ_WRAPPER(sh2_drc_read32_poll);
5490 #endif
5491 
5492   emith_pool_commit(0);
5493   rcache_invalidate();
5494 #if (DRC_DEBUG & 4)
5495   host_dasm_new_symbol(sh2_drc_entry);
5496   host_dasm_new_symbol(sh2_drc_dispatcher);
5497 #if CALL_STACK
5498   host_dasm_new_symbol(sh2_drc_dispatcher_call);
5499   host_dasm_new_symbol(sh2_drc_dispatcher_return);
5500 #endif
5501   host_dasm_new_symbol(sh2_drc_exit);
5502   host_dasm_new_symbol(sh2_drc_test_irq);
5503   host_dasm_new_symbol(sh2_drc_write8);
5504   host_dasm_new_symbol(sh2_drc_write16);
5505   host_dasm_new_symbol(sh2_drc_write32);
5506   host_dasm_new_symbol(sh2_drc_read8);
5507   host_dasm_new_symbol(sh2_drc_read16);
5508   host_dasm_new_symbol(sh2_drc_read32);
5509   host_dasm_new_symbol(sh2_drc_read8_poll);
5510   host_dasm_new_symbol(sh2_drc_read16_poll);
5511   host_dasm_new_symbol(sh2_drc_read32_poll);
5512 #ifdef DRC_SR_REG
5513   host_dasm_new_symbol(sh2_drc_save_sr);
5514   host_dasm_new_symbol(sh2_drc_restore_sr);
5515 #endif
5516 #endif
5517 
5518 #if DRC_DEBUG
5519   host_insn_count = hic;
5520 #endif
5521 }
5522 
sh2_smc_rm_blocks(u32 a,int len,int tcache_id,u32 shift)5523 static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift)
5524 {
5525   struct block_list **blist, *entry, *next;
5526   u32 mask = RAM_SIZE(tcache_id) - 1;
5527   u32 wtmask = ~0x20000000; // writethrough area mask
5528   u32 start_addr, end_addr;
5529   u32 start_lit, end_lit;
5530   struct block_desc *block;
5531 #if (DRC_DEBUG & 2)
5532   int removed = 0;
5533 #endif
5534 
5535   // ignore cache-through
5536   a &= wtmask;
5537 
5538   blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE];
5539   entry = *blist;
5540   // go through the block list for this range
5541   while (entry != NULL) {
5542     next = entry->next;
5543     block = entry->block;
5544     start_addr = block->addr & wtmask;
5545     end_addr = start_addr + block->size;
5546     start_lit = block->addr_lit & wtmask;
5547     end_lit = start_lit + block->size_lit;
5548     // disable/delete block if it covers the modified address
5549     if ((start_addr < a+len && a < end_addr) ||
5550         (start_lit < a+len && a < end_lit))
5551     {
5552       dbg(2, "smc remove @%08x", a);
5553       end_addr = (start_lit < a+len && block->size_lit ? a : 0);
5554       dr_rm_block_entry(block, tcache_id, end_addr, 0);
5555 #if (DRC_DEBUG & 2)
5556       removed = 1;
5557 #endif
5558     }
5559     entry = next;
5560   }
5561 #if (DRC_DEBUG & 2)
5562   if (!removed)
5563     dbg(2, "rm_blocks called @%08x, no work?", a);
5564 #endif
5565 #if BRANCH_CACHE
5566   if (tcache_id)
5567     memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
5568   else {
5569     memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
5570     memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
5571   }
5572 #endif
5573 #if CALL_STACK
5574   if (tcache_id) {
5575     memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
5576     sh2s[tcache_id-1].rts_cache_idx = 0;
5577   } else {
5578     memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4);
5579     memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4);
5580     sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0;
5581   }
5582 #endif
5583 }
5584 
sh2_drc_wcheck_ram(u32 a,unsigned len,SH2 * sh2)5585 void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2)
5586 {
5587   sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT);
5588 }
5589 
sh2_drc_wcheck_da(u32 a,unsigned len,SH2 * sh2)5590 void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2)
5591 {
5592   sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT);
5593 }
5594 
sh2_execute_drc(SH2 * sh2c,int cycles)5595 int sh2_execute_drc(SH2 *sh2c, int cycles)
5596 {
5597   int ret_cycles;
5598 
5599   // cycles are kept in SHR_SR unused bits (upper 20)
5600   // bit11 contains T saved for delay slot
5601   // others are usual SH2 flags
5602   sh2c->sr &= 0x3f3;
5603   sh2c->sr |= cycles << 12;
5604 
5605   sh2c->state |= SH2_IN_DRC;
5606   sh2_drc_entry(sh2c);
5607   sh2c->state &= ~SH2_IN_DRC;
5608 
5609   // TODO: irq cycles
5610   ret_cycles = (int32_t)sh2c->sr >> 12;
5611   if (ret_cycles > 0)
5612     dbg(1, "warning: drc returned with cycles: %d, pc %08x", ret_cycles, sh2c->pc);
5613 
5614   sh2c->sr &= 0x3f3;
5615   return ret_cycles;
5616 }
5617 
block_stats(void)5618 static void block_stats(void)
5619 {
5620 #if (DRC_DEBUG & 2)
5621   int c, b, i;
5622   long total = 0;
5623 
5624   printf("block stats:\n");
5625   for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5626     for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5627       if (block_tables[b][i].addr != 0)
5628         total += block_tables[b][i].refcount;
5629   }
5630   printf("total: %ld\n",total);
5631 
5632   for (c = 0; c < 20; c++) {
5633     struct block_desc *blk, *maxb = NULL;
5634     int max = 0;
5635     for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5636       for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5637         if ((blk = &block_tables[b][i])->addr != 0 && blk->refcount > max) {
5638           max = blk->refcount;
5639           maxb = blk;
5640         }
5641     }
5642     if (maxb == NULL)
5643       break;
5644     printf("%08x %p %9d %2.3f%%\n", maxb->addr, maxb->tcache_ptr, maxb->refcount,
5645       (double)maxb->refcount / total * 100.0);
5646     maxb->refcount = 0;
5647   }
5648 
5649   for (b = 0; b < ARRAY_SIZE(block_tables); b++)
5650     for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5651       block_tables[b][i].refcount = 0;
5652 #endif
5653 }
5654 
entry_stats(void)5655 void entry_stats(void)
5656 {
5657 #if (DRC_DEBUG & 32)
5658   int c, b, i, j;
5659   long total = 0;
5660 
5661   printf("block entry stats:\n");
5662   for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5663     for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5664       for (j = 0; j < block_tables[b][i].entry_count; j++)
5665         total += block_tables[b][i].entryp[j].entry_count;
5666   }
5667   printf("total: %ld\n",total);
5668 
5669   for (c = 0; c < 20; c++) {
5670     struct block_desc *blk;
5671     struct block_entry *maxb = NULL;
5672     int max = 0;
5673     for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5674       for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) {
5675         blk = &block_tables[b][i];
5676         for (j = 0; j < blk->entry_count; j++)
5677           if (blk->entryp[j].entry_count > max) {
5678             max = blk->entryp[j].entry_count;
5679             maxb = &blk->entryp[j];
5680           }
5681       }
5682     }
5683     if (maxb == NULL)
5684       break;
5685     printf("%08x %p %9d %2.3f%%\n", maxb->pc, maxb->tcache_ptr, maxb->entry_count,
5686       (double)100 * maxb->entry_count / total);
5687     maxb->entry_count = 0;
5688   }
5689 
5690   for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
5691     for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size)
5692       for (j = 0; j < block_tables[b][i].entry_count; j++)
5693         block_tables[b][i].entryp[j].entry_count = 0;
5694   }
5695 #endif
5696 }
5697 
backtrace(void)5698 static void backtrace(void)
5699 {
5700 #if (DRC_DEBUG & 1024)
5701   int i;
5702   printf("backtrace master:\n");
5703   for (i = 0; i < ARRAY_SIZE(csh2[0]); i++)
5704     SH2_DUMP(&csh2[0][i], "bt msh2");
5705   printf("backtrace slave:\n");
5706   for (i = 0; i < ARRAY_SIZE(csh2[1]); i++)
5707     SH2_DUMP(&csh2[1][i], "bt ssh2");
5708 #endif
5709 }
5710 
state_dump(void)5711 static void state_dump(void)
5712 {
5713 #if (DRC_DEBUG & 2048)
5714   int i;
5715 
5716   SH2_DUMP(&sh2s[0], "master");
5717   printf("VBR msh2: %x\n", sh2s[0].vbr);
5718   for (i = 0; i < 0x60; i++) {
5719     printf("%08x ",p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0]));
5720     if ((i+1) % 8 == 0) printf("\n");
5721   }
5722   printf("stack msh2: %x\n", sh2s[0].r[15]);
5723   for (i = -0x30; i < 0x30; i++) {
5724     printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0]));
5725     if ((i+1) % 8 == 0) printf("\n");
5726   }
5727   SH2_DUMP(&sh2s[1], "slave");
5728   printf("VBR ssh2: %x\n", sh2s[1].vbr);
5729   for (i = 0; i < 0x60; i++) {
5730     printf("%08x ",p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1]));
5731     if ((i+1) % 8 == 0) printf("\n");
5732   }
5733   printf("stack ssh2: %x\n", sh2s[1].r[15]);
5734   for (i = -0x30; i < 0x30; i++) {
5735     printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1]));
5736     if ((i+1) % 8 == 0) printf("\n");
5737   }
5738 #endif
5739 }
5740 
bcache_stats(void)5741 static void bcache_stats(void)
5742 {
5743 #if (DRC_DEBUG & 128)
5744   int i;
5745 #if CALL_STACK
5746   for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++)
5747     if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break;
5748 
5749   printf("return cache hits:%d misses:%d depth: %d index: %d/%d\n", rchit, rcmiss, i,sh2s[0].rts_cache_idx,sh2s[1].rts_cache_idx);
5750   for (i = 0; i < ARRAY_SIZE(sh2s[0].rts_cache); i++) {
5751     printf("%08x ",sh2s[0].rts_cache[i].pc);
5752     if ((i+1) % 8 == 0) printf("\n");
5753   }
5754   for (i = 0; i < ARRAY_SIZE(sh2s[1].rts_cache); i++) {
5755     printf("%08x ",sh2s[1].rts_cache[i].pc);
5756     if ((i+1) % 8 == 0) printf("\n");
5757   }
5758 #endif
5759 #if BRANCH_CACHE
5760   printf("branch cache hits:%d misses:%d\n", bchit, bcmiss);
5761   printf("branch cache master:\n");
5762   for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) {
5763     printf("%08x ",sh2s[0].branch_cache[i].pc);
5764     if ((i+1) % 8 == 0) printf("\n");
5765   }
5766   printf("branch cache slave:\n");
5767   for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) {
5768     printf("%08x ",sh2s[1].branch_cache[i].pc);
5769     if ((i+1) % 8 == 0) printf("\n");
5770   }
5771 #endif
5772 #endif
5773 }
5774 
sh2_drc_flush_all(void)5775 void sh2_drc_flush_all(void)
5776 {
5777   backtrace();
5778   state_dump();
5779   block_stats();
5780   entry_stats();
5781   bcache_stats();
5782   dr_flush_tcache(0);
5783   dr_flush_tcache(1);
5784   dr_flush_tcache(2);
5785   Pico32x.emu_flags &= ~P32XF_DRC_ROM_C;
5786 }
5787 
sh2_drc_mem_setup(SH2 * sh2)5788 void sh2_drc_mem_setup(SH2 *sh2)
5789 {
5790   // fill the DRC-only convenience pointers
5791   sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave];
5792   sh2->p_drcblk_ram = Pico32xMem->drcblk_ram;
5793 }
5794 
sh2_drc_init(SH2 * sh2)5795 int sh2_drc_init(SH2 *sh2)
5796 {
5797   int i;
5798 
5799   if (block_tables[0] == NULL)
5800   {
5801     for (i = 0; i < TCACHE_BUFFERS; i++) {
5802       block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0]));
5803       if (block_tables[i] == NULL)
5804         goto fail;
5805       entry_tables[i] = calloc(ENTRY_MAX_COUNT(i), sizeof(*entry_tables[0]));
5806       if (entry_tables[i] == NULL)
5807         goto fail;
5808       block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i),
5809                           sizeof(*block_link_pool[0]));
5810       if (block_link_pool[i] == NULL)
5811         goto fail;
5812 
5813       inval_lookup[i] = calloc(RAM_SIZE(i) / INVAL_PAGE_SIZE,
5814                                sizeof(inval_lookup[0]));
5815       if (inval_lookup[i] == NULL)
5816         goto fail;
5817 
5818       hash_tables[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*hash_tables[0]));
5819       if (hash_tables[i] == NULL)
5820         goto fail;
5821 
5822       unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0]));
5823       if (unresolved_links[i] == NULL)
5824         goto fail;
5825 //atexit(sh2_drc_finish);
5826 
5827       RING_INIT(&block_ring[i], block_tables[i], BLOCK_MAX_COUNT(i));
5828       RING_INIT(&entry_ring[i], entry_tables[i], ENTRY_MAX_COUNT(i));
5829     }
5830 
5831     block_list_pool = calloc(BLOCK_LIST_MAX_COUNT, sizeof(*block_list_pool));
5832     if (block_list_pool == NULL)
5833       goto fail;
5834     block_list_pool_count = 0;
5835     blist_free = NULL;
5836 
5837     memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts));
5838     memset(blink_free, 0, sizeof(blink_free));
5839 
5840     drc_cmn_init();
5841     rcache_init();
5842 
5843     tcache_ptr = tcache;
5844     sh2_generate_utils();
5845     host_instructions_updated(tcache, tcache_ptr, 1);
5846     emith_update_cache();
5847 
5848     i = tcache_ptr - tcache;
5849     RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i);
5850     for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) {
5851       RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size,
5852                   tcache_sizes[i]);
5853     }
5854 
5855 #if (DRC_DEBUG & 4)
5856     for (i = 0; i < ARRAY_SIZE(block_tables); i++)
5857       tcache_dsm_ptrs[i] = tcache_ring[i].base;
5858     // disasm the utils
5859     tcache_dsm_ptrs[0] = tcache;
5860     do_host_disasm(0);
5861     fflush(stdout);
5862 #endif
5863 #if (DRC_DEBUG & 1)
5864     hash_collisions = 0;
5865 #endif
5866   }
5867   memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache));
5868   memset(sh2->rts_cache, -1, sizeof(sh2->rts_cache));
5869   sh2->rts_cache_idx = 0;
5870 
5871   return 0;
5872 
5873 fail:
5874   sh2_drc_finish(sh2);
5875   return -1;
5876 }
5877 
sh2_drc_finish(SH2 * sh2)5878 void sh2_drc_finish(SH2 *sh2)
5879 {
5880   int i;
5881 
5882   if (block_tables[0] == NULL)
5883     return;
5884 
5885 #if (DRC_DEBUG & (256|512))
5886    if (trace[0]) fclose(trace[0]);
5887    if (trace[1]) fclose(trace[1]);
5888    trace[0] = trace[1] = NULL;
5889 #endif
5890 
5891 #if (DRC_DEBUG & 4)
5892   for (i = 0; i < TCACHE_BUFFERS; i++) {
5893     printf("~~~ tcache %d\n", i);
5894 #if 0
5895     if (tcache_ring[i].first < tcache_ring[i].next) {
5896       tcache_dsm_ptrs[i] = tcache_ring[i].first;
5897       tcache_ptr = tcache_ring[i].next;
5898       do_host_disasm(i);
5899     } else if (tcache_ring[i].used) {
5900       tcache_dsm_ptrs[i] = tcache_ring[i].first;
5901       tcache_ptr = tcache_ring[i].base + tcache_ring[i].size;
5902       do_host_disasm(i);
5903       tcache_dsm_ptrs[i] = tcache_ring[i].base;
5904       tcache_ptr = tcache_ring[i].next;
5905       do_host_disasm(i);
5906     }
5907 #endif
5908     printf("max links: %d\n", block_link_pool_counts[i]);
5909   }
5910   printf("max block list: %d\n", block_list_pool_count);
5911 #endif
5912 
5913   sh2_drc_flush_all();
5914 
5915   for (i = 0; i < TCACHE_BUFFERS; i++) {
5916     if (block_tables[i] != NULL)
5917       free(block_tables[i]);
5918     block_tables[i] = NULL;
5919     if (entry_tables[i] != NULL)
5920       free(entry_tables[i]);
5921     entry_tables[i] = NULL;
5922     if (block_link_pool[i] != NULL)
5923       free(block_link_pool[i]);
5924     block_link_pool[i] = NULL;
5925     blink_free[i] = NULL;
5926 
5927     if (inval_lookup[i] != NULL)
5928       free(inval_lookup[i]);
5929     inval_lookup[i] = NULL;
5930 
5931     if (hash_tables[i] != NULL) {
5932       free(hash_tables[i]);
5933       hash_tables[i] = NULL;
5934     }
5935 
5936     if (unresolved_links[i] != NULL) {
5937       free(unresolved_links[i]);
5938       unresolved_links[i] = NULL;
5939     }
5940   }
5941 
5942   if (block_list_pool != NULL)
5943     free(block_list_pool);
5944   block_list_pool = NULL;
5945   blist_free = NULL;
5946 
5947   drc_cmn_cleanup();
5948 }
5949 
5950 #endif /* DRC_SH2 */
5951 
dr_get_pc_base(u32 pc,SH2 * sh2)5952 static void *dr_get_pc_base(u32 pc, SH2 *sh2)
5953 {
5954   void *ret;
5955   u32 mask = 0;
5956 
5957   ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2);
5958   if (ret == (void *)-1)
5959     return ret;
5960 
5961   return (char *)ret - (pc & ~mask);
5962 }
5963 
scan_block(u32 base_pc,int is_slave,u8 * op_flags,u32 * end_pc_out,u32 * base_literals_out,u32 * end_literals_out)5964 u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
5965   u32 *base_literals_out, u32 *end_literals_out)
5966 {
5967   u16 *dr_pc_base;
5968   u32 pc, op, tmp;
5969   u32 end_pc, end_literals = 0;
5970   u32 lowest_literal = 0;
5971   u32 lowest_mova = 0;
5972   struct op_data *opd;
5973   int next_is_delay = 0;
5974   int end_block = 0;
5975   int is_divop;
5976   int i, i_end, i_div = -1;
5977   u32 crc = 0;
5978   // 2nd pass stuff
5979   int last_btarget; // loop detector
5980   enum { T_UNKNOWN, T_CLEAR, T_SET } t; // T propagation state
5981 
5982   memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT);
5983   op_flags[0] |= OF_BTARGET; // block start is always a target
5984 
5985   dr_pc_base = dr_get_pc_base(base_pc, &sh2s[!!is_slave]);
5986 
5987   // 1st pass: disassemble
5988   for (i = 0, pc = base_pc; ; i++, pc += 2) {
5989     // we need an ops[] entry after the last one initialized,
5990     // so do it before end_block checks
5991     opd = &ops[i];
5992     opd->op = OP_UNHANDLED;
5993     opd->rm = -1;
5994     opd->source = opd->dest = 0;
5995     opd->cycles = 1;
5996     opd->imm = 0;
5997 
5998     if (next_is_delay) {
5999       op_flags[i] |= OF_DELAY_OP;
6000       next_is_delay = 0;
6001     }
6002     else if (end_block || i >= BLOCK_INSN_LIMIT - 2)
6003       break;
6004     else if ((lowest_mova && lowest_mova <= pc) ||
6005               (lowest_literal && lowest_literal <= pc))
6006       break; // text area collides with data area
6007 
6008     is_divop = 0;
6009     op = FETCH_OP(pc);
6010     switch ((op & 0xf000) >> 12)
6011     {
6012     /////////////////////////////////////////////
6013     case 0x00:
6014       switch (op & 0x0f)
6015       {
6016       case 0x02:
6017         switch (GET_Fx())
6018         {
6019         case 0: // STC SR,Rn  0000nnnn00000010
6020           tmp = BITMASK2(SHR_SR, SHR_T);
6021           break;
6022         case 1: // STC GBR,Rn 0000nnnn00010010
6023           tmp = BITMASK1(SHR_GBR);
6024           break;
6025         case 2: // STC VBR,Rn 0000nnnn00100010
6026           tmp = BITMASK1(SHR_VBR);
6027           break;
6028         default:
6029           goto undefined;
6030         }
6031         opd->op = OP_MOVE;
6032         opd->source = tmp;
6033         opd->dest = BITMASK1(GET_Rn());
6034         break;
6035       case 0x03:
6036         CHECK_UNHANDLED_BITS(0xd0, undefined);
6037         // BRAF Rm    0000mmmm00100011
6038         // BSRF Rm    0000mmmm00000011
6039         opd->op = OP_BRANCH_RF;
6040         opd->rm = GET_Rn();
6041         opd->source = BITMASK2(SHR_PC, opd->rm);
6042         opd->dest = BITMASK1(SHR_PC);
6043         if (!(op & 0x20))
6044           opd->dest |= BITMASK1(SHR_PR);
6045         opd->cycles = 2;
6046         next_is_delay = 1;
6047         if (!(opd->dest & BITMASK1(SHR_PR)))
6048           end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6049         else
6050           op_flags[i+1+next_is_delay] |= OF_BTARGET;
6051         break;
6052       case 0x04: // MOV.B Rm,@(R0,Rn)   0000nnnnmmmm0100
6053       case 0x05: // MOV.W Rm,@(R0,Rn)   0000nnnnmmmm0101
6054       case 0x06: // MOV.L Rm,@(R0,Rn)   0000nnnnmmmm0110
6055         opd->source = BITMASK3(GET_Rm(), SHR_R0, GET_Rn());
6056         opd->dest = BITMASK1(SHR_MEM);
6057         break;
6058       case 0x07:
6059         // MUL.L     Rm,Rn      0000nnnnmmmm0111
6060         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6061         opd->dest = BITMASK1(SHR_MACL);
6062         opd->cycles = 2;
6063         break;
6064       case 0x08:
6065         CHECK_UNHANDLED_BITS(0xf00, undefined);
6066         switch (GET_Fx())
6067         {
6068         case 0: // CLRT               0000000000001000
6069           opd->op = OP_SETCLRT;
6070           opd->dest = BITMASK1(SHR_T);
6071           opd->imm = 0;
6072           break;
6073         case 1: // SETT               0000000000011000
6074           opd->op = OP_SETCLRT;
6075           opd->dest = BITMASK1(SHR_T);
6076           opd->imm = 1;
6077           break;
6078         case 2: // CLRMAC             0000000000101000
6079           opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
6080           break;
6081         default:
6082           goto undefined;
6083         }
6084         break;
6085       case 0x09:
6086         switch (GET_Fx())
6087         {
6088         case 0: // NOP        0000000000001001
6089           CHECK_UNHANDLED_BITS(0xf00, undefined);
6090           break;
6091         case 1: // DIV0U      0000000000011001
6092           CHECK_UNHANDLED_BITS(0xf00, undefined);
6093           opd->op = OP_DIV0;
6094           opd->source = BITMASK1(SHR_SR);
6095           opd->dest = BITMASK2(SHR_SR, SHR_T);
6096           div(opd) = (struct div){ .rn=SHR_MEM, .rm=SHR_MEM, .ro=SHR_MEM };
6097           i_div = i;
6098           is_divop = 1;
6099           break;
6100         case 2: // MOVT Rn    0000nnnn00101001
6101           opd->source = BITMASK1(SHR_T);
6102           opd->dest = BITMASK1(GET_Rn());
6103           break;
6104         default:
6105           goto undefined;
6106         }
6107         break;
6108       case 0x0a:
6109         switch (GET_Fx())
6110         {
6111         case 0: // STS      MACH,Rn   0000nnnn00001010
6112           tmp = SHR_MACH;
6113           break;
6114         case 1: // STS      MACL,Rn   0000nnnn00011010
6115           tmp = SHR_MACL;
6116           break;
6117         case 2: // STS      PR,Rn     0000nnnn00101010
6118           tmp = SHR_PR;
6119           break;
6120         default:
6121           goto undefined;
6122         }
6123         opd->op = OP_MOVE;
6124         opd->source = BITMASK1(tmp);
6125         opd->dest = BITMASK1(GET_Rn());
6126         break;
6127       case 0x0b:
6128         CHECK_UNHANDLED_BITS(0xf00, undefined);
6129         switch (GET_Fx())
6130         {
6131         case 0: // RTS        0000000000001011
6132           opd->op = OP_BRANCH_R;
6133           opd->rm = SHR_PR;
6134           opd->source = BITMASK1(opd->rm);
6135           opd->dest = BITMASK1(SHR_PC);
6136           opd->cycles = 2;
6137           next_is_delay = 1;
6138           end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6139           break;
6140         case 1: // SLEEP      0000000000011011
6141           opd->op = OP_SLEEP;
6142           end_block = 1;
6143           break;
6144         case 2: // RTE        0000000000101011
6145           opd->op = OP_RTE;
6146           opd->source = BITMASK1(SHR_SP);
6147           opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC);
6148           opd->cycles = 4;
6149           next_is_delay = 1;
6150           end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6151           break;
6152         default:
6153           goto undefined;
6154         }
6155         break;
6156       case 0x0c: // MOV.B    @(R0,Rm),Rn      0000nnnnmmmm1100
6157       case 0x0d: // MOV.W    @(R0,Rm),Rn      0000nnnnmmmm1101
6158       case 0x0e: // MOV.L    @(R0,Rm),Rn      0000nnnnmmmm1110
6159         opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM);
6160         opd->dest = BITMASK1(GET_Rn());
6161         op_flags[i] |= OF_POLL_INSN;
6162         break;
6163       case 0x0f: // MAC.L   @Rm+,@Rn+  0000nnnnmmmm1111
6164         opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM);
6165         opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH);
6166         opd->cycles = 3;
6167         break;
6168       default:
6169         goto undefined;
6170       }
6171       break;
6172 
6173     /////////////////////////////////////////////
6174     case 0x01:
6175       // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
6176       opd->source = BITMASK2(GET_Rm(), GET_Rn());
6177       opd->dest = BITMASK1(SHR_MEM);
6178       opd->imm = (op & 0x0f) * 4;
6179       break;
6180 
6181     /////////////////////////////////////////////
6182     case 0x02:
6183       switch (op & 0x0f)
6184       {
6185       case 0x00: // MOV.B Rm,@Rn        0010nnnnmmmm0000
6186       case 0x01: // MOV.W Rm,@Rn        0010nnnnmmmm0001
6187       case 0x02: // MOV.L Rm,@Rn        0010nnnnmmmm0010
6188         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6189         opd->dest = BITMASK1(SHR_MEM);
6190         break;
6191       case 0x04: // MOV.B Rm,@-Rn       0010nnnnmmmm0100
6192       case 0x05: // MOV.W Rm,@-Rn       0010nnnnmmmm0101
6193       case 0x06: // MOV.L Rm,@-Rn       0010nnnnmmmm0110
6194         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6195         opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
6196         break;
6197       case 0x07: // DIV0S Rm,Rn         0010nnnnmmmm0111
6198         opd->op = OP_DIV0;
6199         opd->source = BITMASK3(SHR_SR, GET_Rm(), GET_Rn());
6200         opd->dest = BITMASK2(SHR_SR, SHR_T);
6201         div(opd) = (struct div){ .rn=GET_Rn(), .rm=GET_Rm(), .ro=SHR_MEM };
6202         i_div = i;
6203         is_divop = 1;
6204         break;
6205       case 0x08: // TST Rm,Rn           0010nnnnmmmm1000
6206         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6207         opd->dest = BITMASK1(SHR_T);
6208         break;
6209       case 0x09: // AND Rm,Rn           0010nnnnmmmm1001
6210       case 0x0a: // XOR Rm,Rn           0010nnnnmmmm1010
6211       case 0x0b: // OR  Rm,Rn           0010nnnnmmmm1011
6212         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6213         opd->dest = BITMASK1(GET_Rn());
6214         break;
6215       case 0x0c: // CMP/STR Rm,Rn       0010nnnnmmmm1100
6216         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6217         opd->dest = BITMASK1(SHR_T);
6218         break;
6219       case 0x0d: // XTRCT  Rm,Rn        0010nnnnmmmm1101
6220         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6221         opd->dest = BITMASK1(GET_Rn());
6222         break;
6223       case 0x0e: // MULU.W Rm,Rn        0010nnnnmmmm1110
6224       case 0x0f: // MULS.W Rm,Rn        0010nnnnmmmm1111
6225         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6226         opd->dest = BITMASK1(SHR_MACL);
6227         break;
6228       default:
6229         goto undefined;
6230       }
6231       break;
6232 
6233     /////////////////////////////////////////////
6234     case 0x03:
6235       switch (op & 0x0f)
6236       {
6237       case 0x00: // CMP/EQ Rm,Rn        0011nnnnmmmm0000
6238       case 0x02: // CMP/HS Rm,Rn        0011nnnnmmmm0010
6239       case 0x03: // CMP/GE Rm,Rn        0011nnnnmmmm0011
6240       case 0x06: // CMP/HI Rm,Rn        0011nnnnmmmm0110
6241       case 0x07: // CMP/GT Rm,Rn        0011nnnnmmmm0111
6242         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6243         opd->dest = BITMASK1(SHR_T);
6244         break;
6245       case 0x04: // DIV1    Rm,Rn       0011nnnnmmmm0100
6246         opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T);
6247         opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T);
6248         if (i_div >= 0) {
6249           // divide operation: all DIV1 operations must use the same reg pair
6250           if (div(&ops[i_div]).rn == SHR_MEM)
6251             div(&ops[i_div]).rn=GET_Rn(), div(&ops[i_div]).rm=GET_Rm();
6252           if (div(&ops[i_div]).rn == GET_Rn() && div(&ops[i_div]).rm == GET_Rm()) {
6253             div(&ops[i_div]).div1 += 1;
6254             div(&ops[i_div]).state = 0;
6255             is_divop = 1;
6256           } else {
6257             ops[i_div].imm = 0;
6258             i_div = -1;
6259           }
6260         }
6261         break;
6262       case 0x05: // DMULU.L Rm,Rn       0011nnnnmmmm0101
6263       case 0x0d: // DMULS.L Rm,Rn       0011nnnnmmmm1101
6264         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6265         opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
6266         opd->cycles = 2;
6267         break;
6268       case 0x08: // SUB     Rm,Rn       0011nnnnmmmm1000
6269       case 0x0c: // ADD     Rm,Rn       0011nnnnmmmm1100
6270         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6271         opd->dest = BITMASK1(GET_Rn());
6272         break;
6273       case 0x0a: // SUBC    Rm,Rn       0011nnnnmmmm1010
6274       case 0x0e: // ADDC    Rm,Rn       0011nnnnmmmm1110
6275         opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_T);
6276         opd->dest = BITMASK2(GET_Rn(), SHR_T);
6277         break;
6278       case 0x0b: // SUBV    Rm,Rn       0011nnnnmmmm1011
6279       case 0x0f: // ADDV    Rm,Rn       0011nnnnmmmm1111
6280         opd->source = BITMASK2(GET_Rm(), GET_Rn());
6281         opd->dest = BITMASK2(GET_Rn(), SHR_T);
6282         break;
6283       default:
6284         goto undefined;
6285       }
6286       break;
6287 
6288     /////////////////////////////////////////////
6289     case 0x04:
6290       switch (op & 0x0f)
6291       {
6292       case 0x00:
6293         switch (GET_Fx())
6294         {
6295         case 0: // SHLL Rn    0100nnnn00000000
6296         case 2: // SHAL Rn    0100nnnn00100000
6297           opd->source = BITMASK1(GET_Rn());
6298           opd->dest = BITMASK2(GET_Rn(), SHR_T);
6299           break;
6300         case 1: // DT Rn      0100nnnn00010000
6301           opd->source = BITMASK1(GET_Rn());
6302           opd->dest = BITMASK2(GET_Rn(), SHR_T);
6303           op_flags[i] |= OF_DELAY_INSN;
6304           break;
6305         default:
6306           goto undefined;
6307         }
6308         break;
6309       case 0x01:
6310         switch (GET_Fx())
6311         {
6312         case 0: // SHLR Rn    0100nnnn00000001
6313         case 2: // SHAR Rn    0100nnnn00100001
6314           opd->source = BITMASK1(GET_Rn());
6315           opd->dest = BITMASK2(GET_Rn(), SHR_T);
6316           break;
6317         case 1: // CMP/PZ Rn  0100nnnn00010001
6318           opd->source = BITMASK1(GET_Rn());
6319           opd->dest = BITMASK1(SHR_T);
6320           break;
6321         default:
6322           goto undefined;
6323         }
6324         break;
6325       case 0x02:
6326       case 0x03:
6327         switch (op & 0x3f)
6328         {
6329         case 0x02: // STS.L    MACH,@-Rn 0100nnnn00000010
6330           tmp = BITMASK1(SHR_MACH);
6331           break;
6332         case 0x12: // STS.L    MACL,@-Rn 0100nnnn00010010
6333           tmp = BITMASK1(SHR_MACL);
6334           break;
6335         case 0x22: // STS.L    PR,@-Rn   0100nnnn00100010
6336           tmp = BITMASK1(SHR_PR);
6337           break;
6338         case 0x03: // STC.L    SR,@-Rn   0100nnnn00000011
6339           tmp = BITMASK2(SHR_SR, SHR_T);
6340           opd->cycles = 2;
6341           break;
6342         case 0x13: // STC.L    GBR,@-Rn  0100nnnn00010011
6343           tmp = BITMASK1(SHR_GBR);
6344           opd->cycles = 2;
6345           break;
6346         case 0x23: // STC.L    VBR,@-Rn  0100nnnn00100011
6347           tmp = BITMASK1(SHR_VBR);
6348           opd->cycles = 2;
6349           break;
6350         default:
6351           goto undefined;
6352         }
6353         opd->source = BITMASK1(GET_Rn()) | tmp;
6354         opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
6355         break;
6356       case 0x04:
6357       case 0x05:
6358         switch (op & 0x3f)
6359         {
6360         case 0x04: // ROTL   Rn          0100nnnn00000100
6361         case 0x05: // ROTR   Rn          0100nnnn00000101
6362           opd->source = BITMASK1(GET_Rn());
6363           opd->dest = BITMASK2(GET_Rn(), SHR_T);
6364           break;
6365         case 0x24: // ROTCL  Rn          0100nnnn00100100
6366           if (i_div >= 0) {
6367             // divide operation: all ROTCL operations must use the same register
6368             if (div(&ops[i_div]).ro == SHR_MEM)
6369               div(&ops[i_div]).ro = GET_Rn();
6370             if (div(&ops[i_div]).ro == GET_Rn() && !div(&ops[i_div]).state) {
6371               div(&ops[i_div]).rotcl += 1;
6372               div(&ops[i_div]).state = 1;
6373               is_divop = 1;
6374             } else {
6375               ops[i_div].imm = 0;
6376               i_div = -1;
6377             }
6378           }
6379         case 0x25: // ROTCR  Rn          0100nnnn00100101
6380           opd->source = BITMASK2(GET_Rn(), SHR_T);
6381           opd->dest = BITMASK2(GET_Rn(), SHR_T);
6382           break;
6383         case 0x15: // CMP/PL Rn          0100nnnn00010101
6384           opd->source = BITMASK1(GET_Rn());
6385           opd->dest = BITMASK1(SHR_T);
6386           break;
6387         default:
6388           goto undefined;
6389         }
6390         break;
6391       case 0x06:
6392       case 0x07:
6393         switch (op & 0x3f)
6394         {
6395         case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
6396           tmp = BITMASK1(SHR_MACH);
6397           break;
6398         case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
6399           tmp = BITMASK1(SHR_MACL);
6400           break;
6401         case 0x26: // LDS.L @Rm+,PR   0100mmmm00100110
6402           tmp = BITMASK1(SHR_PR);
6403           break;
6404         case 0x07: // LDC.L @Rm+,SR   0100mmmm00000111
6405           tmp = BITMASK2(SHR_SR, SHR_T);
6406           opd->op = OP_LDC;
6407           opd->cycles = 3;
6408           break;
6409         case 0x17: // LDC.L @Rm+,GBR  0100mmmm00010111
6410           tmp = BITMASK1(SHR_GBR);
6411           opd->op = OP_LDC;
6412           opd->cycles = 3;
6413           break;
6414         case 0x27: // LDC.L @Rm+,VBR  0100mmmm00100111
6415           tmp = BITMASK1(SHR_VBR);
6416           opd->op = OP_LDC;
6417           opd->cycles = 3;
6418           break;
6419         default:
6420           goto undefined;
6421         }
6422         opd->source = BITMASK2(GET_Rn(), SHR_MEM);
6423         opd->dest = BITMASK1(GET_Rn()) | tmp;
6424         break;
6425       case 0x08:
6426       case 0x09:
6427         switch (GET_Fx())
6428         {
6429         case 0:
6430           // SHLL2 Rn        0100nnnn00001000
6431           // SHLR2 Rn        0100nnnn00001001
6432           break;
6433         case 1:
6434           // SHLL8 Rn        0100nnnn00011000
6435           // SHLR8 Rn        0100nnnn00011001
6436           break;
6437         case 2:
6438           // SHLL16 Rn       0100nnnn00101000
6439           // SHLR16 Rn       0100nnnn00101001
6440           break;
6441         default:
6442           goto undefined;
6443         }
6444         opd->source = BITMASK1(GET_Rn());
6445         opd->dest = BITMASK1(GET_Rn());
6446         break;
6447       case 0x0a:
6448         switch (GET_Fx())
6449         {
6450         case 0: // LDS      Rm,MACH   0100mmmm00001010
6451           tmp = SHR_MACH;
6452           break;
6453         case 1: // LDS      Rm,MACL   0100mmmm00011010
6454           tmp = SHR_MACL;
6455           break;
6456         case 2: // LDS      Rm,PR     0100mmmm00101010
6457           tmp = SHR_PR;
6458           break;
6459         default:
6460           goto undefined;
6461         }
6462         opd->op = OP_MOVE;
6463         opd->source = BITMASK1(GET_Rn());
6464         opd->dest = BITMASK1(tmp);
6465         break;
6466       case 0x0b:
6467         switch (GET_Fx())
6468         {
6469         case 0: // JSR  @Rm   0100mmmm00001011
6470           opd->dest = BITMASK1(SHR_PR);
6471         case 2: // JMP  @Rm   0100mmmm00101011
6472           opd->op = OP_BRANCH_R;
6473           opd->rm = GET_Rn();
6474           opd->source = BITMASK1(opd->rm);
6475           opd->dest |= BITMASK1(SHR_PC);
6476           opd->cycles = 2;
6477           next_is_delay = 1;
6478           if (!(opd->dest & BITMASK1(SHR_PR)))
6479             end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6480           else
6481             op_flags[i+1+next_is_delay] |= OF_BTARGET;
6482           break;
6483         case 1: // TAS.B @Rn  0100nnnn00011011
6484           opd->source = BITMASK2(GET_Rn(), SHR_MEM);
6485           opd->dest = BITMASK2(SHR_T, SHR_MEM);
6486           opd->cycles = 4;
6487           break;
6488         default:
6489           goto undefined;
6490         }
6491         break;
6492       case 0x0e:
6493         switch (GET_Fx())
6494         {
6495         case 0: // LDC Rm,SR   0100mmmm00001110
6496           tmp = BITMASK2(SHR_SR, SHR_T);
6497           break;
6498         case 1: // LDC Rm,GBR  0100mmmm00011110
6499           tmp = BITMASK1(SHR_GBR);
6500           break;
6501         case 2: // LDC Rm,VBR  0100mmmm00101110
6502           tmp = BITMASK1(SHR_VBR);
6503           break;
6504         default:
6505           goto undefined;
6506         }
6507         opd->op = OP_LDC;
6508         opd->source = BITMASK1(GET_Rn());
6509         opd->dest = tmp;
6510         break;
6511       case 0x0f:
6512         // MAC.W @Rm+,@Rn+  0100nnnnmmmm1111
6513         opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM);
6514         opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH);
6515         opd->cycles = 3;
6516         break;
6517       default:
6518         goto undefined;
6519       }
6520       break;
6521 
6522     /////////////////////////////////////////////
6523     case 0x05:
6524       // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
6525       opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6526       opd->dest = BITMASK1(GET_Rn());
6527       opd->imm = (op & 0x0f) * 4;
6528       op_flags[i] |= OF_POLL_INSN;
6529       break;
6530 
6531     /////////////////////////////////////////////
6532     case 0x06:
6533       switch (op & 0x0f)
6534       {
6535       case 0x04: // MOV.B @Rm+,Rn       0110nnnnmmmm0100
6536       case 0x05: // MOV.W @Rm+,Rn       0110nnnnmmmm0101
6537       case 0x06: // MOV.L @Rm+,Rn       0110nnnnmmmm0110
6538         opd->dest = BITMASK2(GET_Rm(), GET_Rn());
6539         opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6540         break;
6541       case 0x00: // MOV.B @Rm,Rn        0110nnnnmmmm0000
6542       case 0x01: // MOV.W @Rm,Rn        0110nnnnmmmm0001
6543       case 0x02: // MOV.L @Rm,Rn        0110nnnnmmmm0010
6544         opd->dest = BITMASK1(GET_Rn());
6545         opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6546         op_flags[i] |= OF_POLL_INSN;
6547         break;
6548       case 0x0a: // NEGC   Rm,Rn        0110nnnnmmmm1010
6549         opd->source = BITMASK2(GET_Rm(), SHR_T);
6550         opd->dest = BITMASK2(GET_Rn(), SHR_T);
6551         break;
6552       case 0x03: // MOV    Rm,Rn        0110nnnnmmmm0011
6553         opd->op = OP_MOVE;
6554         goto arith_rmrn;
6555       case 0x07: // NOT    Rm,Rn        0110nnnnmmmm0111
6556       case 0x08: // SWAP.B Rm,Rn        0110nnnnmmmm1000
6557       case 0x09: // SWAP.W Rm,Rn        0110nnnnmmmm1001
6558       case 0x0b: // NEG    Rm,Rn        0110nnnnmmmm1011
6559       case 0x0c: // EXTU.B Rm,Rn        0110nnnnmmmm1100
6560       case 0x0d: // EXTU.W Rm,Rn        0110nnnnmmmm1101
6561       case 0x0e: // EXTS.B Rm,Rn        0110nnnnmmmm1110
6562       case 0x0f: // EXTS.W Rm,Rn        0110nnnnmmmm1111
6563       arith_rmrn:
6564         opd->source = BITMASK1(GET_Rm());
6565         opd->dest = BITMASK1(GET_Rn());
6566         break;
6567       }
6568       break;
6569 
6570     /////////////////////////////////////////////
6571     case 0x07:
6572       // ADD #imm,Rn  0111nnnniiiiiiii
6573       opd->source = opd->dest = BITMASK1(GET_Rn());
6574       opd->imm = (s8)op;
6575       break;
6576 
6577     /////////////////////////////////////////////
6578     case 0x08:
6579       switch (op & 0x0f00)
6580       {
6581       case 0x0000: // MOV.B R0,@(disp,Rn)  10000000nnnndddd
6582         opd->source = BITMASK2(GET_Rm(), SHR_R0);
6583         opd->dest = BITMASK1(SHR_MEM);
6584         opd->imm = (op & 0x0f);
6585         break;
6586       case 0x0100: // MOV.W R0,@(disp,Rn)  10000001nnnndddd
6587         opd->source = BITMASK2(GET_Rm(), SHR_R0);
6588         opd->dest = BITMASK1(SHR_MEM);
6589         opd->imm = (op & 0x0f) * 2;
6590         break;
6591       case 0x0400: // MOV.B @(disp,Rm),R0  10000100mmmmdddd
6592         opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6593         opd->dest = BITMASK1(SHR_R0);
6594         opd->imm = (op & 0x0f);
6595         op_flags[i] |= OF_POLL_INSN;
6596         break;
6597       case 0x0500: // MOV.W @(disp,Rm),R0  10000101mmmmdddd
6598         opd->source = BITMASK2(GET_Rm(), SHR_MEM);
6599         opd->dest = BITMASK1(SHR_R0);
6600         opd->imm = (op & 0x0f) * 2;
6601         op_flags[i] |= OF_POLL_INSN;
6602         break;
6603       case 0x0800: // CMP/EQ #imm,R0       10001000iiiiiiii
6604         opd->source = BITMASK1(SHR_R0);
6605         opd->dest = BITMASK1(SHR_T);
6606         opd->imm = (s8)op;
6607         break;
6608       case 0x0d00: // BT/S label 10001101dddddddd
6609       case 0x0f00: // BF/S label 10001111dddddddd
6610         next_is_delay = 1;
6611         // fallthrough
6612       case 0x0900: // BT   label 10001001dddddddd
6613       case 0x0b00: // BF   label 10001011dddddddd
6614         opd->op = (op & 0x0200) ? OP_BRANCH_CF : OP_BRANCH_CT;
6615         opd->source = BITMASK2(SHR_PC, SHR_T);
6616         opd->dest = BITMASK1(SHR_PC);
6617         opd->imm = ((signed int)(op << 24) >> 23);
6618         opd->imm += pc + 4;
6619         if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2)
6620           op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET;
6621         break;
6622       default:
6623         goto undefined;
6624       }
6625       break;
6626 
6627     /////////////////////////////////////////////
6628     case 0x09:
6629       // MOV.W @(disp,PC),Rn  1001nnnndddddddd
6630       opd->op = OP_LOAD_POOL;
6631       tmp = pc + 2;
6632       if (op_flags[i] & OF_DELAY_OP) {
6633         if (ops[i-1].op == OP_BRANCH)
6634           tmp = ops[i-1].imm;
6635         else if (ops[i-1].op != OP_BRANCH_N)
6636           tmp = 0;
6637       }
6638       opd->source = BITMASK2(SHR_PC, SHR_MEM);
6639       opd->dest = BITMASK1(GET_Rn());
6640       if (tmp) {
6641         opd->imm = tmp + 2 + (op & 0xff) * 2;
6642         if (lowest_literal == 0 || opd->imm < lowest_literal)
6643           lowest_literal = opd->imm;
6644       }
6645       opd->size = 1;
6646       break;
6647 
6648     /////////////////////////////////////////////
6649     case 0x0b:
6650       // BSR  label 1011dddddddddddd
6651       opd->dest = BITMASK1(SHR_PR);
6652     case 0x0a:
6653       // BRA  label 1010dddddddddddd
6654       opd->op = OP_BRANCH;
6655       opd->source =  BITMASK1(SHR_PC);
6656       opd->dest |= BITMASK1(SHR_PC);
6657       opd->imm = ((signed int)(op << 20) >> 19);
6658       opd->imm += pc + 4;
6659       opd->cycles = 2;
6660       next_is_delay = 1;
6661       if (!(opd->dest & BITMASK1(SHR_PR))) {
6662         if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) {
6663           op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET;
6664           if (opd->imm <= pc)
6665             end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6666         } else
6667           end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
6668       } else
6669         op_flags[i+1+next_is_delay] |= OF_BTARGET;
6670       break;
6671 
6672     /////////////////////////////////////////////
6673     case 0x0c:
6674       switch (op & 0x0f00)
6675       {
6676       case 0x0000: // MOV.B R0,@(disp,GBR)   11000000dddddddd
6677       case 0x0100: // MOV.W R0,@(disp,GBR)   11000001dddddddd
6678       case 0x0200: // MOV.L R0,@(disp,GBR)   11000010dddddddd
6679         opd->source = BITMASK2(SHR_GBR, SHR_R0);
6680         opd->dest = BITMASK1(SHR_MEM);
6681         opd->size = (op & 0x300) >> 8;
6682         opd->imm = (op & 0xff) << opd->size;
6683         break;
6684       case 0x0400: // MOV.B @(disp,GBR),R0   11000100dddddddd
6685       case 0x0500: // MOV.W @(disp,GBR),R0   11000101dddddddd
6686       case 0x0600: // MOV.L @(disp,GBR),R0   11000110dddddddd
6687         opd->source = BITMASK2(SHR_GBR, SHR_MEM);
6688         opd->dest = BITMASK1(SHR_R0);
6689         opd->size = (op & 0x300) >> 8;
6690         opd->imm = (op & 0xff) << opd->size;
6691         op_flags[i] |= OF_POLL_INSN;
6692         break;
6693       case 0x0300: // TRAPA #imm      11000011iiiiiiii
6694         opd->op = OP_TRAPA;
6695         opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T);
6696         opd->dest = BITMASK2(SHR_SP, SHR_PC);
6697         opd->imm = (op & 0xff);
6698         opd->cycles = 8;
6699         op_flags[i+1] |= OF_BTARGET;
6700         break;
6701       case 0x0700: // MOVA @(disp,PC),R0    11000111dddddddd
6702         opd->op = OP_MOVA;
6703         tmp = pc + 2;
6704         if (op_flags[i] & OF_DELAY_OP) {
6705           if (ops[i-1].op == OP_BRANCH)
6706             tmp = ops[i-1].imm;
6707           else if (ops[i-1].op != OP_BRANCH_N)
6708             tmp = 0;
6709         }
6710         opd->dest = BITMASK1(SHR_R0);
6711         if (tmp) {
6712           opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3;
6713           if (opd->imm >= base_pc) {
6714             if (lowest_mova == 0 || opd->imm < lowest_mova)
6715               lowest_mova = opd->imm;
6716           }
6717         }
6718         break;
6719       case 0x0800: // TST #imm,R0           11001000iiiiiiii
6720         opd->source = BITMASK1(SHR_R0);
6721         opd->dest = BITMASK1(SHR_T);
6722         opd->imm = op & 0xff;
6723         break;
6724       case 0x0900: // AND #imm,R0           11001001iiiiiiii
6725         opd->source = opd->dest = BITMASK1(SHR_R0);
6726         opd->imm = op & 0xff;
6727         break;
6728       case 0x0a00: // XOR #imm,R0           11001010iiiiiiii
6729         opd->source = opd->dest = BITMASK1(SHR_R0);
6730         opd->imm = op & 0xff;
6731         break;
6732       case 0x0b00: // OR  #imm,R0           11001011iiiiiiii
6733         opd->source = opd->dest = BITMASK1(SHR_R0);
6734         opd->imm = op & 0xff;
6735         break;
6736       case 0x0c00: // TST.B #imm,@(R0,GBR)  11001100iiiiiiii
6737         opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM);
6738         opd->dest = BITMASK1(SHR_T);
6739         opd->imm = op & 0xff;
6740         op_flags[i] |= OF_POLL_INSN;
6741         opd->cycles = 3;
6742         break;
6743       case 0x0d00: // AND.B #imm,@(R0,GBR)  11001101iiiiiiii
6744       case 0x0e00: // XOR.B #imm,@(R0,GBR)  11001110iiiiiiii
6745       case 0x0f00: // OR.B  #imm,@(R0,GBR)  11001111iiiiiiii
6746         opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM);
6747         opd->dest = BITMASK1(SHR_MEM);
6748         opd->imm = op & 0xff;
6749         opd->cycles = 3;
6750         break;
6751       default:
6752         goto undefined;
6753       }
6754       break;
6755 
6756     /////////////////////////////////////////////
6757     case 0x0d:
6758       // MOV.L @(disp,PC),Rn  1101nnnndddddddd
6759       opd->op = OP_LOAD_POOL;
6760       tmp = pc + 2;
6761       if (op_flags[i] & OF_DELAY_OP) {
6762         if (ops[i-1].op == OP_BRANCH)
6763           tmp = ops[i-1].imm;
6764         else if (ops[i-1].op != OP_BRANCH_N)
6765           tmp = 0;
6766       }
6767       opd->source = BITMASK2(SHR_PC, SHR_MEM);
6768       opd->dest = BITMASK1(GET_Rn());
6769       if (tmp) {
6770         opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3;
6771         if (lowest_literal == 0 || opd->imm < lowest_literal)
6772           lowest_literal = opd->imm;
6773       }
6774       opd->size = 2;
6775       break;
6776 
6777     /////////////////////////////////////////////
6778     case 0x0e:
6779       // MOV #imm,Rn   1110nnnniiiiiiii
6780       opd->op = OP_LOAD_CONST;
6781       opd->dest = BITMASK1(GET_Rn());
6782       opd->imm = (s8)op;
6783       break;
6784 
6785     default:
6786     undefined:
6787       opd->op = OP_UNDEFINED;
6788       // an unhandled instruction is probably not code if it's not the 1st insn
6789       if (!(op_flags[i] & OF_DELAY_OP) && pc != base_pc)
6790         goto end;
6791       break;
6792     }
6793 
6794     if (op_flags[i] & OF_DELAY_OP) {
6795       switch (opd->op) {
6796       case OP_BRANCH:
6797       case OP_BRANCH_N:
6798       case OP_BRANCH_CT:
6799       case OP_BRANCH_CF:
6800       case OP_BRANCH_R:
6801       case OP_BRANCH_RF:
6802         elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x",
6803           is_slave ? 's' : 'm', pc);
6804         opd->op = OP_UNDEFINED;
6805         op_flags[i] |= OF_B_IN_DS;
6806         next_is_delay = 0;
6807         break;
6808       }
6809     } else if (!is_divop && i_div >= 0)
6810       i_div = -1;       // divide parser stop
6811   }
6812 end:
6813   i_end = i;
6814   end_pc = pc;
6815 
6816   // 2nd pass: some analysis
6817   lowest_literal = end_literals = lowest_mova = 0;
6818   t = T_UNKNOWN; // T flag state
6819   last_btarget = 0;
6820   op = 0; // delay/poll insns counter
6821   is_divop = 0; // divide op insns counter
6822   i_div = -1; // index of current divide op
6823   for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
6824     opd = &ops[i];
6825     crc += FETCH_OP(pc);
6826 
6827     // propagate T (TODO: DIV0U)
6828     if (op_flags[i] & OF_BTARGET)
6829       t = T_UNKNOWN;
6830 
6831     if ((opd->op == OP_BRANCH_CT && t == T_SET) ||
6832         (opd->op == OP_BRANCH_CF && t == T_CLEAR)) {
6833       opd->op = OP_BRANCH;
6834       opd->cycles = (op_flags[i + 1] & OF_DELAY_OP) ? 2 : 3;
6835     } else if ((opd->op == OP_BRANCH_CT && t == T_CLEAR) ||
6836                (opd->op == OP_BRANCH_CF && t == T_SET))
6837       opd->op = OP_BRANCH_N;
6838     else if (OP_ISBRACND(opd->op))
6839       t = (opd->op == OP_BRANCH_CF ? T_SET : T_CLEAR);
6840     else if (opd->op == OP_SETCLRT)
6841       t = (opd->imm ? T_SET : T_CLEAR);
6842     else if (opd->dest & BITMASK1(SHR_T))
6843       t = T_UNKNOWN;
6844 
6845     // "overscan" detection: unreachable code after unconditional branch
6846     // this can happen if the insn after a forward branch isn't a local target
6847     if (OP_ISBRAUC(opd->op)) {
6848       if (op_flags[i + 1] & OF_DELAY_OP) {
6849         if (i_end > i + 2 && !(op_flags[i + 2] & OF_BTARGET))
6850           i_end = i + 2;
6851       } else {
6852         if (i_end > i + 1 && !(op_flags[i + 1] & OF_BTARGET))
6853           i_end = i + 1;
6854       }
6855     }
6856 
6857     // divide operation verification:
6858     // 1. there must not be a branch target inside
6859     // 2. nothing is in a delay slot (could only be DIV0)
6860     // 2. DIV0/n*(ROTCL+DIV1)/ROTCL:
6861     //     div.div1 > 0 && div.rotcl == div.div1+1 && div.rn =! div.ro
6862     // 3. DIV0/n*DIV1/ROTCL:
6863     //     div.div1 > 0 && div.rotcl == 1 && div.ro == div.rn
6864     if (i_div >= 0) {
6865       if (op_flags[i] & OF_BTARGET) {   // condition 1
6866         ops[i_div].imm = 0;
6867         i_div = -1;
6868       } else if (--is_divop == 0)
6869         i_div = -1;
6870     } else if (opd->op == OP_DIV0) {
6871       struct div *div = &div(opd);
6872       is_divop = div->div1 + div->rotcl;
6873       if (op_flags[i] & OF_DELAY_OP)    // condition 2
6874         opd->imm = 0;
6875       else if (! div->div1 || ! ((div->ro == div->rn && div->rotcl == 1) ||
6876                (div->ro != div->rn && div->rotcl == div->div1+1)))
6877         opd->imm = 0;                   // condition 3+4
6878       else if (is_divop)
6879         i_div = i;
6880     }
6881 
6882     // literal pool size detection
6883     if (opd->op == OP_MOVA && opd->imm >= base_pc)
6884       if (lowest_mova == 0 || opd->imm < lowest_mova)
6885         lowest_mova = opd->imm;
6886     if (opd->op == OP_LOAD_POOL) {
6887       if (opd->imm >= base_pc && opd->imm < end_pc + MAX_LITERAL_OFFSET) {
6888         if (end_literals < opd->imm + opd->size * 2)
6889           end_literals = opd->imm + opd->size * 2;
6890         if (lowest_literal == 0 || lowest_literal > opd->imm)
6891           lowest_literal = opd->imm;
6892         if (opd->size == 2) {
6893           // tweak for NFL: treat a 32bit literal as an address and check if it
6894           // points to the literal space. In that case handle it like MOVA.
6895           tmp = FETCH32(opd->imm) & ~0x20000000; // MUST ignore wt bit here
6896           if (tmp >= end_pc && tmp < end_pc + MAX_LITERAL_OFFSET)
6897             if (lowest_mova == 0 || tmp < lowest_mova)
6898               lowest_mova = tmp;
6899         }
6900       }
6901     }
6902 #if LOOP_DETECTION
6903     // inner loop detection
6904     // 1. a loop always starts with a branch target (for the backwards jump)
6905     // 2. it doesn't contain more than one polling and/or delaying insn
6906     // 3. it doesn't contain unconditional jumps
6907     // 4. no overlapping of loops
6908     if (op_flags[i] & OF_BTARGET) {
6909       last_btarget = i;         // possible loop starting point
6910       op = 0;
6911     }
6912     // XXX let's hope nobody is putting a delay or poll insn in a delay slot :-/
6913     if (OP_ISBRAIMM(opd->op)) {
6914       // BSR, BRA, BT, BF with immediate target
6915       int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops
6916       if (i_tmp == last_btarget) // candidate for basic loop optimizer
6917         op_flags[i_tmp] |= OF_BASIC_LOOP;
6918       if (i_tmp == last_btarget && op <= 1) {
6919         op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop
6920         last_btarget = i+1;     // condition 4
6921       } else if (opd->op == OP_BRANCH)
6922         last_btarget = i+1;     // condition 3
6923     }
6924     else if (OP_ISBRAIND(opd->op))
6925       // BRAF, BSRF, JMP, JSR, register indirect. treat it as off-limits jump
6926       last_btarget = i+1;       // condition 3
6927     else if (op_flags[i] & (OF_POLL_INSN|OF_DELAY_INSN))
6928       op ++;                    // condition 2
6929 #endif
6930   }
6931   end_pc = pc;
6932 
6933   // end_literals is used to decide to inline a literal or not
6934   // XXX: need better detection if this actually is used in write
6935   if (lowest_literal >= base_pc) {
6936     if (lowest_literal < end_pc) {
6937       dbg(1, "warning: lowest_literal=%08x < end_pc=%08x", lowest_literal, end_pc);
6938       // TODO: does this always mean end_pc covers data?
6939     }
6940   }
6941   if (lowest_mova >= base_pc) {
6942     if (lowest_mova < end_literals) {
6943       dbg(1, "warning: mova=%08x < end_literals=%08x", lowest_mova, end_literals);
6944       end_literals = lowest_mova;
6945     }
6946     if (lowest_mova < end_pc) {
6947       dbg(1, "warning: mova=%08x < end_pc=%08x", lowest_mova, end_pc);
6948       end_literals = end_pc;
6949     }
6950   }
6951   if (lowest_literal >= end_literals)
6952     lowest_literal = end_literals;
6953 
6954   if (lowest_literal && end_literals)
6955     for (pc = lowest_literal; pc < end_literals; pc += 2)
6956       crc += FETCH_OP(pc);
6957 
6958   *end_pc_out = end_pc;
6959   if (base_literals_out != NULL)
6960     *base_literals_out = (lowest_literal ? lowest_literal : end_pc);
6961   if (end_literals_out != NULL)
6962     *end_literals_out = (end_literals ? end_literals : end_pc);
6963 
6964   // crc overflow handling, twice to collect all overflows
6965   crc = (crc & 0xffff) + (crc >> 16);
6966   crc = (crc & 0xffff) + (crc >> 16);
6967   return crc;
6968 }
6969 
6970 // vim:shiftwidth=2:ts=2:expandtab
6971