1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Yabause - sh2_dynarec.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <stdint.h> //include for uint64_t
24 #include <assert.h>
25 #include <string.h> //include for memset
26
27 #include <sys/mman.h>
28
29 #include "../memory.h"
30 #include "../sh2core.h"
31 #include "../yabause.h"
32 #include "sh2_dynarec.h"
33
34 #ifdef __i386__
35 #include "assem_x86.h"
36 #endif
37 #ifdef __x86_64__
38 #include "assem_x64.h"
39 #endif
40 #ifdef __arm__
41 #include "assem_arm.h"
42 #endif
43
44 #define MAXBLOCK 4096
45 #define MAX_OUTPUT_BLOCK_SIZE 262144
46 #define CLOCK_DIVIDER 1
47 #define SH2_REGS 23
48
49 struct regstat
50 {
51 signed char regmap_entry[HOST_REGS];
52 signed char regmap[HOST_REGS];
53 u32 wasdirty;
54 u32 dirty;
55 u64 u;
56 u32 wasdoingcp;
57 u32 isdoingcp;
58 u32 cpmap[HOST_REGS];
59 u32 isconst;
60 u32 constmap[SH2_REGS];
61 };
62
63 struct ll_entry
64 {
65 u32 vaddr;
66 u32 reg32;
67 void *addr;
68 struct ll_entry *next;
69 };
70
71 u32 start;
72 u16 *source;
73 void *alignedsource;
74 u32 pagelimit;
75 char insn[MAXBLOCK][10];
76 unsigned char itype[MAXBLOCK];
77 unsigned char opcode[MAXBLOCK];
78 unsigned char opcode2[MAXBLOCK];
79 unsigned char opcode3[MAXBLOCK];
80 unsigned char addrmode[MAXBLOCK];
81 unsigned char bt[MAXBLOCK];
82 signed char rs1[MAXBLOCK];
83 signed char rs2[MAXBLOCK];
84 signed char rs3[MAXBLOCK];
85 signed char rt1[MAXBLOCK];
86 signed char rt2[MAXBLOCK];
87 unsigned char us1[MAXBLOCK];
88 unsigned char us2[MAXBLOCK];
89 unsigned char dep1[MAXBLOCK];
90 unsigned char dep2[MAXBLOCK];
91 signed char lt1[MAXBLOCK];
92 int imm[MAXBLOCK];
93 u32 ba[MAXBLOCK];
94 char is_ds[MAXBLOCK];
95 char ooo[MAXBLOCK];
96 u64 unneeded_reg[MAXBLOCK];
97 u64 branch_unneeded_reg[MAXBLOCK];
98 signed char regmap_pre[MAXBLOCK][HOST_REGS];
99 u32 cpmap[MAXBLOCK][HOST_REGS];
100 struct regstat regs[MAXBLOCK];
101 struct regstat branch_regs[MAXBLOCK];
102 signed char minimum_free_regs[MAXBLOCK];
103 u32 needed_reg[MAXBLOCK];
104 u32 wont_dirty[MAXBLOCK];
105 u32 will_dirty[MAXBLOCK];
106 int cycles[MAXBLOCK];
107 int ccadj[MAXBLOCK];
108 int slen;
109 pointer instr_addr[MAXBLOCK];
110 u32 link_addr[MAXBLOCK][3];
111 int linkcount;
112 u32 stubs[MAXBLOCK*3][8];
113 int stubcount;
114 pointer ccstub_return[MAXBLOCK];
115 u32 literals[1024][2];
116 int literalcount;
117 int is_delayslot;
118 u8 *out;
119 struct ll_entry *jump_in[2048];
120 struct ll_entry *jump_out[2048];
121 struct ll_entry *jump_dirty[2048];
122 ALIGNED(16) u32 hash_table[65536][4];
123 ALIGNED(16) char shadow[2097152];
124 char *copy;
125 int expirep;
126 unsigned int stop_after_jal;
127 //char invalid_code[0x100000];
128 char cached_code[0x20000];
129 char cached_code_words[2048*128];
130 u32 recent_writes[8];
131 u32 recent_write_index=0;
132 unsigned int slave;
133 u32 invalidate_count;
134 extern int master_reg[22];
135 extern int master_cc;
136 extern int master_pc; // Virtual PC
137 extern void * master_ip; // Translated PC
138 extern int slave_reg[22];
139 extern int slave_cc;
140 extern int slave_pc; // Virtual PC
141 extern void * slave_ip; // Translated PC
142 extern u8 restore_candidate[512];
143
144 /* registers that may be allocated */
145 /* 0-15 gpr */
146 #define SR 16 // Status register, including T bit
147 #define GBR 17 // Global base register
148 #define VBR 18 // Vector base register
149 #define MACH 19 // MACH
150 #define MACL 20 // MACL
151 #define PR 21 // Return address
152 #define TBIT 22 // T bit, seperate from SR
153
154 #define CCREG 23 // Cycle count
155 #define MMREG 24 // Pointer to memory_map
156 #define TEMPREG 25
157 #define PTEMP 25 // Prefetch temporary register
158 #define MOREG 26 // offset from memory_map
159 #define RHASH 27 // Return address hash
160 #define RHTBL 28 // Return address hash table address
161 #define RTEMP 29 // BRAF/BSRF address register
162 #define MAXREG 29
163 #define AGEN1 30 // Address generation temporary register
164 #define AGEN2 31 // Address generation temporary register
165 #define MGEN1 32 // Maptable address generation temporary register
166 #define MGEN2 33 // Maptable address generation temporary register
167
168 /* instruction types */
169 #define NOP 0 // No operation
170 #define LOAD 1 // Load
171 #define STORE 2 // Store
172 #define RMW 3 // Read-Modify-Write
173 #define PCREL 4 // PC-relative Load
174 #define MOV 5 // Move
175 #define ALU 6 // Arithmetic/logic
176 #define MULTDIV 7 // Multiply/divide
177 #define SHIFTIMM 8// Shift by immediate
178 #define IMM8 9 // 8-bit immediate
179 #define EXT 10 // Sign/Zero Extension
180 #define FLAGS 11 // SETT/CLRT/MOVT
181 #define UJUMP 12 // Unconditional jump
182 #define RJUMP 13 // Unconditional jump to register
183 #define CJUMP 14 // Conditional branch (BT/BF)
184 #define SJUMP 15 // Conditional branch with delay slot
185 #define COMPLEX 16// Complex instructions (function call)
186 #define SYSTEM 17 // Halt/Trap/Exception
187 #define SYSCALL 18// SYSCALL (TRAPA)
188 #define NI 19 // Not implemented
189 #define DATA 20 // Constant pool data not decoded as instructions
190 #define BIOS 21 // Emulate BIOS function
191
192 /* addressing modes */
193 #define REGIND 1 // @Rn
194 #define POSTINC 2 // @Rn+
195 #define PREDEC 3 // @-Rm
196 #define DUALIND 4 // @(R0,Rn)
197 #define GBRIND 5 // @(R0,GBR)
198 #define GBRDISP 6 // @(disp,GBR)
199 #define REGDISP 7 // @(disp,Rn)
200
201 /* stubs */
202 #define CC_STUB 1
203 #define FP_STUB 2
204 #define LOADB_STUB 3
205 #define LOADW_STUB 4
206 #define LOADL_STUB 5
207 #define LOADS_STUB 6
208 #define STOREB_STUB 7
209 #define STOREW_STUB 8
210 #define STOREL_STUB 9
211 #define RMWT_STUB 10
212 #define RMWA_STUB 11
213 #define RMWX_STUB 12
214 #define RMWO_STUB 13
215
216 /* branch codes */
217 #define TAKEN 1
218 #define NOTTAKEN 2
219 #define NODS 3
220
221 // asm linkage
222 int sh2_recompile_block(int addr);
223 void *get_addr_ht(u32 vaddr);
224 void get_bounds(pointer addr,u32 *start,u32 *end);
225 void invalidate_addr(u32 addr);
226 void remove_hash(int vaddr);
227 void dyna_linker();
228 void verify_code();
229 void cc_interrupt();
230 void cc_interrupt_master();
231 void slave_entry();
232 void div1();
233 void macl();
234 void macw();
235 void master_handle_bios();
236 void slave_handle_bios();
237
238 // Needed by assembler
239 void wb_register(signed char r,signed char regmap[],u32 dirty);
240 void wb_dirtys(signed char i_regmap[],u32 i_dirty);
241 void wb_needed_dirtys(signed char i_regmap[],u32 i_dirty,int addr);
242 void load_regs(signed char entry[],signed char regmap[],int rs1,int rs2,int rs3);
243 void load_all_regs(signed char i_regmap[]);
244 void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
245 void load_regs_entry(int t);
246 void load_all_consts(signed char regmap[],u32 dirty,int i);
247
248 int tracedebug=0;
249
250 //#define DEBUG_CYCLE_COUNT 1
251
nullf(const char * format,...)252 void nullf(const char *format, ...) {}
253 //#define assem_debug printf
254 //#define inv_debug printf
255 #define assem_debug nullf
256 #define inv_debug nullf
257
258
259 // Get address from virtual address
260 // This is called from the recompiled BRAF/BSRF instructions
get_addr(u32 vaddr)261 void *get_addr(u32 vaddr)
262 {
263 struct ll_entry *head;
264 u32 page=(vaddr&0xDFFFFFFF)>>12;
265 if(page>1024) page=1024+(page&1023);
266 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
267 head=jump_in[page];
268 while(head!=NULL) {
269 //printf("TRACE: (get_addr check %x: %x)\n",vaddr,(int)head->addr);
270 if(head->vaddr==vaddr) {
271 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
272 //printf("TRACE: (get_addr match %x: %x)\n",vaddr,(int)head->addr);
273 u32 *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
274 ht_bin[3]=ht_bin[1];
275 ht_bin[2]=ht_bin[0];
276 ht_bin[1]=(int)head->addr;
277 ht_bin[0]=vaddr;
278 //printf("TRACE: get_addr clean (%x,%x)\n",vaddr,(int)head->addr);
279 return head->addr;
280 }
281 head=head->next;
282 }
283 head=jump_dirty[page];
284 while(head!=NULL) {
285 if(head->vaddr==vaddr) {
286 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
287 // Don't restore blocks which are about to expire from the cache
288 if((((u32)head->addr-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
289 if(verify_dirty((pointer)head->addr)) {
290 u32 start,end;
291 u32 *ht_bin;
292 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,(cached_code[vaddr>>15]>>((vaddr>>12)&7))&1);
293 //invalid_code[vaddr>>12]=0;
294 cached_code[vaddr>>15]|=1<<((vaddr>>12)&7);
295 cached_code[(vaddr^0x20000000)>>15]|=1<<((vaddr>>12)&7);
296 #ifdef POINTERS_64BIT
297 memory_map[vaddr>>12]|=0x4000000000000000LL;
298 memory_map[(vaddr^0x20000000)>>12]|=0x4000000000000000LL;
299 #else
300 memory_map[vaddr>>12]|=0x40000000;
301 memory_map[(vaddr^0x20000000)>>12]|=0x40000000;
302 #endif
303 restore_candidate[page>>3]|=1<<(page&7);
304 get_bounds((pointer)head->addr,&start,&end);
305 if(start-(u32)HighWram<0x100000) {
306 u32 vstart=start-(u32)HighWram+0x6000000;
307 u32 vend=end-(u32)HighWram+0x6000000;
308 int i;
309 //printf("write protect: start=%x, end=%x\n",vstart,vend);
310 for(i=0;i<vend-vstart;i+=4) {
311 cached_code_words[((vstart<4194304?vstart:((vstart|0x400000)&0x7fffff))+i)>>5]|=1<<(((vstart+i)>>2)&7);
312 }
313 }
314 if(start-(u32)LowWram<0x100000) {
315 u32 vstart=start-(u32)LowWram+0x200000;
316 u32 vend=end-(u32)LowWram+0x200000;
317 int i;
318 //printf("write protect: start=%x, end=%x\n",vstart,vend);
319 for(i=0;i<vend-vstart;i+=4) {
320 cached_code_words[((vstart<4194304?vstart:((vstart|0x400000)&0x7fffff))+i)>>5]|=1<<(((vstart+i)>>2)&7);
321 }
322 }
323 ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
324 if(ht_bin[0]==vaddr) {
325 ht_bin[1]=(int)head->addr; // Replace existing entry
326 }
327 else
328 {
329 ht_bin[3]=ht_bin[1];
330 ht_bin[2]=ht_bin[0];
331 ht_bin[1]=(int)head->addr;
332 ht_bin[0]=vaddr;
333 }
334 //printf("TRACE: get_addr dirty (%x,%x)\n",vaddr,(int)head->addr);
335 return head->addr;
336 }
337 }
338 head=head->next;
339 }
340 sh2_recompile_block(vaddr);
341 return get_addr(vaddr);
342 }
343 // Look up address in hash table first
get_addr_ht(u32 vaddr)344 void *get_addr_ht(u32 vaddr)
345 {
346 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
347 //if(vaddr>>12==0x60a0) printf("TRACE: (get_addr_ht %x)\n",vaddr);
348 u32 *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
349 //if(vaddr>>12==0x60a0) printf("%x %x %x %x\n",ht_bin[0],ht_bin[1],ht_bin[2],ht_bin[3]);
350 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
351 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
352 return get_addr(vaddr);
353 }
354
clear_all_regs(signed char regmap[])355 void clear_all_regs(signed char regmap[])
356 {
357 int hr;
358 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
359 }
360
get_reg(signed char regmap[],int r)361 signed char get_reg(signed char regmap[],int r)
362 {
363 int hr;
364 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&®map[hr]==r) return hr;
365 return -1;
366 }
367
368 // Get a second temporary register (hopefully different from the first)
get_alt_reg(signed char regmap[],int r)369 signed char get_alt_reg(signed char regmap[],int r)
370 {
371 int hr;
372 for (hr=HOST_REGS-1;hr>=0;hr--) if(hr!=EXCLUDE_REG&®map[hr]==r) return hr;
373 return -1;
374 }
375
376 // Find a register that is available for two consecutive cycles
get_reg2(signed char regmap1[],signed char regmap2[],int r)377 signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
378 {
379 int hr;
380 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&®map1[hr]==r&®map2[hr]==r) return hr;
381 return -1;
382 }
383
count_free_regs(signed char regmap[])384 int count_free_regs(signed char regmap[])
385 {
386 int count=0;
387 int hr;
388 for(hr=0;hr<HOST_REGS;hr++)
389 {
390 if(hr!=EXCLUDE_REG) {
391 if(regmap[hr]<0) count++;
392 }
393 }
394 return count;
395 }
396
dirty_reg(struct regstat * cur,signed char reg)397 void dirty_reg(struct regstat *cur,signed char reg)
398 {
399 int hr;
400 if(reg<0) return;
401 for (hr=0;hr<HOST_REGS;hr++) {
402 if((cur->regmap[hr]&63)==reg) {
403 cur->dirty|=1<<hr;
404 }
405 }
406 }
407
set_const(struct regstat * cur,signed char reg,u64 value)408 void set_const(struct regstat *cur,signed char reg,u64 value)
409 {
410 int hr;
411 if(reg<0) return;
412 for (hr=0;hr<HOST_REGS;hr++) {
413 if(cur->regmap[hr]==reg) {
414 cur->isdoingcp|=1<<hr;
415 cur->cpmap[hr]=value;
416 }
417 else if((cur->regmap[hr]^64)==reg) {
418 cur->isdoingcp|=1<<hr;
419 cur->cpmap[hr]=value>>32;
420 }
421 }
422 }
423
clear_const(struct regstat * cur,signed char reg)424 void clear_const(struct regstat *cur,signed char reg)
425 {
426 int hr;
427 if(reg<0) return;
428 for (hr=0;hr<HOST_REGS;hr++) {
429 if((cur->regmap[hr]&63)==reg) {
430 cur->isdoingcp&=~(1<<hr);
431 }
432 }
433 }
434
is_const(struct regstat * cur,signed char reg)435 int is_const(struct regstat *cur,signed char reg)
436 {
437 int hr;
438 if(reg<0) return 0;
439 for (hr=0;hr<HOST_REGS;hr++) {
440 if((cur->regmap[hr]&63)==reg) {
441 return (cur->isdoingcp>>hr)&1;
442 }
443 }
444 return 0;
445 }
get_const(struct regstat * cur,signed char reg)446 u64 get_const(struct regstat *cur,signed char reg)
447 {
448 int hr;
449 if(reg<0) return 0;
450 for (hr=0;hr<HOST_REGS;hr++) {
451 if(cur->regmap[hr]==reg) {
452 return cur->cpmap[hr];
453 }
454 }
455 printf("Unknown constant in r%d\n",reg);
456 exit(1);
457 }
458
sh2_set_const(u32 * isconst,u32 * constmap,signed char reg,u64 value)459 void sh2_set_const(u32 *isconst,u32 *constmap,signed char reg,u64 value)
460 {
461 *isconst|=1<<reg;
462 constmap[reg]=value;
463 }
464
sh2_clear_const(u32 * isconst,u32 * constmap,signed char reg)465 void sh2_clear_const(u32 *isconst,u32 *constmap,signed char reg)
466 {
467 if(reg<0) return;
468 *isconst&=~(1<<reg);
469 }
470
471
472 // Least soon needed registers
473 // Look at the next ten instructions and see which registers
474 // will be used. Try not to reallocate these.
lsn(unsigned char hsn[],int i,int * preferred_reg)475 void lsn(unsigned char hsn[], int i, int *preferred_reg)
476 {
477 int j;
478 int b=-1;
479 for(j=0;j<9;j++)
480 {
481 if(i+j>=slen) {
482 j=slen-i-1;
483 break;
484 }
485 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP)
486 {
487 // Don't go past an unconditonal jump
488 j++;
489 break;
490 }
491 }
492 for(;j>=0;j--)
493 {
494 if(rs1[i+j]>=0) hsn[rs1[i+j]]=j;
495 if(rs2[i+j]>=0) hsn[rs2[i+j]]=j;
496 if(rs3[i+j]>=0) hsn[rs3[i+j]]=j;
497 if(rt1[i+j]>=0) hsn[rt1[i+j]]=j;
498 if(rt2[i+j]>=0) hsn[rt2[i+j]]=j;
499 if(rs1[i+j]==TBIT) hsn[SR]=j;
500 if(rs2[i+j]==TBIT) hsn[SR]=j;
501 if(rs3[i+j]==TBIT) hsn[SR]=j;
502 if(rt1[i+j]==TBIT) hsn[SR]=j;
503 if(rt2[i+j]==TBIT) hsn[SR]=j;
504 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
505 {
506 hsn[CCREG]=j;
507 b=j;
508 }
509 }
510 if(b>=0)
511 {
512 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
513 {
514 // Follow first branch
515 int t=(ba[i+b]-start)>>2;
516 j=7-b;if(t+j>=slen) j=slen-t-1;
517 for(;j>=0;j--)
518 {
519 if(rs1[t+j]>=0) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
520 if(rs2[t+j]>=0) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
521 if(rs3[t+j]>=0) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
522 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
523 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
524 }
525 }
526 // TODO: preferred register based on backward branch
527 }
528 // Delay slot should preferably not overwrite branch conditions or cycle count
529 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==SJUMP)) {
530 if(rs1[i-1]>=0) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
531 if(rs2[i-1]>=0) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
532 if(rs3[i-1]>=0) if(hsn[rs3[i-1]]>1) hsn[rs3[i-1]]=1;
533 if(itype[i-1]==SJUMP) if(hsn[SR]>1) hsn[SR]=1;
534 hsn[CCREG]=1;
535 // ...or hash tables
536 hsn[RHASH]=1;
537 hsn[RHTBL]=1;
538 // .. or branch target
539 hsn[RTEMP]=1;
540 }
541 // If reading/writing T bit, need SR
542 if(rs1[i]==TBIT||rs2[i]==TBIT||rt1[i]==TBIT||rt2[i]==TBIT) {
543 hsn[SR]=0;
544 }
545 // Don't remove the memory_map registers either
546 if(itype[i]==LOAD || itype[i]==STORE || itype[i]==RMW || itype[i]==PCREL) {
547 hsn[MOREG]=0;
548 }
549 if(itype[i]==UJUMP || itype[i]==RJUMP || itype[i]==SJUMP)
550 {
551 if(itype[i+1]==LOAD || itype[i+1]==STORE || itype[i+1]==RMW || itype[i+1]==PCREL) {
552 hsn[MOREG]=0;
553 }
554 }
555 if(itype[i]==SYSTEM && opcode[i]==12) { // TRAPA
556 hsn[MOREG]=0;
557 }
558 // Don't remove the miniht registers
559 if(itype[i]==UJUMP||itype[i]==RJUMP)
560 {
561 hsn[RHASH]=0;
562 hsn[RHTBL]=0;
563 // or branch target
564 hsn[RTEMP]=0;
565 }
566 }
567
568 // We only want to allocate registers if we're going to use them again soon
needed_again(int r,int i)569 int needed_again(int r, int i)
570 {
571 int j;
572 int b=-1;
573 int rn=10;
574
575 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP))
576 {
577 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
578 return 0; // Don't need any registers if exiting the block
579 }
580 for(j=0;j<9;j++)
581 {
582 if(i+j>=slen) {
583 j=slen-i-1;
584 break;
585 }
586 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP)
587 {
588 // Don't go past an unconditonal jump
589 j++;
590 break;
591 }
592 if(itype[i+j]==SYSCALL||itype[i+j]==SYSTEM)
593 {
594 break;
595 }
596 }
597 for(;j>=1;j--)
598 {
599 if(rs1[i+j]==r) rn=j;
600 if(rs2[i+j]==r) rn=j;
601 if((unneeded_reg[i+j]>>r)&1) rn=10;
602 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP))
603 {
604 b=j;
605 }
606 }
607 /*
608 if(b>=0)
609 {
610 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
611 {
612 // Follow first branch
613 int o=rn;
614 int t=(ba[i+b]-start)>>2;
615 j=7-b;if(t+j>=slen) j=slen-t-1;
616 for(;j>=0;j--)
617 {
618 if(!((unneeded_reg[t+j]>>r)&1)) {
619 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
620 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
621 }
622 else rn=o;
623 }
624 }
625 }*/
626 if(rn<10) return 1;
627 return 0;
628 }
629
630 // Try to match register allocations at the end of a loop with those
631 // at the beginning
loop_reg(int i,int r,int hr)632 int loop_reg(int i, int r, int hr)
633 {
634 int j,k;
635 for(j=0;j<9;j++)
636 {
637 if(i+j>=slen) {
638 j=slen-i-1;
639 break;
640 }
641 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP)
642 {
643 // Don't go past an unconditonal jump
644 j++;
645 break;
646 }
647 }
648 k=0;
649 if(i>0){
650 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)
651 k--;
652 }
653 for(;k<j;k++)
654 {
655 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
656 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP))
657 {
658 if(ba[i+k]>=start && ba[i+k]<(start+i*2))
659 {
660 int t=(ba[i+k]-start)>>1;
661 int reg=get_reg(regs[t].regmap_entry,r);
662 if(reg>=0) return reg;
663 //reg=get_reg(regs[t+1].regmap_entry,r);
664 //if(reg>=0) return reg;
665 }
666 }
667 }
668 return hr;
669 }
670
671
672 // Allocate every register, preserving source/target regs
alloc_all(struct regstat * cur,int i)673 void alloc_all(struct regstat *cur,int i)
674 {
675 int hr;
676
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=EXCLUDE_REG) {
679 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&((cur->regmap[hr]&63)!=rs3[i])&&
680 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
681 {
682 cur->regmap[hr]=-1;
683 cur->dirty&=~(1<<hr);
684 }
685 }
686 }
687 }
688
can_direct_read(int address)689 int can_direct_read(int address)
690 {
691 if((address&0xDFF00000)==0x200000) return 1;
692 if((address&0xDE000000)==0x6000000) return 1;
693 if((address&0xDFF00000)==0) return 1;
694 return 0;
695 }
696
can_direct_write(int address)697 int can_direct_write(int address)
698 {
699 if((address&0xDFF00000)==0x200000) return 1;
700 if((address&0xDE000000)==0x6000000) return 1;
701 return 0;
702 }
703
map_address(u32 address)704 static pointer map_address(u32 address)
705 {
706 if((address&0xDFF00000)==0x200000) return (pointer)LowWram+(address&0xFFFFF);
707 if((address&0xDE000000)==0x6000000) return (pointer)HighWram+(address&0xFFFFF);
708 assert((address&0xDFF00000)==0);
709 return (pointer)BiosRom+(address&0x8FFFF);
710 }
711
712 #ifdef __i386__
713 #include "assem_x86.c"
714 #endif
715 #ifdef __x86_64__
716 #include "assem_x64.c"
717 #endif
718 #ifdef __arm__
719 #include "assem_arm.c"
720 #endif
721
722 // Add virtual address mapping to linked list
ll_add(struct ll_entry ** head,int vaddr,void * addr)723 void ll_add(struct ll_entry **head,int vaddr,void *addr)
724 {
725 struct ll_entry *new_entry;
726 new_entry=malloc(sizeof(struct ll_entry));
727 assert(new_entry!=NULL);
728 new_entry->vaddr=vaddr;
729 new_entry->reg32=0;
730 new_entry->addr=addr;
731 new_entry->next=*head;
732 *head=new_entry;
733 }
734
735 // Add to linked list only if there is not an existing record
ll_add_nodup(struct ll_entry ** head,int vaddr,void * addr)736 void ll_add_nodup(struct ll_entry **head,int vaddr,void *addr)
737 {
738 struct ll_entry *ptr;
739 ptr=*head;
740 while(ptr!=NULL) {
741 if(ptr->vaddr==vaddr) {
742 return;
743 }
744 ptr=ptr->next;
745 }
746 ll_add(head,vaddr,addr);
747 }
748
749 // Check if an address is already compiled
750 // but don't return addresses which are about to expire from the cache
check_addr(u32 vaddr)751 void *check_addr(u32 vaddr)
752 {
753 struct ll_entry *head;
754 u32 page;
755 u32 *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
756 if(ht_bin[0]==vaddr) {
757 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
758 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
759 }
760 if(ht_bin[2]==vaddr) {
761 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
762 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
763 }
764 page=(vaddr&0xDFFFFFFF)>>12;
765 if(page>1024) page=1024+(page&1023);
766 head=jump_in[page];
767 while(head!=NULL) {
768 if(head->vaddr==vaddr) {
769 if((((u32)head->addr-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
770 // Update existing entry with current address
771 if(ht_bin[0]==vaddr) {
772 ht_bin[1]=(int)head->addr;
773 return head->addr;
774 }
775 if(ht_bin[2]==vaddr) {
776 ht_bin[3]=(int)head->addr;
777 return head->addr;
778 }
779 // Insert into hash table with low priority.
780 // Don't evict existing entries, as they are probably
781 // addresses that are being accessed frequently.
782 if(ht_bin[0]==-1) {
783 ht_bin[1]=(int)head->addr;
784 ht_bin[0]=vaddr;
785 }else if(ht_bin[2]==-1) {
786 ht_bin[3]=(int)head->addr;
787 ht_bin[2]=vaddr;
788 }
789 return head->addr;
790 }
791 }
792 head=head->next;
793 }
794 return 0;
795 }
796
remove_hash(int vaddr)797 void remove_hash(int vaddr)
798 {
799 //printf("remove hash: %x\n",vaddr);
800 u32 *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
801 if(ht_bin[2]==vaddr) {
802 ht_bin[2]=ht_bin[3]=-1;
803 }
804 if(ht_bin[0]==vaddr) {
805 ht_bin[0]=ht_bin[2];
806 ht_bin[1]=ht_bin[3];
807 ht_bin[2]=ht_bin[3]=-1;
808 }
809 }
810
ll_remove_matching_addrs(struct ll_entry ** head,int addr,int shift)811 void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
812 {
813 struct ll_entry *next;
814 while(*head) {
815 if(((u32)((*head)->addr)>>shift)==(addr>>shift) ||
816 ((u32)(((char *)(*head)->addr)-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
817 {
818 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
819 remove_hash((*head)->vaddr);
820 next=(*head)->next;
821 free(*head);
822 *head=next;
823 }
824 else
825 {
826 head=&((*head)->next);
827 }
828 }
829 }
830
831 // Remove all entries from linked list
ll_clear(struct ll_entry ** head)832 void ll_clear(struct ll_entry **head)
833 {
834 struct ll_entry *cur;
835 struct ll_entry *next;
836 if((cur=*head)) {
837 *head=0;
838 while(cur) {
839 next=cur->next;
840 free(cur);
841 cur=next;
842 }
843 }
844 }
845
846 // Dereference the pointers and remove if it matches
ll_kill_pointers(struct ll_entry * head,int addr,int shift)847 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
848 {
849 while(head) {
850 int ptr=get_pointer(head->addr);
851 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
852 if(((ptr>>shift)==(addr>>shift)) ||
853 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
854 {
855 u32 host_addr;
856 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
857 host_addr=(u32)kill_pointer(head->addr);
858 #ifdef __arm__
859 needs_clear_cache[(host_addr-(u32)BASE_ADDR)>>17]|=1<<(((host_addr-(u32)BASE_ADDR)>>12)&31);
860 #endif
861 }
862 head=head->next;
863 }
864 }
865
866 // This is called when we write to a compiled block
invalidate_page(u32 page)867 void invalidate_page(u32 page)
868 {
869 struct ll_entry *head;
870 struct ll_entry *next;
871 head=jump_in[page];
872 jump_in[page]=0;
873 while(head!=NULL) {
874 inv_debug("INVALIDATE: %x\n",head->vaddr);
875 remove_hash(head->vaddr);
876 next=head->next;
877 free(head);
878 head=next;
879 }
880 head=jump_out[page];
881 jump_out[page]=0;
882 while(head!=NULL) {
883 u32 host_addr;
884 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
885 host_addr=(u32)kill_pointer(head->addr);
886 #ifdef __arm__
887 needs_clear_cache[(host_addr-(u32)BASE_ADDR)>>17]|=1<<(((host_addr-(u32)BASE_ADDR)>>12)&31);
888 #endif
889 next=head->next;
890 free(head);
891 head=next;
892 }
893 }
894
invalidate_blocks(u32 firstblock,u32 lastblock)895 void invalidate_blocks(u32 firstblock,u32 lastblock)
896 {
897 u32 page;
898 int block;
899 u32 first,last;
900 first=firstblock<1024?firstblock:1024+(firstblock&1023);
901 last=lastblock<1024?lastblock:1024+(lastblock&1023);
902 // Invalidate the adjacent pages if a block crosses a 4K boundary
903 for(block=firstblock;block<=lastblock;block++) {
904 struct ll_entry *head;
905 page=block&0xDFFFF;
906 if(page>1024) page=1024+(page&1023);
907 inv_debug("INVALIDATE: %x..%x (%d)\n",firstblock<<12,lastblock<<12,page);
908 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
909 head=jump_dirty[page];
910 //printf("page=%d vpage=%d\n",page,vpage);
911 while(head!=NULL) {
912 u32 start,end;
913 if((head->vaddr>>12)==block) { // Ignore vaddr hash collision
914 get_bounds((pointer)head->addr,&start,&end);
915 //printf("start: %x end: %x\n",start,end);
916 if(start>=(u32)LowWram&&end<(u32)LowWram+1048576) {
917 if(((start-(u32)LowWram)>>12)<=page&&((end-1-(u32)LowWram)>>12)>=page) {
918 if((((start-(u32)LowWram)>>12)+512)<first) first=((start-(u32)LowWram)>>12)&1023;
919 if((((end-1-(u32)LowWram)>>12)+512)>last) last=((end-1-(u32)LowWram)>>12)&1023;
920 }
921 }
922 // FIXME: Aliasing/mirroring is wrong here
923 if(start>=(u32)HighWram&&end<(u32)HighWram+1048576) {
924 if(((start-(u32)HighWram)>>12)<=page-1024&&((end-1-(u32)HighWram)>>12)>=page-1024) {
925 if((((start-(u32)HighWram)>>12)&255)<first-1024) first=(((start-(u32)HighWram)>>12)&255)+1024;
926 if((((end-1-(u32)HighWram)>>12)&255)>last-1024) last=(((end-1-(u32)HighWram)>>12)&255)+1024;
927 }
928 }
929 }
930 head=head->next;
931 }
932 }
933 //printf("first=%d last=%d\n",first,last);
934 while(first<=last) {
935 invalidate_page(first);
936 first++;
937 }
938 #ifdef __arm__
939 do_clear_cache();
940 #endif
941
942 for(block=firstblock;block<=lastblock;block++) {
943 // Don't trap writes
944 cached_code[block>>3]&=~(1<<(block&7));
945 cached_code[(block^0x20000)>>3]&=~(1<<(block&7));
946
947 #ifdef POINTERS_64BIT
948 if((block>=0x0200&&block<0x0300)||(block>=0x20200&&block<0x20300)) {
949 memory_map[block]=((u64)LowWram-((block<<12)&0xFFF00000))>>2;
950 memory_map[block^0x20000]=((u64)LowWram-(((block^0x20000)<<12)&0xFFF00000))>>2;
951 }
952 if((block>=0x6000&&block<0x8000)||(block>=0x26000&&block<0x28000)) {
953 memory_map[block]=((u64)HighWram-((block<<12)&0xFFF00000))>>2;
954 memory_map[block^0x20000]=((u64)HighWram-(((block^0x20000)<<12)&0xFFF00000))>>2;
955 }
956 #else
957 if((block>=0x0200&&block<0x0300)||(block>=0x20200&&block<0x20300)) {
958 memory_map[block]=((u32)LowWram-((block<<12)&0xFFF00000))>>2;
959 memory_map[block^0x20000]=((u32)LowWram-(((block^0x20000)<<12)&0xFFF00000))>>2;
960 }
961 if((block>=0x6000&&block<0x8000)||(block>=0x26000&&block<0x28000)) {
962 memory_map[block]=((u32)HighWram-((block<<12)&0xFFF00000))>>2;
963 memory_map[block^0x20000]=((u32)HighWram-(((block^0x20000)<<12)&0xFFF00000))>>2;
964 }
965 #endif
966 page=block&0xDFFFF;
967 if(page>1024) page=1024+(page&1023);
968 memset(cached_code_words+(page<<7),0,128);
969 }
970 #ifdef USE_MINI_HT
971 memset(mini_ht_master,-1,sizeof(mini_ht_master));
972 memset(mini_ht_slave,-1,sizeof(mini_ht_slave));
973 #endif
974 }
invalidate_addr(u32 addr)975 void invalidate_addr(u32 addr)
976 {
977 u32 index=addr&0xDFFFFFFF;
978 if(index>4194304) index=(addr|0x400000)&0x7fffff;
979 if(!((cached_code_words[index>>5]>>((index>>2)&7))&1)) {
980 // If we get an excessive number of these,
981 // then we probably do want to invalidate the page
982 if(invalidate_count++<500) {
983 if((restore_candidate[index>>15]>>((index>>12)&7))&1) {
984 recent_writes[recent_write_index]=addr;
985 recent_write_index=(recent_write_index+1)&7;
986 }
987 return;
988 }
989 }
990 //printf("invalidate_count: %d\n",invalidate_count);
991 //printf("invalidate_addr(%x)\n",addr);
992 //invalidate_block(addr>>12);
993 invalidate_blocks(addr>>12,addr>>12);
994 assert(!((cached_code_words[index>>5]>>((index>>2)&7))&1));
995
996 // Keep track of recent writes that invalidated the cache, so we don't
997 // attempt constant propagation in areas that are frequently written
998 recent_writes[recent_write_index]=addr;
999 recent_write_index=(recent_write_index+1)&7;
1000 }
1001 // This is called when loading a save state.
1002 // Anything could have changed, so invalidate everything.
invalidate_all_pages()1003 void invalidate_all_pages()
1004 {
1005 u32 page;
1006 for(page=0;page<2048;page++)
1007 invalidate_page(page);
1008 for(page=0;page<256;page++) {
1009 if(cached_code[page]) {
1010 restore_candidate[page]|=cached_code[page]; // LowWram/bios
1011 }
1012 if(cached_code[3072+page]) {
1013 restore_candidate[page+256]|=cached_code[3072+page]; // HighWram
1014 }
1015 }
1016 memset(cached_code_words,0,262144);
1017 #ifdef __arm__
1018 __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1019 #endif
1020 #ifdef USE_MINI_HT
1021 memset(mini_ht_master,-1,sizeof(mini_ht_master));
1022 memset(mini_ht_slave,-1,sizeof(mini_ht_slave));
1023 #endif
1024 }
1025
1026 // Add an entry to jump_out after making a link
add_link(u32 vaddr,void * src)1027 void add_link(u32 vaddr,void *src)
1028 {
1029 u32 page=(vaddr&0xDFFFFFFF)>>12;
1030 if(page>1024) page=1024+(page&1023);
1031 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
1032 ll_add(jump_out+page,vaddr,src);
1033 //int ptr=get_pointer(src);
1034 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1035 }
1036
1037 // If a code block was found to be unmodified (bit was set in
1038 // restore_candidate) and it remains unmodified (bit is set
1039 // in cached_code) then move the entries for that 4K page from
1040 // the dirty list to the clean list.
clean_blocks(u32 page)1041 void clean_blocks(u32 page)
1042 {
1043 struct ll_entry *head;
1044 inv_debug("INV: clean_blocks page=%d\n",page);
1045 head=jump_dirty[page];
1046 while(head!=NULL) {
1047 if((cached_code[head->vaddr>>15]>>((head->vaddr>>12)&7))&1) {;
1048 // Don't restore blocks which are about to expire from the cache
1049 if((((u32)head->addr-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1050 u32 start,end,vstart=0,vend;
1051 if(verify_dirty((int)head->addr)) {
1052 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1053 u32 i;
1054 u32 inv=0;
1055 get_bounds((pointer)head->addr,&start,&end);
1056 if(start-(u32)HighWram<0x100000) {
1057 vstart=start-(u32)HighWram+0x6000000;
1058 vend=end-(u32)HighWram+0x6000000;
1059 for(i=(start-(u32)HighWram+0x6000000)>>12;i<=(end-1-(u32)HighWram+0x6000000)>>12;i++) {
1060 // Check that all the pages are write-protected
1061 if(!((cached_code[i>>3]>>(i&7))&1)) inv=1;
1062 }
1063 }
1064 if(start-(u32)LowWram<0x100000) {
1065 vstart=start-(u32)LowWram+0x200000;
1066 vend=end-(u32)LowWram+0x200000;
1067 for(i=(start-(u32)LowWram+0x200000)>>12;i<=(end-1-(u32)LowWram+0x200000)>>12;i++) {
1068 // Check that all the pages are write-protected
1069 if(!((cached_code[i>>3]>>(i&7))&1)) inv=1;
1070 }
1071 }
1072 // Don't restore stuff that recently got hit, it will probably get hit again
1073 if(vstart) for(i=0;i<8;i++) {
1074 if(recent_writes[i]>=vstart&&recent_writes[i]<vend) {
1075 //printf("recent write: %x\n",recent_writes[i]);
1076 inv=1;
1077 }
1078 }
1079 if(!inv) {
1080 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1081 if((((u32)clean_addr-(u32)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1082 u32 *ht_bin;
1083 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1084 //printf("page=%x, addr=%x\n",page,head->vaddr);
1085 //assert(head->vaddr>>12==(page|0x80000));
1086 ll_add_nodup(jump_in+page,head->vaddr,clean_addr);
1087 ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1088 if(ht_bin[0]==head->vaddr) {
1089 ht_bin[1]=(int)clean_addr; // Replace existing entry
1090 }
1091 if(ht_bin[2]==head->vaddr) {
1092 ht_bin[3]=(int)clean_addr; // Replace existing entry
1093 }
1094 }
1095 if(vstart) {
1096 //printf("start=%x, end=%x\n",vstart,vend);
1097 for(i=0;i<vend-vstart;i+=4) {
1098 cached_code_words[((vstart<4194304?vstart:((vstart|0x400000)&0x7fffff))+i)>>5]|=1<<(((vstart+i)>>2)&7);
1099 }
1100 }
1101 }
1102 }
1103 }
1104 }
1105 head=head->next;
1106 }
1107 }
1108
1109
do_consts(int i,u32 * isconst,u32 * constmap)1110 void do_consts(int i,u32 *isconst,u32 *constmap)
1111 {
1112 switch(itype[i]) {
1113 case LOAD:
1114 sh2_clear_const(isconst,constmap,rt1[i]);
1115 if(addrmode[i]==POSTINC) {
1116 int size=(opcode[i]==4)?2:(opcode2[i]&3);
1117 constmap[rt2[i]]+=1<<size;
1118 }
1119 break;
1120 case STORE:
1121 if(addrmode[i]==PREDEC) {
1122 int size=(opcode[i]==4)?2:(opcode2[i]&3);
1123 constmap[rt1[i]]-=1<<size;
1124 }
1125 break;
1126 case RMW:
1127 break;
1128 case PCREL:
1129 if(opcode[i]==12) sh2_set_const(isconst,constmap,rt1[i],((start+i*2+4)&~3)+imm[i]); // MOVA
1130 else { // PC-relative load (constant pool)
1131 u32 addr=((start+i*2+4)&~3)+imm[i];
1132 if((u32)((addr-start)>>1)<slen) {
1133 int value;
1134 if(opcode[i]==9) value=(s16)source[((start+i*2+4)+imm[i]-start)>>1]; // MOV.W
1135 else value=(source[(((start+i*2+4)&~3)+imm[i]-start)>>1]<<16)+source[(((start+i*2+4)&~3)+imm[i]+2-start)>>1]; // MOV.L
1136 sh2_set_const(isconst,constmap,rt1[i],value);
1137 }
1138 else sh2_clear_const(isconst,constmap,rt1[i]);
1139 }
1140 break;
1141 case MOV:
1142 if(((*isconst)>>rs1[i])&1) {
1143 int v=constmap[rs1[i]];
1144 sh2_set_const(isconst,constmap,rt1[i],v);
1145 }
1146 else sh2_clear_const(isconst,constmap,rt1[i]);
1147 break;
1148 case IMM8:
1149 if(opcode[i]==0x7) { // ADD
1150 if(((*isconst)>>rs1[i])&1) {
1151 int v=constmap[rs1[i]];
1152 sh2_set_const(isconst,constmap,rt1[i],v+imm[i]);
1153 }
1154 else sh2_clear_const(isconst,constmap,rt1[i]);
1155 }
1156 else if(opcode[i]==0x8) { // CMP/EQ
1157 }
1158 else if(opcode[i]==12) {
1159 if(opcode2[i]==8) { // TST
1160 }else
1161 // AND/XOR/OR
1162 if(((*isconst)>>rs1[i])&1) {
1163 int v=constmap[rs1[i]];
1164 if(opcode2[i]==0x09) sh2_set_const(isconst,constmap,rt1[i],v&imm[i]);
1165 if(opcode2[i]==0x0a) sh2_set_const(isconst,constmap,rt1[i],v^imm[i]);
1166 if(opcode2[i]==0x0b) sh2_set_const(isconst,constmap,rt1[i],v|imm[i]);
1167 }
1168 else sh2_clear_const(isconst,constmap,rt1[i]);
1169 }
1170 else { // opcode[i]==0xE
1171 assert(opcode[i]==0xE);
1172 sh2_set_const(isconst,constmap,rt1[i],imm[i]); // MOV
1173 }
1174 break;
1175 case FLAGS:
1176 if(opcode2[i]==9) { // MOVT
1177 sh2_clear_const(isconst,constmap,rt1[i]);
1178 }
1179 break;
1180 case ALU:
1181 sh2_clear_const(isconst,constmap,rt1[i]);
1182 break;
1183 case EXT:
1184 sh2_clear_const(isconst,constmap,rt1[i]);
1185 break;
1186 case MULTDIV:
1187 if(opcode[i]==0) {
1188 if(opcode2[i]==7) // MUL.L
1189 {
1190 sh2_clear_const(isconst,constmap,MACL);
1191 }
1192 if(opcode2[i]==8) // CLRMAC
1193 {
1194 sh2_clear_const(isconst,constmap,MACH);
1195 sh2_clear_const(isconst,constmap,MACL);
1196 }
1197 if(opcode2[i]==9) // DIV0U
1198 {
1199 }
1200 }
1201 if(opcode[i]==2) {
1202 if(opcode2[i]==7) // DIV0S
1203 {
1204 }
1205 if(opcode2[i]==14||opcode2[i]==15) // MULU.W / MULS.W
1206 {
1207 sh2_clear_const(isconst,constmap,MACL);
1208 }
1209 }
1210 if(opcode[i]==3) {
1211 // DMULU.L / DMULS.L
1212 sh2_clear_const(isconst,constmap,MACH);
1213 sh2_clear_const(isconst,constmap,MACL);
1214 }
1215 break;
1216 case SHIFTIMM:
1217 sh2_clear_const(isconst,constmap,rt1[i]);
1218 break;
1219 case UJUMP:
1220 case RJUMP:
1221 case SJUMP:
1222 case CJUMP:
1223 break;
1224 case SYSTEM:
1225 *isconst=0;
1226 break;
1227 case COMPLEX:
1228 *isconst=0;
1229 break;
1230 }
1231 }
1232
mov_alloc(struct regstat * current,int i)1233 void mov_alloc(struct regstat *current,int i)
1234 {
1235 // Note: Don't need to actually alloc the source registers
1236 // TODO: Constant propagation
1237 //alloc_reg(current,i,rs1[i]);
1238 alloc_reg(current,i,rt1[i]);
1239 clear_const(current,rs1[i]);
1240 clear_const(current,rt1[i]);
1241 dirty_reg(current,rt1[i]);
1242 }
1243
shiftimm_alloc(struct regstat * current,int i)1244 void shiftimm_alloc(struct regstat *current,int i)
1245 {
1246 clear_const(current,rs1[i]);
1247 clear_const(current,rt1[i]);
1248 alloc_reg(current,i,rs1[i]);
1249 alloc_reg(current,i,rt1[i]);
1250 dirty_reg(current,rt1[i]);
1251 if(opcode[i]==4) {
1252 if(opcode2[i]<6) { // SHLL/SHAL/SHLR/SHAR/ROTL/ROTCL/ROTR/ROTCR
1253 if(opcode2[i]<4||opcode3[i]<2) {
1254 // SHL/SHA/ROT don't need T bit as a source, only a destination
1255 if(!(current->u&(1LL<<TBIT))) {
1256 alloc_reg(current,i,SR);
1257 dirty_reg(current,SR);
1258 }
1259 }
1260 else {
1261 alloc_reg(current,i,SR); // ROTCL/ROTCR always need T bit
1262 dirty_reg(current,SR);
1263 }
1264 }
1265 }
1266 if(opcode[i]==2&opcode2[i]==13) { // XTRCT
1267 clear_const(current,rs2[i]);
1268 alloc_reg(current,i,rs2[i]);
1269 }
1270 }
1271
alu_alloc(struct regstat * current,int i)1272 void alu_alloc(struct regstat *current,int i)
1273 {
1274 if(opcode[i]==2) {
1275 alloc_reg(current,i,rs1[i]);
1276 alloc_reg(current,i,rs2[i]);
1277 clear_const(current,rs2[i]);
1278 if(opcode2[i]>8&&opcode2[i]<=11) { // AND/XOR/OR
1279 alloc_reg(current,i,rt1[i]);
1280 }
1281 else // TST or CMP/STR
1282 {
1283 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1284 dirty_reg(current,SR);
1285 //#ifdef __x86__ ?
1286 //#ifdef NEEDS_TEMP
1287 if(opcode2[i]==8) { // TST
1288 alloc_reg_temp(current,i,-1);
1289 minimum_free_regs[i]=1;
1290 }
1291 if(opcode2[i]==12) { // CMP/STR
1292 alloc_reg_temp(current,i,-1);
1293 minimum_free_regs[i]=1;
1294 }
1295 }
1296 }
1297 if(opcode[i]==3) {
1298 alloc_reg(current,i,rs1[i]);
1299 alloc_reg(current,i,rs2[i]);
1300 clear_const(current,rs2[i]);
1301 if(opcode2[i]<8) { // CMP intructions
1302 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1303 dirty_reg(current,SR);
1304 alloc_reg_temp(current,i,-1);
1305 minimum_free_regs[i]=1;
1306 }else{ // ADD/SUB
1307 alloc_reg(current,i,rt1[i]);
1308 if(opcode2[i]&3) {
1309 alloc_reg(current,i,SR);
1310 dirty_reg(current,SR);
1311 //#ifdef NEEDS_TEMP
1312 if((opcode2[i]&3)==3) {
1313 // Need a temporary register for ADDV/SUBV on x86
1314 alloc_reg_temp(current,i,-1);
1315 minimum_free_regs[i]=1;
1316 }
1317 }
1318 }
1319 }
1320 if(opcode[i]==4) { // DT/CMPPZ/CMPPL
1321 // Single operand forms
1322 alloc_reg(current,i,rs1[i]);
1323 if(opcode2[i]==0) dirty_reg(current,rt1[i]); // DT
1324 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1325 dirty_reg(current,SR);
1326 if(opcode2[i]>0) {
1327 alloc_reg_temp(current,i,-1);
1328 minimum_free_regs[i]=1;
1329 }
1330 }
1331 if(opcode[i]==6) { // NOT/NEG/NEGC
1332 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1333 alloc_reg(current,i,rt1[i]);
1334 if(opcode2[i]==8||opcode2[i]==9) { // SWAP needs temp (?)
1335 alloc_reg_temp(current,i,-1);
1336 minimum_free_regs[i]=1;
1337 }
1338 if(opcode2[i]==10) {
1339 // NEGC sets T bit
1340 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1341 dirty_reg(current,SR);
1342 }
1343 }
1344 clear_const(current,rs1[i]);
1345 clear_const(current,rt1[i]);
1346 dirty_reg(current,rt1[i]);
1347 }
1348
imm8_alloc(struct regstat * current,int i)1349 void imm8_alloc(struct regstat *current,int i)
1350 {
1351 //if(rs1[i]>=0&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1352 //else lt1[i]=rs1[i];
1353 alloc_reg(current,i,rs1[i]);
1354 if(rt1[i]>=0&&rt1[i]!=TBIT) alloc_reg(current,i,rt1[i]);
1355 if(opcode[i]==0x7) { // ADD
1356 if(is_const(current,rs1[i])) {
1357 int v=get_const(current,rs1[i]);
1358 set_const(current,rt1[i],v+imm[i]);
1359 }
1360 else clear_const(current,rt1[i]);
1361 }
1362 else if(opcode[i]==0x8) { // CMP/EQ
1363 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1364 dirty_reg(current,SR);
1365 alloc_reg_temp(current,i,-1);
1366 minimum_free_regs[i]=1;
1367 }
1368 else if(opcode[i]==12) {
1369 if(opcode2[i]==8) { // TST
1370 alloc_reg(current,i,SR); // Liveness analysis on TBIT?
1371 dirty_reg(current,SR);
1372 alloc_reg_temp(current,i,-1);
1373 minimum_free_regs[i]=1;
1374 }else
1375 // AND/XOR/OR
1376 if(is_const(current,rs1[i])) {
1377 int v=get_const(current,rs1[i]);
1378 if(opcode2[i]==0x09) set_const(current,rt1[i],v&imm[i]);
1379 if(opcode2[i]==0x0a) set_const(current,rt1[i],v^imm[i]);
1380 if(opcode2[i]==0x0b) set_const(current,rt1[i],v|imm[i]);
1381 }
1382 else clear_const(current,rt1[i]);
1383 }
1384 else { // opcode[i]==0xE
1385 assert(opcode[i]==0xE);
1386 set_const(current,rt1[i],imm[i]); // MOV
1387 }
1388 if(rt1[i]>=0&&rt1[i]!=TBIT) dirty_reg(current,rt1[i]);
1389 }
1390
ext_alloc(struct regstat * current,int i)1391 void ext_alloc(struct regstat *current,int i)
1392 {
1393 // Note: Don't need to actually alloc the source registers
1394 // FIXME: Constant propagation
1395 //alloc_reg(current,i,rs1[i]);
1396 alloc_reg(current,i,rt1[i]);
1397 clear_const(current,rs1[i]);
1398 clear_const(current,rt1[i]);
1399 dirty_reg(current,rt1[i]);
1400 }
1401
flags_alloc(struct regstat * current,int i)1402 void flags_alloc(struct regstat *current,int i)
1403 {
1404 if(opcode2[i]==8) { // CLRT/SETT
1405 alloc_reg(current,i,SR);
1406 dirty_reg(current,SR);
1407 }else
1408 if(opcode2[i]==9) { // MOVT
1409 alloc_reg(current,i,SR);
1410 alloc_reg(current,i,rt1[i]);
1411 clear_const(current,rt1[i]);
1412 dirty_reg(current,rt1[i]);
1413 }
1414 }
1415
load_alloc(struct regstat * current,int i)1416 void load_alloc(struct regstat *current,int i)
1417 {
1418 int hr;
1419 clear_const(current,rt1[i]);
1420 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1421 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1422 // if(rs2[i]>=0) alloc_reg(current,i,rs2[i]);
1423 alloc_reg(current,i,rt1[i]==TBIT?SR:rt1[i]);
1424 if(addrmode[i]==DUALIND||addrmode[i]==GBRIND) {
1425 alloc_reg(current,i,rs1[i]);
1426 alloc_reg(current,i,rs2[i]);
1427 if(!is_const(current,rs1[i])||!is_const(current,rs2[i])) {
1428 // Both must be constants to propagate the sum
1429 clear_const(current,rs1[i]);
1430 clear_const(current,rs2[i]);
1431 }
1432 }
1433 else
1434 if(addrmode[i]==POSTINC) {
1435 if(is_const(current,rt2[i])) {
1436 int v=get_const(current,rt2[i]);
1437 set_const(current,rt2[i],v+(1<<((opcode[i]==4)?2:(opcode2[i]&3))));
1438 // Note: constant is preincremented, address_generation corrects the offset
1439 }
1440 else {
1441 alloc_reg(current,i,rt2[i]);
1442 dirty_reg(current,rt2[i]);
1443 }
1444 }
1445
1446 // Need a register to load from memory_map
1447 alloc_reg(current,i,MOREG);
1448 if(rt1[i]==TBIT||get_reg(current->regmap,rt1[i])<0) {
1449 // dummy load, but we still need a register to calculate the address
1450 alloc_reg_temp(current,i,-1);
1451 minimum_free_regs[i]=1;
1452 }
1453 if(rt1[i]==TBIT) dirty_reg(current,SR);
1454 else dirty_reg(current,rt1[i]);
1455
1456 // Make MOREG a temporary, give pass 5 another register to work with
1457 hr=get_reg(current->regmap,MOREG);
1458 assert(hr>=0);
1459 assert(current->regmap[hr]==MOREG);
1460 current->regmap[hr]=-1;
1461 minimum_free_regs[i]++;
1462 }
1463
store_alloc(struct regstat * current,int i)1464 void store_alloc(struct regstat *current,int i)
1465 {
1466 int hr;
1467 //printf("%x: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",start+i*2,current->regmap[0],current->regmap[1],current->regmap[2],current->regmap[3],current->regmap[5],current->regmap[6],current->regmap[7]);
1468 if(addrmode[i]==DUALIND) {
1469 alloc_reg(current,i,rs2[i]);
1470 alloc_reg(current,i,0); // rs3[i]
1471 if(!is_const(current,rs2[i])||!is_const(current,rs3[i])) {
1472 // Both must be constants to propagate the sum
1473 clear_const(current,rs2[i]);
1474 clear_const(current,rs3[i]);
1475 }
1476 }
1477 if(addrmode[i]==PREDEC) {
1478 if(is_const(current,rt1[i])) {
1479 int v=get_const(current,rt1[i]);
1480 set_const(current,rt1[i],v-(1<<((opcode[i]==4)?2:(opcode2[i]&3))));
1481 }
1482 else {
1483 alloc_reg(current,i,rt1[i]);
1484 dirty_reg(current,rt1[i]);
1485 }
1486 }
1487 if(needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1488 clear_const(current,rs1[i]);
1489 alloc_reg(current,i,rs1[i]);
1490 // Need a register to load from memory_map
1491 alloc_reg(current,i,MOREG);
1492
1493 // We need a temporary register for address generation
1494 alloc_reg_temp(current,i,-1);
1495 minimum_free_regs[i]=1;
1496
1497 // Make MOREG a temporary, give pass 5 another register to work with
1498 hr=get_reg(current->regmap,MOREG);
1499 assert(hr>=0);
1500 assert(current->regmap[hr]==MOREG);
1501 current->regmap[hr]=-1;
1502 minimum_free_regs[i]++;
1503 }
1504
rmw_alloc(struct regstat * current,int i)1505 void rmw_alloc(struct regstat *current,int i)
1506 {
1507 //printf("%x: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",start+i*2,current->regmap[0],current->regmap[1],current->regmap[2],current->regmap[3],current->regmap[5],current->regmap[6],current->regmap[7]);
1508 if(addrmode[i]==GBRIND) {
1509 alloc_reg(current,i,GBR);
1510 alloc_reg(current,i,0);
1511 if(!is_const(current,rs2[i])||!is_const(current,rs3[i])) {
1512 // Both must be constants to propagate the sum
1513 clear_const(current,rs2[i]);
1514 clear_const(current,rs3[i]);
1515 }
1516 }
1517 if(addrmode[i]==REGIND&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1518 if(rt1[i]==TBIT) {
1519 alloc_reg(current,i,SR);
1520 dirty_reg(current,SR);
1521 }
1522
1523 // Need a register to load from memory_map
1524 alloc_reg(current,i,MOREG);
1525
1526 // We need a temporary register for address generation
1527 alloc_reg_temp(current,i,-1);
1528 // And one for the read-modify-write
1529 //alloc_reg_temp(current,i,-2); // Can re-use mapping reg for this
1530 minimum_free_regs[i]=1;
1531 }
1532
pcrel_alloc(struct regstat * current,int i)1533 void pcrel_alloc(struct regstat *current,int i)
1534 {
1535 u32 addr;
1536 alloc_reg(current,i,rt1[i]);
1537 addr=((start+i*2+4)&~3)+imm[i];
1538 if(opcode[i]==12) { // MOVA, address generation only
1539 set_const(current,rt1[i],addr);
1540 }else if((unsigned)((addr-start)>>1)<slen) {
1541 if(opcode[i]==9) { // MOV.W
1542 addr=(start+i*2+4)+imm[i];
1543 set_const(current,rt1[i],(s16)source[(addr-start)>>1]);
1544 }
1545 else // MOV.L
1546 set_const(current,rt1[i],(source[(addr-start)>>1]<<16)+source[(addr+2-start)>>1]);
1547 }
1548 else {
1549 // Do actual load
1550 //alloc_reg(current,i,MOREG);
1551 clear_const(current,rt1[i]);
1552 }
1553 dirty_reg(current,rt1[i]);
1554 }
1555
1556 #ifndef multdiv_alloc
multdiv_alloc(struct regstat * current,int i)1557 void multdiv_alloc(struct regstat *current,int i)
1558 {
1559 //printf("%x: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",start+i*2,current->regmap[0],current->regmap[1],current->regmap[2],current->regmap[3],current->regmap[5],current->regmap[6],current->regmap[7]);
1560 if(opcode[i]==0) {
1561 if(opcode2[i]==7) // MUL.L
1562 {
1563 clear_const(current,rs1[i]);
1564 clear_const(current,rs2[i]);
1565 clear_const(current,MACL);
1566 alloc_reg(current,i,rs1[i]);
1567 alloc_reg(current,i,rs2[i]);
1568 alloc_reg(current,i,MACL);
1569 dirty_reg(current,MACL);
1570 }
1571 if(opcode2[i]==8) // CLRMAC
1572 {
1573 clear_const(current,MACH);
1574 clear_const(current,MACL);
1575 alloc_reg(current,i,MACH);
1576 alloc_reg(current,i,MACL);
1577 dirty_reg(current,MACH);
1578 dirty_reg(current,MACL);
1579 }
1580 if(opcode2[i]==9) // DIV0U
1581 {
1582 alloc_reg(current,i,SR);
1583 dirty_reg(current,SR);
1584 }
1585 }
1586 if(opcode[i]==2) {
1587 if(opcode2[i]==7) // DIV0S
1588 {
1589 clear_const(current,rs1[i]); // Is this necessary?
1590 clear_const(current,rs2[i]); // Is this necessary?
1591 alloc_reg(current,i,rs1[i]);
1592 alloc_reg(current,i,rs2[i]);
1593 alloc_reg(current,i,SR);
1594 dirty_reg(current,SR);
1595 #if defined(__i386__) || defined(__x86_64__)
1596 //#ifdef NEEDS_TEMP
1597 alloc_reg_temp(current,i,-1);
1598 minimum_free_regs[i]=1;
1599 #endif
1600 }
1601 if(opcode2[i]==14||opcode2[i]==15) // MULU.W / MULS.W
1602 {
1603 clear_const(current,rs1[i]);
1604 clear_const(current,rs2[i]);
1605 clear_const(current,MACL);
1606 alloc_reg(current,i,rs1[i]);
1607 alloc_reg(current,i,rs2[i]);
1608 alloc_reg(current,i,MACL);
1609 dirty_reg(current,MACL);
1610 //#ifdef NEEDS_TEMP
1611 alloc_reg_temp(current,i,-1);
1612 minimum_free_regs[i]=1;
1613 }
1614 }
1615 if(opcode[i]==3) {
1616 // DMULU.L / DMULS.L
1617 #if defined(__i386__) || defined(__x86_64__)
1618 if(!(current->u&(1LL<<MACH))) {
1619 alloc_x86_reg(current,i,MACH,EDX); // Don't need to alloc MACH if it's unneeded
1620 current->u&=~(1LL<<MACL); // But if it is, then assume MACL is needed since it will be overwritten
1621 }
1622 alloc_x86_reg(current,i,MACL,EAX);
1623 #else
1624 if(!(current->u&(1LL<<MACH))) {
1625 alloc_reg(current,i,MACH);
1626 current->u&=~(1LL<<MACL);
1627 }
1628 alloc_reg(current,i,MACL);
1629 #endif
1630 clear_const(current,rs1[i]);
1631 clear_const(current,rs2[i]);
1632 clear_const(current,MACH);
1633 clear_const(current,MACL);
1634 alloc_reg(current,i,rs1[i]);
1635 alloc_reg(current,i,rs2[i]);
1636 dirty_reg(current,MACH);
1637 dirty_reg(current,MACL);
1638 }
1639 }
1640 #endif
1641
complex_alloc(struct regstat * current,int i)1642 void complex_alloc(struct regstat *current,int i)
1643 {
1644 if(opcode[i]==3&&opcode2[i]==4) { // DIV1
1645 #if defined(__i386__) || defined(__x86_64__)
1646 alloc_x86_reg(current,i,rs1[i],ECX);
1647 alloc_x86_reg(current,i,rs2[i],EAX);
1648 alloc_x86_reg(current,i,SR,EDX);
1649 alloc_all(current,i);
1650 #else
1651 #if defined(__arm__)
1652 alloc_arm_reg(current,i,rs1[i],1);
1653 alloc_arm_reg(current,i,rs2[i],0);
1654 alloc_arm_reg(current,i,SR,2);
1655 alloc_all(current,i);
1656 #else
1657 // FIXME
1658 assert(0);
1659 #endif
1660 #endif
1661 dirty_reg(current,rs2[i]);
1662 dirty_reg(current,SR);
1663 }
1664 if(opcode[i]==0&&opcode2[i]==15) { // MAC.L
1665 #if defined(__i386__) || defined(__x86_64__)
1666 alloc_x86_reg(current,i,rs1[i],EBP);
1667 alloc_x86_reg(current,i,rs2[i],EDI);
1668 alloc_x86_reg(current,i,SR,EBX);
1669 alloc_all(current,i);
1670 alloc_x86_reg(current,i,MACL,EAX);
1671 alloc_x86_reg(current,i,MACH,EDX);
1672 #else
1673 #if defined(__arm__)
1674 alloc_arm_reg(current,i,rs1[i],5);
1675 alloc_arm_reg(current,i,rs2[i],6);
1676 alloc_arm_reg(current,i,SR,4);
1677 alloc_all(current,i);
1678 alloc_arm_reg(current,i,MACL,0);
1679 alloc_arm_reg(current,i,MACH,1);
1680 #else
1681 // FIXME
1682 assert(0);
1683 #endif
1684 #endif
1685 dirty_reg(current,rs1[i]);
1686 dirty_reg(current,rs2[i]);
1687 dirty_reg(current,MACH);
1688 dirty_reg(current,MACL);
1689 clear_const(current,MACH);
1690 clear_const(current,MACL);
1691 }
1692 if(opcode[i]==4&&opcode2[i]==15) { // MAC.W
1693 #if defined(__i386__) || defined(__x86_64__)
1694 alloc_x86_reg(current,i,rs1[i],EBP);
1695 alloc_x86_reg(current,i,rs2[i],EDI);
1696 alloc_x86_reg(current,i,SR,EBX);
1697 alloc_all(current,i);
1698 alloc_x86_reg(current,i,MACL,EAX);
1699 alloc_x86_reg(current,i,MACH,EDX);
1700 #else
1701 #if defined(__arm__)
1702 alloc_arm_reg(current,i,rs1[i],5);
1703 alloc_arm_reg(current,i,rs2[i],6);
1704 alloc_arm_reg(current,i,SR,4);
1705 alloc_all(current,i);
1706 alloc_arm_reg(current,i,MACL,0);
1707 alloc_arm_reg(current,i,MACH,1);
1708 #else
1709 // FIXME
1710 assert(0);
1711 #endif
1712 #endif
1713 dirty_reg(current,rs1[i]);
1714 dirty_reg(current,rs2[i]);
1715 dirty_reg(current,MACH);
1716 dirty_reg(current,MACL);
1717 clear_const(current,MACH);
1718 clear_const(current,MACL);
1719 }
1720 clear_const(current,rs1[i]);
1721 clear_const(current,rs2[i]);
1722 minimum_free_regs[i]=HOST_REGS;
1723 }
1724
system_alloc(struct regstat * current,int i)1725 void system_alloc(struct regstat *current,int i)
1726 {
1727 alloc_cc(current,i);
1728 dirty_reg(current,CCREG);
1729 if(opcode[i]==12) { // TRAPA
1730 alloc_reg(current,i,15); // Stack reg
1731 dirty_reg(current,15);
1732 alloc_reg(current,i,SR); // Status/flags
1733 alloc_reg(current,i,VBR);
1734 alloc_reg(current,i,MOREG); // memory_map offset
1735 alloc_reg_temp(current,i,-1);
1736 minimum_free_regs[i]=1;
1737 }
1738 current->isdoingcp=0;
1739 }
1740
delayslot_alloc(struct regstat * current,int i)1741 void delayslot_alloc(struct regstat *current,int i)
1742 {
1743 switch(itype[i]) {
1744 case UJUMP:
1745 case CJUMP:
1746 case SJUMP:
1747 case RJUMP:
1748 case SYSCALL:
1749 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
1750 printf("Disabled speculative precompilation\n");
1751 stop_after_jal=1;
1752 break;
1753 case IMM8:
1754 imm8_alloc(current,i);
1755 break;
1756 case LOAD:
1757 load_alloc(current,i);
1758 break;
1759 case STORE:
1760 store_alloc(current,i);
1761 break;
1762 case RMW:
1763 rmw_alloc(current,i);
1764 break;
1765 case PCREL:
1766 pcrel_alloc(current,i);
1767 break;
1768 case ALU:
1769 alu_alloc(current,i);
1770 break;
1771 case MULTDIV:
1772 multdiv_alloc(current,i);
1773 break;
1774 case SHIFTIMM:
1775 shiftimm_alloc(current,i);
1776 break;
1777 case MOV:
1778 mov_alloc(current,i);
1779 break;
1780 case EXT:
1781 ext_alloc(current,i);
1782 break;
1783 case FLAGS:
1784 flags_alloc(current,i);
1785 break;
1786 case COMPLEX:
1787 complex_alloc(current,i);
1788 break;
1789 }
1790 }
1791
add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)1792 void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
1793 {
1794 stubs[stubcount][0]=type;
1795 stubs[stubcount][1]=addr;
1796 stubs[stubcount][2]=retaddr;
1797 stubs[stubcount][3]=a;
1798 stubs[stubcount][4]=b;
1799 stubs[stubcount][5]=c;
1800 stubs[stubcount][6]=d;
1801 stubs[stubcount][7]=e;
1802 stubcount++;
1803 }
1804
1805 // Write out a single register
wb_register(signed char r,signed char regmap[],u32 dirty)1806 void wb_register(signed char r,signed char regmap[],u32 dirty)
1807 {
1808 int hr;
1809 for(hr=0;hr<HOST_REGS;hr++) {
1810 if(hr!=EXCLUDE_REG) {
1811 if((regmap[hr]&63)==r) {
1812 if((dirty>>hr)&1) {
1813 emit_storereg(r,hr);
1814 }
1815 }
1816 }
1817 }
1818 }
1819
1820 /*int mchecksum()
1821 {
1822 //if(!tracedebug) return 0;
1823 int i;
1824 int sum=0;
1825 for(i=0;i<2097152;i++) {
1826 unsigned int temp=sum;
1827 sum<<=1;
1828 sum|=(~temp)>>31;
1829 sum^=((u_int *)rdram)[i];
1830 }
1831 return sum;
1832 }
1833 int rchecksum()
1834 {
1835 int i;
1836 int sum=0;
1837 for(i=0;i<64;i++)
1838 sum^=((u_int *)reg)[i];
1839 return sum;
1840 }
1841 int fchecksum()
1842 {
1843 int i;
1844 int sum=0;
1845 for(i=0;i<64;i++)
1846 sum^=((u_int *)reg_cop1_fgr_64)[i];
1847 return sum;
1848 }
1849 void rlist()
1850 {
1851 int i;
1852 printf("TRACE: ");
1853 for(i=0;i<32;i++)
1854 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1855 printf("\n");
1856 //printf("TRACE: ");
1857 //for(i=0;i<32;i++)
1858 // printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
1859 //printf("\n");
1860 }*/
1861
enabletrace()1862 void enabletrace()
1863 {
1864 tracedebug=1;
1865 }
1866
1867 #if 0
1868 void memdebug(int i)
1869 {
1870 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
1871 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
1872 //rlist();
1873 //if(tracedebug) {
1874 //if(Count>=-2084597794) {
1875 //if((signed int)Count>=-2084597794&&(signed int)Count<0) {
1876 //if(0) {
1877 printf("TRACE: (checksum %x)\n",mchecksum());
1878 //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
1879 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
1880 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
1881 //rlist();
1882 #ifdef __i386__
1883 printf("TRACE: %x\n",(&i)[-1]);
1884 #endif
1885 #ifdef __arm__
1886 int j;
1887 printf("TRACE: %x \n",(&j)[10]);
1888 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
1889 #endif
1890 //fflush(stdout);
1891 //}
1892 //printf("TRACE: %x\n",(&i)[-1]);
1893 }
1894 #endif
1895
alu_assemble(int i,struct regstat * i_regs)1896 void alu_assemble(int i,struct regstat *i_regs)
1897 {
1898 if(opcode[i]==2) {
1899 if(opcode2[i]>=9&&opcode2[i]<=11) { // AND/XOR/OR
1900 signed char s,t;
1901 s=get_reg(i_regs->regmap,rs1[i]);
1902 t=get_reg(i_regs->regmap,rt1[i]);
1903 //assert(s>=0);
1904 if(t>=0) {
1905 if(opcode2[i]==9) emit_and(s,t,t);
1906 if(opcode2[i]==10) emit_xor(rs1[i]>=0?s:t,t,t);
1907 if(opcode2[i]==11) emit_or(s,t,t);
1908 }
1909 }
1910 else
1911 {
1912 signed char s1,s2,sr,temp;
1913 s1=get_reg(i_regs->regmap,rs1[i]);
1914 s2=get_reg(i_regs->regmap,rs2[i]);
1915 sr=get_reg(i_regs->regmap,SR);
1916 temp=get_reg(i_regs->regmap,-1);
1917 assert(s1>=0);
1918 assert(s2>=0);
1919 assert(sr>=0);
1920 assert(temp>=0); // Not needed for TST on ARM?
1921 if(opcode2[i]==8) { // TST
1922 emit_sh2tst(s1,s2,sr,temp);
1923 }
1924 else if(opcode2[i]==12) { // CMP/STR
1925 emit_cmpstr(s1,s2,sr,temp);
1926 }
1927 }
1928 }
1929 if(opcode[i]==3) { // ADD/SUB
1930 if(opcode2[i]<8) { // CMP
1931 signed char s1,s2,sr,temp;
1932 s1=get_reg(i_regs->regmap,rs1[i]);
1933 s2=get_reg(i_regs->regmap,rs2[i]);
1934 sr=get_reg(i_regs->regmap,SR);
1935 temp=get_reg(i_regs->regmap,-1);
1936 assert(s1>=0);
1937 assert(s2>=0);
1938 assert(temp>=0);
1939 if(opcode2[i]==0) emit_cmpeq(s1,s2,sr,temp);
1940 if(opcode2[i]==2) emit_cmphs(s1,s2,sr,temp);
1941 if(opcode2[i]==3) emit_cmpge(s1,s2,sr,temp);
1942 if(opcode2[i]==6) emit_cmphi(s1,s2,sr,temp);
1943 if(opcode2[i]==7) emit_cmpgt(s1,s2,sr,temp);
1944 }
1945 else
1946 {
1947 signed char s,t,sr,temp;
1948 t=get_reg(i_regs->regmap,rt1[i]);
1949 if(t>=0) {
1950 s=get_reg(i_regs->regmap,rs1[i]);
1951 sr=get_reg(i_regs->regmap,SR);
1952 temp=get_reg(i_regs->regmap,-1);
1953 assert(s>=0);
1954 //assert(s2==t);
1955 if(opcode2[i]==8) emit_sub(t,s,t);
1956 if(opcode2[i]==10) emit_subc(s,t,sr);
1957 //if(opcode2[i]==11) emit_subv(s,sr,temp);
1958 assert(opcode2[i]!=11);
1959 if(opcode2[i]==12) emit_add(s,t,t);
1960 if(opcode2[i]==14) emit_addc(s,t,sr);
1961 //if(opcode2[i]==15) emit_addv(s,sr,temp);
1962 assert(opcode2[i]!=15);
1963 }
1964 }
1965 }
1966 if(opcode[i]==4) { // DT/CMPPZ/CMPPL
1967 signed char s,t,sr,temp;
1968 s=get_reg(i_regs->regmap,rs1[i]);
1969 sr=get_reg(i_regs->regmap,SR);
1970 assert(s>=0);
1971 assert(sr>=0);
1972 if(opcode2[i]==0) {
1973 t=get_reg(i_regs->regmap,rt1[i]);
1974 assert(t>=0); // FIXME - Liveness analysis
1975 assert(s==t);
1976 emit_dt(s,sr);
1977 }
1978 else if(opcode2[i]==1) emit_cmppz(s,sr);
1979 else if(opcode2[i]==5)
1980 {
1981 temp=get_reg(i_regs->regmap,-1);
1982 emit_cmppl(s,sr,temp);
1983 }
1984 }
1985 if(opcode[i]==6) { // NOT/SWAP/NEG
1986 int s=get_reg(i_regs->regmap,rs1[i]);
1987 int t=get_reg(i_regs->regmap,rt1[i]);
1988 if(s<0) {
1989 // FIXME: Preload?
1990 emit_loadreg(rs1[i],t);
1991 s=t;
1992 }
1993 if(t>=0) {
1994 if(opcode2[i]==7) emit_not(s,t);
1995 if(opcode2[i]==8) emit_swapb(s,t);
1996 if(opcode2[i]==9) emit_rorimm(s,16,t);
1997 if(opcode2[i]==11) emit_neg(s,t);
1998 }
1999 if(opcode2[i]==10) { // NEGC
2000 int sr=get_reg(i_regs->regmap,SR);
2001 if(i_regs->u&(1LL<<rt1[i])) t=-1;
2002 assert(sr>=0);
2003 emit_negc(s,t,sr);
2004 }
2005 }
2006 }
2007
imm8_assemble(int i,struct regstat * i_regs)2008 void imm8_assemble(int i,struct regstat *i_regs)
2009 {
2010 if(opcode[i]==0x7) { // ADD
2011 signed char s,t;
2012 t=get_reg(i_regs->regmap,rt1[i]);
2013 s=get_reg(i_regs->regmap,rs1[i]);
2014 //assert(t>=0);
2015 assert(s>=0);
2016 if(t>=0) {
2017 if(!((i_regs->isdoingcp>>t)&1)) {
2018 if(s<0) {
2019 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2020 emit_addimm(t,imm[i],t);
2021 }else{
2022 if(!((i_regs->wasdoingcp>>s)&1))
2023 emit_addimm(s,imm[i],t);
2024 else
2025 emit_movimm(cpmap[i][s]+imm[i],t);
2026 }
2027 }
2028 }
2029 }
2030 else if(opcode[i]==0x8) { // CMP/EQ
2031 signed char s,sr,temp;
2032 s=get_reg(i_regs->regmap,rs1[i]);
2033 sr=get_reg(i_regs->regmap,SR);
2034 temp=get_reg(i_regs->regmap,-1);
2035 assert(s>=0);
2036 assert(sr>=0); // Liveness analysis?
2037 assert(temp>=0);
2038 emit_cmpeqimm(s,imm[i],sr,temp);
2039 }
2040 else if(opcode[i]==12) {
2041 if(opcode2[i]==8) { // TST
2042 signed char s,sr,temp;
2043 s=get_reg(i_regs->regmap,rs1[i]);
2044 sr=get_reg(i_regs->regmap,SR);
2045 temp=get_reg(i_regs->regmap,-1);
2046 assert(s>=0);
2047 assert(sr>=0); // Liveness analysis?
2048 assert(temp>=0);
2049 emit_sh2tstimm(s,imm[i],sr,temp);
2050 }else{
2051 signed char s,t;
2052 t=get_reg(i_regs->regmap,rt1[i]);
2053 s=get_reg(i_regs->regmap,rs1[i]);
2054 if(t>=0 && !((i_regs->isdoingcp>>t)&1)) {
2055 if(opcode2[i]==9) //AND
2056 {
2057 if(s<0) {
2058 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2059 emit_andimm(t,imm[i],t);
2060 }else{
2061 if(!((i_regs->wasdoingcp>>s)&1))
2062 emit_andimm(s,imm[i],t);
2063 else
2064 emit_movimm(cpmap[i][s]&imm[i],t);
2065 }
2066 }
2067 else
2068 if(opcode2[i]==10) //XOR
2069 {
2070 if(s<0) {
2071 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2072 emit_xorimm(t,imm[i],t);
2073 }else{
2074 if(!((i_regs->wasdoingcp>>s)&1))
2075 emit_xorimm(s,imm[i],t);
2076 else
2077 emit_movimm(cpmap[i][s]^imm[i],t);
2078 }
2079 }
2080 else
2081 if(opcode2[i]==11) //OR
2082 {
2083 if(s<0) {
2084 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2085 emit_orimm(t,imm[i],t);
2086 }else{
2087 if(!((i_regs->wasdoingcp>>s)&1))
2088 emit_orimm(s,imm[i],t);
2089 else
2090 emit_movimm(cpmap[i][s]|imm[i],t);
2091 }
2092 }
2093 }
2094 }
2095 }
2096 else { // opcode[i]==0xE
2097 signed char t;
2098 assert(opcode[i]==0xE);
2099 t=get_reg(i_regs->regmap,rt1[i]);
2100 //assert(t>=0);
2101 if(t>=0) {
2102 if(!((i_regs->isdoingcp>>t)&1))
2103 emit_movimm(imm[i]<<16,t);
2104 }
2105 }
2106 }
2107
shiftimm_assemble(int i,struct regstat * i_regs)2108 void shiftimm_assemble(int i,struct regstat *i_regs)
2109 {
2110 if(opcode[i]==4) // SHL/SHR
2111 {
2112 if(opcode2[i]<8) {
2113 signed char s,t,sr;
2114 s=get_reg(i_regs->regmap,rs1[i]);
2115 t=get_reg(i_regs->regmap,rt1[i]);
2116 sr=get_reg(i_regs->regmap,SR);
2117 assert(s==t);
2118 if(opcode2[i]==0) // SHLL/SHAL
2119 {
2120 if(i_regs->u&(1LL<<TBIT)) emit_shlimm(s,1,s);
2121 else emit_shlsr(s,sr); // Is there any difference between SHLL and SHAL?
2122 }
2123 else if(opcode2[i]==1) // SHLR/SHAR
2124 {
2125 if(i_regs->u&(1LL<<TBIT)) {
2126 // Skip T bit if unneeded
2127 if(opcode3[i]==0) emit_shrimm(s,1,s);
2128 if(opcode3[i]==2) emit_sarimm(s,1,s);
2129 }else{
2130 // Set T bit
2131 if(opcode3[i]==0) emit_shrsr(s,sr);
2132 if(opcode3[i]==2) emit_sarsr(s,sr);
2133 }
2134 }
2135 else if(opcode2[i]==4) {// ROTL/ROTCL
2136 if(opcode3[i]==0) {
2137 if(i_regs->u&(1LL<<TBIT)) {
2138 emit_rotl(s); // Skip T bit if unneeded
2139 }else{
2140 emit_rotlsr(s,sr);
2141 }
2142 }
2143 if(opcode3[i]==2) emit_rotclsr(s,sr);
2144 }
2145 else {
2146 assert(opcode2[i]==5); // ROTR/ROTCR
2147 if(opcode3[i]==0) {
2148 if(i_regs->u&(1LL<<TBIT)) {
2149 emit_rotr(s); // Skip T bit if unneeded
2150 }else{
2151 emit_rotrsr(s,sr);
2152 }
2153 }
2154 if(opcode3[i]==2) emit_rotcrsr(s,sr);
2155 }
2156 }else{
2157 signed char s,t;
2158 s=get_reg(i_regs->regmap,rs1[i]);
2159 t=get_reg(i_regs->regmap,rt1[i]);
2160 //assert(t>=0);
2161 if(t>=0){
2162 if(opcode2[i]==8) // SHLL
2163 {
2164 if(opcode3[i]==0) emit_shlimm(s,2,t);
2165 if(opcode3[i]==1) emit_shlimm(s,8,t);
2166 if(opcode3[i]==2) emit_shlimm(s,16,t);
2167 }
2168 if(opcode2[i]==9) // SHLR
2169 {
2170 if(opcode3[i]==0) emit_shrimm(s,2,t);
2171 if(opcode3[i]==1) emit_shrimm(s,8,t);
2172 if(opcode3[i]==2) emit_shrimm(s,16,t);
2173 }
2174 }
2175 }
2176 }
2177 else if(opcode[i]==2) // XTRCT
2178 {
2179 signed char s,t,sr;
2180 s=get_reg(i_regs->regmap,rs1[i]);
2181 t=get_reg(i_regs->regmap,rt1[i]);
2182 assert(rs2[i]==rt1[i]);
2183 emit_shrdimm(t,s,16,t);
2184 }
2185 }
2186
load_assemble(int i,struct regstat * i_regs)2187 void load_assemble(int i,struct regstat *i_regs)
2188 {
2189 int dummy;
2190 int s,o,t,addr,map=-1,cache=-1;
2191 int offset;
2192 int jaddr=0;
2193 int memtarget,c=0;
2194 int dualindex=(addrmode[i]==DUALIND||addrmode[i]==GBRIND);
2195 int size=(opcode[i]==4)?2:(opcode2[i]&3);
2196 unsigned int hr;
2197 u32 reglist=0;
2198 pointer constaddr;
2199 t=get_reg(i_regs->regmap,rt1[i]==TBIT?-1:rt1[i]);
2200 s=get_reg(i_regs->regmap,rs1[i]);
2201 o=get_reg(i_regs->regmap,rs2[i]);
2202 offset=imm[i];
2203 for(hr=0;hr<HOST_REGS;hr++) {
2204 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2205 }
2206 //if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2207 if(s>=0) {
2208 if(dualindex)
2209 c=(i_regs->wasdoingcp>>s)&(i_regs->wasdoingcp>>o)&1;
2210 else
2211 c=(i_regs->wasdoingcp>>s)&1;
2212 if(c) {
2213 if(dualindex)
2214 constaddr=cpmap[i][s]+cpmap[i][o];
2215 else
2216 constaddr=cpmap[i][s]+offset;
2217 //if(dualindex) {
2218 // if((i_regs->isconst>>rs1[i])&(i_regs->isconst>>rs2[i])&1)
2219 // assert(constaddr==i_regs->constmap[rs1[i]]+i_regs->constmap[rs2[i]]);
2220 //}else
2221 // if((i_regs->isconst>>rs1[i])&1)
2222 // assert(constaddr==i_regs->constmap[rs1[i]]+offset);
2223 if(addrmode[i]==POSTINC) constaddr-=1<<size;
2224 //printf("constaddr=%x offset=%x\n",constaddr,offset);
2225 memtarget=can_direct_read(constaddr);
2226 }
2227 }
2228 if(t<0) t=get_reg(i_regs->regmap,-1);
2229 if(!c) {
2230 if(dualindex) {
2231 c=(i_regs->isconst>>rs1[i])&(i_regs->isconst>>rs2[i])&1;
2232 } else {
2233 c=(i_regs->isconst>>rs1[i])&1;
2234 }
2235 if(c) {
2236 if(dualindex)
2237 constaddr=i_regs->constmap[rs1[i]]+i_regs->constmap[rs2[i]];
2238 else
2239 constaddr=i_regs->constmap[rs1[i]]+offset;
2240 if(addrmode[i]==POSTINC) constaddr-=1<<size;
2241 //printf("constaddr=%x offset=%x\n",constaddr,offset);
2242 memtarget=can_direct_read(constaddr);
2243 #ifndef HOST_IMM_ADDR32
2244 // In this case, the constant is not already loaded into a register
2245 if(can_direct_read(constaddr))
2246 emit_movimm(map_address(constaddr^(!size)),t);
2247 #endif
2248 }
2249 }
2250 if(offset||dualindex||s<0||c) addr=t;
2251 else addr=s;
2252 //printf("load_assemble: c=%d\n",c);
2253 //if(c) printf("load_assemble: const=%x\n",(int)constaddr);
2254 assert(t>=0); // Even if the load is a NOP, we must check for I/O
2255 reglist&=~(1<<t);
2256 if(!c)
2257 {
2258 int x=0;
2259 if (!c&&size==0) x=1; // MOV.B
2260 cache=get_reg(i_regs->regmap,MMREG);
2261 map=get_reg(i_regs->regmap,MOREG);
2262 if(map<0) map=get_alt_reg(i_regs->regmap,-1);
2263 assert(map>=0);
2264 assert(map!=s);
2265 assert(map!=t);
2266 reglist&=~(1<<map);
2267 map=do_map_r(addr,t,map,cache,x,-1,-1,c,constaddr);
2268 if (!c&&size==0) addr=t; // MOV.B
2269 do_map_r_branch(map,c,constaddr,&jaddr);
2270 //jaddr=(int)out;emit_jmp(0); // for debugging
2271 }
2272 else
2273 {
2274 if(can_direct_read(constaddr)) constaddr=map_address(constaddr);
2275 }
2276 dummy=(t!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to unneeded reg
2277 if(opcode[i]==12&&opcode2[i]==12) // TST.B
2278 dummy=i_regs->u&(1LL<<TBIT);
2279 if (size==0) { // MOV.B
2280 if(!c||memtarget) {
2281 if(!dummy) {
2282 #ifdef HOST_IMM_ADDR32
2283 if(c)
2284 emit_movsbl(constaddr^1,t);
2285 else
2286 #endif
2287 {
2288 int x=0;
2289 emit_movsbl_indexed_map(x,t,map,t);
2290 }
2291 }
2292 if(jaddr)
2293 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2294 }
2295 else
2296 inline_readstub(LOADB_STUB,i,constaddr,i_regs->regmap,rt1[i],ccadj[i],reglist);
2297 if(rt1[i]==TBIT&&!dummy) { // TST.B
2298 signed char sr;
2299 sr=get_reg(i_regs->regmap,SR);
2300 assert(sr>=0); // Liveness analysis?
2301 emit_sh2tstimm(t,imm[i],sr,t);
2302 }
2303 }
2304 if (size==1) { // MOV.W
2305 if(!c||memtarget) {
2306 if(!dummy) {
2307 #ifdef HOST_IMM_ADDR32
2308 if(c)
2309 emit_movswl(constaddr,t);
2310 else
2311 #endif
2312 {
2313 int x=0;
2314 emit_movswl_indexed_map(0,addr,map,t);
2315 }
2316 }
2317 if(jaddr)
2318 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2319 }
2320 else
2321 inline_readstub(LOADW_STUB,i,constaddr,i_regs->regmap,rt1[i],ccadj[i],reglist);
2322 }
2323 if (size==2) { // MOV.L
2324 if(!c||memtarget) {
2325 if(!dummy) {
2326 #ifdef HOST_IMM_ADDR32
2327 if(c)
2328 emit_readword(constaddr,t);
2329 else
2330 #endif
2331 emit_readword_indexed_map(0,addr,map,t);
2332 emit_rorimm(t,16,t);
2333 }
2334 if(jaddr)
2335 add_stub(LOADL_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2336 }
2337 else
2338 inline_readstub(LOADL_STUB,i,constaddr,i_regs->regmap,rt1[i],ccadj[i],reglist);
2339 }
2340 if(addrmode[i]==POSTINC) {
2341 if(!((i_regs->wasdoingcp>>s)&1)) {
2342 if(!(i_regs->u&(1LL<<rt2[i]))&&rt1[i]!=rt2[i])
2343 emit_addimm(s,1<<size,s);
2344 }
2345 }
2346 //emit_storereg(rt1[i],tl); // DEBUG
2347 //if(opcode[i]==0x23)
2348 //if(opcode[i]==0x24)
2349 //if(opcode[i]==0x23||opcode[i]==0x24)
2350 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
2351 {
2352 //emit_pusha();
2353 save_regs(0x100f);
2354 emit_readword((int)&last_count,ECX);
2355 #ifdef __i386__
2356 if(get_reg(i_regs->regmap,CCREG)<0)
2357 emit_loadreg(CCREG,HOST_CCREG);
2358 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2359 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2360 emit_writeword(HOST_CCREG,(int)&Count);
2361 #endif
2362 #ifdef __arm__
2363 if(get_reg(i_regs->regmap,CCREG)<0)
2364 emit_loadreg(CCREG,0);
2365 else
2366 emit_mov(HOST_CCREG,0);
2367 emit_add(0,ECX,0);
2368 emit_addimm(0,2*ccadj[i],0);
2369 emit_writeword(0,(int)&Count);
2370 #endif
2371 emit_call((int)memdebug);
2372 //emit_popa();
2373 restore_regs(0x100f);
2374 }*/
2375 }
2376
store_assemble(int i,struct regstat * i_regs)2377 void store_assemble(int i,struct regstat *i_regs)
2378 {
2379 int s,t,o,map=-1,cache=-1;
2380 int addr,temp;
2381 int offset;
2382 int jaddr=0,jaddr2,type;
2383 int memtarget,c=0,constaddr;
2384 int dualindex=(addrmode[i]==DUALIND);
2385 int size=(opcode[i]==4)?2:(opcode2[i]&3);
2386 int agr=AGEN1+(i&1);
2387 unsigned int hr;
2388 u32 reglist=0;
2389 t=get_reg(i_regs->regmap,rs1[i]);
2390 s=get_reg(i_regs->regmap,rs2[i]);
2391 o=get_reg(i_regs->regmap,rs3[i]);
2392 temp=get_reg(i_regs->regmap,agr);
2393 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2394 offset=imm[i];
2395 for(hr=0;hr<HOST_REGS;hr++) {
2396 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2397 }
2398 //if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2399 if(s>=0) {
2400 if(dualindex)
2401 c=(i_regs->wasdoingcp>>s)&(i_regs->wasdoingcp>>o)&1;
2402 else
2403 c=(i_regs->wasdoingcp>>s)&1;
2404 if(c) {
2405 if(dualindex)
2406 constaddr=cpmap[i][s]+cpmap[i][o];
2407 else
2408 constaddr=cpmap[i][s]+offset;
2409 }
2410 //printf("constaddr=%x offset=%x\n",constaddr,offset);
2411 memtarget=can_direct_write(constaddr);
2412 }
2413 if(!c) {
2414 if(dualindex) {
2415 c=(i_regs->isconst>>rs2[i])&(i_regs->isconst>>rs3[i])&1;
2416 } else {
2417 c=(i_regs->isconst>>rs2[i])&1;
2418 }
2419 if(c) {
2420 if(dualindex)
2421 constaddr=i_regs->constmap[rs2[i]]+i_regs->constmap[rs3[i]];
2422 else
2423 constaddr=i_regs->constmap[rs2[i]]+offset;
2424 //printf("constaddr=%x offset=%x\n",constaddr,offset);
2425 memtarget=can_direct_write(constaddr);
2426 // In this case, the constant is not already loaded into a register
2427 if(can_direct_write(constaddr)) {
2428 emit_movimm(constaddr^(!size),temp);
2429 map=get_reg(i_regs->regmap,MOREG);
2430 if(map<0) map=get_alt_reg(i_regs->regmap,-1);
2431 generate_map_const(constaddr,map);
2432 }
2433 }
2434 }
2435 assert(t>=0);
2436 assert(temp>=0);
2437 if(offset||dualindex||s<0||c) addr=temp;
2438 else addr=s;
2439 //printf("store_assemble: c=%d\n",c);
2440 if(addrmode[i]==PREDEC&&!c&&rt1[i]==rs1[i]) addr=temp; // Old value is written, so decremented address is in a temporary register
2441 if(addrmode[i]==REGIND&&!c&&rs1[i]==rs2[i]) {// Swapped value is written, so unswapped value must be used as the address
2442 emit_mov(addr,temp);addr=temp;
2443 }
2444 if(!c||memtarget)
2445 {
2446 int x=0;
2447 if (!c&&size==0) x=1; // MOV.B
2448 cache=get_reg(i_regs->regmap,MMREG);
2449 map=get_reg(i_regs->regmap,MOREG);
2450 if(map<0) map=get_alt_reg(i_regs->regmap,-1);
2451 assert(map>=0);
2452 assert(map!=temp);
2453 assert(map!=s);
2454 reglist&=~(1<<map);
2455 //if(x) emit_xorimm(addr,x,temp); // for debugging
2456 map=do_map_w(addr,temp,map,cache,x,c,constaddr);
2457 if (!c&&size==0) addr=temp; // MOV.B
2458 do_map_w_branch(map,c,constaddr,&jaddr);
2459 //jaddr=(int)out;emit_jmp(0); // for debugging
2460 }
2461
2462 if (size==0) { // MOV.B
2463 if(!c||memtarget) {
2464 int x=0;
2465 emit_writebyte_indexed_map(t,x,temp,map,temp);
2466 }
2467 type=STOREB_STUB;
2468 }
2469 if (size==1) { // MOV.W
2470 if(!c||memtarget) {
2471 emit_writehword_indexed_map(t,0,addr,map,temp);
2472 }
2473 type=STOREW_STUB;
2474 }
2475 if (size==2) { // MOV.L
2476 if(!c||memtarget) {
2477 emit_rorimm(t,16,t);
2478 emit_writeword_indexed_map(t,0,addr,map,temp);
2479 if(!(i_regs->u&(1LL<<rs1[i])))
2480 emit_rorimm(t,16,t);
2481 }
2482 type=STOREL_STUB;
2483 }
2484 if(jaddr) {
2485 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2486 } else if(c&&!memtarget) {
2487 inline_writestub(type,i,constaddr,i_regs->regmap,rs1[i],ccadj[i],reglist);
2488 }
2489 if(addrmode[i]==PREDEC) {
2490 assert(s>=0);
2491 if(!((i_regs->wasdoingcp>>s)&1)&&rt1[i]==rs1[i]) emit_addimm(s,-(1<<size),s); // Old value is written, so this "pre-decrement" is really post-decrement
2492 }
2493 //if(opcode[i]==0x2B || opcode[i]==0x3F)
2494 //if(opcode[i]==0x2B || opcode[i]==0x28)
2495 //if(opcode[i]==0x2B || opcode[i]==0x29)
2496 //if(opcode[i]==0x2B)
2497 /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
2498 {
2499 //emit_pusha();
2500 save_regs(0x100f);
2501 emit_readword((int)&last_count,ECX);
2502 #ifdef __i386__
2503 if(get_reg(i_regs->regmap,CCREG)<0)
2504 emit_loadreg(CCREG,HOST_CCREG);
2505 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2506 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2507 emit_writeword(HOST_CCREG,(int)&Count);
2508 #endif
2509 #ifdef __arm__
2510 if(get_reg(i_regs->regmap,CCREG)<0)
2511 emit_loadreg(CCREG,0);
2512 else
2513 emit_mov(HOST_CCREG,0);
2514 emit_add(0,ECX,0);
2515 emit_addimm(0,2*ccadj[i],0);
2516 emit_writeword(0,(int)&Count);
2517 #endif
2518 emit_call((int)memdebug);
2519 //emit_popa();
2520 restore_regs(0x100f);
2521 }*/
2522 }
2523
rmw_assemble(int i,struct regstat * i_regs)2524 void rmw_assemble(int i,struct regstat *i_regs)
2525 {
2526 int s,o,t,addr,map=-1,cache=-1;
2527 int jaddr=0;
2528 int type;
2529 int memtarget,c=0,constaddr;
2530 int dualindex=(addrmode[i]==GBRIND);
2531 unsigned int hr;
2532 u32 reglist=0;
2533 t=get_reg(i_regs->regmap,-1);
2534 s=get_reg(i_regs->regmap,rs1[i]);
2535 o=get_reg(i_regs->regmap,rs2[i]);
2536 for(hr=0;hr<HOST_REGS;hr++) {
2537 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2538 }
2539 if(s>=0) {
2540 if(dualindex)
2541 c=(i_regs->wasdoingcp>>s)&(i_regs->wasdoingcp>>o)&1;
2542 else
2543 c=(i_regs->wasdoingcp>>s)&1;
2544 if(c) {
2545 if(dualindex)
2546 constaddr=cpmap[i][s]+cpmap[i][o];
2547 else
2548 constaddr=cpmap[i][s];
2549 }
2550 //printf("constaddr=%x offset=%x\n",constaddr,offset);
2551 memtarget=1; // FIXME
2552 }
2553 if(dualindex||s<0||c) addr=t;
2554 else addr=s;
2555 assert(t>=0);
2556 reglist&=~(1<<t);
2557 {
2558 int x=0;
2559 if (!c) x=1; // MOV.B
2560 map=get_reg(i_regs->regmap,MOREG);
2561 cache=get_reg(i_regs->regmap,MMREG);
2562 assert(map>=0);
2563 reglist&=~(1<<map);
2564 map=do_map_w(addr,t,map,cache,x,c,constaddr);
2565 if (!c) addr=t; // MOV.B
2566 do_map_w_branch(map,c,constaddr,&jaddr);
2567 }
2568 if(opcode2[i]==11) type=RMWT_STUB; // TAS.B
2569 if(opcode2[i]==13) type=RMWA_STUB; // AND.B
2570 if(opcode2[i]==14) type=RMWX_STUB; // XOR.B
2571 if(opcode2[i]==15) type=RMWO_STUB; // OR.B
2572 if(!c||memtarget) {
2573 if(opcode2[i]==11) { // TAS.B
2574 signed char sr;
2575 sr=get_reg(i_regs->regmap,SR);
2576 assert(sr>=0); // Liveness analysis?
2577 assert(rt1[i]==TBIT);
2578 if(sr>=0&&!(i_regs->u&(1LL<<TBIT))) emit_sh2tas(addr,map,sr);
2579 else emit_rmw_orimm(addr,map,0x80); // T ignored, set only
2580 }
2581 if(opcode2[i]==13) emit_rmw_andimm(addr,map,imm[i]); // AND.B
2582 if(opcode2[i]==14) emit_rmw_xorimm(addr,map,imm[i]); // XOR.B
2583 if(opcode2[i]==15) emit_rmw_orimm(addr,map,imm[i]); // OR.B
2584 }
2585 if(jaddr)
2586 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2587 }
2588
pcrel_assemble(int i,struct regstat * i_regs)2589 void pcrel_assemble(int i,struct regstat *i_regs)
2590 {
2591 int t,addr,map=-1,cache=-1;
2592 int offset;
2593 int jaddr=0;
2594 int memtarget,c=0,constaddr;
2595 unsigned int hr;
2596 u32 reglist=0;
2597 t=get_reg(i_regs->regmap,rt1[i]);
2598 offset=imm[i];
2599 for(hr=0;hr<HOST_REGS;hr++) {
2600 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2601 }
2602 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2603 if(t>=0) {
2604 if(!((i_regs->isdoingcp>>t)&1)) {
2605 int jaddr=0;
2606 // This is to handle the exceptional case where we can not do constant propagation
2607 assert(opcode[i]!=12); // MOVA should always be able to do constant propagation
2608 constaddr=((start+i*2+4)&~3)+imm[i];
2609 if(opcode[i]==9) constaddr=(start+i*2+4)+imm[i]; // MOV.W
2610 assem_debug("Can't do constant propagation, doing PC-relatve load\n");
2611 //int map=get_reg(i_regs->regmap,MOREG);
2612 //int cache=get_reg(i_regs->regmap,MMREG);
2613 //assert(map>=0);
2614 reglist&=~(1<<t);
2615 //reglist&=~(1<<map);
2616 assert(can_direct_read(constaddr));
2617 #ifndef HOST_IMM_ADDR32
2618 emit_movimm(map_address(constaddr),t);
2619 #endif
2620 //map=do_map_r(t,-1,map,cache,0,-1,-1,0,0);
2621 //do_map_r_branch(map,0,0,&jaddr);
2622 //assert(jaddr);
2623 if(opcode[i]==9) { // MOV.W
2624 // direct load
2625 #ifdef HOST_IMM_ADDR32
2626 emit_movswl(map_address(constaddr),t);
2627 #else
2628 //emit_movswl_indexed_map(0,t,map,t);
2629 emit_movswl_indexed(0,t,t);
2630 #endif
2631 //add_stub(LOADW_STUB,jaddr,(int)out,i,t,(int)(i_regs),ccadj[i],reglist);
2632 }
2633 else { // MOV.L
2634 // direct load
2635 #ifdef HOST_IMM_ADDR32
2636 emit_readword(map_address(constaddr),t);
2637 #else
2638 //emit_readword_indexed_map(0,t,map,t);
2639 emit_readword_indexed(0,t,t);
2640 #endif
2641 emit_rorimm(t,16,t);
2642 //add_stub(LOADL_STUB,jaddr,(int)out,i,t,(int)(i_regs),ccadj[i],reglist);
2643 }
2644 }
2645 }
2646 }
2647
2648 //extern void debug_multiplication(int m,int n,int h,int l);
2649 #ifndef multdiv_assemble
multdiv_assemble(int i,struct regstat * i_regs)2650 void multdiv_assemble(int i,struct regstat *i_regs)
2651 {
2652 if(opcode[i]==0) {
2653 if(opcode2[i]==7) // MUL.L
2654 {
2655 int s1=get_reg(i_regs->regmap,rs1[i]);
2656 int s2=get_reg(i_regs->regmap,rs2[i]);
2657 int t=get_reg(i_regs->regmap,MACL);
2658 if(t>=0) emit_multiply(s1,s2,t);
2659 }
2660 if(opcode2[i]==8) // CLRMAC
2661 {
2662 int t1=get_reg(i_regs->regmap,rt1[i]);
2663 int t2=get_reg(i_regs->regmap,rt2[i]);
2664 if(!(i_regs->u&(1LL<<MACH)))
2665 emit_zeroreg(t1);
2666 if(!(i_regs->u&(1LL<<MACL)))
2667 emit_zeroreg(t2);
2668 }
2669 if(opcode2[i]==9) // DIV0U
2670 {
2671 int sr=get_reg(i_regs->regmap,SR);
2672 emit_andimm(sr,0xfe,sr);
2673 }
2674 }
2675 if(opcode[i]==2) {
2676 if(opcode2[i]==7) // DIV0S
2677 {
2678 int s1=get_reg(i_regs->regmap,rs1[i]);
2679 int s2=get_reg(i_regs->regmap,rs2[i]);
2680 int sr=get_reg(i_regs->regmap,SR);
2681 int temp=get_reg(i_regs->regmap,-1);
2682 assert(s1>=0);
2683 assert(s2>=0);
2684 assert(sr>=0);
2685 emit_div0s(s1,s2,sr,temp);
2686 }
2687 if(opcode2[i]==14||opcode2[i]==15) // MULU.W / MULS.W
2688 {
2689 int s1=get_reg(i_regs->regmap,rs1[i]);
2690 int s2=get_reg(i_regs->regmap,rs2[i]);
2691 int t=get_reg(i_regs->regmap,MACL);
2692 #ifdef HOST_TEMPREG
2693 int temp=HOST_TEMPREG;
2694 #else
2695 int temp=get_reg(i_regs->regmap,-1);
2696 #endif
2697 if(t>=0) {
2698 assert(temp>=0);
2699 if(opcode2[i]==14) { // MULU.W
2700 emit_movzwl_reg(s1,t);
2701 emit_movzwl_reg(s2,temp);
2702 }else{ // MULS.W
2703 emit_movswl_reg(s1,t);
2704 emit_movswl_reg(s2,temp);
2705 }
2706 emit_multiply(t,temp,t);
2707 }
2708 /* DEBUG
2709 emit_pusha();
2710 emit_pushreg(t);
2711 emit_pushreg(t);
2712 emit_pushreg(s2);
2713 emit_pushreg(s1);
2714 emit_call((int)debug_multiplication);
2715 emit_addimm(ESP,16,ESP);
2716 emit_popa();*/
2717 }
2718 }
2719 if(opcode[i]==3) {
2720 int s1=get_reg(i_regs->regmap,rs1[i]);
2721 int s2=get_reg(i_regs->regmap,rs2[i]);
2722 int th=get_reg(i_regs->regmap,MACH);
2723 int tl=get_reg(i_regs->regmap,MACL);
2724 if(th>=0) {
2725 // DMULU.L / DMULS.L
2726 #if defined(__i386__) || defined(__x86_64__)
2727 assert(tl==EAX);
2728 assert(th==EDX);
2729 assert(s1!=EAX); // This would work only if s1 is clean or dead
2730 if(s1!=EAX) emit_mov(s1,EAX);
2731 if(opcode2[i]==5) emit_mul(s2); // DMULU.L
2732 if(opcode2[i]==13) emit_imul(s2); // DMULS.L
2733 #else
2734 if(opcode2[i]==5) emit_umull(s1,s2,th,tl); // DMULU.L
2735 if(opcode2[i]==13) emit_smull(s1,s2,th,tl); // DMULS.L
2736 #endif
2737 }else if(tl>=0) {
2738 // MACH is unneeded, 32-bit result only
2739 emit_multiply(s1,s2,tl);
2740 }
2741 /* DEBUG
2742 emit_pusha();
2743 emit_pushreg(tl);
2744 emit_pushreg(th);
2745 emit_pushreg(s2);
2746 emit_pushreg(s1);
2747 emit_call((int)debug_multiplication);
2748 emit_addimm(ESP,16,ESP);
2749 emit_popa();*/
2750 }
2751 }
2752 #endif
2753
mov_assemble(int i,struct regstat * i_regs)2754 void mov_assemble(int i,struct regstat *i_regs)
2755 {
2756 signed char s,t;
2757 t=get_reg(i_regs->regmap,rt1[i]);
2758 //assert(t>=0);
2759 if(t>=0) {
2760 s=get_reg(i_regs->regmap,rs1[i]);
2761 if(s>=0) {if(s!=t) emit_mov(s,t);}
2762 else emit_loadreg(rs1[i],t);
2763 }
2764 }
2765
ext_assemble(int i,struct regstat * i_regs)2766 void ext_assemble(int i,struct regstat *i_regs)
2767 {
2768 signed char s,t;
2769 t=get_reg(i_regs->regmap,rt1[i]);
2770 //assert(t>=0);
2771 if(t>=0) {
2772 s=get_reg(i_regs->regmap,rs1[i]);
2773 if(s>=0) {
2774 if(opcode2[i]==12) emit_movzbl_reg(s,t);
2775 if(opcode2[i]==13) emit_movzwl_reg(s,t);
2776 if(opcode2[i]==14) emit_movsbl_reg(s,t);
2777 if(opcode2[i]==15) emit_movswl_reg(s,t);
2778 }
2779 else
2780 {
2781 emit_loadreg(rs1[i],t); // Fix - do byte/halfword loads?
2782 if(opcode2[i]==12) emit_movzbl_reg(t,t);
2783 if(opcode2[i]==13) emit_movzwl_reg(t,t);
2784 if(opcode2[i]==14) emit_movsbl_reg(t,t);
2785 if(opcode2[i]==15) emit_movswl_reg(t,t);
2786 }
2787 }
2788 }
2789
flags_assemble(int i,struct regstat * i_regs)2790 void flags_assemble(int i,struct regstat *i_regs)
2791 {
2792 signed char sr,t;
2793 sr=get_reg(i_regs->regmap,SR);
2794 if(opcode2[i]==8) { // CLRT/SETT
2795 if(opcode3[i]==0) emit_andimm(sr,~1,sr);
2796 if(opcode3[i]==1) emit_orimm(sr,1,sr);
2797 }else
2798 if(opcode2[i]==9) { // MOVT
2799 t=get_reg(i_regs->regmap,rt1[i]);
2800 if(t>=0)
2801 emit_andimm(sr,1,t);
2802 }
2803 }
2804
complex_assemble(int i,struct regstat * i_regs)2805 void complex_assemble(int i,struct regstat *i_regs)
2806 {
2807 if(opcode[i]==3&&opcode2[i]==4) { // DIV1
2808 emit_call((pointer)div1);
2809 }
2810 if(opcode[i]==0&&opcode2[i]==15) { // MAC.L
2811 load_regs(i_regs->regmap_entry,i_regs->regmap,MACL,MACH,MACH);
2812 // If both registers are the same, the register is incremented twice.
2813 // Pre-increment one of the function arguments.
2814 #if defined(__i386__) || defined(__x86_64__)
2815 if(rs1[i]==rs2[i]) {emit_mov(EDI,EBP);emit_addimm(EDI,4,EDI);}
2816 #else
2817 #if defined(__arm__)
2818 if(rs1[i]==rs2[i]) {emit_mov(6,5);emit_addimm(6,4,6);}
2819 #else
2820 // FIXME
2821 assert(0);
2822 #endif
2823 #endif
2824 /* DEBUG
2825 //if(i_regmap[HOST_CCREG]!=CCREG) {
2826 emit_loadreg(CCREG,ECX);
2827 emit_addimm(ECX,CLOCK_DIVIDER*(ccadj[i]),ECX);
2828 output_byte(0x03);
2829 output_modrm(1,4,ECX);
2830 output_sib(0,4,4);
2831 output_byte(4);
2832 emit_writeword(ECX,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
2833 // }*/
2834 emit_call((pointer)macl);
2835 }
2836 if(opcode[i]==4&&opcode2[i]==15) { // MAC.W
2837 load_regs(i_regs->regmap_entry,i_regs->regmap,MACL,MACH,MACH);
2838 // If both registers are the same, the register is incremented twice.
2839 // Pre-increment one of the function arguments.
2840 #if defined(__i386__) || defined(__x86_64__)
2841 if(rs1[i]==rs2[i]) {emit_mov(EDI,EBP);emit_addimm(EDI,2,EDI);}
2842 #else
2843 #if defined(__arm__)
2844 if(rs1[i]==rs2[i]) {emit_mov(6,5);emit_addimm(6,2,6);}
2845 #else
2846 // FIXME
2847 assert(0);
2848 #endif
2849 #endif
2850 /* DEBUG
2851 //if(i_regmap[HOST_CCREG]!=CCREG) {
2852 emit_loadreg(CCREG,ECX);
2853 emit_addimm(ECX,CLOCK_DIVIDER*(ccadj[i]),ECX);
2854 output_byte(0x03);
2855 output_modrm(1,4,ECX);
2856 output_sib(0,4,4);
2857 output_byte(4);
2858 emit_writeword(ECX,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
2859 // }*/
2860 emit_call((pointer)macw);
2861 }
2862 }
2863
ds_assemble(int i,struct regstat * i_regs)2864 void ds_assemble(int i,struct regstat *i_regs)
2865 {
2866 is_delayslot=1;
2867 switch(itype[i]) {
2868 case ALU:
2869 alu_assemble(i,i_regs);break;
2870 case IMM8:
2871 imm8_assemble(i,i_regs);break;
2872 case SHIFTIMM:
2873 shiftimm_assemble(i,i_regs);break;
2874 case LOAD:
2875 load_assemble(i,i_regs);break;
2876 case STORE:
2877 store_assemble(i,i_regs);break;
2878 case RMW:
2879 rmw_assemble(i,i_regs);break;
2880 case PCREL:
2881 pcrel_assemble(i,i_regs);break;
2882 case MULTDIV:
2883 multdiv_assemble(i,i_regs);break;
2884 case MOV:
2885 mov_assemble(i,i_regs);break;
2886 case EXT:
2887 ext_assemble(i,i_regs);break;
2888 case FLAGS:
2889 flags_assemble(i,i_regs);break;
2890 case COMPLEX:
2891 complex_assemble(i,i_regs);break;
2892 case SYSTEM:
2893 case SYSCALL:
2894 case UJUMP:
2895 case RJUMP:
2896 case CJUMP:
2897 case SJUMP:
2898 printf("Jump in the delay slot. This is probably a bug.\n");
2899 }
2900 is_delayslot=0;
2901 }
2902
2903 // Is the branch target a valid internal jump?
internal_branch(int addr)2904 int internal_branch(int addr)
2905 {
2906 if(addr&1) return 0; // Indirect (register) jump
2907 if(addr>=start && addr<start+slen*2-2)
2908 {
2909 return 1;
2910 }
2911 return 0;
2912 }
2913
2914 #ifndef wb_invalidate
wb_invalidate(signed char pre[],signed char entry[],u32 dirty,u64 u)2915 void wb_invalidate(signed char pre[],signed char entry[],u32 dirty, u64 u)
2916 {
2917 int hr;
2918 for(hr=0;hr<HOST_REGS;hr++) {
2919 if(hr!=EXCLUDE_REG) {
2920 if(pre[hr]!=entry[hr]) {
2921 if(pre[hr]>=0) {
2922 if((dirty>>hr)&1) {
2923 if(!((u>>pre[hr])&1)) {
2924 int nr;
2925 if((nr=get_reg(entry,pre[hr]))<0) {
2926 emit_storereg(pre[hr],hr);
2927 }else{
2928 // Register move would overwrite another register, so write back
2929 if(pre[nr]>=0)
2930 if(get_reg(entry,pre[nr])>=0)
2931 emit_storereg(pre[hr],hr);
2932 }
2933 }
2934 }
2935 }
2936 }
2937 }
2938 }
2939 // Move from one register to another (no writeback)
2940 for(hr=0;hr<HOST_REGS;hr++) {
2941 if(hr!=EXCLUDE_REG) {
2942 if(pre[hr]!=entry[hr]) {
2943 if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
2944 int nr;
2945 if((nr=get_reg(entry,pre[hr]))>=0) {
2946 if(pre[nr]<0||get_reg(entry,pre[nr])<0) {
2947 emit_mov(hr,nr);
2948 }
2949 }
2950 }
2951 }
2952 }
2953 }
2954 // Reload registers that couldn't be directly moved
2955 for(hr=0;hr<HOST_REGS;hr++) {
2956 if(hr!=EXCLUDE_REG) {
2957 if(pre[hr]!=entry[hr]) {
2958 if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
2959 int nr;
2960 if((nr=get_reg(entry,pre[hr]))>=0) {
2961 if(pre[nr]>=0) {
2962 if(get_reg(entry,pre[nr])>=0) {
2963 emit_loadreg(pre[hr],nr);
2964 }
2965 }
2966 }
2967 }
2968 }
2969 }
2970 }
2971 }
2972 #endif
2973
2974 // Load the specified registers
2975 // This only loads the registers given as arguments because
2976 // we don't want to load things that will be overwritten
load_regs(signed char entry[],signed char regmap[],int rs1,int rs2,int rs3)2977 void load_regs(signed char entry[],signed char regmap[],int rs1,int rs2,int rs3)
2978 {
2979 int hr;
2980 if(rs1==TBIT) rs1=SR;
2981 if(rs2==TBIT) rs2=SR;
2982 if(rs3==TBIT) rs3=SR;
2983 // Load 32-bit regs
2984 for(hr=0;hr<HOST_REGS;hr++) {
2985 if(hr!=EXCLUDE_REG&®map[hr]>=0) {
2986 if(entry[hr]!=regmap[hr]) {
2987 if(regmap[hr]==rs1||regmap[hr]==rs2||regmap[hr]==rs3)
2988 {
2989 emit_loadreg(regmap[hr],hr);
2990 }
2991 }
2992 }
2993 }
2994 }
2995
2996 // Load registers prior to the start of a loop
2997 // so that they are not loaded within the loop
loop_preload(signed char pre[],signed char entry[])2998 static void loop_preload(signed char pre[],signed char entry[])
2999 {
3000 int hr;
3001 for(hr=0;hr<HOST_REGS;hr++) {
3002 if(hr!=EXCLUDE_REG) {
3003 if(pre[hr]!=entry[hr]) {
3004 if(entry[hr]>=0) {
3005 if(get_reg(pre,entry[hr])<0) {
3006 assem_debug("loop preload:\n");
3007 //printf("loop preload: %d\n",hr);
3008 if(entry[hr]<TEMPREG)
3009 {
3010 emit_loadreg(entry[hr],hr);
3011 }
3012 }
3013 }
3014 }
3015 }
3016 }
3017 }
3018
3019 // Generate address for load/store instruction
address_generation(int i,struct regstat * i_regs,signed char entry[])3020 void address_generation(int i,struct regstat *i_regs,signed char entry[])
3021 {
3022 if(itype[i]==LOAD||itype[i]==STORE||itype[i]==RMW) {
3023 int rs,ri;
3024 int rm;
3025 int ra;
3026 int agr=AGEN1+(i&1);
3027 int mgr=MGEN1+(i&1);
3028 if(itype[i]==LOAD) {
3029 ra=get_reg(i_regs->regmap,rt1[i]);
3030 if(ra<0||rt1[i]==TBIT) ra=get_reg(i_regs->regmap,-1);
3031 assert(ra>=0);
3032 }
3033 if(itype[i]==STORE||itype[i]==RMW) {
3034 ra=get_reg(i_regs->regmap,agr);
3035 if(ra<0) ra=get_reg(i_regs->regmap,-1);
3036 assert(ra>=0);
3037 }
3038 if(itype[i]==STORE) {
3039 rs=get_reg(i_regs->regmap,rs2[i]);
3040 ri=get_reg(i_regs->regmap,rs3[i]);
3041 }else{
3042 rs=get_reg(i_regs->regmap,rs1[i]);
3043 ri=get_reg(i_regs->regmap,rs2[i]);
3044 }
3045 rm=get_reg(i_regs->regmap,MOREG);
3046 if(rm<0) rm=get_alt_reg(i_regs->regmap,-1);
3047 if(ra>=0) {
3048 int offset=imm[i];
3049 int c;
3050 u32 constaddr;
3051 if(addrmode[i]==DUALIND||addrmode[i]==GBRIND) {
3052 c=(i_regs->wasdoingcp>>rs)&(i_regs->wasdoingcp>>ri)&1;
3053 constaddr=cpmap[i][rs]+cpmap[i][ri];
3054 }else{
3055 c=(i_regs->wasdoingcp>>rs)&1;
3056 constaddr=cpmap[i][rs]+offset;
3057 if(addrmode[i]==POSTINC) constaddr-=1<<((opcode[i]==4)?2:(opcode2[i]&3));
3058 }
3059 if(addrmode[i]==PREDEC&&!c) {
3060 if(rt1[i]!=rs1[i]) emit_addimm(rs,-(1<<((opcode[i]==4)?2:(opcode2[i]&3))),rs);
3061 else offset=-(1<<((opcode[i]==4)?2:(opcode2[i]&3)));
3062 }
3063 if(rs<0) {
3064 if(itype[i]==LOAD) {
3065 if(!entry||entry[ra]!=rs1[i])
3066 emit_loadreg(rs1[i],ra);
3067 }
3068 if(itype[i]==STORE) {
3069 if(!entry||entry[ra]!=rs2[i])
3070 emit_loadreg(rs2[i],ra);
3071 }
3072 //if(!entry||entry[ra]!=rs1[i])
3073 // printf("poor load scheduling!\n");
3074 }
3075 else if(c) {
3076 // Stores to memory go thru the mapper to detect self-modifying
3077 // code, loads don't.
3078 if(rm>=0) {
3079 if(!entry||entry[rm]!=mgr) {
3080 if(itype[i]==STORE) {
3081 if(can_direct_write(constaddr))
3082 generate_map_const(constaddr,rm);
3083 }
3084 if(itype[i]==RMW) {
3085 generate_map_const(constaddr,rm);
3086 }
3087 }
3088 }
3089 if((opcode2[i]&3)==0||itype[i]==RMW) constaddr^=1; // byteswap for little-endian
3090 if(rs1[i]!=rt1[i]||itype[i]!=LOAD||addrmode[i]==DUALIND||addrmode[i]==GBRIND) {
3091 if(!entry||entry[ra]!=agr) {
3092 #ifdef HOST_IMM_ADDR32
3093 if(itype[i]==RMW || (itype[i]==STORE && can_direct_write(constaddr)))
3094 #endif
3095 {
3096 if(itype[i]==LOAD&&can_direct_read(constaddr))
3097 emit_movimm(map_address(constaddr),ra);
3098 else
3099 emit_movimm(constaddr,ra);
3100 }
3101 } // else did it in the previous cycle
3102 } // else load_consts already did it
3103 }
3104 if(!c) {
3105 if(rs>=0) {
3106 if(addrmode[i]==DUALIND||addrmode[i]==GBRIND)
3107 emit_add(rs,ri,ra);
3108 else
3109 if(offset) emit_addimm(rs,offset,ra);
3110 }else{
3111 if(addrmode[i]==DUALIND||addrmode[i]==GBRIND)
3112 emit_add(ra,ri,ra);
3113 else
3114 if(offset) emit_addimm(ra,offset,ra);
3115 }
3116 }
3117 }
3118 }
3119 // Preload constants for next instruction
3120 if(itype[i+1]==LOAD||itype[i+1]==STORE||itype[i+1]==RMW) {
3121 int agr,ra,rm;
3122 #ifndef HOST_IMM_ADDR32
3123 // Mapper entry
3124 agr=MGEN1+((i+1)&1);
3125 rm=get_reg(i_regs->regmap,agr);
3126 if(rm>=0) {
3127 int rs,ri;
3128 if(itype[i+1]==STORE) {
3129 rs=get_reg(regs[i+1].regmap,rs2[i+1]);
3130 ri=get_reg(regs[i+1].regmap,rs3[i+1]);
3131 }else{
3132 rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3133 ri=get_reg(regs[i+1].regmap,rs2[i+1]);
3134 }
3135 //int rm=get_reg(i_regs->regmap,MOREG);
3136 int offset=imm[i+1];
3137 int c;
3138 u32 constaddr;
3139 if(addrmode[i+1]==DUALIND||addrmode[i+1]==GBRIND) {
3140 c=(regs[i+1].wasdoingcp>>rs)&(regs[i+1].wasdoingcp>>ri)&1;
3141 constaddr=cpmap[i+1][rs]+cpmap[i+1][ri];
3142 }else{
3143 c=(regs[i+1].wasdoingcp>>rs)&1;
3144 constaddr=cpmap[i+1][rs]+offset;
3145 if(addrmode[i+1]==POSTINC) constaddr-=1<<((opcode[i+1]==4)?2:(opcode2[i+1]&3));
3146 }
3147 if((opcode2[i+1]&3)==0||itype[i+1]==RMW) constaddr^=1; // byteswap for little-endian
3148 if(c) {
3149 // Stores to memory go thru the mapper to detect self-modifying
3150 // code, loads don't.
3151 if(itype[i+1]==STORE) {
3152 if(can_direct_write(constaddr))
3153 generate_map_const(constaddr,rm);
3154 }
3155 if(itype[i+1]==RMW) {
3156 generate_map_const(constaddr,rm);
3157 }
3158 }
3159 }
3160 #endif
3161 // Actual address
3162 agr=AGEN1+((i+1)&1);
3163 ra=get_reg(i_regs->regmap,agr);
3164 if(ra>=0) {
3165 int c;
3166 int offset;
3167 int rs,ri;
3168 u32 constaddr;
3169 if(itype[i+1]==STORE) {
3170 rs=get_reg(regs[i+1].regmap,rs2[i+1]);
3171 ri=get_reg(regs[i+1].regmap,rs3[i+1]);
3172 }else{
3173 rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3174 ri=get_reg(regs[i+1].regmap,rs2[i+1]);
3175 }
3176 offset=imm[i+1];
3177 if(addrmode[i+1]==DUALIND||addrmode[i+1]==GBRIND) {
3178 c=(regs[i+1].wasdoingcp>>rs)&(regs[i+1].wasdoingcp>>ri)&1;
3179 constaddr=cpmap[i+1][rs]+cpmap[i+1][ri];
3180 }else{
3181 c=(regs[i+1].wasdoingcp>>rs)&1;
3182 constaddr=cpmap[i+1][rs]+offset;
3183 if(addrmode[i+1]==POSTINC) constaddr-=1<<((opcode[i+1]==4)?2:(opcode2[i+1]&3));
3184 }
3185 if((opcode2[i+1]&3)==0||itype[i+1]==RMW) constaddr^=1; // byteswap for little-endian
3186 if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD||addrmode[i+1]==DUALIND||addrmode[i+1]==GBRIND)) {
3187 //if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) {
3188 #ifdef HOST_IMM_ADDR32
3189 if(itype[i+1]==RMW || (itype[i+1]==STORE && can_direct_write(constaddr)))
3190 #endif
3191 {
3192 if(itype[i+1]==LOAD&&can_direct_read(constaddr))
3193 emit_movimm(map_address(constaddr),ra);
3194 else
3195 emit_movimm(constaddr,ra);
3196 }
3197 }
3198 }
3199 }
3200 }
3201
get_final_value(int hr,int i,int * value)3202 int get_final_value(int hr, int i, int *value)
3203 {
3204 int reg=regs[i].regmap[hr];
3205 while(i<slen-1) {
3206 if(regs[i+1].regmap[hr]!=reg) break;
3207 if(!((regs[i+1].isdoingcp>>hr)&1)) break;
3208 if(bt[i+1]) break;
3209 i++;
3210 }
3211 if(i<slen-1) {
3212 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP) {
3213 *value=cpmap[i][hr];
3214 return 1;
3215 }
3216 if(!bt[i+1]) {
3217 if(itype[i+1]==UJUMP||itype[i+1]==RJUMP||itype[i+1]==SJUMP) {
3218 // Load in delay slot, out-of-order execution
3219 if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasdoingcp>>hr)&1))
3220 {
3221 if(addrmode[i+2]==DUALIND||addrmode[i+2]==GBRIND) {
3222 *value=cpmap[i][hr];
3223 return 1;
3224 }
3225 // Don't load address if can_direct_read and HOST_IMM_ADDR32
3226 #ifdef HOST_IMM_ADDR32
3227 if(can_direct_read(cpmap[i][hr]+imm[i+2])) return 0;
3228 #endif
3229 // Precompute load address
3230 *value=cpmap[i][hr]+imm[i+2];
3231 if(can_direct_read(*value)) *value=map_address(*value);
3232 if((opcode2[i+2]&3)==0) *value^=1; // byteswap for little-endian
3233 return 1;
3234 }
3235 }
3236 if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
3237 {
3238 if(addrmode[i+1]==DUALIND||addrmode[i+1]==GBRIND) {
3239 *value=cpmap[i][hr];
3240 return 1;
3241 }
3242 // Don't load address if can_direct_read and HOST_IMM_ADDR32
3243 #ifdef HOST_IMM_ADDR32
3244 if(can_direct_read(cpmap[i][hr]+imm[i+1])) return 0;
3245 #endif
3246 // Precompute load address
3247 *value=cpmap[i][hr]+imm[i+1];
3248 if(can_direct_read(*value)) *value=map_address(*value);
3249 if((opcode2[i+1]&3)==0) *value^=1; // byteswap for little-endian
3250 //printf("c=%x imm=%x\n",(int)cpmap[i][hr],imm[i+1]);
3251 return 1;
3252 }
3253 }
3254 }
3255 *value=cpmap[i][hr];
3256 //printf("c=%x\n",(int)cpmap[i][hr]);
3257 if(i==slen-1) return 1;
3258 return !((unneeded_reg[i+1]>>reg)&1);
3259 }
3260
3261 // Load registers with known constants
load_consts(signed char pre[],signed char regmap[],int i)3262 void load_consts(signed char pre[],signed char regmap[],int i)
3263 {
3264 int hr;
3265 // Load 32-bit regs
3266 for(hr=0;hr<HOST_REGS;hr++) {
3267 if(hr!=EXCLUDE_REG&®map[hr]>=0) {
3268 if(i==0||!((regs[i-1].isdoingcp>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) {
3269 if(((regs[i].isdoingcp>>hr)&1)&®map[hr]<64&®map[hr]>=0) {
3270 int value;
3271 if(get_final_value(hr,i,&value)) {
3272 emit_movimm(value,hr);
3273 }
3274 }
3275 }
3276 }
3277 }
3278 }
load_all_consts(signed char regmap[],u32 dirty,int i)3279 void load_all_consts(signed char regmap[],u32 dirty,int i)
3280 {
3281 int hr;
3282 // Load 32-bit regs
3283 for(hr=0;hr<HOST_REGS;hr++) {
3284 if(hr!=EXCLUDE_REG&®map[hr]>=0&&((dirty>>hr)&1)) {
3285 if(((regs[i].isdoingcp>>hr)&1)&®map[hr]<64&®map[hr]>=0) {
3286 int value=cpmap[i][hr];
3287 emit_movimm(value,hr);
3288 }
3289 }
3290 }
3291 }
3292
3293 // Write out all dirty registers (except cycle count)
wb_dirtys(signed char i_regmap[],u32 i_dirty)3294 void wb_dirtys(signed char i_regmap[],u32 i_dirty)
3295 {
3296 int hr;
3297 for(hr=0;hr<HOST_REGS;hr++) {
3298 if(hr!=EXCLUDE_REG) {
3299 if(i_regmap[hr]>=0) {
3300 if(i_regmap[hr]!=CCREG) {
3301 if((i_dirty>>hr)&1) {
3302 emit_storereg(i_regmap[hr],hr);
3303 }
3304 }
3305 }
3306 }
3307 }
3308 }
3309 // Write out dirty registers that we need to reload (pair with load_needed_regs)
3310 // This writes the registers not written by store_regs_bt
wb_needed_dirtys(signed char i_regmap[],u32 i_dirty,int addr)3311 void wb_needed_dirtys(signed char i_regmap[],u32 i_dirty,int addr)
3312 {
3313 int hr;
3314 int t=(addr-start)>>1;
3315 for(hr=0;hr<HOST_REGS;hr++) {
3316 if(hr!=EXCLUDE_REG) {
3317 if(i_regmap[hr]>=0) {
3318 if(i_regmap[hr]!=CCREG) {
3319 if((i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1)) || i_regmap[hr]==SR || i_regmap[hr]==15) {
3320 if((i_dirty>>hr)&1) {
3321 emit_storereg(i_regmap[hr],hr);
3322 }
3323 }
3324 }
3325 }
3326 }
3327 }
3328 }
3329
3330 // Load all registers (except cycle count)
load_all_regs(signed char i_regmap[])3331 void load_all_regs(signed char i_regmap[])
3332 {
3333 int hr;
3334 for(hr=0;hr<HOST_REGS;hr++) {
3335 if(hr!=EXCLUDE_REG) {
3336 if(i_regmap[hr]>=0 && i_regmap[hr]<TEMPREG && i_regmap[hr]!=CCREG)
3337 {
3338 emit_loadreg(i_regmap[hr],hr);
3339 }
3340 }
3341 }
3342 }
3343
3344 // Load all current registers also needed by next instruction
load_needed_regs(signed char i_regmap[],signed char next_regmap[])3345 void load_needed_regs(signed char i_regmap[],signed char next_regmap[])
3346 {
3347 int hr;
3348 for(hr=0;hr<HOST_REGS;hr++) {
3349 if(hr!=EXCLUDE_REG) {
3350 if(get_reg(next_regmap,i_regmap[hr])>=0) {
3351 if(i_regmap[hr]>=0 && i_regmap[hr]<TEMPREG && i_regmap[hr]!=CCREG)
3352 {
3353 emit_loadreg(i_regmap[hr],hr);
3354 }
3355 }
3356 }
3357 }
3358 }
3359
3360 // Load all regs, storing cycle count if necessary
load_regs_entry(int t)3361 void load_regs_entry(int t)
3362 {
3363 int hr;
3364 if(is_ds[t]) emit_addimm(HOST_CCREG,CLOCK_DIVIDER,HOST_CCREG);
3365 else if(ccadj[t]) emit_addimm(HOST_CCREG,-ccadj[t]*CLOCK_DIVIDER,HOST_CCREG);
3366 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
3367 emit_storereg(CCREG,HOST_CCREG);
3368 }
3369 // Load 32-bit regs
3370 for(hr=0;hr<HOST_REGS;hr++) {
3371 if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG) {
3372 if(regs[t].regmap_entry[hr]!=CCREG)
3373 {
3374 emit_loadreg(regs[t].regmap_entry[hr],hr);
3375 }
3376 }
3377 }
3378 }
3379
3380 // Store dirty registers prior to branch
store_regs_bt(signed char i_regmap[],u32 i_dirty,int addr)3381 void store_regs_bt(signed char i_regmap[],u32 i_dirty,int addr)
3382 {
3383 if(internal_branch(addr))
3384 {
3385 int t=(addr-start)>>1;
3386 int hr;
3387 for(hr=0;hr<HOST_REGS;hr++) {
3388 if(hr!=EXCLUDE_REG) {
3389 if(i_regmap[hr]>=0 && i_regmap[hr]!=CCREG) {
3390 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) ) {
3391 if((i_dirty>>hr)&1) {
3392 if(!((unneeded_reg[t]>>i_regmap[hr])&1)) {
3393 emit_storereg(i_regmap[hr],hr);
3394 }
3395 }
3396 }
3397 }
3398 }
3399 }
3400 }
3401 else
3402 {
3403 // Branch out of this block, write out all dirty regs
3404 wb_dirtys(i_regmap,i_dirty);
3405 }
3406 }
3407
3408 // Load all needed registers for branch target
load_regs_bt(signed char i_regmap[],u32 i_dirty,int addr)3409 void load_regs_bt(signed char i_regmap[],u32 i_dirty,int addr)
3410 {
3411 //if(addr>=start && addr<(start+slen*4))
3412 if(internal_branch(addr))
3413 {
3414 int t=(addr-start)>>1;
3415 int hr;
3416 // Store the cycle count before loading something else
3417 if(i_regmap[HOST_CCREG]!=CCREG) {
3418 assert(i_regmap[HOST_CCREG]==-1);
3419 }
3420 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
3421 emit_storereg(CCREG,HOST_CCREG);
3422 }
3423 // Load 32-bit regs
3424 for(hr=0;hr<HOST_REGS;hr++) {
3425 if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG) {
3426 if(i_regmap[hr]!=regs[t].regmap_entry[hr] ) {
3427 if(regs[t].regmap_entry[hr]!=CCREG)
3428 {
3429 emit_loadreg(regs[t].regmap_entry[hr],hr);
3430 }
3431 }
3432 }
3433 }
3434 }
3435 }
3436
match_bt(signed char i_regmap[],u32 i_dirty,int addr)3437 int match_bt(signed char i_regmap[],u32 i_dirty,int addr)
3438 {
3439 if(addr>=start && addr<start+slen*2-2)
3440 {
3441 int t=(addr-start)>>1;
3442 int hr;
3443 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0;
3444 for(hr=0;hr<HOST_REGS;hr++)
3445 {
3446 if(hr!=EXCLUDE_REG)
3447 {
3448 if(i_regmap[hr]!=regs[t].regmap_entry[hr])
3449 {
3450 if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG)
3451 {
3452 return 0;
3453 }
3454 else
3455 if((i_dirty>>hr)&1)
3456 {
3457 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
3458 return 0;
3459 }
3460 }
3461 else // Same register but is it dirty?
3462 if(i_regmap[hr]>=0)
3463 {
3464 if(!((regs[t].dirty>>hr)&1))
3465 {
3466 if((i_dirty>>hr)&1)
3467 {
3468 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
3469 {
3470 //printf("%x: dirty no match\n",addr);
3471 return 0;
3472 }
3473 }
3474 }
3475 }
3476 }
3477 }
3478 // Delay slots require additional processing, so do not match
3479 if(is_ds[t]) return 0;
3480 }
3481 else
3482 {
3483 int hr;
3484 for(hr=0;hr<HOST_REGS;hr++)
3485 {
3486 if(hr!=EXCLUDE_REG)
3487 {
3488 if(i_regmap[hr]>=0)
3489 {
3490 if(hr!=HOST_CCREG||i_regmap[hr]!=CCREG)
3491 {
3492 if((i_dirty>>hr)&1)
3493 {
3494 return 0;
3495 }
3496 }
3497 }
3498 }
3499 }
3500 }
3501 return 1;
3502 }
3503
3504 // Used when a branch jumps into the delay slot of another branch
ds_assemble_entry(int i)3505 void ds_assemble_entry(int i)
3506 {
3507 int t=(ba[i]-start)>>1;
3508 if(!instr_addr[t]) instr_addr[t]=(pointer)out;
3509 assem_debug("Assemble delay slot at %x\n",ba[i]);
3510 assem_debug("<->\n");
3511 if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG)
3512 wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty);
3513 load_regs(regs[t].regmap_entry,regs[t].regmap,rs1[t],rs2[t],rs3[t]);
3514 address_generation(t,®s[t],regs[t].regmap_entry);
3515 if(itype[t]==LOAD||itype[t]==STORE)
3516 load_regs(regs[t].regmap_entry,regs[t].regmap,MMREG,MMREG,MMREG);
3517 is_delayslot=0;
3518 switch(itype[t]) {
3519 case ALU:
3520 alu_assemble(t,®s[t]);break;
3521 case IMM8:
3522 imm8_assemble(t,®s[t]);break;
3523 case SHIFTIMM:
3524 shiftimm_assemble(t,®s[t]);break;
3525 case LOAD:
3526 load_assemble(t,®s[t]);break;
3527 case STORE:
3528 store_assemble(t,®s[t]);break;
3529 case RMW:
3530 rmw_assemble(t,®s[t]);break;
3531 case PCREL:
3532 pcrel_assemble(t,®s[t]);break;
3533 case MULTDIV:
3534 multdiv_assemble(t,®s[t]);break;
3535 case MOV:
3536 mov_assemble(t,®s[t]);break;
3537 case EXT:
3538 ext_assemble(i,®s[t]);break;
3539 case FLAGS:
3540 flags_assemble(i,®s[t]);break;
3541 case COMPLEX:
3542 complex_assemble(i,®s[t]);break;
3543 case SYSTEM:
3544 case SYSCALL:
3545 case UJUMP:
3546 case RJUMP:
3547 case CJUMP:
3548 case SJUMP:
3549 printf("Jump in the delay slot. This is probably a bug.\n");
3550 }
3551 store_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+2);
3552 load_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+2);
3553 if(internal_branch(ba[i]+2))
3554 assem_debug("branch: internal\n");
3555 else
3556 assem_debug("branch: external\n");
3557 assert(internal_branch(ba[i]+2));
3558 add_to_linker((int)out,ba[i]+2,internal_branch(ba[i]+2));
3559 emit_jmp(0);
3560 }
3561
do_cc(int i,signed char i_regmap[],int * adj,int addr,int taken,int invert)3562 void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
3563 {
3564 int count;
3565 int jaddr;
3566 int idle=0;
3567 if(itype[i]==RJUMP)
3568 {
3569 *adj=0;
3570 }
3571 //if(ba[i]>=start && ba[i]<(start+slen*4))
3572 if(internal_branch(ba[i]))
3573 {
3574 int t=(ba[i]-start)>>1;
3575 if(is_ds[t]) *adj=ccadj[t+1]-cycles[t]; // Branch into delay slot adds an extra cycle
3576 else *adj=ccadj[t];
3577 }
3578 else
3579 {
3580 *adj=0;
3581 }
3582 if(itype[i]==CJUMP) *adj-=2+cycles[i]; // Two extra cycles for taken BT/BF
3583 if(itype[i]==SJUMP) *adj-=1+cycles[i]+cycles[i+1]; // One extra cycle for taken BT/BF with delay slot
3584 count=ccadj[i]+((taken==NODS)?0:cycles[i]+cycles[i+1]);
3585 if(taken==TAKEN && i==(ba[i]-start)>>1 && source[i+1]==0) {
3586 // Idle loop
3587 // FIXME
3588 //if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG);
3589 idle=(int)out;
3590 //emit_subfrommem(&idlecount,HOST_CCREG); // Count idle cycles
3591 emit_andimm(HOST_CCREG,3,HOST_CCREG);
3592 jaddr=(int)out;
3593 emit_jmp(0);
3594 }
3595 else if(*adj==0||invert) {
3596 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG);
3597 jaddr=(int)out;
3598 emit_jns(0);
3599 }
3600 else
3601 {
3602 emit_cmpimm(HOST_CCREG,-CLOCK_DIVIDER*count);
3603 jaddr=(int)out;
3604 emit_jns(0);
3605 }
3606 add_stub(CC_STUB,jaddr,idle?idle:(int)out,(*adj==0||invert||idle)?0:count,i,addr,taken,0);
3607 }
3608
do_ccstub(int n)3609 void do_ccstub(int n)
3610 {
3611 int i;
3612 literal_pool(256);
3613 assem_debug("do_ccstub %x\n",start+stubs[n][4]*2);
3614 set_jump_target(stubs[n][1],(pointer)out);
3615 i=stubs[n][4];
3616 if(stubs[n][6]==NODS) {
3617 if(itype[i+1]==LOAD&&rs1[i+1]==rt1[i+1]&&addrmode[i+1]!=DUALIND&&addrmode[i+1]!=GBRIND) {
3618 int hr=get_reg(regs[i].regmap,rs1[i+1]);
3619 if(hr>=0&&((regs[i].wasdoingcp>>hr)&1))
3620 {
3621 emit_movimm(cpmap[i][hr],hr);
3622 }
3623 }
3624 wb_dirtys(regs[i].regmap_entry,regs[i].dirty);
3625 }
3626 else if(stubs[n][6]!=TAKEN) {
3627 wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty);
3628 }
3629 else {
3630 if(internal_branch(ba[i]))
3631 wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
3632 }
3633 if(stubs[n][5]!=-1)
3634 {
3635 // Save PC as return address
3636 emit_movimm(stubs[n][5],0);
3637 emit_writeword(0,slave?(int)&slave_pc:(int)&master_pc);
3638 }
3639 else
3640 {
3641 // Return address is branch target
3642 if(itype[i]==RJUMP)
3643 {
3644 int r=get_reg(branch_regs[i].regmap,rs1[i]);
3645 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
3646 r=get_reg(branch_regs[i].regmap,RTEMP);
3647 }
3648 else if(opcode[i]==0&&opcode2[i]==3) { // BSRF/BRAF
3649 r=get_reg(branch_regs[i].regmap,RTEMP);
3650 }
3651 else if(opcode[i]==0&&opcode2[i]==11&&opcode3[i]==2) { // RTE
3652 r=get_reg(branch_regs[i].regmap,RTEMP);
3653 }
3654 emit_writeword(r,slave?(int)&slave_pc:(int)&master_pc);
3655 }
3656 else {printf("Unknown branch type in do_ccstub\n");exit(1);}
3657 }
3658 // Update cycle count
3659 if(stubs[n][6]==NODS) assert(regs[i].regmap[HOST_CCREG]==CCREG||regs[i].regmap[HOST_CCREG]==-1);
3660 else assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1);
3661 if(stubs[n][3]) emit_addimm(HOST_CCREG,CLOCK_DIVIDER*stubs[n][3],HOST_CCREG);
3662 if(slave) {
3663 emit_load_return_address(SLAVERA_REG);
3664 emit_jmp((pointer)cc_interrupt);
3665 }
3666 else {
3667 emit_call((pointer)slave_entry);
3668 }
3669 if(stubs[n][3]&&stubs[n][6]!=NODS) emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*stubs[n][3],HOST_CCREG);
3670 if(stubs[n][6]==TAKEN) {
3671 if(internal_branch(ba[i]))
3672 load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>1].regmap_entry);
3673 else if(itype[i]==RJUMP) {
3674 if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
3675 emit_readword(slave?(int)&slave_pc:(int)&master_pc,get_reg(branch_regs[i].regmap,RTEMP));
3676 else
3677 emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i]));
3678 }
3679 }else if(stubs[n][6]==NOTTAKEN) {
3680 if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
3681 else load_all_regs(branch_regs[i].regmap);
3682 }else{
3683 if(stubs[n][6]==NODS) {
3684 if(bt[i]||i==0) ccstub_return[i]=(pointer)out;
3685 else {
3686 if(stubs[n][3]) emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*stubs[n][3],HOST_CCREG);
3687 load_all_regs(regs[i].regmap);
3688 load_consts(regmap_pre[i],regs[i].regmap,i);
3689 if(itype[i+1]==LOAD&&rs1[i+1]==rt1[i+1]&&addrmode[i+1]!=DUALIND&&addrmode[i+1]!=GBRIND) {
3690 int hr=get_reg(regs[i].regmap,rs1[i+1]);
3691 if(hr>=0&&((regs[i].wasdoingcp>>hr)&1))
3692 {
3693 #ifdef HOST_IMM_ADDR32
3694 if(!can_direct_read(cpmap[i][hr]+imm[i+1]))
3695 #endif
3696 {
3697 int value=cpmap[i][hr]+imm[i+1];
3698 if(can_direct_read(value)) value=map_address(value);
3699 if((opcode2[i+1]&3)==0) value^=1; // byteswap for little-endian
3700 emit_movimm(value,hr);
3701 }
3702 }
3703 }
3704 ccstub_return[i]=0;
3705 }
3706 }
3707 else load_all_regs(branch_regs[i].regmap);
3708 }
3709 emit_jmp(stubs[n][2]); // return address
3710 }
3711
add_to_linker(int addr,int target,int ext)3712 void add_to_linker(int addr,int target,int ext)
3713 {
3714 link_addr[linkcount][0]=addr;
3715 link_addr[linkcount][1]=target|slave;
3716 link_addr[linkcount][2]=ext;
3717 linkcount++;
3718 }
3719
ujump_assemble(int i,struct regstat * i_regs)3720 void ujump_assemble(int i,struct regstat *i_regs)
3721 {
3722 u64 bc_unneeded;
3723 int cc,adj;
3724 signed char *i_regmap=i_regs->regmap;
3725 if(i==(ba[i]-start)>>1) assem_debug("idle loop\n");
3726 address_generation(i+1,i_regs,regs[i].regmap_entry);
3727 #ifdef REG_PREFETCH
3728 int temp=get_reg(branch_regs[i].regmap,PTEMP);
3729 if(rt1[i]==PR&&temp>=0)
3730 {
3731 int return_address=start+i*2+4;
3732 if(get_reg(branch_regs[i].regmap,PR)>0)
3733 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3734 }
3735 #endif
3736 if(rt1[i]==PR) {
3737 if(rt1[i+1]==PR||rt2[i+1]==PR) {
3738 // Delay slot abuse, set PR before executing delay slot
3739 int rt;
3740 unsigned int return_address;
3741 rt=get_reg(regs[i].regmap,PR);
3742 return_address=start+i*2+4;
3743 assert(rt>=0);
3744 if(rt>=0) {
3745 #ifdef REG_PREFETCH
3746 if(temp>=0)
3747 {
3748 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3749 }
3750 #endif
3751 emit_movimm(return_address,rt); // PC into link register
3752 }
3753 }
3754 }
3755 ds_assemble(i+1,i_regs);
3756 bc_unneeded=regs[i].u;
3757 bc_unneeded|=1LL<<rt1[i];
3758 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,
3759 bc_unneeded);
3760 load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG,CCREG);
3761 if(rt1[i]==PR) {
3762 int rt;
3763 unsigned int return_address;
3764 assert(rs1[i+1]!=PR);
3765 assert(rs2[i+1]!=PR);
3766 assert(rs3[i+1]!=PR);
3767 rt=get_reg(branch_regs[i].regmap,PR);
3768 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
3769 //assert(rt>=0);
3770 return_address=start+i*2+4;
3771 if(rt>=0&&rt1[i+1]!=PR&&rt2[i+1]!=PR) {
3772 #ifdef USE_MINI_HT
3773 if(internal_branch(return_address)) {
3774 int temp=rt+1;
3775 if(temp==EXCLUDE_REG||temp>=HOST_REGS||
3776 branch_regs[i].regmap[temp]>=0)
3777 {
3778 temp=get_reg(branch_regs[i].regmap,-1);
3779 }
3780 #ifdef HOST_TEMPREG
3781 if(temp<0) temp=HOST_TEMPREG;
3782 #endif
3783 if(temp>=0) do_miniht_insert(return_address,rt,temp);
3784 else emit_movimm(return_address,rt);
3785 }
3786 else
3787 #endif
3788 {
3789 #ifdef REG_PREFETCH
3790 if(temp>=0)
3791 {
3792 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3793 }
3794 #endif
3795 emit_movimm(return_address,rt); // PC into link register
3796 #ifdef IMM_PREFETCH
3797 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
3798 #endif
3799 }
3800 }
3801 }
3802 cc=get_reg(branch_regs[i].regmap,CCREG);
3803 assert(cc==HOST_CCREG);
3804 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
3805 #ifdef REG_PREFETCH
3806 if(rt1[i]==PR&&temp>=0) emit_prefetchreg(temp);
3807 #endif
3808 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
3809 if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]-adj),cc);
3810 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
3811 if(internal_branch(ba[i]))
3812 assem_debug("branch: internal\n");
3813 else
3814 assem_debug("branch: external\n");
3815 if(internal_branch(ba[i])&&is_ds[(ba[i]-start)>>1]) {
3816 ds_assemble_entry(i);
3817 }
3818 else {
3819 add_to_linker((int)out,ba[i],internal_branch(ba[i]));
3820 emit_jmp(0);
3821 }
3822 }
3823
rjump_assemble(int i,struct regstat * i_regs)3824 void rjump_assemble(int i,struct regstat *i_regs)
3825 {
3826 signed char *i_regmap=i_regs->regmap;
3827 int temp;
3828 int rs,cc,adj,rh,ht;
3829 u64 bc_unneeded;
3830 rs=get_reg(branch_regs[i].regmap,rs1[i]);
3831 assert(rs>=0);
3832 if(!((i_regs->wasdoingcp>>rs)&1)) {
3833 if(opcode[i]==0&&opcode2[i]==3) {
3834 // PC-relative branch, put PC in a temporary register
3835 temp=get_reg(branch_regs[i].regmap,RTEMP);
3836 assert(temp>=0);
3837 if(regs[i].regmap[temp]==RTEMP)
3838 emit_movimm(start+i*2+4,temp);
3839 }
3840 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
3841 // Delay slot abuse, make a copy of the branch address register
3842 temp=get_reg(branch_regs[i].regmap,RTEMP);
3843 assert(temp>=0);
3844 assert(regs[i].regmap[temp]==RTEMP);
3845 if(opcode[i]==0&&opcode2[i]==3)
3846 emit_add(rs,temp,temp);
3847 else
3848 emit_mov(rs,temp);
3849 rs=temp;
3850 }
3851 }
3852 address_generation(i+1,i_regs,regs[i].regmap_entry);
3853 #ifdef REG_PREFETCH
3854 if(rt1[i]==PR)
3855 {
3856 if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
3857 int return_address=start+i*2+4;
3858 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3859 }
3860 }
3861 #endif
3862 #ifdef USE_MINI_HT
3863 if(rs1[i]==PR) {
3864 int rh=get_reg(regs[i].regmap,RHASH);
3865 if(rh>=0) do_preload_rhash(rh);
3866 }
3867 #endif
3868 if(rt1[i]==PR) {
3869 if(rt1[i+1]==PR||rt2[i+1]==PR) {
3870 // Delay slot abuse, set PR before executing delay slot
3871 int rt,return_address;
3872 rt=get_reg(regs[i].regmap,rt1[i]);
3873 assert(rt>=0);
3874 if(rt>=0) {
3875 return_address=start+i*2+4;
3876 #ifdef REG_PREFETCH
3877 if(temp>=0)
3878 {
3879 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3880 }
3881 #endif
3882 emit_movimm(return_address,rt); // PC into link register
3883 }
3884 }
3885 }
3886 ds_assemble(i+1,i_regs);
3887 bc_unneeded=regs[i].u;
3888 bc_unneeded|=1LL<<rt1[i];
3889 bc_unneeded&=~(1LL<<rs1[i]);
3890 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,
3891 bc_unneeded);
3892 load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i],CCREG,CCREG);
3893 if(rt1[i]==PR) {
3894 int rt,return_address;
3895 assert(rs1[i+1]!=PR);
3896 assert(rs2[i+1]!=PR);
3897 assert(rs3[i+1]!=PR);
3898 rt=get_reg(branch_regs[i].regmap,rt1[i]);
3899 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
3900 if(rt>=0&&rt1[i+1]!=PR&&rt2[i+1]!=PR) {
3901 return_address=start+i*2+4;
3902 #ifdef REG_PREFETCH
3903 if(temp>=0)
3904 {
3905 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
3906 }
3907 #endif
3908 emit_movimm(return_address,rt); // PC into link register
3909 #ifdef IMM_PREFETCH
3910 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
3911 #endif
3912 }
3913 }
3914 cc=get_reg(branch_regs[i].regmap,CCREG);
3915 assert(cc==HOST_CCREG);
3916 #ifdef USE_MINI_HT
3917 rh=get_reg(branch_regs[i].regmap,RHASH);
3918 ht=get_reg(branch_regs[i].regmap,RHTBL);
3919 if(rs1[i]==PR) {
3920 if(regs[i].regmap[rh]!=RHASH) do_preload_rhash(rh);
3921 do_preload_rhtbl(ht);
3922 do_rhash(rs,rh);
3923 }
3924 #endif
3925 if(opcode[i]==0&&opcode2[i]==11&&opcode3[i]==2) {
3926 // Return From Exception (RTE) - pop PC and SR from stack
3927 //printf("RTE\n");
3928 int map=get_reg(branch_regs[i].regmap,MOREG);
3929 int cache=get_reg(branch_regs[i].regmap,MMREG);
3930 int sp=get_reg(branch_regs[i].regmap,15);
3931 int sr=get_reg(branch_regs[i].regmap,SR);
3932 int jaddr=0;
3933 unsigned int hr;
3934 u32 reglist=0;
3935 temp=get_reg(branch_regs[i].regmap,RTEMP);
3936 for(hr=0;hr<HOST_REGS;hr++) {
3937 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3938 }
3939 assert(sp>=0);
3940 assert(sr>=0);
3941 assert(temp>=0);
3942 assert(map>=0);
3943 reglist&=~(1<<sr);
3944 reglist&=~(1<<temp);
3945 reglist&=~(1<<map);
3946 map=do_map_r(sp,-1,map,cache,0,-1,-1,0,0);
3947 do_map_r_branch(map,0,0,&jaddr);
3948 // direct load
3949 emit_readword_indexed_map(0,sp,map,temp);
3950 emit_addimm(sp,4,sp);
3951 emit_rorimm(temp,16,temp);
3952 emit_readword_indexed_map(0,sp,map,sr);
3953 emit_addimm(sp,4,sp);
3954 emit_rorimm(sr,16,sr);
3955 assert(jaddr);
3956 add_stub(LOADS_STUB,jaddr,(int)out,i,sp,(int)(&branch_regs[i]),ccadj[i],reglist);
3957 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
3958 emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]),HOST_CCREG);
3959 add_stub(CC_STUB,(int)out,jump_vaddr_reg[slave][temp],0,i,-1,TAKEN,0);
3960 emit_jns(0);
3961 emit_jmp(jump_vaddr_reg[slave][temp]);
3962 }
3963 else {
3964 if((((i_regs->wasdoingcp>>rs)&1)&®s[i].regmap[rs]==branch_regs[i].regmap[rs])
3965 ||((i_regs->isconst>>rs1[i])&1)) {
3966 // Do constant propagation, branch to fixed address
3967 u32 constaddr;
3968 if(((i_regs->wasdoingcp>>rs)&1)&®s[i].regmap[rs]==branch_regs[i].regmap[rs])
3969 constaddr=cpmap[i][rs];
3970 else
3971 constaddr=i_regs->constmap[rs1[i]];
3972 if(opcode[i]==0&&opcode2[i]==3) {
3973 // PC-relative branch, add PC+4
3974 constaddr+=start+i*2+4;
3975 }
3976 assert(ba[i]==constaddr);
3977 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
3978 //emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]),HOST_CCREG);
3979 //add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0);
3980 //emit_jns(0);
3981 do_cc(i,branch_regs[i].regmap,&adj,constaddr,TAKEN,0);
3982 if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]-adj),cc);
3983 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
3984
3985 if(internal_branch(constaddr)) assert(bt[(constaddr-start)>>1]);
3986 if(internal_branch(constaddr)&&bt[(constaddr-start)>>1]) {
3987 assem_debug("branch: internal (constant address)\n");
3988 if(is_ds[(constaddr-start)>>1]) {
3989 ds_assemble_entry(i);
3990 }
3991 else {
3992 add_to_linker((int)out,constaddr,1/*internal_branch*/);
3993 emit_jmp(0);
3994 }
3995 }
3996 else
3997 {
3998 assem_debug("branch: external (constant address)\n");
3999 add_to_linker((int)out,constaddr,0/*internal_branch*/);
4000 emit_jmp(0);
4001 }
4002 }
4003 else {
4004 ba[i]=-1;
4005 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
4006 #ifdef REG_PREFETCH
4007 if(rt1[i]==PR&&temp>=0) emit_prefetchreg(temp);
4008 #endif
4009 #ifdef USE_MINI_HT
4010 if(rs1[i]==PR) {
4011 do_miniht_load(ht,rh);
4012 }
4013 #endif
4014 //#ifdef HOST_IMM_ADDR32 alternative using lea?
4015 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
4016 if(opcode[i]==0&&opcode2[i]==3) {
4017 // PC-relative branch, add offset to PC
4018 temp=get_reg(branch_regs[i].regmap,RTEMP);
4019 if(regs[i].regmap[temp]!=RTEMP) {
4020 // Load PC if necessary
4021 emit_movimm(start+i*2+4,temp);
4022 }
4023 emit_add(rs,temp,temp);
4024 rs=temp;
4025 }
4026 }
4027 //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN);
4028 //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen
4029 //assert(adj==0);
4030 emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]),HOST_CCREG);
4031 add_stub(CC_STUB,(int)out,jump_vaddr_reg[slave][rs],0,i,-1,TAKEN,0);
4032 emit_jns(0);
4033 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
4034 #ifdef USE_MINI_HT
4035 if(rs1[i]==PR) {
4036 do_miniht_jump(rs,rh,ht);
4037 }
4038 else
4039 #endif
4040 {
4041 emit_jmp(jump_vaddr_reg[slave][rs]);
4042 }
4043 }
4044 }
4045 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4046 if(rt1[i]!=PR&&i<slen-2&&(((u32)out)&7)) emit_mov(13,13);
4047 #endif
4048 }
4049
cjump_assemble(int i,struct regstat * i_regs)4050 void cjump_assemble(int i,struct regstat *i_regs)
4051 {
4052 signed char *i_regmap=i_regs->regmap;
4053 int cc;
4054 int match;
4055 int sr;
4056 int unconditional=0,nop=0;
4057 int adj;
4058 int invert=0;
4059 int internal;
4060 match=match_bt(regs[i].regmap,regs[i].dirty,ba[i]);
4061 assem_debug("match=%d\n",match);
4062 internal=internal_branch(ba[i]);
4063 if(i==(ba[i]-start)>>1) assem_debug("idle loop\n");
4064 if(!match) invert=1;
4065 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4066 if(i>(ba[i]-start)>>1) invert=1;
4067 #endif
4068 sr=get_reg(i_regmap,SR);
4069 assert(sr>=0);
4070 cc=get_reg(i_regmap,CCREG);
4071 assert(cc==HOST_CCREG);
4072 do_cc(i,regs[i].regmap,&adj,start+i*2,NODS,invert);
4073 if(unconditional)
4074 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4075 if(unconditional) {
4076 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
4077 if(i!=(ba[i]-start)>>1 || source[i+1]!=0) {
4078 if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc);
4079 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4080 if(internal)
4081 assem_debug("branch: internal\n");
4082 else
4083 assem_debug("branch: external\n");
4084 if(internal&&is_ds[(ba[i]-start)>>1]) {
4085 ds_assemble_entry(i);
4086 }
4087 else {
4088 add_to_linker((int)out,ba[i],internal);
4089 emit_jmp(0);
4090 }
4091 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4092 if(((u32)out)&7) emit_addnop(0);
4093 #endif
4094 }
4095 }
4096 else if(nop) {
4097 int jaddr;
4098 emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc);
4099 jaddr=(int)out;
4100 emit_jns(0);
4101 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*2+4,NOTTAKEN,0);
4102 }
4103 else {
4104 pointer taken=0,nottaken=0,nottaken1=0;
4105 //do_cc(i,regs[i].regmap,&adj,-1,0,invert);
4106 if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]-adj),cc);
4107
4108 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4109 emit_testimm(sr,1);
4110 if(opcode2[i]==9) // BT
4111 {
4112 if(invert){
4113 nottaken=(pointer)out;
4114 emit_jeq(1);
4115 }else{
4116 add_to_linker((int)out,ba[i],internal);
4117 emit_jne(0);
4118 }
4119 }
4120 if(opcode2[i]==11) // BF
4121 {
4122 if(invert){
4123 nottaken=(pointer)out;
4124 emit_jne(1);
4125 }else{
4126 add_to_linker((int)out,ba[i],internal);
4127 emit_jeq(0);
4128 }
4129 }
4130 if(invert) {
4131 if(taken) set_jump_target(taken,(pointer)out);
4132 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4133 if(match&&(!internal||!is_ds[(ba[i]-start)>>1])) {
4134 if(adj) {
4135 emit_addimm(cc,-CLOCK_DIVIDER*adj,cc);
4136 add_to_linker((int)out,ba[i],internal);
4137 }else{
4138 emit_addnop(13);
4139 add_to_linker((int)out,ba[i],internal*2);
4140 }
4141 emit_jmp(0);
4142 }else
4143 #endif
4144 {
4145 if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc);
4146 store_regs_bt(regs[i].regmap,regs[i].dirty,ba[i]);
4147 load_regs_bt(regs[i].regmap,regs[i].dirty,ba[i]);
4148 if(internal)
4149 assem_debug("branch: internal\n");
4150 else
4151 assem_debug("branch: external\n");
4152 if(internal&&is_ds[(ba[i]-start)>>1]) {
4153 ds_assemble_entry(i);
4154 }
4155 else {
4156 add_to_linker((int)out,ba[i],internal);
4157 emit_jmp(0);
4158 }
4159 }
4160 set_jump_target(nottaken,(pointer)out);
4161 }
4162
4163 //if(nottaken1) set_jump_target(nottaken1,(int)out);
4164 if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc);
4165 } // (!unconditional)
4166 }
4167
sjump_assemble(int i,struct regstat * i_regs)4168 void sjump_assemble(int i,struct regstat *i_regs)
4169 {
4170 signed char *i_regmap=i_regs->regmap;
4171 int cc;
4172 int adj;
4173 int match;
4174 int sr;
4175 int unconditional=0,nop=0;
4176 int invert=0;
4177 int internal=internal_branch(ba[i]);
4178 match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4179 assem_debug("match=%d\n",match);
4180 internal=internal_branch(ba[i]);
4181 if(i==(ba[i]-start)>>1) assem_debug("idle loop\n");
4182 if(!match) invert=1;
4183 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4184 if(i>(ba[i]-start)>>1) invert=1;
4185 #endif
4186
4187 if(ooo[i]) {
4188 sr=get_reg(branch_regs[i].regmap,SR);
4189 }
4190 else {
4191 sr=get_reg(i_regmap,SR);
4192 }
4193
4194 cc=get_reg(i_regmap,CCREG);
4195 assert(cc==HOST_CCREG);
4196
4197 if(ooo[i]) {
4198 u64 bc_unneeded;
4199 // Out of order execution (delay slot first)
4200 //printf("OOOE\n");
4201 do_cc(i,regs[i].regmap,&adj,start+i*2,NODS,invert);
4202 address_generation(i+1,i_regs,regs[i].regmap_entry);
4203 ds_assemble(i+1,i_regs);
4204 bc_unneeded=regs[i].u;
4205 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
4206 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,
4207 bc_unneeded);
4208 load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,SR,SR);
4209 cc=get_reg(branch_regs[i].regmap,CCREG);
4210 assert(cc==HOST_CCREG);
4211 if(unconditional)
4212 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4213 if(unconditional) {
4214 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
4215 if(i!=(ba[i]-start)>>1 || source[i+1]!=0) {
4216 if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc);
4217 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4218 if(internal)
4219 assem_debug("branch: internal\n");
4220 else
4221 assem_debug("branch: external\n");
4222 if(internal&&is_ds[(ba[i]-start)>>1]) {
4223 ds_assemble_entry(i);
4224 }
4225 else {
4226 add_to_linker((int)out,ba[i],internal);
4227 emit_jmp(0);
4228 }
4229 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4230 if(((u32)out)&7) emit_addnop(0);
4231 #endif
4232 }
4233 }
4234 else if(nop) {
4235 int jaddr;
4236 emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc);
4237 jaddr=(int)out;
4238 emit_jns(0);
4239 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*2+4,NOTTAKEN,0);
4240 }
4241 else {
4242 pointer taken=0,nottaken=0,nottaken1=0;
4243 //do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
4244 if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]-adj),cc);
4245
4246 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4247 assert(sr>=0);
4248 emit_testimm(sr,1);
4249 if(opcode2[i]==13) // BT/S
4250 {
4251 if(invert){
4252 nottaken=(pointer)out;
4253 emit_jeq(1);
4254 }else{
4255 add_to_linker((int)out,ba[i],internal);
4256 emit_jne(0);
4257 }
4258 }
4259 if(opcode2[i]==15) // BF/S
4260 {
4261 if(invert){
4262 nottaken=(pointer)out;
4263 emit_jne(1);
4264 }else{
4265 add_to_linker((int)out,ba[i],internal);
4266 emit_jeq(0);
4267 }
4268 }
4269 if(invert) {
4270 if(taken) set_jump_target(taken,(pointer)out);
4271 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4272 if(match&&(!internal||!is_ds[(ba[i]-start)>>1])) {
4273 if(adj) {
4274 emit_addimm(cc,-CLOCK_DIVIDER*adj,cc);
4275 add_to_linker((int)out,ba[i],internal);
4276 }else{
4277 emit_addnop(13);
4278 add_to_linker((int)out,ba[i],internal*2);
4279 }
4280 emit_jmp(0);
4281 }else
4282 #endif
4283 {
4284 if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc);
4285 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4286 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4287 if(internal)
4288 assem_debug("branch: internal\n");
4289 else
4290 assem_debug("branch: external\n");
4291 if(internal&&is_ds[(ba[i]-start)>>1]) {
4292 ds_assemble_entry(i);
4293 }
4294 else {
4295 add_to_linker((int)out,ba[i],internal);
4296 emit_jmp(0);
4297 }
4298 }
4299 set_jump_target(nottaken,(pointer)out);
4300 }
4301
4302 if(nottaken1) set_jump_target(nottaken1,(pointer)out);
4303 if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc);
4304 } // (!unconditional)
4305 } // if(ooo)
4306 else
4307 {
4308 // In-order execution (branch first)
4309 //printf("IOE\n");
4310 u64 ds_unneeded;
4311 pointer taken=0,nottaken=0,nottaken1=0;
4312 do_cc(i,regs[i].regmap,&adj,start+i*2,NODS,1);
4313 if(!unconditional&&!nop) {
4314 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4315 assert(sr>=0);
4316 emit_testimm(sr,1);
4317 if(opcode2[i]==13) // BT/S
4318 {
4319 nottaken=(pointer)out;
4320 emit_jeq(2);
4321 }
4322 if(opcode2[i]==15) // BF/S
4323 {
4324 nottaken=(pointer)out;
4325 emit_jne(2);
4326 }
4327 } // if(!unconditional)
4328 ds_unneeded=regs[i].u;
4329 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1])|(1LL<<rs3[i+1]));
4330 // branch taken
4331 if(!nop) {
4332 if(taken) set_jump_target(taken,(int)out);
4333 assem_debug("1:\n");
4334 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,
4335 ds_unneeded);
4336 // load regs
4337 load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1],rs3[i+1]);
4338 address_generation(i+1,&branch_regs[i],0);
4339 if(itype[i+1]==COMPLEX) {
4340 if((opcode[i+1]|4)==4&&opcode2[i+1]==15) { // MAC.W/MAC.L
4341 load_regs(regs[i].regmap,branch_regs[i].regmap,MACL,MACH,MACH);
4342 }
4343 }
4344 load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG,CCREG);
4345 ds_assemble(i+1,&branch_regs[i]);
4346 cc=get_reg(branch_regs[i].regmap,CCREG);
4347 if(cc==-1) {
4348 emit_loadreg(CCREG,cc=HOST_CCREG);
4349 // CHECK: Is the following instruction (fall thru) allocated ok?
4350 }
4351 assert(cc==HOST_CCREG);
4352 store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4353 //do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
4354 assem_debug("cycle count (adj)\n");
4355 /*if(adj)*/ //emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+cycles[i]+cycles[i+1]-adj),cc);
4356 if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc);
4357 load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]);
4358 if(internal)
4359 assem_debug("branch: internal\n");
4360 else
4361 assem_debug("branch: external\n");
4362 if(internal&&is_ds[(ba[i]-start)>>1]) {
4363 ds_assemble_entry(i);
4364 }
4365 else {
4366 add_to_linker((int)out,ba[i],internal);
4367 emit_jmp(0);
4368 }
4369 }
4370 // branch not taken
4371 if(!unconditional) {
4372 if(nottaken1) set_jump_target(nottaken1,(int)out);
4373 set_jump_target(nottaken,(int)out);
4374 assem_debug("2:\n");
4375 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,
4376 ds_unneeded);
4377 load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1],rs3[i+1]);
4378 address_generation(i+1,&branch_regs[i],0);
4379 if(itype[i+1]==COMPLEX) {
4380 if((opcode[i+1]|4)==4&&opcode2[i+1]==15) { // MAC.W/MAC.L
4381 load_regs(regs[i].regmap,branch_regs[i].regmap,MACL,MACH,MACH);
4382 }
4383 }
4384 load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG,CCREG);
4385 ds_assemble(i+1,&branch_regs[i]);
4386 }
4387 }
4388 }
4389
system_assemble(int i,struct regstat * i_regs)4390 void system_assemble(int i,struct regstat *i_regs)
4391 {
4392 signed char ccreg=get_reg(i_regs->regmap,CCREG);
4393 assert(ccreg==HOST_CCREG);
4394 assert(!is_delayslot);
4395 if(opcode[i]==0&&opcode2[i]==11&&opcode3[i]==1) { // SLEEP
4396 pointer jaddr, return_address;
4397 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4398 jaddr=(pointer)out;
4399 emit_jns(0);
4400 return_address=(pointer)out;
4401 emit_zeroreg(HOST_CCREG);
4402 set_jump_target(jaddr,(pointer)out);
4403 add_stub(CC_STUB,(int)out,return_address,0,i,start+i*2,TAKEN,0);
4404 emit_jmp(0);
4405 // DEBUG: Count in multiples of three to match interpreter
4406 //emit_addimm_and_set_flags(CLOCK_DIVIDER*3,HOST_CCREG);
4407 //add_stub(CC_STUB,(int)out,return_address,0,i,start+i*2,TAKEN,0);
4408 //emit_jns(0);
4409 emit_jmp(return_address);
4410 }
4411 else {
4412 int b,t,sr,st,map=-1,cache=-1;
4413 int jaddr=0;
4414 unsigned int hr;
4415 u32 reglist=0;
4416 assert(opcode[i]==12); // TRAPA
4417 t=get_reg(i_regs->regmap,-1);
4418 b=get_reg(i_regs->regmap,VBR);
4419 sr=get_reg(i_regs->regmap,SR);
4420 st=get_reg(i_regs->regmap,15); // STACK
4421 for(hr=0;hr<HOST_REGS;hr++) {
4422 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4423 }
4424 assert(t>=0);
4425 assert(b>=0);
4426 assert(sr>=0);
4427 assert(st>=0);
4428 emit_addimm(st,-4,st);
4429 map=get_reg(i_regs->regmap,MOREG);
4430 cache=get_reg(i_regs->regmap,MMREG);
4431 assert(map>=0);
4432 reglist&=~(1<<map);
4433 map=do_map_w(st,st,map,cache,0,0,0);
4434 do_map_w_branch(map,0,0,&jaddr);
4435 // Save SR
4436 emit_rorimm(sr,16,sr);
4437 emit_writeword_indexed_map(sr,0,st,map,map);
4438 emit_rorimm(sr,16,sr);
4439 if(jaddr) {
4440 add_stub(STOREL_STUB,jaddr,(int)out,i,st,(int)i_regs,ccadj[i],reglist);
4441 }
4442 emit_addimm(st,-4,st);
4443 store_regs_bt(i_regs->regmap,i_regs->dirty,-1);
4444 emit_movimm(start+i*2+2,sr);
4445 emit_addimm(b,imm[i]<<2,b);
4446 map=do_map_w(st,st,map,cache,0,0,0);
4447 do_map_w_branch(map,0,0,&jaddr);
4448 // Save PC
4449 emit_rorimm(sr,16,sr);
4450 emit_writeword_indexed_map(sr,0,st,map,map);
4451 if(jaddr) {
4452 add_stub(STOREL_STUB,jaddr,(int)out,i,st,(int)i_regs,ccadj[i],reglist);
4453 }
4454 // Load PC
4455 map=do_map_r(b,b,map,cache,0,-1,-1,0,0);
4456 do_map_r_branch(map,0,0,&jaddr);
4457 emit_readword_indexed_map(0,b,map,t);
4458 emit_rorimm(t,16,t);
4459 if(jaddr)
4460 add_stub(LOADL_STUB,jaddr,(int)out,i,t,(int)i_regs,ccadj[i],reglist);
4461 if(i_regs->regmap[HOST_CCREG]!=CCREG) {
4462 emit_loadreg(CCREG,HOST_CCREG);
4463 }
4464 emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+cycles[i]),HOST_CCREG);
4465 //add_stub(CC_STUB,(int)out,jump_vaddr_reg[slave][t],0,i,-1,TAKEN,0); // FIXME
4466 //emit_jns(0);
4467 emit_jmp(jump_vaddr_reg[slave][t]);
4468 }
4469 }
4470
bios_assemble(int i,struct regstat * i_regs)4471 void bios_assemble(int i,struct regstat *i_regs)
4472 {
4473 signed char ccreg=get_reg(i_regs->regmap,CCREG);
4474 assert(ccreg==HOST_CCREG);
4475 assert(!is_delayslot);
4476 emit_movimm(start+i*2,0);
4477 //emit_writeword(0,slave?(int)&slave_pc:(int)&master_pc);
4478 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4479 if(slave)
4480 emit_call((pointer)slave_handle_bios); // Probably doesn't work
4481 else
4482 emit_call((pointer)master_handle_bios);
4483 }
4484
4485 // Basic liveness analysis for SH2 registers
unneeded_registers(int istart,int iend,int r)4486 void unneeded_registers(int istart,int iend,int r)
4487 {
4488 int i;
4489 u64 u,uu,b,bu;
4490 u64 temp_u,temp_uu;
4491 u64 tdep;
4492 if(iend==slen-1) {
4493 u=0;
4494 }else{
4495 u=unneeded_reg[iend+1];
4496 u=0;
4497 }
4498 for (i=iend;i>=istart;i--)
4499 {
4500 //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
4501 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP)
4502 {
4503 if(ba[i]<start || ba[i]>=(start+slen*2))
4504 {
4505 // Branch out of this block, flush all regs
4506 u=0;
4507 branch_unneeded_reg[i]=u;
4508 if(itype[i]!=CJUMP) {
4509 // Merge in delay slot
4510 if(rt1[i+1]>=0) u|=1LL<<rt1[i+1];
4511 if(rt2[i+1]>=0) u|=1LL<<rt2[i+1];
4512 if(rs1[i+1]>=0) u&=~(1LL<<rs1[i+1]);
4513 if(rs2[i+1]>=0) u&=~(1LL<<rs2[i+1]);
4514 if(rs3[i+1]>=0) u&=~(1LL<<rs3[i+1]);
4515 }
4516 }
4517 else
4518 {
4519 if(ba[i]<=start+i*2) {
4520 // Backward branch
4521 if(itype[i]==UJUMP||itype[i]==RJUMP)
4522 {
4523 // Unconditional branch
4524 temp_u=0;
4525 } else if(itype[i]==CJUMP) {
4526 // Conditional branch (not taken case)
4527 temp_u=unneeded_reg[i+1];
4528 } else {
4529 // Conditional branch (not taken case)
4530 temp_u=unneeded_reg[i+2];
4531 }
4532 if(itype[i]!=CJUMP) {
4533 // Merge in delay slot
4534 if(rt1[i+1]>=0) temp_u|=1LL<<rt1[i+1];
4535 if(rt2[i+1]>=0) temp_u|=1LL<<rt2[i+1];
4536 if(rs1[i+1]>=0) temp_u&=~(1LL<<rs1[i+1]);
4537 if(rs2[i+1]>=0) temp_u&=~(1LL<<rs2[i+1]);
4538 if(rs3[i+1]>=0) temp_u&=~(1LL<<rs3[i+1]);
4539 }
4540 if(rt1[i]>=0) temp_u|=1LL<<rt1[i];
4541 if(rt2[i]>=0) temp_u|=1LL<<rt2[i];
4542 if(rs1[i]>=0) temp_u&=~(1LL<<rs1[i]);
4543 if(rs2[i]>=0) temp_u&=~(1LL<<rs2[i]);
4544 if(rs3[i]>=0) temp_u&=~(1LL<<rs3[i]);
4545 unneeded_reg[i]=temp_u;
4546 // Only go three levels deep. This recursion can take an
4547 // excessive amount of time if there are a lot of nested loops.
4548 if(r<2) {
4549 unneeded_registers((ba[i]-start)>>1,i-1,r+1);
4550 }else{
4551 unneeded_reg[(ba[i]-start)>>1]=0;
4552 }
4553 } /*else*/ if(1) {
4554 if(itype[i]==UJUMP||itype[i]==RJUMP)
4555 {
4556 // Unconditional branch
4557 u=unneeded_reg[(ba[i]-start)>>1];
4558 // Always need stack and status in case of interrupt
4559 u&=~((1LL<<15)|(1LL<<SR));
4560 branch_unneeded_reg[i]=u;
4561 //u=0; // for debugging
4562 //branch_unneeded_reg[i]=u; // for debugging
4563 // Merge in delay slot
4564 if(rt1[i+1]>=0) u|=1LL<<rt1[i+1];
4565 if(rt2[i+1]>=0) u|=1LL<<rt2[i+1];
4566 if(rs1[i+1]>=0) u&=~(1LL<<rs1[i+1]);
4567 if(rs2[i+1]>=0) u&=~(1LL<<rs2[i+1]);
4568 if(rs3[i+1]>=0) u&=~(1LL<<rs3[i+1]);
4569 } else {
4570 // Conditional branch
4571 b=unneeded_reg[(ba[i]-start)>>1];
4572 branch_unneeded_reg[i]=b;
4573 //b=0; // for debugging
4574 //branch_unneeded_reg[i]=b; // for debugging
4575 // Branch delay slot
4576 if(itype[i]!=CJUMP) {
4577 if(rt1[i+1]>=0) b|=1LL<<rt1[i+1];
4578 if(rt2[i+1]>=0) b|=1LL<<rt2[i+1];
4579 if(rs1[i+1]>=0) b&=~(1LL<<rs1[i+1]);
4580 if(rs2[i+1]>=0) b&=~(1LL<<rs2[i+1]);
4581 if(rs3[i+1]>=0) b&=~(1LL<<rs3[i+1]);
4582 }
4583 u&=b;
4584 // Always need stack and status in case of interrupt
4585 u&=~((1LL<<15)|(1LL<<SR));
4586 //u=0; // for debugging
4587 if(itype[i]!=CJUMP) {
4588 if(i<slen-1) {
4589 branch_unneeded_reg[i]&=unneeded_reg[i+2];
4590 } else {
4591 branch_unneeded_reg[i]=0;
4592 }
4593 }else{
4594 if(i<slen) {
4595 branch_unneeded_reg[i]&=unneeded_reg[i+1];
4596 } else {
4597 branch_unneeded_reg[i]=0;
4598 }
4599 }
4600 //branch_unneeded_reg[i]=0; // for debugging
4601 }
4602 }
4603 }
4604 }
4605 else if(itype[i]==RJUMP && source[i]==0x2b)
4606 {
4607 // RTE instruction (return from exception)
4608 u=(1<<SR);
4609 }
4610 else if(itype[i]==SYSTEM && opcode[i]==12)
4611 {
4612 // TRAPA instruction (syscall)
4613 u=0;
4614 }
4615 //u=uu=0; // DEBUG
4616 //tdep=(~uu>>rt1[i])&1;
4617 // Written registers are unneeded
4618 if(rt1[i]>=0) u|=1LL<<rt1[i];
4619 if(rt2[i]>=0) u|=1LL<<rt2[i];
4620 // Accessed registers are needed
4621 if(rs1[i]>=0) u&=~(1LL<<rs1[i]);
4622 if(rs2[i]>=0) u&=~(1LL<<rs2[i]);
4623 if(rs3[i]>=0) u&=~(1LL<<rs3[i]);
4624 // Source-target dependencies
4625 //uu&=~(tdep<<dep1[i]);
4626 //uu&=~(tdep<<dep2[i]);
4627 if(u&(1<<SR)) u|=(1<<TBIT);
4628 // Save it
4629 unneeded_reg[i]=u;
4630 }
4631 }
4632
4633 // Write back dirty registers as soon as we will no longer modify them,
4634 // so that we don't end up with lots of writes at the branches.
clean_registers(int istart,int iend,int wr)4635 void clean_registers(int istart,int iend,int wr)
4636 {
4637 int i;
4638 int r;
4639 u32 will_dirty_i,will_dirty_next,temp_will_dirty;
4640 u32 wont_dirty_i,wont_dirty_next,temp_wont_dirty;
4641 if(iend==slen-1) {
4642 will_dirty_i=will_dirty_next=0;
4643 wont_dirty_i=wont_dirty_next=0;
4644 }else{
4645 will_dirty_i=will_dirty_next=will_dirty[iend+1];
4646 wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
4647 }
4648 for (i=iend;i>=istart;i--)
4649 {
4650 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP)
4651 {
4652 if(ba[i]<start || ba[i]>=(start+slen*2))
4653 {
4654 // Branch out of this block, flush all regs
4655 if(itype[i]==RJUMP||itype[i]==UJUMP)
4656 {
4657 // Unconditional branch
4658 will_dirty_i=0;
4659 wont_dirty_i=0;
4660 // Merge in delay slot (will dirty)
4661 for(r=0;r<HOST_REGS;r++) {
4662 if(r!=EXCLUDE_REG) {
4663 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4664 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4665 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4666 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4667 if((branch_regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4668 if(branch_regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4669 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4670 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4671 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4672 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4673 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4674 if((regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4675 if(regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4676 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4677 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4678 if(regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4679 if(branch_regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4680 }
4681 }
4682 }
4683 }
4684 else
4685 {
4686 // Conditional branch
4687 will_dirty_i=0;
4688 wont_dirty_i=wont_dirty_next;
4689 // Merge in delay slot (will dirty)
4690 for(r=0;r<HOST_REGS;r++) {
4691 if(r!=EXCLUDE_REG) {
4692 if(itype[i]==SJUMP) {
4693 // Only conditional branches with delay slots
4694 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4695 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4696 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4697 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4698 if((branch_regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4699 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4700 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4701 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4702 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4703 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4704 if((regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4705 if(regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4706 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4707 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4708 if(regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4709 if(branch_regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4710 }
4711 }
4712 }
4713 }
4714 }
4715 // Merge in delay slot (wont dirty)
4716 for(r=0;r<HOST_REGS;r++) {
4717 if(r!=EXCLUDE_REG) {
4718 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
4719 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
4720 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
4721 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
4722 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
4723 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
4724 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
4725 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
4726 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
4727 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
4728 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4729 if(regs[i].regmap[r]==SR) wont_dirty_i|=1<<r;
4730 if(branch_regs[i].regmap[r]==SR) wont_dirty_i|=1<<r;
4731 }
4732 if(opcode[i]==0&&opcode2[i]==11&&opcode3[i]==2)
4733 {
4734 // RTE instruction (return from interrupt)
4735 if(regs[i].regmap[r]==15||branch_regs[i].regmap[r]==15) {
4736 wont_dirty_i|=1<<r;
4737 will_dirty_i|=1<<r;
4738 }
4739 if(regs[i].regmap[r]==SR||branch_regs[i].regmap[r]==SR) {
4740 wont_dirty_i|=1<<r;
4741 will_dirty_i|=1<<r;
4742 }
4743 }
4744 }
4745 }
4746 if(wr) {
4747 //#ifndef DESTRUCTIVE_WRITEBACK
4748 branch_regs[i].dirty&=wont_dirty_i;
4749 //#endif
4750 branch_regs[i].dirty|=will_dirty_i;
4751 }
4752 }
4753 else
4754 {
4755 // Internal branch
4756 if(ba[i]<=start+i*2) {
4757 // Recursively evaluate backward branches
4758 if(itype[i]==RJUMP||itype[i]==UJUMP)
4759 {
4760 // Unconditional branch
4761 temp_will_dirty=0;
4762 temp_wont_dirty=0;
4763 // Merge in delay slot (will dirty)
4764 for(r=0;r<HOST_REGS;r++) {
4765 if(r!=EXCLUDE_REG) {
4766 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
4767 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
4768 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
4769 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
4770 if((branch_regs[i].regmap[r]&63)>TBIT) temp_will_dirty&=~(1<<r);
4771 if(branch_regs[i].regmap[r]<0) temp_will_dirty&=~(1<<r);
4772 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
4773 if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
4774 if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
4775 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
4776 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
4777 if((regs[i].regmap[r]&63)>TBIT) temp_will_dirty&=~(1<<r);
4778 if(regs[i].regmap[r]<0) temp_will_dirty&=~(1<<r);
4779 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
4780 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4781 if(regs[i].regmap[r]==SR) temp_will_dirty|=1<<r;
4782 if(branch_regs[i].regmap[r]==SR) temp_will_dirty|=1<<r;
4783 }
4784 }
4785 }
4786 } else {
4787 // Conditional branch (not taken case)
4788 temp_will_dirty=will_dirty_next;
4789 temp_wont_dirty=wont_dirty_next;
4790 // Merge in delay slot (will dirty)
4791 for(r=0;r<HOST_REGS;r++) {
4792 if(r!=EXCLUDE_REG) {
4793 if(itype[i]==SJUMP) {
4794 // Only /S instructions have a delay slot
4795 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
4796 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
4797 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
4798 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
4799 if((branch_regs[i].regmap[r]&63)>TBIT) temp_will_dirty&=~(1<<r);
4800 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
4801 //if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
4802 //if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
4803 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
4804 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
4805 if((regs[i].regmap[r]&63)>TBIT) temp_will_dirty&=~(1<<r);
4806 if(regs[i].regmap[r]<0) temp_will_dirty&=~(1<<r);
4807 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
4808 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4809 if(regs[i].regmap[r]==SR) temp_will_dirty|=1<<r;
4810 if(branch_regs[i].regmap[r]==SR) temp_will_dirty|=1<<r;
4811 }
4812 }
4813 }
4814 }
4815 }
4816 // Merge in delay slot (wont dirty)
4817 for(r=0;r<HOST_REGS;r++) {
4818 if(r!=EXCLUDE_REG) {
4819 if((regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
4820 if((regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
4821 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
4822 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
4823 if(regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
4824 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
4825 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
4826 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
4827 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
4828 if(branch_regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
4829 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4830 if(regs[i].regmap[r]==SR) temp_wont_dirty|=1<<r;
4831 if(branch_regs[i].regmap[r]==SR) temp_wont_dirty|=1<<r;
4832 }
4833 }
4834 }
4835 // Deal with changed mappings
4836 if(i<iend) {
4837 for(r=0;r<HOST_REGS;r++) {
4838 if(r!=EXCLUDE_REG) {
4839 if(regs[i].regmap[r]!=regmap_pre[i][r]) {
4840 temp_will_dirty&=~(1<<r);
4841 temp_wont_dirty&=~(1<<r);
4842 if((regmap_pre[i][r]&63)>=0 && (regmap_pre[i][r]&63)<TBIT) {
4843 temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
4844 temp_wont_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
4845 } else {
4846 temp_will_dirty|=1<<r;
4847 temp_wont_dirty|=1<<r;
4848 }
4849 }
4850 }
4851 }
4852 }
4853 if(wr) {
4854 will_dirty[i]=temp_will_dirty;
4855 wont_dirty[i]=temp_wont_dirty;
4856 clean_registers((ba[i]-start)>>1,i-1,0);
4857 }else{
4858 // Limit recursion. It can take an excessive amount
4859 // of time if there are a lot of nested loops.
4860 will_dirty[(ba[i]-start)>>1]=0;
4861 wont_dirty[(ba[i]-start)>>1]=-1;
4862 }
4863 }
4864 /*else*/ if(1)
4865 {
4866 if(itype[i]==RJUMP||itype[i]==UJUMP)
4867 {
4868 // Unconditional branch
4869 will_dirty_i=0;
4870 wont_dirty_i=0;
4871 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
4872 for(r=0;r<HOST_REGS;r++) {
4873 if(r!=EXCLUDE_REG) {
4874 if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>1].regmap_entry[r]) {
4875 will_dirty_i|=will_dirty[(ba[i]-start)>>1]&(1<<r);
4876 wont_dirty_i|=wont_dirty[(ba[i]-start)>>1]&(1<<r);
4877 }
4878 if(branch_regs[i].regmap[r]>=0) {
4879 will_dirty_i|=((unneeded_reg[(ba[i]-start)>>1]>>branch_regs[i].regmap[r])&1)<<r;
4880 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>1]>>branch_regs[i].regmap[r])&1)<<r;
4881 }
4882 }
4883 }
4884 //}
4885 // Merge in delay slot
4886 for(r=0;r<HOST_REGS;r++) {
4887 if(r!=EXCLUDE_REG) {
4888 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4889 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4890 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4891 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4892 if((branch_regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4893 if(branch_regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4894 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4895 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4896 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4897 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4898 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4899 if((regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4900 if(regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4901 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4902 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4903 if(regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4904 if(branch_regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4905 }
4906 }
4907 }
4908 } else {
4909 // Conditional branch
4910 will_dirty_i=will_dirty_next;
4911 wont_dirty_i=wont_dirty_next;
4912 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
4913 for(r=0;r<HOST_REGS;r++) {
4914 if(r!=EXCLUDE_REG) {
4915 signed char target_reg=(itype[i]==CJUMP)?regs[i].regmap[r]:branch_regs[i].regmap[r];
4916 if(target_reg==regs[(ba[i]-start)>>1].regmap_entry[r]) {
4917 will_dirty_i&=will_dirty[(ba[i]-start)>>1]&(1<<r);
4918 wont_dirty_i|=wont_dirty[(ba[i]-start)>>1]&(1<<r);
4919 }
4920 else if(target_reg>=0) {
4921 will_dirty_i&=((unneeded_reg[(ba[i]-start)>>1]>>target_reg)&1)<<r;
4922 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>1]>>target_reg)&1)<<r;
4923 }
4924 }
4925 }
4926 //}
4927 // Merge in delay slot
4928 for(r=0;r<HOST_REGS;r++) {
4929 if(r!=EXCLUDE_REG) {
4930 if(itype[i]==SJUMP) {
4931 // Only /S branches have delay slots
4932 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4933 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4934 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4935 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4936 if((branch_regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4937 if(branch_regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4938 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4939 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4940 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4941 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
4942 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
4943 if((regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4944 if(regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4945 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4946 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4947 if(regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4948 if(branch_regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4949 }
4950 }
4951 }
4952 }
4953 }
4954 // Merge in delay slot (won't dirty)
4955 for(r=0;r<HOST_REGS;r++) {
4956 if(r!=EXCLUDE_REG) {
4957 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
4958 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
4959 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
4960 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
4961 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
4962 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
4963 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
4964 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
4965 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
4966 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
4967 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
4968 if(regs[i].regmap[r]==SR) wont_dirty_i|=1<<r;
4969 if(branch_regs[i].regmap[r]==SR) wont_dirty_i|=1<<r;
4970 }
4971 }
4972 }
4973 if(wr) {
4974 //#ifndef DESTRUCTIVE_WRITEBACK
4975 branch_regs[i].dirty&=wont_dirty_i;
4976 //#endif
4977 branch_regs[i].dirty|=will_dirty_i;
4978 }
4979 }
4980 }
4981 }
4982 else if(itype[i]==SYSCALL) // FIXME
4983 {
4984 // SYSCALL instruction (software interrupt)
4985 will_dirty_i=0;
4986 wont_dirty_i=0;
4987 }
4988 will_dirty_next=will_dirty_i;
4989 wont_dirty_next=wont_dirty_i;
4990 for(r=0;r<HOST_REGS;r++) {
4991 if(r!=EXCLUDE_REG) {
4992 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
4993 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
4994 if(rt1[i]==TBIT||rt2[i]==TBIT)
4995 if(regs[i].regmap[r]==SR) will_dirty_i|=1<<r;
4996 if((regs[i].regmap[r]&63)>TBIT) will_dirty_i&=~(1<<r);
4997 if(regs[i].regmap[r]<0) will_dirty_i&=~(1<<r);
4998 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
4999 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5000 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5001 if(rt1[i]==TBIT||rt2[i]==TBIT)
5002 if(regs[i].regmap[r]==SR) wont_dirty_i|=1<<r;
5003 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5004 if(itype[i]==COMPLEX)
5005 {
5006 if((opcode[i]|4)==4&&opcode2[i]==15) { // MAC.L/MAC.W
5007 if(regs[i].regmap[r]==MACL||regs[i].regmap[r]==MACH) {
5008 wont_dirty_i|=1<<r;
5009 will_dirty_i|=1<<r;
5010 }
5011 }
5012 }
5013 if(i>istart) {
5014 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP)
5015 {
5016 // Don't store a register immediately after writing it,
5017 // may prevent dual-issue.
5018 if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<<r;
5019 if((regs[i].regmap[r]&63)==rt2[i-1]) wont_dirty_i|=1<<r;
5020 }
5021 }
5022 }
5023 }
5024 // Save it
5025 will_dirty[i]=will_dirty_i;
5026 wont_dirty[i]=wont_dirty_i;
5027 // Mark registers that won't be dirtied as not dirty
5028 if(wr) {
5029 /*printf("wr (%d,%d) %x will:",istart,iend,start+i*4);
5030 for(r=0;r<HOST_REGS;r++) {
5031 if((will_dirty_i>>r)&1) {
5032 printf(" r%d",r);
5033 }
5034 }
5035 printf("\n");*/
5036
5037 regs[i].dirty|=will_dirty_i;
5038 //#ifndef DESTRUCTIVE_WRITEBACK
5039 regs[i].dirty&=wont_dirty_i;
5040 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==SJUMP)
5041 {
5042 if(i<iend-1&&itype[i]!=RJUMP&&itype[i]!=UJUMP) {
5043 for(r=0;r<HOST_REGS;r++) {
5044 if(r!=EXCLUDE_REG) {
5045 if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
5046 regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
5047 }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
5048 }
5049 }
5050 }
5051 }
5052 else
5053 {
5054 if(i<iend) {
5055 for(r=0;r<HOST_REGS;r++) {
5056 if(r!=EXCLUDE_REG) {
5057 if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
5058 regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
5059 }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
5060 }
5061 }
5062 }
5063 }
5064 //#endif
5065 }
5066 // Deal with changed mappings
5067 temp_will_dirty=will_dirty_i;
5068 temp_wont_dirty=wont_dirty_i;
5069 for(r=0;r<HOST_REGS;r++) {
5070 if(r!=EXCLUDE_REG) {
5071 int nr;
5072 if(regs[i].regmap[r]==regmap_pre[i][r]) {
5073 if(wr) {
5074 //#ifndef DESTRUCTIVE_WRITEBACK
5075 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
5076 //#endif
5077 regs[i].wasdirty|=will_dirty_i&(1<<r);
5078 }
5079 }
5080 else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
5081 // Register moved to a different register
5082 will_dirty_i&=~(1<<r);
5083 wont_dirty_i&=~(1<<r);
5084 will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
5085 wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
5086 if(wr) {
5087 //#ifndef DESTRUCTIVE_WRITEBACK
5088 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
5089 //#endif
5090 regs[i].wasdirty|=will_dirty_i&(1<<r);
5091 }
5092 }
5093 else {
5094 will_dirty_i&=~(1<<r);
5095 wont_dirty_i&=~(1<<r);
5096 if((regmap_pre[i][r]&63)>=0 && (regmap_pre[i][r]&63)<TBIT) {
5097 will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5098 wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5099 } else {
5100 wont_dirty_i|=1<<r;
5101 /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/
5102 }
5103 }
5104 }
5105 }
5106 }
5107 }
5108
5109 /* disassembly */
disassemble_inst(int i)5110 void disassemble_inst(int i)
5111 {
5112 if (bt[i]) printf("*"); else printf(" ");
5113 switch(itype[i]) {
5114 case UJUMP:
5115 case CJUMP:
5116 case SJUMP:
5117 printf (" %x: %s %8x\n",start+i*2,insn[i],ba[i]);break;
5118 case RJUMP:
5119 printf (" %x: %s r%d\n",start+i*2,insn[i],rs1[i]);break;
5120 case IMM8:
5121 printf (" %x: %s #%d,r%d\n",start+i*2,insn[i],imm[i],opcode[i]==14?rt1[i]:rs1[i]);
5122 break;
5123 case LOAD:
5124 switch(addrmode[i])
5125 {
5126 case REGIND:
5127 printf (" %x: %s @r%d,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5128 break;
5129 case POSTINC:
5130 printf (" %x: %s @r%d+,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5131 break;
5132 case PREDEC:
5133 printf (" %x: %s @-r%d,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5134 break;
5135 case DUALIND:
5136 printf (" %x: %s @(R0,r%d),r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5137 break;
5138 case GBRIND:
5139 printf (" %x: %s #%d,@(R0,GBR)\n",start+i*2,insn[i],imm[i]);
5140 break;
5141 case GBRDISP:
5142 printf (" %x: %s @(%d,GBR),r%d\n",start+i*2,insn[i],imm[i],rt1[i]);
5143 break;
5144 case REGDISP:
5145 printf (" %x: %s @(%d,r%d),r%d\n",start+i*2,insn[i],imm[i],rs1[i],rt1[i]);
5146 break;
5147 }
5148 break;
5149 case STORE:
5150 switch(addrmode[i])
5151 {
5152 case REGIND:
5153 printf (" %x: %s r%d,@r%d\n",start+i*2,insn[i],rs1[i],rs2[i]);
5154 break;
5155 case POSTINC:
5156 printf (" %x: %s r%d,@r%d+\n",start+i*2,insn[i],rs1[i],rs2[i]);
5157 break;
5158 case PREDEC:
5159 printf (" %x: %s r%d,@-r%d\n",start+i*2,insn[i],rs1[i],rs2[i]);
5160 break;
5161 case DUALIND:
5162 printf (" %x: %s r%d,@(R0,r%d)\n",start+i*2,insn[i],rs1[i],rs2[i]);
5163 break;
5164 case GBRDISP:
5165 printf (" %x: %s r%d,@(%d,GBR)\n",start+i*2,insn[i],rs1[i],imm[i]);
5166 break;
5167 case REGDISP:
5168 printf (" %x: %s r%d,@(%d,r%d)\n",start+i*2,insn[i],rs1[i],imm[i],rs2[i]);
5169 break;
5170 }
5171 break;
5172 case RMW:
5173 switch(addrmode[i])
5174 {
5175 case REGIND:
5176 printf (" %x: %s @r%d\n",start+i*2,insn[i],rs1[i]);
5177 break;
5178 case GBRIND:
5179 printf (" %x: %s #%d,@(R0,GBR)\n",start+i*2,insn[i],imm[i]);
5180 break;
5181 }
5182 break;
5183 case PCREL:
5184 printf (" %x: %s @(%x,PC),r%d (PC+%d=%x)",start+i*2,insn[i],imm[i],rt1[i],imm[i],((start+i*2+4)&(opcode[i]==9?~1:~3))+imm[i]);
5185 if (opcode[i]==9 && (unsigned)(i+(imm[i]>>1))<slen)
5186 printf(" [%x]\n",(s16)source[((start+i*2+4)+imm[i]-start)>>1]); // MOV.W
5187 else if (opcode[i]==13 && (unsigned)(i+(imm[i]>>1))<slen)
5188 printf(" [%8x]\n",(source[(((start+i*2+4)&~3)+imm[i]-start)>>1]<<16)+source[(((start+i*2+4)&~3)+imm[i]+2-start)>>1]); // MOV.L
5189 else printf("\n");
5190 if (opcode[i]==13 && (unsigned)(i+(imm[i]>>1))<slen)
5191 if((source[(((start+i*2+4)&~3)+imm[i]-start)>>1]<<16)+source[(((start+i*2+4)&~3)+imm[i]+2-start)>>1]-(start+i*2)<(unsigned)1024)
5192 printf("Within 1024\n");
5193 break;
5194 case ALU:
5195 if(rs1[i]<0&&rs2[i]<0) // XOR reg,reg case
5196 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rt1[i],rt1[i]);
5197 else if(rs2[i]>=0&&rs2[i]!=TBIT)
5198 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rs2[i]);
5199 else if(rt1[i]!=rs1[i])
5200 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5201 else
5202 printf (" %x: %s r%d\n",start+i*2,insn[i],rs1[i]);
5203 break;
5204 case MULTDIV:
5205 //printf (" %x: %s rt1=%d rt2=%d\n",start+i*2,insn[i],rt1[i],rt2[i]);
5206 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rs2[i]);
5207 break;
5208 case SHIFTIMM:
5209 if(rs2[i]>=0) printf (" %x: %s r%d,r%d #%d\n",start+i*2,insn[i],rs1[i],rs2[i],imm[i]);
5210 else printf (" %x: %s r%d #%d\n",start+i*2,insn[i],rt1[i],imm[i]);
5211 break;
5212 case MOV:
5213 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5214 break;
5215 case EXT:
5216 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rt1[i]);
5217 break;
5218 case FLAGS:
5219 if(opcode2[i]==9) printf (" %x: %s r%d\n",start+i*2,insn[i],rt1[i]);
5220 else printf (" %x: %s\n",start+i*2,insn[i]);
5221 break;
5222 case COMPLEX:
5223 printf (" %x: %s r%d,r%d\n",start+i*2,insn[i],rs1[i],rs2[i]);
5224 break;
5225 case DATA:
5226 printf (" %x: WORD %4x\n",start+i*2,source[i]&0xFFFF); // Constant data
5227 break;
5228 default:
5229 //printf (" %s %8x\n",insn[i],source[i]);
5230 printf (" %x: %s\n",start+i*2,insn[i]);
5231 }
5232 }
5233
sh2_dynarec_init()5234 void sh2_dynarec_init()
5235 {
5236 int n;
5237 //printf("Init new dynarec\n");
5238 out=(u8 *)BASE_ADDR;
5239 if (mmap (out, 1<<TARGET_SIZE_2,
5240 PROT_READ | PROT_WRITE | PROT_EXEC,
5241 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5242 -1, 0) <= 0) {printf("mmap() failed\n");}
5243 //for(n=0x80000;n<0x80800;n++)
5244 // invalid_code[n]=1;
5245 for(n=0;n<131072;n++)
5246 cached_code[n]=0;
5247 for(n=0;n<262144;n++)
5248 cached_code_words[n]=0;
5249 for(n=0;n<65536;n++)
5250 hash_table[n][0]=hash_table[n][2]=-1;
5251 memset(mini_ht_master,-1,sizeof(mini_ht_master));
5252 memset(mini_ht_slave,-1,sizeof(mini_ht_slave));
5253 memset(restore_candidate,0,sizeof(restore_candidate));
5254 copy=shadow;
5255 expirep=16384; // Expiry pointer, +2 blocks
5256 literalcount=0;
5257 stop_after_jal=0;
5258 if (mmap ((void *)0x80000000, 4194304,
5259 PROT_READ | PROT_WRITE,
5260 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5261 -1, 0) <= 0) {printf("mmap() failed\n");}
5262
5263 // This has to be done after BiosRom etc are allocated
5264 for(n=0;n<1048576;n++) {
5265 if(n<0x100) {
5266 #ifdef POINTERS_64BIT
5267 memory_map[n]=(((u64)BiosRom-((n<<12)&0x80000))>>2)|0x4000000000000000LL;
5268 #else
5269 memory_map[n]=(((u32)BiosRom-((n<<12)&0x80000))>>2)|0x40000000;
5270 #endif
5271 }else
5272 if(n>=0x0200&&n<0x0300) {
5273 #ifdef POINTERS_64BIT
5274 memory_map[n]=((u64)LowWram-((n<<12)&0xFFF00000))>>2;
5275 #else
5276 memory_map[n]=((u32)LowWram-((n<<12)&0xFFF00000))>>2;
5277 #endif
5278 }else
5279 if(n>=0x6000&&n<0x8000) {
5280 #ifdef POINTERS_64BIT
5281 memory_map[n]=((u64)HighWram-((n<<12)&0xFFF00000))>>2;
5282 #else
5283 memory_map[n]=((u32)HighWram-((n<<12)&0xFFF00000))>>2;
5284 #endif
5285 }else
5286 if(n>=0x20200&&n<0x20300) {
5287 #ifdef POINTERS_64BIT
5288 memory_map[n]=((u64)LowWram-((n<<12)&0xFFF00000))>>2;
5289 #else
5290 memory_map[n]=((u32)LowWram-((n<<12)&0xFFF00000))>>2;
5291 #endif
5292 }else
5293 if(n>=0x26000&&n<0x28000) {
5294 #ifdef POINTERS_64BIT
5295 memory_map[n]=((u64)HighWram-((n<<12)&0xFFF00000))>>2;
5296 #else
5297 memory_map[n]=((u32)HighWram-((n<<12)&0xFFF00000))>>2;
5298 #endif
5299 }else
5300 memory_map[n]=-1LL;
5301 }
5302
5303 master_cc=slave_cc=0;
5304 slave_ip=(void *)0; // Slave not running, go directly to interrupt handler
5305
5306 arch_init();
5307 }
5308
SH2DynarecReset(SH2_struct * context)5309 void SH2DynarecReset(SH2_struct *context) {
5310
5311 //printf("SH2DynarecReset\n");
5312 if(context==MSH2) master_cc=0;
5313 if(context==SSH2) { slave_ip=(void*)0; slave_cc=0; }
5314 }
5315
sh2_dynarec_cleanup()5316 void sh2_dynarec_cleanup()
5317 {
5318 int n;
5319 if (munmap ((void *)BASE_ADDR, 1<<TARGET_SIZE_2) < 0) {printf("munmap() failed\n");}
5320 for(n=0;n<2048;n++) ll_clear(jump_in+n);
5321 for(n=0;n<2048;n++) ll_clear(jump_out+n);
5322 for(n=0;n<2048;n++) ll_clear(jump_dirty+n);
5323 }
5324
sh2_recompile_block(int addr)5325 int sh2_recompile_block(int addr)
5326 {
5327 pointer beginning;
5328 int hr;
5329 int ds=0;
5330 int i,j;
5331 int done=0;
5332 unsigned int type,mode,op,op2,op3;
5333 unsigned int lastconst=0;
5334 unsigned int writelimit=0xFFFFFFFF;
5335 u32 p_constmap[SH2_REGS];
5336 u32 p_isconst=0;
5337 int cached_addr;
5338
5339 //if(Count==365117028) tracedebug=1;
5340 assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
5341 //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
5342 //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
5343 //if(debug)
5344 //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
5345 //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
5346 /*if(Count>=312978186) {
5347 rlist();
5348 }*/
5349 //rlist();
5350 start = (u32)addr&~1;
5351 slave = (u32)addr&1;
5352 cached_addr = start&~0x20000000;
5353 //assert(((u32)addr&1)==0);
5354 if (cached_addr >= 0x00000000 && cached_addr < 0x00100000) {
5355 source = (u16 *)((char *)BiosRom+(start & 0x7FFFF));
5356 pagelimit = (addr|0x7FFFF) + 1;
5357 }
5358 else if (cached_addr >= 0x00200000 && cached_addr < 0x00300000) {
5359 source = (u16 *)((char *)LowWram+(start & 0xFFFFF));
5360 pagelimit = (addr|0xFFFFF) + 1;
5361 }
5362 else if (cached_addr >= 0x06000000 && cached_addr < 0x08000000) {
5363 source = (u16 *)((char *)HighWram+(start & 0xFFFFF));
5364 pagelimit = (addr|0xFFFFF) + 1;
5365 }
5366 else {
5367 printf("Compile at bogus memory address: %x \n", (int)addr);
5368 exit(1);
5369 }
5370 //printf("source= %x\n",(int)source);
5371
5372 alignedsource=(void *)(((pointer)source)&~3);
5373
5374 /* Pass 1: disassemble */
5375 /* Pass 2: register dependencies, branch targets */
5376 /* Pass 3: register allocation */
5377 /* Pass 4: branch dependencies */
5378 /* Pass 5: pre-alloc */
5379 /* Pass 6: optimize clean/dirty state */
5380 /* Pass 7: identify interrupt return locations */
5381 /* Pass 8: assembly */
5382 /* Pass 9: linker */
5383 /* Pass 10: garbage collection / free memory */
5384
5385 slen=MAXBLOCK;
5386
5387 //printf("addr = %x source = %x %x\n", addr,source,source[0]);
5388
5389 /* Pass 1 disassembly */
5390
5391 for(i=0;i<8;i++) {
5392 //printf("recent write: %x\n",recent_writes[i]);
5393 if(recent_writes[i]<writelimit) {
5394 if(recent_writes[i]>start) writelimit=recent_writes[i];
5395 }
5396 }
5397
5398 for(i=0;!done;i++) {
5399 bt[i]=0;ooo[i]=0;op2=0;op3=0;mode=0;
5400 minimum_free_regs[i]=0;
5401 opcode[i]=op=source[i]>>12;
5402 strcpy(insn[i],"???"); type=NI;
5403 switch(op)
5404 {
5405 case 0x00:
5406 op2=source[i]&0xf;
5407 op3=(source[i]>>4)&0xf;
5408 switch(op2)
5409 {
5410 case 0x02: strcpy(insn[i],"STC"); type=MOV; break;
5411 case 0x03:
5412 switch(op3)
5413 {
5414 case 0x00: strcpy(insn[i],"BSRF"); type=RJUMP; break;
5415 case 0x02: strcpy(insn[i],"BRAF"); type=RJUMP; break;
5416 }
5417 break;
5418 case 0x04: strcpy(insn[i],"MOV.B"); type=STORE;mode=DUALIND; break;
5419 case 0x05: strcpy(insn[i],"MOV.W"); type=STORE;mode=DUALIND; break;
5420 case 0x06: strcpy(insn[i],"MOV.L"); type=STORE;mode=DUALIND; break;
5421 case 0x07: strcpy(insn[i],"MUL.L"); type=MULTDIV; break;
5422 case 0x08:
5423 switch(op3)
5424 {
5425 case 0x00: strcpy(insn[i],"CLRT"); type=FLAGS; break;
5426 case 0x01: strcpy(insn[i],"SETT"); type=FLAGS; break;
5427 case 0x02: strcpy(insn[i],"CLRMAC"); type=MULTDIV; break;
5428 }
5429 break;
5430 case 0x09:
5431 switch(op3)
5432 {
5433 case 0x00: strcpy(insn[i],"NOP"); type=NOP; break;
5434 case 0x01: strcpy(insn[i],"DIV0U"); type=MULTDIV; break;
5435 case 0x02: strcpy(insn[i],"MOVT"); type=FLAGS; break;
5436 }
5437 break;
5438 case 0x0A: strcpy(insn[i],"STS"); type=MOV; break;
5439 case 0x0B:
5440 switch(op3)
5441 {
5442 case 0x00: strcpy(insn[i],"RTS"); type=RJUMP; break;
5443 case 0x01: strcpy(insn[i],"SLEEP"); type=SYSTEM; break;
5444 case 0x02: strcpy(insn[i],"RTE"); type=RJUMP; break;
5445 }
5446 break;
5447 case 0x0C: strcpy(insn[i],"MOV.B"); type=LOAD;mode=DUALIND; break;
5448 case 0x0D: strcpy(insn[i],"MOV.W"); type=LOAD;mode=DUALIND; break;
5449 case 0x0E: strcpy(insn[i],"MOV.L"); type=LOAD;mode=DUALIND; break;
5450 case 0x0F: strcpy(insn[i],"MAC.L"); type=COMPLEX; break;
5451 }
5452 break;
5453 case 0x01: strcpy(insn[i],"MOV.L"); type=STORE;mode=REGDISP;op2=2; break;
5454 case 0x02:
5455 op2=source[i]&0xf;
5456 switch(op2)
5457 {
5458 case 0x00: strcpy(insn[i],"MOV.B"); type=STORE;mode=REGIND; break;
5459 case 0x01: strcpy(insn[i],"MOV.W"); type=STORE;mode=REGIND; break;
5460 case 0x02: strcpy(insn[i],"MOV.L"); type=STORE;mode=REGIND; break;
5461 case 0x04: strcpy(insn[i],"MOV.B"); type=STORE;mode=PREDEC; break;
5462 case 0x05: strcpy(insn[i],"MOV.W"); type=STORE;mode=PREDEC; break;
5463 case 0x06: strcpy(insn[i],"MOV.L"); type=STORE;mode=PREDEC; break;
5464 case 0x07: strcpy(insn[i],"DIV0S"); type=MULTDIV; break;
5465 case 0x08: strcpy(insn[i],"TST"); type=ALU; break;
5466 case 0x09: strcpy(insn[i],"AND"); type=ALU; break;
5467 case 0x0A: strcpy(insn[i],"XOR"); type=ALU; break;
5468 case 0x0B: strcpy(insn[i],"OR"); type=ALU; break;
5469 case 0x0C: strcpy(insn[i],"CMP/ST"); type=ALU; break;
5470 case 0x0D: strcpy(insn[i],"XTRCT"); type=SHIFTIMM; break;
5471 case 0x0E: strcpy(insn[i],"MULU.W"); type=MULTDIV; break;
5472 case 0x0F: strcpy(insn[i],"MULS.W"); type=MULTDIV; break;
5473 }
5474 break;
5475 case 0x03:
5476 op2=source[i]&0xf;
5477 switch(op2)
5478 {
5479 case 0x00: strcpy(insn[i],"CMP/EQ"); type=ALU; break;
5480 case 0x02: strcpy(insn[i],"CMP/HS"); type=ALU; break;
5481 case 0x03: strcpy(insn[i],"CMP/GE"); type=ALU; break;
5482 case 0x04: strcpy(insn[i],"DIV1"); type=COMPLEX; break;
5483 case 0x05: strcpy(insn[i],"DMULU.L"); type=MULTDIV; break;
5484 case 0x06: strcpy(insn[i],"CMP/HI"); type=ALU; break;
5485 case 0x07: strcpy(insn[i],"CMP/GT"); type=ALU; break;
5486 case 0x08: strcpy(insn[i],"SUB"); type=ALU; break;
5487 case 0x0A: strcpy(insn[i],"SUBC"); type=ALU; break;
5488 case 0x0B: strcpy(insn[i],"SUBV"); type=ALU; break;
5489 case 0x0C: strcpy(insn[i],"ADD"); type=ALU; break;
5490 case 0x0D: strcpy(insn[i],"DMULS.L"); type=MULTDIV; break;
5491 case 0x0E: strcpy(insn[i],"ADDC"); type=ALU; break;
5492 case 0x0F: strcpy(insn[i],"ADDV"); type=ALU; break;
5493 }
5494 break;
5495 case 0x04:
5496 op2=source[i]&0xf;
5497 op3=(source[i]>>4)&0xf;
5498 switch(op2)
5499 {
5500 case 0x00:
5501 switch(op3)
5502 {
5503 case 0x00: strcpy(insn[i],"SHLL"); type=SHIFTIMM; break;
5504 case 0x01: strcpy(insn[i],"DT"); type=ALU; break;
5505 case 0x02: strcpy(insn[i],"SHAL"); type=SHIFTIMM; break;
5506 }
5507 break;
5508 case 0x01:
5509 switch(op3)
5510 {
5511 case 0x00: strcpy(insn[i],"SHLR"); type=SHIFTIMM; break;
5512 case 0x01: strcpy(insn[i],"CMP/PZ"); type=ALU; break;
5513 case 0x02: strcpy(insn[i],"SHAR"); type=SHIFTIMM; break;
5514 }
5515 break;
5516 case 0x02: strcpy(insn[i],"STS.L"); type=STORE;mode=PREDEC; break;
5517 case 0x03: strcpy(insn[i],"STC.L"); type=STORE;mode=PREDEC; break;
5518 case 0x04:
5519 switch(op3)
5520 {
5521 case 0x00: strcpy(insn[i],"ROTL"); type=SHIFTIMM; break;
5522 case 0x02: strcpy(insn[i],"ROTCL"); type=SHIFTIMM; break;
5523 }
5524 break;
5525 case 0x05:
5526 switch(op3)
5527 {
5528 case 0x00: strcpy(insn[i],"ROTR"); type=SHIFTIMM; break;
5529 case 0x01: strcpy(insn[i],"CMP/PL"); type=ALU; break;
5530 case 0x02: strcpy(insn[i],"ROTCR"); type=SHIFTIMM; break;
5531 }
5532 break;
5533 case 0x06: strcpy(insn[i],"LDS.L"); type=LOAD;mode=POSTINC; break;
5534 case 0x07: strcpy(insn[i],"LDC.L"); type=LOAD;mode=POSTINC; break;
5535 case 0x08:
5536 switch(op3)
5537 {
5538 case 0x00: strcpy(insn[i],"SHLL2"); type=SHIFTIMM; break;
5539 case 0x01: strcpy(insn[i],"SHLL8"); type=SHIFTIMM; break;
5540 case 0x02: strcpy(insn[i],"SHLL16"); type=SHIFTIMM; break;
5541 }
5542 break;
5543 case 0x09:
5544 switch(op3)
5545 {
5546 case 0x00: strcpy(insn[i],"SHLR2"); type=SHIFTIMM; break;
5547 case 0x01: strcpy(insn[i],"SHLR8"); type=SHIFTIMM; break;
5548 case 0x02: strcpy(insn[i],"SHLR16"); type=SHIFTIMM; break;
5549 }
5550 break;
5551 case 0x0A: strcpy(insn[i],"LDS"); type=MOV; break;
5552 case 0x0B:
5553 switch(op3)
5554 {
5555 case 0x00: strcpy(insn[i],"JSR"); type=RJUMP; break;
5556 case 0x01: strcpy(insn[i],"TAS.B"); type=RMW;mode=REGIND; break;
5557 case 0x02: strcpy(insn[i],"JMP"); type=RJUMP; break;
5558 }
5559 break;
5560 case 0x0E: strcpy(insn[i],"LDC"); type=MOV; break;
5561 case 0x0F: strcpy(insn[i],"MAC.W"); type=COMPLEX; break;
5562 }
5563 break;
5564 case 0x05: strcpy(insn[i],"MOV.L"); type=LOAD;mode=REGDISP;op2=2; break;
5565 case 0x06:
5566 op2=source[i]&0xf;
5567 switch(op2)
5568 {
5569 case 0x00: strcpy(insn[i],"MOV.B"); type=LOAD;mode=REGIND; break;
5570 case 0x01: strcpy(insn[i],"MOV.W"); type=LOAD;mode=REGIND; break;
5571 case 0x02: strcpy(insn[i],"MOV.L"); type=LOAD;mode=REGIND; break;
5572 case 0x03: strcpy(insn[i],"MOV"); type=MOV; break;
5573 case 0x04: strcpy(insn[i],"MOV.B"); type=LOAD;mode=POSTINC; break;
5574 case 0x05: strcpy(insn[i],"MOV.W"); type=LOAD;mode=POSTINC; break;
5575 case 0x06: strcpy(insn[i],"MOV.L"); type=LOAD;mode=POSTINC; break;
5576 case 0x07: strcpy(insn[i],"NOT"); type=ALU; break;
5577 case 0x08: strcpy(insn[i],"SWAP.B"); type=ALU; break;
5578 case 0x09: strcpy(insn[i],"SWAP.W"); type=ALU; break;
5579 case 0x0A: strcpy(insn[i],"NEGC"); type=ALU; break;
5580 case 0x0B: strcpy(insn[i],"NEG"); type=ALU; break;
5581 case 0x0C: strcpy(insn[i],"EXTU.B"); type=EXT; break;
5582 case 0x0D: strcpy(insn[i],"EXTU.W"); type=EXT; break;
5583 case 0x0E: strcpy(insn[i],"EXTS.B"); type=EXT; break;
5584 case 0x0F: strcpy(insn[i],"EXTS.W"); type=EXT; break;
5585 }
5586 break;
5587 case 0x07: strcpy(insn[i],"ADD"); type=IMM8; break;
5588 case 0x08:
5589 op2=(source[i]>>8)&0xf;
5590 switch(op2)
5591 {
5592 case 0x00: strcpy(insn[i],"MOV.B"); type=STORE;mode=REGDISP; break;
5593 case 0x01: strcpy(insn[i],"MOV.W"); type=STORE;mode=REGDISP; break;
5594 case 0x04: strcpy(insn[i],"MOV.B"); type=LOAD;mode=REGDISP; break;
5595 case 0x05: strcpy(insn[i],"MOV.W"); type=LOAD;mode=REGDISP; break;
5596 case 0x08: strcpy(insn[i],"CMP/EQ"); type=IMM8; break;
5597 case 0x09: strcpy(insn[i],"BT"); type=CJUMP; break;
5598 case 0x0B: strcpy(insn[i],"BF"); type=CJUMP; break;
5599 case 0x0D: strcpy(insn[i],"BT/S"); type=SJUMP; break;
5600 case 0x0F: strcpy(insn[i],"BF/S"); type=SJUMP; break;
5601 }
5602 break;
5603 case 0x09: strcpy(insn[i],"MOV.W"); type=PCREL; break;
5604 case 0x0A: strcpy(insn[i],"BRA"); type=UJUMP; break;
5605 case 0x0B: strcpy(insn[i],"BSR"); type=UJUMP; break;
5606 case 0x0C:
5607 op2=(source[i]>>8)&0xf;
5608 switch(op2)
5609 {
5610 case 0x00: strcpy(insn[i],"MOV.B"); type=STORE;mode=GBRDISP; break;
5611 case 0x01: strcpy(insn[i],"MOV.W"); type=STORE;mode=GBRDISP; break;
5612 case 0x02: strcpy(insn[i],"MOV.L"); type=STORE;mode=GBRDISP; break;
5613 case 0x03: strcpy(insn[i],"TRAPA"); type=SYSTEM; break;
5614 case 0x04: strcpy(insn[i],"MOV.B"); type=LOAD;mode=GBRDISP; break;
5615 case 0x05: strcpy(insn[i],"MOV.W"); type=LOAD;mode=GBRDISP; break;
5616 case 0x06: strcpy(insn[i],"MOV.L"); type=LOAD;mode=GBRDISP; break;
5617 case 0x07: strcpy(insn[i],"MOVA"); type=PCREL; break;
5618 case 0x08: strcpy(insn[i],"TST"); type=IMM8; break;
5619 case 0x09: strcpy(insn[i],"AND"); type=IMM8; break;
5620 case 0x0A: strcpy(insn[i],"XOR"); type=IMM8; break;
5621 case 0x0B: strcpy(insn[i],"OR"); type=IMM8; break;
5622 case 0x0C: strcpy(insn[i],"TST.B"); type=LOAD;mode=GBRIND; break;
5623 case 0x0D: strcpy(insn[i],"AND.B"); type=RMW;mode=GBRIND; break;
5624 case 0x0E: strcpy(insn[i],"XOR.B"); type=RMW;mode=GBRIND; break;
5625 case 0x0F: strcpy(insn[i],"OR.B"); type=RMW;mode=GBRIND; break;
5626 }
5627 break;
5628 case 0x0D: strcpy(insn[i],"MOV.L"); type=PCREL; break;
5629 case 0x0E: strcpy(insn[i],"MOV"); type=IMM8; break;
5630 default: strcpy(insn[i],"???"); type=NI; break;
5631 }
5632 itype[i]=type;
5633 addrmode[i]=mode;
5634 opcode2[i]=op2;
5635 opcode3[i]=op3;
5636 /* Get registers/immediates */
5637 rs1[i]=-1;
5638 rs2[i]=-1;
5639 rs3[i]=-1;
5640 rt1[i]=-1;
5641 rt2[i]=-1;
5642 lt1[i]=-1;
5643 cycles[i]=1;
5644 switch(type) {
5645 case LOAD:
5646 if(mode==GBRDISP||mode==GBRIND) rs1[i]=GBR;
5647 else rs1[i]=(source[i]>>4)&0xf;
5648 if(mode==DUALIND||mode==GBRIND) rs2[i]=0;
5649 if(op==4) {
5650 // LDS/LDC
5651 rs1[i]=(source[i]>>8)&0xf;
5652 if(op2==6) rt1[i]=((source[i]>>4)&0xf)+MACH;
5653 if(op2==7) {rt1[i]=((source[i]>>4)&0xf)+SR;cycles[i]=3;}
5654 if(rt1[i]==SR) rt2[i]=TBIT;
5655 }
5656 else if(op==8)
5657 rt1[i]=0; // (@disp,rm),r0
5658 else if(op==12) {
5659 if(op2!=12)
5660 rt1[i]=0; // (@disp,GBR),r0
5661 else {
5662 imm[i]=(unsigned int)((unsigned char)source[i]);
5663 rt1[i]=TBIT; // TST.B
5664 cycles[i]=3;
5665 }
5666 }
5667 else {
5668 rt1[i]=(source[i]>>8)&0xf;
5669 }
5670 if(mode==REGDISP) {
5671 imm[i]=(unsigned int)source[i]&0xF;
5672 if(op==5) imm[i]<<=2; // MOV.L
5673 if(op==8&&op2==5) imm[i]<<=1; // MOV.W
5674 }
5675 else if(mode==GBRDISP) {
5676 imm[i]=(unsigned int)((unsigned char)source[i])<<(op2&3);
5677 }
5678 else if(mode!=GBRIND) imm[i]=0;
5679 if(mode==POSTINC) rt2[i]=rs1[i];
5680 break;
5681 case STORE:
5682 if(op==4) {
5683 // STS/STC
5684 if(op2==2) rs1[i]=((source[i]>>4)&0xf)+MACH;
5685 if(op2==3) {rs1[i]=((source[i]>>4)&0xf)+SR;cycles[i]=2;}
5686 if(rs1[i]==SR) rs3[i]=TBIT;
5687 }
5688 else
5689 if(op==8)
5690 rs1[i]=0; // r0,(@disp,rn)
5691 else if(op==12)
5692 rs1[i]=0; // r0,(@disp,GBR)
5693 else
5694 rs1[i]=(source[i]>>4)&0xf;
5695 if(mode==GBRDISP) rs2[i]=GBR;
5696 else if(op==8) rs2[i]=(source[i]>>4)&0xf; // r0,(@disp,rn)
5697 else rs2[i]=(source[i]>>8)&0xf;
5698 if(mode==DUALIND) rs3[i]=0;
5699 if(mode==REGDISP) {
5700 imm[i]=(unsigned int)source[i]&0xF;
5701 if(op==1) imm[i]<<=2; // MOV.L
5702 if(op==8&&op2==1) imm[i]<<=1; // MOV.W
5703 }
5704 else if(mode==GBRDISP) {
5705 imm[i]=(unsigned int)((unsigned char)source[i])<<(op2&3);
5706 }
5707 else imm[i]=0;
5708 if(mode==PREDEC) rt1[i]=rs2[i];
5709 if( (mode==DUALIND&&((p_isconst>>rs2[i])&(p_isconst>>rs3[i])&1)) ||
5710 (mode!=DUALIND&&((p_isconst>>rs2[i])&1)) )
5711 {
5712 u32 addr;
5713 if(mode==DUALIND) addr=p_constmap[rs2[i]]+p_constmap[rs3[i]];
5714 if(mode==REGDISP||mode==GBRDISP) addr=p_constmap[rs2[i]]+imm[i];
5715 if(mode==PREDEC) addr=(p_constmap[rs2[i]]-=4);
5716 if(mode==REGIND) addr=p_constmap[rs2[i]];
5717 if(addr>start+i*2&&addr<writelimit) writelimit=addr;
5718 assem_debug("Instruction at %x possibly writes %x (limit=%x)\n",start+i*2,addr,writelimit);
5719 }
5720 break;
5721 case RMW:
5722 if(op==4) // TAS.B
5723 {
5724 rs1[i]=(source[i]>>8)&0xf;
5725 rt1[i]=TBIT;
5726 imm[i]=0;
5727 cycles[i]=4;
5728 }
5729 if(op==12) // AND.B/XOR.B/OR.B
5730 {
5731 rs1[i]=GBR;
5732 rs2[i]=0;
5733 imm[i]=(unsigned int)((unsigned char)source[i]);
5734 cycles[i]=3;
5735 }
5736 break;
5737 case PCREL:
5738 imm[i]=(signed int)((unsigned char)source[i]);
5739 if(op==12) rt1[i]=0; // MOVA
5740 else rt1[i]=(source[i]>>8)&0xf;
5741 if(op==9) imm[i]<<=1; // MOV.W
5742 else imm[i]<<=2;
5743 // Extend block to include consts
5744 // FIXME: Don't go past limit
5745 if (op==9 && lastconst < (start+i*2+4)+imm[i]) // MOV.W
5746 lastconst = (start+i*2+4)+imm[i];
5747 if (op==13 && lastconst < ((start+i*2+4)&~3)+imm[i]+2) // MOV.L
5748 lastconst = ((start+i*2+4)&~3)+imm[i]+2;
5749 //printf("lastconst=%x\n",lastconst);
5750 break;
5751 case MOV:
5752 if(op==6) {
5753 rs1[i]=(source[i]>>4)&0xf;
5754 rt1[i]=(source[i]>>8)&0xf;
5755 }
5756 if(op==0) { // STC/STS
5757 if(op2==2) rs1[i]=((source[i]>>4)&0xf)+SR; //STC
5758 if(op2==10) rs1[i]=((source[i]>>4)&0xf)+MACH; //STS
5759 rt1[i]=(source[i]>>8)&0xf;
5760 if(rs1[i]==SR) rs2[i]=TBIT; // For liveness analysis
5761 }
5762 if(op==4) { // LDC/LDS
5763 if(op2==14) rt1[i]=((source[i]>>4)&0xf)+SR; //LDC
5764 if(op2==10) rt1[i]=((source[i]>>4)&0xf)+MACH; //LDS
5765 rs1[i]=(source[i]>>8)&0xf;
5766 if(rt1[i]==SR) rt2[i]=TBIT; // For liveness analysis
5767 }
5768 break;
5769 case IMM8:
5770 if(op==8) { // CMP/EQ r0
5771 rs1[i]=0;
5772 rt1[i]=TBIT;
5773 imm[i]=(signed int)((signed char)source[i]);
5774 }else
5775 if(op==12) {
5776 rs1[i]=0;
5777 if(op2==8)
5778 rt1[i]=TBIT; // TST
5779 else
5780 rt1[i]=0; // AND/XOR/OR
5781 imm[i]=(unsigned int)((unsigned char)source[i]);
5782 }else{ // ADD/MOV
5783 if(op==7) rs1[i]=(source[i]>>8)&0xf; // ADD
5784 rt1[i]=(source[i]>>8)&0xf;
5785 imm[i]=(signed int)((signed char)source[i]);
5786 }
5787 break;
5788 case FLAGS:
5789 if(op2==8) rt1[i]=TBIT; // CLRT/SETT
5790 if(op2==9) {rs1[i]=TBIT;rt1[i]=(source[i]>>8)&0xf;} // MOVT
5791 break;
5792 case ALU:
5793 if(op==2) {
5794 if(op2==8||op2==12) { // TST or CMP/STR
5795 rs1[i]=(source[i]>>4)&0xf;
5796 rs2[i]=(source[i]>>8)&0xf;
5797 rt1[i]=TBIT;
5798 }
5799 else
5800 { // AND/OR/XOR
5801 rs1[i]=(source[i]>>4)&0xf;
5802 rs2[i]=(source[i]>>8)&0xf;
5803 rt1[i]=(source[i]>>8)&0xf;
5804 if(op2==10&&rs1[i]==rs2[i]) {
5805 rs1[i]=-1;rs2[i]=-1; // Optimize XOR reg,reg
5806 }
5807 }
5808 }
5809 if(op==3) {
5810 if(op2<8) { // CMP
5811 rs1[i]=(source[i]>>4)&0xf;
5812 rs2[i]=(source[i]>>8)&0xf;
5813 rt1[i]=TBIT;
5814 }
5815 else
5816 { // ADD/SUB
5817 rs1[i]=(source[i]>>4)&0xf;
5818 rs2[i]=(source[i]>>8)&0xf;
5819 rt1[i]=(source[i]>>8)&0xf;
5820 if(op2==10||op2==14) rs3[i]=TBIT; // ADDC/SUBC read T bit
5821 if(op2!=8&&op2!=12) // ADDC/ADDV/SUBC/SUBV set T bit
5822 rt2[i]=TBIT;
5823 }
5824 }
5825 if(op==4) { // DT and compare with zero
5826 rs1[i]=(source[i]>>8)&0xf;
5827 if(op2==0) rt1[i]=(source[i]>>8)&0xf; // DT
5828 rt2[i]=TBIT;
5829 }
5830 if(op==6) { // NOT/NEG/NEGC/SWAP
5831 rs1[i]=(source[i]>>4)&0xf;
5832 rt1[i]=(source[i]>>8)&0xf;
5833 if(op2==10)
5834 rs2[i]=rt2[i]=TBIT; // NEGC sets T bit
5835 }
5836 break;
5837 case EXT:
5838 rs1[i]=(source[i]>>4)&0xf;
5839 rt1[i]=(source[i]>>8)&0xf;
5840 break;
5841 case MULTDIV:
5842 if(op==0) {
5843 if(op2==7) // MUL.L
5844 {
5845 rs1[i]=(source[i]>>4)&0xf;
5846 rs2[i]=(source[i]>>8)&0xf;
5847 rt1[i]=MACL;
5848 cycles[i]=2;
5849 }
5850 if(op2==8) // CLRMAC
5851 {
5852 rt1[i]=MACH;
5853 rt2[i]=MACL;
5854 }
5855 if(op2==9) // DIV0U
5856 {
5857 rs1[i]=SR;
5858 rt1[i]=SR;
5859 rt2[i]=TBIT;
5860 }
5861 }
5862 if(op==2) {
5863 if(op2==7) // DIV0S
5864 {
5865 rs1[i]=(source[i]>>4)&0xf;
5866 rs2[i]=(source[i]>>8)&0xf;
5867 rs3[i]=SR;
5868 rt1[i]=SR;
5869 rt2[i]=TBIT;
5870 }
5871 if(op2==14) // MULU.W
5872 {
5873 rs1[i]=(source[i]>>4)&0xf;
5874 rs2[i]=(source[i]>>8)&0xf;
5875 rt1[i]=MACL;
5876 }
5877 if(op2==15) // MULS.W
5878 {
5879 rs1[i]=(source[i]>>4)&0xf;
5880 rs2[i]=(source[i]>>8)&0xf;
5881 rt1[i]=MACL;
5882 }
5883 }
5884 if(op==3) {
5885 if(op2==5) // DMULU.L
5886 {
5887 rs1[i]=(source[i]>>4)&0xf;
5888 rs2[i]=(source[i]>>8)&0xf;
5889 rt1[i]=MACH;
5890 rt2[i]=MACL;
5891 cycles[i]=2;
5892 }
5893 if(op2==13) // DMULS.L
5894 {
5895 rs1[i]=(source[i]>>4)&0xf;
5896 rs2[i]=(source[i]>>8)&0xf;
5897 rt1[i]=MACH;
5898 rt2[i]=MACL;
5899 cycles[i]=2;
5900 }
5901 }
5902 break;
5903 case SHIFTIMM:
5904 rs1[i]=(source[i]>>8)&0xf;
5905 rt1[i]=(source[i]>>8)&0xf;
5906 if(op==4) {
5907 if(op2<6) rt2[i]=TBIT;
5908 if(op2==4||op2==5) {if(op3==2) rs2[i]=TBIT;} // ROTCL/ROTCR
5909 }
5910 if(op==2&op2==13) { // XTRCT
5911 rs1[i]=(source[i]>>4)&0xf;
5912 rs2[i]=(source[i]>>8)&0xf;
5913 }
5914 break;
5915 case UJUMP:
5916 rs2[i]=CCREG;
5917 if(op==11) rt1[i]=PR; // BSR
5918 cycles[i]=2;
5919 break;
5920 case RJUMP:
5921 rs1[i]=(source[i]>>8)&0xf;
5922 if (op==0&&op2==11&&op3==0) rs1[i]=PR; // RTS
5923 if ((op==0&&op2==3)||(op==4&&op2==11)) { // BSRF/JSR
5924 if(op3==0) rt1[i]=PR;
5925 }
5926 rs2[i]=CCREG;
5927 cycles[i]=2;
5928 if(op==0&&op2==11&&op3==2) { // RTE
5929 rs1[i]=15; // Stack pointer
5930 rs2[i]=CCREG;
5931 rt1[i]=SR;
5932 rt2[i]=15;
5933 cycles[i]=4;
5934 }
5935 break;
5936 case CJUMP:
5937 rs1[i]=TBIT;
5938 rs2[i]=CCREG;
5939 //cycles[i]=3; // Will be adjusted if branch is taken
5940 break;
5941 case SJUMP:
5942 rs1[i]=TBIT;
5943 rs2[i]=CCREG;
5944 //cycles[i]=2; // Will be adjusted if branch is taken
5945 break;
5946 case SYSTEM:
5947 if(op2==11&&op3==2) { // RTE
5948 rs1[i]=15; // Stack pointer
5949 rs2[i]=CCREG;
5950 rt1[i]=SR;
5951 rt2[i]=TBIT;
5952 cycles[i]=4;
5953 }
5954 else if(op==12) { // TRAPA
5955 rs1[i]=SR; // Status/flags
5956 //rs2[i]=CCREG;
5957 rs2[i]=VBR;
5958 rs3[i]=15; // Stack pointer
5959 imm[i]=(unsigned int)((unsigned char)source[i]);
5960 cycles[i]=8;
5961 }
5962 else { // SLEEP
5963 rs2[i]=CCREG;
5964 cycles[i]=8;
5965 }
5966 break;
5967 case COMPLEX:
5968 if(op==3&&op2==4) { // DIV1
5969 rs1[i]=(source[i]>>4)&0xf;
5970 rs2[i]=(source[i]>>8)&0xf;
5971 rs3[i]=SR;
5972 rt1[i]=(source[i]>>8)&0xf;
5973 rt2[i]=SR;
5974 }
5975 if(op==0&&op2==15) { // MAC.L
5976 rs1[i]=(source[i]>>4)&0xf;
5977 rs2[i]=(source[i]>>8)&0xf;
5978 rs3[i]=SR;
5979 rt1[i]=(source[i]>>4)&0xf;
5980 rt2[i]=(source[i]>>8)&0xf;
5981 cycles[i]=3;
5982 }
5983 if(op==4&&op2==15) { // MAC.W
5984 rs1[i]=(source[i]>>4)&0xf;
5985 rs2[i]=(source[i]>>8)&0xf;
5986 rs3[i]=SR;
5987 rt1[i]=(source[i]>>4)&0xf;
5988 rt2[i]=(source[i]>>8)&0xf;
5989 cycles[i]=3;
5990 }
5991 break;
5992 }
5993 // Do preliminary constant propagation
5994 do_consts(i,&p_isconst,p_constmap);
5995 /* Calculate branch target addresses */
5996 if(type==UJUMP)
5997 ba[i]=start+i*2+4+((((signed int)source[i])<<20)>>19);
5998 else if(type==CJUMP||type==SJUMP)
5999 ba[i]=start+i*2+4+((((signed int)source[i])<<24)>>23);
6000 else
6001 {
6002 ba[i]=-1;
6003 if(type==RJUMP) {
6004 if(op!=0||op2!=11||op3!=2) { // !RTE
6005 if((p_isconst>>rs1[i])&1)
6006 {
6007 u32 constaddr=p_constmap[rs1[i]];
6008 if(op==0&&op2==3) {
6009 // PC-relative branch, add PC+4
6010 constaddr+=start+i*2+4;
6011 }
6012 ba[i]=constaddr;
6013 }
6014 }
6015 }
6016 }
6017
6018 // If the branch target was previously identified as data, back up
6019 if(ba[i]>start&&ba[i]<start+i*2) {
6020 //assert(itype[(ba[i]-start)>>1]!=DATA);
6021 if(itype[(ba[i]-start)>>1]==DATA||itype[(ba[i]+2-start)>>1]==DATA) {
6022 //printf("back up and redecode %x\n",ba[i]);
6023 i=(ba[i]-2-start)>>1;
6024 continue;
6025 }
6026 }
6027 /* Is this the end of the block? */
6028 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP)) {
6029 if(rt1[i-1]!=PR) { // Continue past subroutine call (BSR/JSR)
6030 unsigned int firstbt=0xFFFFFFFF;
6031 done=1;
6032 // Find next branch target (if any)
6033 for(j=i-1;j>=0;j--)
6034 {
6035 if(ba[j]>start+i*2-2&&ba[j]<firstbt) firstbt=ba[j];
6036 }
6037 // See if there are any backward branches following that one
6038 //printf("firstbt=%x diff=%d\n",firstbt,firstbt-(start+i*2));
6039 if(firstbt-(start+i*2)<(unsigned)4096) {
6040 u32 branch_addr;
6041 for(j=(firstbt-start)>>1;j<MAXBLOCK;j++) {
6042 if((source[j]&0xF900)==0x8900) { //BT(S)/BF(S)
6043 branch_addr=start+j*2+4+((((signed int)source[j])<<24)>>23);
6044 if(branch_addr>start+i*2&&branch_addr<firstbt) firstbt=branch_addr;
6045 //printf("firstbt=%x\n",firstbt);
6046 }
6047 if((source[j]&0xE000)==0xA000) { //BRA/BSR
6048 branch_addr=start+j*2+4+((((signed int)source[j])<<20)>>19);
6049 if(branch_addr>start+i*2&&branch_addr<firstbt) firstbt=branch_addr;
6050 //printf("firstbt=%x\n",firstbt);
6051 if((source[j]&0xF000)==0xA000) break; //BRA (stop after unconditional branch)
6052 }
6053 if((source[j]&0xF007)==0x0003) break; //BRAF/BSRF/RTS/RTE (stop after unconditional branch)
6054 }
6055 }
6056 // Skip constant pool
6057 // FIXME: check pagelimit
6058 while(start+i*2+2<=lastconst&&start+i*2+2<firstbt&&start+i*2+1024<writelimit&&i<MAXBLOCK-1) {
6059 i++;
6060 rs1[i]=-1;
6061 rs2[i]=-1;
6062 rs3[i]=-1;
6063 rt1[i]=-1;
6064 rt2[i]=-1;
6065 lt1[i]=-1;
6066 itype[i]=DATA;
6067 bt[i]=0;ba[i]=-1;
6068 ooo[i]=0;cycles[i]=0;is_ds[i]=0;
6069 }
6070 // Does the block continue due to a branch?
6071 if(firstbt==start+i*2) done=j=0; // Branch into delay slot
6072 if(firstbt==start+i*2+2) done=j=0;
6073 if(firstbt==start+i*2+4) done=j=0; // CHECK: Is this useful?
6074 }
6075 else {
6076 if(stop_after_jal) done=1;
6077 // Stop on BREAK
6078 //if((source[i+1]&0xfc00003f)==0x0d) done=1;
6079 }
6080 // Don't recompile stuff that's already compiled
6081 if(check_addr(start+i*2+2+slave)) done=1;
6082 // Don't get too close to the limit
6083 if(i>MAXBLOCK/2) done=1;
6084 }
6085 if(yabsys.emulatebios) {
6086 if(start+i*2>=0x200&&start+i*2<0x600) {
6087 strcpy(insn[i],"(BIOS)");
6088 itype[i]=BIOS;
6089 done=1;
6090 }
6091 }
6092 //if(i>0&&itype[i-1]==SYSCALL&&stop_after_jal) done=1;
6093 //if(i>0&&itype[i-1]==SYSTEM&&source[i-1]==0x002B) done=1; // RTE
6094 //assert(i<MAXBLOCK-1);
6095 if(start+i*2==pagelimit-2) done=1;
6096 assert(start+i*2<pagelimit);
6097 if (i==MAXBLOCK-1) done=1;
6098 // Stop if we're compiling junk
6099 if(itype[i]==NI&&opcode[i]==0x11) {
6100 done=stop_after_jal=1;
6101 printf("Disabled speculative precompilation\n");
6102 }
6103 if(!done&&i<MAXBLOCK-1) {
6104 // Constant propagation
6105 //if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP)) isconst[i+1]=0;
6106 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP)) p_isconst=0;
6107 }
6108 }
6109 slen=i;
6110 assert(slen>0);
6111
6112 /* Pass 2 - Register dependencies and branch targets */
6113
6114 // Flag branch targets
6115 for(i=0;i<slen;i++)
6116 {
6117 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP)
6118 {
6119 // If subroutine call, flag return address as a possible branch target
6120 if(rt1[i]==PR && i<slen-2) bt[i+2]=1;
6121
6122 if(ba[i]>=start && ba[i]<(start+slen*2) ) {
6123 // Possibly internal branch, flag target
6124 bt[(ba[i]-start)>>1]=1;
6125 }
6126 }
6127 }
6128
6129 // Do constant propagation
6130 p_isconst=0;
6131 for(i=0;i<slen;i++)
6132 {
6133 if(bt[i])
6134 {
6135 // Can't do constant propagation if a branch target intervenes
6136 p_isconst=0;
6137 }
6138 if(i>1&&(itype[i-2]==UJUMP||itype[i-2]==RJUMP)) p_isconst=0;
6139 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP)) p_isconst=0;
6140 if(i>0&&(itype[i-1]==CJUMP||itype[i-1]==SJUMP)) p_isconst=0;
6141 do_consts(i,&p_isconst,p_constmap);
6142 if(itype[i]==RJUMP) {
6143 if(opcode[i]!=0||opcode2[i]!=11||opcode3[i]!=2) { // Not RTE
6144 if((p_isconst>>rs1[i])&1) {
6145 // Do constant propagation, branch to fixed address
6146 u32 constaddr=p_constmap[rs1[i]];
6147 if(opcode[i]==0&&opcode2[i]==3) {
6148 // PC-relative branch, add PC+4
6149 constaddr+=start+i*2+4;
6150 }
6151 ba[i]=constaddr;
6152 //if(internal_branch(constaddr))
6153 // if(!bt[(constaddr-start)>>1]) printf("oops: %x\n",constaddr);
6154 //assert(bt[(constaddr-start)>>1]);
6155 }
6156 }
6157 }
6158 // No stack-based addressing modes in the delay slot,
6159 // to avoid incorrect constants due to pre-incrementing.
6160 // TODO: This really should only drop the address register
6161 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==SJUMP) {
6162 if((source[i+1]&0xF00A)==0x4002) p_isconst=0;
6163 if((source[i+1]&0xB00E)==0x2004) p_isconst=0;
6164 if((source[i+1]&0xB00F)==0x2006) p_isconst=0;
6165 }
6166 memcpy(regs[i].constmap,p_constmap,sizeof(u32)*SH2_REGS);
6167 regs[i].isconst=p_isconst;
6168 }
6169 unneeded_registers(0,slen-1,0);
6170
6171 /* Pass 3 - Register allocation */
6172
6173 {
6174 struct regstat current; // Current register allocations/status
6175 int cc=0;
6176 current.dirty=0;
6177 current.u=unneeded_reg[0];
6178 clear_all_regs(current.regmap);
6179 alloc_reg(¤t,0,CCREG);
6180 dirty_reg(¤t,CCREG);
6181 current.isdoingcp=0;
6182 current.wasdoingcp=0;
6183
6184 for(i=0;i<slen;i++)
6185 {
6186 if(bt[i])
6187 {
6188 // Can't do constant propagation if a branch target intervenes
6189 current.isdoingcp=0;
6190 }
6191 memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap));
6192 //printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]);
6193 regs[i].wasdoingcp=current.isdoingcp;
6194 regs[i].wasdirty=current.dirty;
6195 if(itype[i]==UJUMP||itype[i]==SJUMP||itype[i]==RJUMP) {
6196 if(i+1<slen) {
6197 //current.u=branch_unneeded_reg[i]&~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6198 current.u=branch_unneeded_reg[i];
6199 //if(rt1[i+1]>=0) current.u|=1LL<<rt1[i+1];
6200 //if(rt2[i+1]>=0) current.u|=1LL<<rt2[i+1];
6201 if(rs1[i+1]>=0) current.u&=~(1LL<<rs1[i+1]);
6202 if(rs2[i+1]>=0) current.u&=~(1LL<<rs2[i+1]);
6203 if(rs3[i+1]>=0) current.u&=~(1LL<<rs3[i+1]);
6204 if(rs1[i+1]==TBIT||rs2[i+1]==TBIT) current.u&=~(1LL<<SR);
6205 if(rt1[i+1]==TBIT||rt2[i+1]==TBIT) current.u&=~(1LL<<SR);
6206 //current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6207 if(rs1[i]>=0) current.u&=~(1LL<<rs1[i]);
6208 if(rs2[i]>=0) current.u&=~(1LL<<rs2[i]); // CCREG
6209 if(rs1[i]==TBIT||rs2[i]==TBIT) current.u&=~(1LL<<SR); // BT/S BF/S
6210 regs[i].u=current.u;
6211 } else { printf("oops, branch at end of block with no delay slot\n");exit(1); }
6212 }else if(itype[i]==CJUMP) {
6213 current.u=branch_unneeded_reg[i];
6214 regs[i].u=current.u;
6215 if(rs1[i]>=0) current.u&=~(1LL<<rs1[i]);
6216 if(rs2[i]>=0) current.u&=~(1LL<<rs2[i]); // CCREG
6217 if(rs1[i]==TBIT||rs2[i]==TBIT) current.u&=~(1LL<<SR); // BT BF
6218 } else {
6219 if(i+1<slen) {
6220 regs[i].u=unneeded_reg[i+1];
6221 //current.u=unneeded_reg[i+1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
6222 //current.u=unneeded_reg[i+1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rs3[i]));
6223 current.u=unneeded_reg[i+1];
6224 if(rs1[i]>=0) current.u&=~(1LL<<rs1[i]);
6225 if(rs2[i]>=0) current.u&=~(1LL<<rs2[i]);
6226 if(rs3[i]>=0) current.u&=~(1LL<<rs3[i]);
6227 if(rs1[i]==TBIT||rs2[i]==TBIT) current.u&=~(1LL<<SR);
6228 if(rt1[i]==TBIT||rt2[i]==TBIT) current.u&=~(1LL<<SR);
6229 } else {
6230 current.u=0;
6231 }
6232 }
6233 is_ds[i]=ds;
6234 if(ds) {
6235 struct regstat temp;
6236 ds=0; // Skip delay slot, already allocated as part of branch
6237 // ...but we need to alloc it in case something jumps here
6238 if(i+1<slen) {
6239 current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1];
6240 }else{
6241 current.u=branch_unneeded_reg[i-1];
6242 }
6243 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6244 memcpy(&temp,¤t,sizeof(current));
6245 temp.wasdirty=temp.dirty;
6246 // TODO: Take into account unconditional branches, as below
6247 delayslot_alloc(&temp,i);
6248 memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap));
6249 regs[i].wasdirty=temp.wasdirty;
6250 regs[i].dirty=temp.dirty;
6251 regs[i].isdoingcp=0;
6252 regs[i].wasdoingcp=0;
6253 current.isdoingcp=0;
6254 // Create entry (branch target) regmap
6255 for(hr=0;hr<HOST_REGS;hr++)
6256 {
6257 int r=temp.regmap[hr];
6258 if(r>=0) {
6259 if(r!=regmap_pre[i][hr]) {
6260 regs[i].regmap_entry[hr]=-1;
6261 }
6262 else
6263 {
6264 if((current.u>>r)&1) {
6265 regs[i].regmap_entry[hr]=-1;
6266 regs[i].regmap[hr]=-1;
6267 //Don't clear regs in the delay slot as the branch might need them
6268 //current.regmap[hr]=-1;
6269 }else
6270 regs[i].regmap_entry[hr]=r;
6271 }
6272 } else {
6273 // First instruction expects CCREG to be allocated
6274 if(i==0&&hr==HOST_CCREG)
6275 regs[i].regmap_entry[hr]=CCREG;
6276 else
6277 regs[i].regmap_entry[hr]=-1;
6278 }
6279 }
6280 }
6281 else { // Not delay slot
6282 switch(itype[i]) {
6283 case UJUMP:
6284 //current.isdoingcp=0; // DEBUG
6285 //current.wasdoingcp=0; // DEBUG
6286 //regs[i].wasdoingcp=0; // DEBUG
6287 clear_const(¤t,rt1[i]);
6288 alloc_cc(¤t,i);
6289 dirty_reg(¤t,CCREG);
6290 if (rt1[i]==PR) {
6291 alloc_reg(¤t,i,PR);
6292 dirty_reg(¤t,PR);
6293 assert(rs1[i+1]!=PR&&rs2[i+1]!=PR);
6294 #ifdef REG_PREFETCH
6295 alloc_reg(¤t,i,PTEMP);
6296 #endif
6297 }
6298 ooo[i]=1;
6299 delayslot_alloc(¤t,i+1);
6300 //current.isdoingcp=0; // DEBUG
6301 ds=1;
6302 //printf("i=%d, isdoingcp=%x\n",i,current.isdoingcp);
6303 break;
6304 case RJUMP:
6305 //current.isdoingcp=0;
6306 //current.wasdoingcp=0;
6307 //regs[i].wasdoingcp=0;
6308 clear_const(¤t,rs1[i]);
6309 clear_const(¤t,rt1[i]);
6310 alloc_cc(¤t,i);
6311 dirty_reg(¤t,CCREG);
6312 if(opcode[i]==0&&opcode2[i]==11&&opcode3[i]==2) { // RTE
6313 alloc_reg(¤t,i,15); // Stack reg
6314 dirty_reg(¤t,15);
6315 alloc_reg(¤t,i,SR); // SR will be loaded from stack
6316 dirty_reg(¤t,SR);
6317 assert(rt1[i+1]!=15&&rt2[i+1]!=15);
6318 assert(rt1[i+1]!=SR&&rt2[i+1]!=SR);
6319 assert(rt1[i+1]!=TBIT&&rt2[i+1]!=TBIT);
6320 delayslot_alloc(¤t,i+1);
6321 }
6322 else
6323 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
6324 alloc_reg(¤t,i,rs1[i]);
6325 if (rt1[i]==PR) {
6326 alloc_reg(¤t,i,rt1[i]);
6327 dirty_reg(¤t,rt1[i]);
6328 assert(rs1[i+1]!=PR&&rs2[i+1]!=PR);
6329 if(rs1[i+1]==PR||rs2[i+1]==PR) {printf("OOPS\n");}
6330 #ifdef REG_PREFETCH
6331 alloc_reg(¤t,i,PTEMP);
6332 #endif
6333 }
6334 #ifdef USE_MINI_HT
6335 if(rs1[i]==PR) { // BSRF/JSR
6336 alloc_reg(¤t,i,RHASH);
6337 #ifndef HOST_IMM_ADDR32
6338 alloc_reg(¤t,i,RHTBL);
6339 #endif
6340 }
6341 #endif
6342 // PC-relative branch needs a temporary register to add PC
6343 if(opcode[i]==0&&opcode2[i]==3) alloc_reg(¤t,i,RTEMP);
6344 delayslot_alloc(¤t,i+1);
6345 } else {
6346 // The delay slot overwrites our source register,
6347 // allocate a temporary register to hold the old value.
6348 current.isdoingcp=0;
6349 current.wasdoingcp=0;
6350 regs[i].wasdoingcp=0;
6351 delayslot_alloc(¤t,i+1);
6352 current.isdoingcp=0;
6353 alloc_reg(¤t,i,RTEMP);
6354 }
6355 //current.isdoingcp=0; // DEBUG
6356 ooo[i]=1;
6357 ds=1;
6358 break;
6359 case CJUMP:
6360 //current.isdoingcp=0;
6361 //current.wasdoingcp=0;
6362 //regs[i].wasdoingcp=0;
6363 clear_const(¤t,rs1[i]);
6364 clear_const(¤t,rs2[i]);
6365 alloc_cc(¤t,i);
6366 dirty_reg(¤t,CCREG);
6367 alloc_reg(¤t,i,SR);
6368 // No delay slot, don't do constant propagation
6369 current.isdoingcp=0;
6370 current.wasdoingcp=0;
6371 regs[i].wasdoingcp=0;
6372 //ds=1; // BT/BF don't have delay slots
6373 break;
6374 case SJUMP:
6375 //current.isdoingcp=0;
6376 //current.wasdoingcp=0;
6377 //regs[i].wasdoingcp=0;
6378 clear_const(¤t,rs1[i]);
6379 clear_const(¤t,rt1[i]);
6380 alloc_cc(¤t,i);
6381 dirty_reg(¤t,CCREG);
6382 alloc_reg(¤t,i,SR);
6383 if(rt1[i+1]==TBIT||rt2[i+1]==TBIT||rt1[i+1]==SR||rt2[i+1]==SR) {
6384 // The delay slot overwrites the branch condition.
6385 // Allocate the branch condition registers instead.
6386 current.isdoingcp=0;
6387 current.wasdoingcp=0;
6388 regs[i].wasdoingcp=0;
6389 }
6390 else
6391 if(itype[i+1]==COMPLEX) {
6392 // The MAC and DIV instructions make function calls which
6393 // do not save registers. Do the branch and update the
6394 // cycle count first.
6395 current.isdoingcp=0;
6396 current.wasdoingcp=0;
6397 regs[i].wasdoingcp=0;
6398 }
6399 else
6400 {
6401 ooo[i]=1;
6402 delayslot_alloc(¤t,i+1);
6403 }
6404 ds=1;
6405 //current.isdoingcp=0;
6406 break;
6407 case IMM8:
6408 imm8_alloc(¤t,i);
6409 break;
6410 case LOAD:
6411 load_alloc(¤t,i);
6412 break;
6413 case STORE:
6414 store_alloc(¤t,i);
6415 break;
6416 case RMW:
6417 rmw_alloc(¤t,i);
6418 break;
6419 case PCREL:
6420 pcrel_alloc(¤t,i);
6421 break;
6422 case ALU:
6423 alu_alloc(¤t,i);
6424 break;
6425 case MULTDIV:
6426 multdiv_alloc(¤t,i);
6427 break;
6428 case SHIFTIMM:
6429 shiftimm_alloc(¤t,i);
6430 break;
6431 case MOV:
6432 mov_alloc(¤t,i);
6433 break;
6434 case EXT:
6435 ext_alloc(¤t,i);
6436 break;
6437 case FLAGS:
6438 flags_alloc(¤t,i);
6439 break;
6440 case COMPLEX:
6441 complex_alloc(¤t,i);
6442 break;
6443 case SYSTEM:
6444 system_alloc(¤t,i);
6445 break;
6446 }
6447
6448 //printf("xxx: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",current.regmap[0],current.regmap[1],current.regmap[2],current.regmap[3],current.regmap[5],current.regmap[6],current.regmap[7]);
6449
6450 // Create entry (branch target) regmap
6451 for(hr=0;hr<HOST_REGS;hr++)
6452 {
6453 int r,or,er;
6454 r=current.regmap[hr];
6455 if(r>=0) {
6456 if(r!=regmap_pre[i][hr]) {
6457 // TODO: delay slot (?)
6458 or=get_reg(regmap_pre[i],r); // Get old mapping for this register
6459 if(or<0||(r&63)>=TEMPREG){
6460 regs[i].regmap_entry[hr]=-1;
6461 }
6462 else
6463 {
6464 // Just move it to a different register
6465 regs[i].regmap_entry[hr]=r;
6466 // If it was dirty before, it's still dirty
6467 if((regs[i].wasdirty>>or)&1) dirty_reg(¤t,r&63);
6468 }
6469 }
6470 else
6471 {
6472 if(r<64){
6473 if((current.u>>r)&1) {
6474 regs[i].regmap_entry[hr]=-1;
6475 //regs[i].regmap[hr]=-1;
6476 current.regmap[hr]=-1;
6477 }else
6478 regs[i].regmap_entry[hr]=r;
6479 }
6480 }
6481 } else {
6482 // Branches expect CCREG to be allocated at the target
6483 if(regmap_pre[i][hr]==CCREG)
6484 regs[i].regmap_entry[hr]=CCREG;
6485 else
6486 regs[i].regmap_entry[hr]=-1;
6487 }
6488 }
6489 memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
6490 }
6491 /* Branch post-alloc */
6492 if(i>0)
6493 {
6494 current.wasdirty=current.dirty;
6495 switch(itype[i-1]) {
6496 case UJUMP:
6497 memcpy(&branch_regs[i-1],¤t,sizeof(current));
6498 branch_regs[i-1].isdoingcp=0;
6499 branch_regs[i-1].wasdoingcp=0;
6500 branch_regs[i-1].isconst=0;
6501 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
6502 alloc_cc(&branch_regs[i-1],i-1);
6503 dirty_reg(&branch_regs[i-1],CCREG);
6504 if(rt1[i-1]==PR) { // BSR
6505 alloc_reg(&branch_regs[i-1],i-1,PR);
6506 dirty_reg(&branch_regs[i-1],PR);
6507 }
6508 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
6509 memcpy(cpmap[i],cpmap[i-1],sizeof(current.cpmap));
6510 break;
6511 case RJUMP:
6512 memcpy(&branch_regs[i-1],¤t,sizeof(current));
6513 branch_regs[i-1].isdoingcp=0;
6514 branch_regs[i-1].wasdoingcp=0;
6515 branch_regs[i-1].isconst=0;
6516 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
6517 alloc_cc(&branch_regs[i-1],i-1);
6518 dirty_reg(&branch_regs[i-1],CCREG);
6519 alloc_reg(&branch_regs[i-1],i-1,rs1[i-1]);
6520 if(rt1[i-1]==PR) { // BSRF/JSR
6521 alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]);
6522 dirty_reg(&branch_regs[i-1],rt1[i-1]);
6523 }
6524 #ifdef USE_MINI_HT
6525 if(rs1[i-1]==PR) { // RTS
6526 alloc_reg(&branch_regs[i-1],i-1,RHASH);
6527 #ifndef HOST_IMM_ADDR32
6528 alloc_reg(&branch_regs[i-1],i-1,RHTBL);
6529 #endif
6530 }
6531 #endif
6532 if(opcode[i-1]==0&&opcode2[i-1]==11&&opcode3[i-1]==2) { // RTE
6533 alloc_reg(&branch_regs[i-1],i-1,SR); // SR will be loaded from stack
6534 dirty_reg(&branch_regs[i-1],SR);
6535 alloc_reg(&branch_regs[i-1],i-1,RTEMP);
6536 alloc_reg(&branch_regs[i-1],i-1,MOREG);
6537 }
6538 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
6539 memcpy(cpmap[i],cpmap[i-1],sizeof(current.cpmap));
6540 break;
6541 case SJUMP:
6542 alloc_cc(¤t,i-1);
6543 dirty_reg(¤t,CCREG);
6544 if(rt1[i]==TBIT||rt2[i]==TBIT||rt1[i]==SR||rt2[i]==SR||itype[i]==COMPLEX) {
6545 // The delay slot overwrote the branch condition
6546 // Delay slot goes after the test (in order)
6547 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
6548 delayslot_alloc(¤t,i);
6549 current.isdoingcp=0;
6550 }
6551 else
6552 {
6553 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
6554 // Alloc the branch condition register
6555 alloc_reg(¤t,i-1,SR);
6556 }
6557 memcpy(&branch_regs[i-1],¤t,sizeof(current));
6558 branch_regs[i-1].isdoingcp=0;
6559 branch_regs[i-1].wasdoingcp=0;
6560 branch_regs[i-1].isconst=0;
6561 memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap));
6562 memcpy(cpmap[i],cpmap[i-1],sizeof(current.cpmap));
6563 break;
6564 }
6565
6566 if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||itype[i]==DATA)
6567 {
6568 if(rt1[i-1]==PR&&itype[i]!=DATA) // BSR/JSR
6569 {
6570 // Subroutine call will return here, don't alloc any registers
6571 current.dirty=0;
6572 clear_all_regs(current.regmap);
6573 alloc_reg(¤t,i,CCREG);
6574 dirty_reg(¤t,CCREG);
6575 }
6576 else if(i+1<slen)
6577 {
6578 // Internal branch will jump here, match registers to caller
6579 current.dirty=0;
6580 clear_all_regs(current.regmap);
6581 alloc_reg(¤t,i,CCREG);
6582 dirty_reg(¤t,CCREG);
6583 for(j=i-1;j>=0;j--)
6584 {
6585 if(ba[j]==start+i*2+2) {
6586 if(itype[j]==CJUMP) {
6587 memcpy(current.regmap,regs[j].regmap,sizeof(current.regmap));
6588 current.dirty=regs[j].dirty;
6589 }else{
6590 memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap));
6591 current.dirty=branch_regs[j].dirty;
6592 }
6593 break;
6594 }
6595 }
6596 while(j>=0) {
6597 if(ba[j]==start+i*2+2) {
6598 for(hr=0;hr<HOST_REGS;hr++) {
6599 if(itype[j]==CJUMP) {
6600 if(current.regmap[hr]!=regs[j].regmap[hr]) {
6601 current.regmap[hr]=-1;
6602 }
6603 current.dirty&=regs[j].dirty;
6604 }else{
6605 if(current.regmap[hr]!=branch_regs[j].regmap[hr]) {
6606 current.regmap[hr]=-1;
6607 }
6608 current.dirty&=branch_regs[j].dirty;
6609 }
6610 }
6611 }
6612 j--;
6613 }
6614 }
6615 }
6616 }
6617
6618 // Count cycles in between branches
6619 ccadj[i]=cc;
6620 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP))
6621 {
6622 cc=0;
6623 }
6624 else
6625 if(itype[i]==CJUMP||itype[i]==SJUMP)
6626 {
6627 cc=1;
6628 }
6629 else
6630 {
6631 cc+=cycles[i];
6632 }
6633
6634 if(!is_ds[i]) {
6635 regs[i].dirty=current.dirty;
6636 regs[i].isdoingcp=current.isdoingcp;
6637 memcpy(cpmap[i],current.cpmap,sizeof(current.cpmap));
6638 }
6639 for(hr=0;hr<HOST_REGS;hr++) {
6640 if(hr!=EXCLUDE_REG&®s[i].regmap[hr]>=0) {
6641 if(regmap_pre[i][hr]!=regs[i].regmap[hr]) {
6642 regs[i].wasdoingcp&=~(1<<hr);
6643 }
6644 }
6645 }
6646 }
6647 }
6648
6649 /* Pass 4 - Cull unused host registers */
6650
6651 {
6652 u64 nr=0;
6653
6654 for (i=slen-1;i>=0;i--)
6655 {
6656 int hr;
6657 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP)
6658 {
6659 if(ba[i]<start || ba[i]>=(start+slen*2))
6660 {
6661 // Branch out of this block, don't need anything
6662 nr=0;
6663 }
6664 else
6665 {
6666 // Internal branch
6667 // Need whatever matches the target
6668 int t=(ba[i]-start)>>1;
6669 nr=0;
6670 for(hr=0;hr<HOST_REGS;hr++)
6671 {
6672 if(regs[i].regmap_entry[hr]>=0) {
6673 if(regs[i].regmap_entry[hr]==regs[t].regmap_entry[hr]) nr|=1<<hr;
6674 }
6675 }
6676 }
6677 // Conditional branch may need registers for following instructions
6678 if(itype[i]==SJUMP)
6679 {
6680 if(i<slen-2) {
6681 nr|=needed_reg[i+2];
6682 for(hr=0;hr<HOST_REGS;hr++)
6683 {
6684 if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
6685 //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*2,hr,regmap_entry[i+2][hr]);
6686 }
6687 }
6688 }
6689 else if(itype[i]==CJUMP)
6690 {
6691 if(i<slen-2) {
6692 nr|=needed_reg[i+1];
6693 for(hr=0;hr<HOST_REGS;hr++)
6694 {
6695 if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
6696 //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*2,hr,regmap_entry[i+2][hr]);
6697 }
6698 }
6699 }
6700 // Don't need stuff which is overwritten
6701 for(hr=0;hr<HOST_REGS;hr++) {
6702 if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
6703 if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
6704 }
6705 // Merge in delay slot
6706 if(itype[i]!=CJUMP)
6707 for(hr=0;hr<HOST_REGS;hr++)
6708 {
6709 // These are overwritten by the delay slot
6710 if(rt1[i+1]>=0&&rt1[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
6711 if(rt2[i+1]>=0&&rt2[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
6712 if(rs1[i+1]>=0&&rs1[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
6713 if(rs2[i+1]>=0&&rs2[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
6714 if(rs3[i+1]>=0&&rs3[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
6715 if(rs1[i+1]>=0&&rs1[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6716 if(rs2[i+1]>=0&&rs2[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6717 if(rs3[i+1]>=0&&rs3[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6718 //if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1)) {
6719 // if(dep1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6720 // if(dep2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6721 //}
6722 //if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1)) {
6723 // if(dep1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6724 // if(dep2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6725 //}
6726 if(regs[i].regmap_entry[hr]==SR) nr|=1<<hr;
6727 if(regs[i].regmap[hr]==SR) nr|=1<<hr;
6728 if(regmap_pre[i][hr]==SR) nr|=1<<hr;
6729 }
6730 }
6731 else if(itype[i]==SYSTEM)
6732 {
6733 // TRAPA instruction (software interrupt)
6734 nr=0;
6735 for(hr=0;hr<HOST_REGS;hr++)
6736 {
6737 // Source registers are needed
6738 if(regmap_pre[i][hr]==15) nr|=1<<hr;
6739 if(regmap_pre[i][hr]==SR) nr|=1<<hr;
6740 if(regmap_pre[i][hr]==VBR) nr|=1<<hr;
6741 if(regmap_pre[i][hr]==CCREG) nr|=1<<hr;
6742 if(regs[i].regmap_entry[hr]==15) nr|=1<<hr;
6743 if(regs[i].regmap_entry[hr]==SR) nr|=1<<hr;
6744 if(regs[i].regmap_entry[hr]==VBR) nr|=1<<hr;
6745 if(regs[i].regmap_entry[hr]==CCREG) nr|=1<<hr;
6746 }
6747 }
6748 else // Non-branch
6749 {
6750 if(i<slen-1) {
6751 for(hr=0;hr<HOST_REGS;hr++) {
6752 if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
6753 if(regs[i].regmap[hr]!=regmap_pre[i+1][hr]) nr&=~(1<<hr);
6754 if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
6755 if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
6756 }
6757 }
6758 }
6759 for(hr=0;hr<HOST_REGS;hr++)
6760 {
6761 // Overwritten registers are not needed
6762 if(rt1[i]>=0&&rt1[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
6763 if(rt2[i]>=0&&rt2[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
6764 // Source registers are needed
6765 if(rs1[i]>=0&&rs1[i]==regmap_pre[i][hr]) nr|=1<<hr;
6766 if(rs2[i]>=0&&rs2[i]==regmap_pre[i][hr]) nr|=1<<hr;
6767 if(rs3[i]>=0&&rs3[i]==regmap_pre[i][hr]) nr|=1<<hr;
6768 if(rs1[i]>=0&&rs1[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6769 if(rs2[i]>=0&&rs2[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6770 if(rs3[i]>=0&&rs3[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
6771 //if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1)) {
6772 // if(dep1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6773 // if(dep1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6774 //}
6775 //if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1)) {
6776 // if(dep2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6777 // if(dep2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6778 //}
6779 if(regs[i].regmap_entry[hr]==SR) nr|=1<<hr;
6780 if(regs[i].regmap[hr]==SR) nr|=1<<hr;
6781 if(regmap_pre[i][hr]==SR) nr|=1<<hr;
6782 // Don't store a register immediately after writing it,
6783 // may prevent dual-issue.
6784 // But do so if this is a branch target, otherwise we
6785 // might have to load the register before the branch.
6786 if(i>0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) {
6787 if(regmap_pre[i][hr]>=0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1)) {
6788 if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6789 if(rt2[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
6790 }
6791 if(regs[i].regmap_entry[hr]>=0&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1)) {
6792 if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6793 if(rt2[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
6794 }
6795 }
6796 }
6797 // Cycle count is needed at branches. Assume it is needed at the target too.
6798 if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==SJUMP) {
6799 if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
6800 if(regs[i].regmap_entry[HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
6801 }
6802 // Save it
6803 needed_reg[i]=nr;
6804
6805 // Deallocate unneeded registers
6806 for(hr=0;hr<HOST_REGS;hr++)
6807 {
6808 if(!((nr>>hr)&1)) {
6809 if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1;
6810 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
6811 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
6812 (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG)
6813 {
6814 if(itype[i]==CJUMP) {
6815 regs[i].regmap[hr]=-1;
6816 regs[i].isdoingcp&=~(1<<hr);
6817 if(i<slen-1) {
6818 regmap_pre[i+1][hr]=-1;
6819 regs[i+1].wasdoingcp&=~(1<<hr);
6820 }
6821 }
6822 }
6823 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==SJUMP)
6824 {
6825 int temp1=-1,temp2=-1;
6826 //if(get_reg(regs[i].regmap,rt1[i+1]|64)>=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0)
6827 //{
6828 // d1=dep1[i+1];
6829 // d2=dep2[i+1];
6830 //}
6831 if(itype[i+1]==LOAD || itype[i+1]==STORE ||
6832 itype[i+1]==RMW || itype[i+1]==PCREL ||
6833 itype[i+1]==SYSTEM || source[i]==0x002B /* RTE */ )
6834 temp1=MOREG;
6835 if(itype[i+1]==COMPLEX) {
6836 temp1=MACH;
6837 temp2=MACL;
6838 }
6839 if(regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] && regs[i].regmap[hr]!=rs3[i] &&
6840 regs[i].regmap[hr]!=rt1[i] && regs[i].regmap[hr]!=rt2[i] &&
6841 regs[i].regmap[hr]!=rs1[i+1] && regs[i].regmap[hr]!=rs2[i+1] && regs[i].regmap[hr]!=rs3[i+1] &&
6842 regs[i].regmap[hr]!=rt1[i+1] && regs[i].regmap[hr]!=rt2[i+1] &&
6843 regs[i].regmap[hr]!=RHASH && regs[i].regmap[hr]!=RHTBL &&
6844 regs[i].regmap[hr]!=RTEMP && regs[i].regmap[hr]!=PTEMP &&
6845 regs[i].regmap[hr]!=CCREG &&
6846 regs[i].regmap[hr]!=temp1 && regs[i].regmap[hr]!=temp2 )
6847 {
6848 regs[i].regmap[hr]=-1;
6849 regs[i].isdoingcp&=~(1<<hr);
6850 if(branch_regs[i].regmap[hr]!=rs1[i] && branch_regs[i].regmap[hr]!=rs2[i] && branch_regs[i].regmap[hr]!=rs3[i] &&
6851 branch_regs[i].regmap[hr]!=rt1[i] && branch_regs[i].regmap[hr]!=rt2[i] &&
6852 branch_regs[i].regmap[hr]!=rs1[i+1] && branch_regs[i].regmap[hr]!=rs2[i+1] && branch_regs[i].regmap[hr]!=rs3[i+1] &&
6853 branch_regs[i].regmap[hr]!=rt1[i+1] && branch_regs[i].regmap[hr]!=rt2[i+1] &&
6854 branch_regs[i].regmap[hr]!=RHASH && branch_regs[i].regmap[hr]!=RHTBL &&
6855 branch_regs[i].regmap[hr]!=RTEMP && branch_regs[i].regmap[hr]!=PTEMP &&
6856 branch_regs[i].regmap[hr]!=CCREG &&
6857 branch_regs[i].regmap[hr]!=temp1 && branch_regs[i].regmap[hr]!=temp2)
6858 {
6859 branch_regs[i].regmap[hr]=-1;
6860 branch_regs[i].regmap_entry[hr]=-1;
6861 if(itype[i]!=RJUMP&&itype[i]!=UJUMP)
6862 {
6863 if(i<slen-2) {
6864 regmap_pre[i+2][hr]=-1;
6865 regs[i+2].wasdoingcp&=~(1<<hr);
6866 }
6867 }
6868 }
6869 }
6870 }
6871 else
6872 {
6873 // Non-branch
6874 if(i>0)
6875 {
6876 int temp1=-1,temp2=-1;
6877 //if(get_reg(regs[i].regmap,rt1[i]|64)>=0)
6878 //{
6879 // d1=dep1[i];
6880 // d2=dep2[i];
6881 //}
6882 if(itype[i]==LOAD || itype[i]==STORE || itype[i]==RMW ||
6883 itype[i]==PCREL || itype[i]==SYSTEM )
6884 temp1=MOREG;
6885 if(itype[i]==COMPLEX) {
6886 temp1=MACH;
6887 temp2=MACL;
6888 }
6889 else if(itype[i]==SYSTEM) {
6890 temp2=CCREG;
6891 }
6892 if(regs[i].regmap[hr]!=rt1[i] && regs[i].regmap[hr]!=rt2[i] &&
6893 regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] &&
6894 regs[i].regmap[hr]!=rs3[i] &&
6895 regs[i].regmap[hr]!=temp1 && regs[i].regmap[hr]!=temp2 &&
6896 regs[i].regmap[hr]!=CCREG)
6897 {
6898 if(i<slen-1&&!is_ds[i]) {
6899 if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]!=-1)
6900 if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
6901 {
6902 printf("fail: %x (%d %d!=%d)\n",start+i*2,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]);
6903 assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
6904 }
6905 regmap_pre[i+1][hr]=-1;
6906 if(regs[i+1].regmap_entry[hr]==CCREG) regs[i+1].regmap_entry[hr]=-1;
6907 regs[i+1].wasdoingcp&=~(1<<hr);
6908 }
6909 regs[i].regmap[hr]=-1;
6910 regs[i].isdoingcp&=~(1<<hr);
6911 }
6912 }
6913 }
6914 }
6915 }
6916 }
6917 }
6918
6919 /* Pass 5 - Pre-allocate registers */
6920
6921 // If a register is allocated during a loop, try to allocate it for the
6922 // entire loop, if possible. This avoids loading/storing registers
6923 // inside of the loop.
6924 {
6925 signed char f_regmap[HOST_REGS];
6926 clear_all_regs(f_regmap);
6927 for(i=0;i<slen-1;i++)
6928 {
6929 if(itype[i]==UJUMP||itype[i]==SJUMP||itype[i]==CJUMP)
6930 {
6931 if(ba[i]>=start && ba[i]<(start+i*2))
6932 if(itype[i]==CJUMP||itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU
6933 ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM8||itype[i+1]==LOAD
6934 ||itype[i+1]==STORE||itype[i+1]==RMW||itype[i+1]==PCREL||itype[i+1]==EXT||itype[i+1]==FLAGS)
6935 {
6936 // Track register allocation
6937 int t=(ba[i]-start)>>1;
6938 if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=SJUMP)) // loop_preload can't handle jumps into delay slots
6939 if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=PR) // call/ret assumes no registers allocated
6940 for(hr=0;hr<HOST_REGS;hr++)
6941 {
6942 if(regs[i].regmap[hr]>=0) {
6943 if(f_regmap[hr]!=regs[i].regmap[hr]) {
6944 // dealloc old register
6945 int n;
6946 for(n=0;n<HOST_REGS;n++)
6947 {
6948 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
6949 }
6950 // and alloc new one
6951 f_regmap[hr]=regs[i].regmap[hr];
6952 }
6953 }
6954 if(branch_regs[i].regmap[hr]>=0) {
6955 if(f_regmap[hr]!=branch_regs[i].regmap[hr]) {
6956 // dealloc old register
6957 int n;
6958 for(n=0;n<HOST_REGS;n++)
6959 {
6960 if(f_regmap[n]==branch_regs[i].regmap[hr]) {f_regmap[n]=-1;}
6961 }
6962 // and alloc new one
6963 f_regmap[hr]=branch_regs[i].regmap[hr];
6964 }
6965 }
6966 if(ooo[i]) {
6967 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
6968 f_regmap[hr]=branch_regs[i].regmap[hr];
6969 }else{
6970 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
6971 f_regmap[hr]=branch_regs[i].regmap[hr];
6972 }
6973 // Avoid dirty->clean transition
6974 //if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1;
6975 // This check isn't required, but it's a good idea. We can't hoist
6976 // the load if the register was already allocated, so there's no
6977 // point wasting time analyzing most of these cases. It only
6978 // "succeeds" when the mapping was different and the load can be
6979 // replaced with a mov, which is of negligible benefit. So such
6980 // cases are skipped below.
6981 if(f_regmap[hr]>=0) {
6982 if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) {
6983 int r=f_regmap[hr];
6984 for(j=t;j<=i;j++)
6985 {
6986 //printf("Test %x -> %x, %x %d/%d\n",start+i*2,ba[i],start+j*2,hr,r);
6987 if(r<TBIT&&((unneeded_reg[j]>>r)&1)) break;
6988 if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)<TEMPREG) {
6989 //printf("Hit %x -> %x, %x %d/%d\n",start+i*2,ba[i],start+j*2,hr,r);
6990 int k;
6991 if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) {
6992 if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break;
6993 k=i;
6994 while(k>1&®s[k-1].regmap[hr]==-1) {
6995 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
6996 //printf("no free regs for store %x\n",start+(k-1)*4);
6997 break;
6998 }
6999 if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) {
7000 //printf("no-match due to different register\n");
7001 break;
7002 }
7003 if(itype[k-2]==UJUMP||itype[k-2]==RJUMP||itype[k-2]==CJUMP||itype[k-2]==SJUMP) {
7004 //printf("no-match due to branch\n");
7005 break;
7006 }
7007 // call/ret fast path assumes no registers allocated
7008 if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==PR) {
7009 break;
7010 }
7011 k--;
7012 }
7013 if(regs[k-1].regmap[hr]==f_regmap[hr]&®map_pre[k][hr]==f_regmap[hr]) {
7014 //printf("Extend r%d, %x ->\n",hr,start+k*4);
7015 while(k<i) {
7016 regs[k].regmap_entry[hr]=f_regmap[hr];
7017 regs[k].regmap[hr]=f_regmap[hr];
7018 regmap_pre[k+1][hr]=f_regmap[hr];
7019 regs[k].wasdirty&=~(1<<hr);
7020 regs[k].dirty&=~(1<<hr);
7021 regs[k].wasdirty|=(1<<hr)®s[k-1].dirty;
7022 regs[k].dirty|=(1<<hr)®s[k].wasdirty;
7023 regs[k].wasdoingcp&=~(1<<hr);
7024 regs[k].isdoingcp&=~(1<<hr);
7025 k++;
7026 }
7027 }
7028 else {
7029 //printf("Fail Extend r%d, %x ->\n",hr,start+k*4);
7030 break;
7031 }
7032 assert(regs[i-1].regmap[hr]==f_regmap[hr]);
7033 if(regs[i-1].regmap[hr]==f_regmap[hr]&®map_pre[i][hr]==f_regmap[hr]) {
7034 //printf("OK fill %x (r%d)\n",start+i*4,hr);
7035 regs[i].regmap_entry[hr]=f_regmap[hr];
7036 regs[i].regmap[hr]=f_regmap[hr];
7037 regs[i].wasdirty&=~(1<<hr);
7038 regs[i].dirty&=~(1<<hr);
7039 regs[i].wasdirty|=(1<<hr)®s[i-1].dirty;
7040 regs[i].dirty|=(1<<hr)®s[i-1].dirty;
7041 regs[i].wasdoingcp&=~(1<<hr);
7042 regs[i].isdoingcp&=~(1<<hr);
7043 branch_regs[i].regmap_entry[hr]=f_regmap[hr];
7044 branch_regs[i].wasdirty&=~(1<<hr);
7045 branch_regs[i].wasdirty|=(1<<hr)®s[i].dirty;
7046 branch_regs[i].regmap[hr]=f_regmap[hr];
7047 branch_regs[i].dirty&=~(1<<hr);
7048 branch_regs[i].dirty|=(1<<hr)®s[i].dirty;
7049 branch_regs[i].wasdoingcp&=~(1<<hr);
7050 branch_regs[i].isdoingcp&=~(1<<hr);
7051 if(itype[i]==CJUMP) {
7052 regmap_pre[i+1][hr]=f_regmap[hr];
7053 regs[i+1].wasdirty&=~(1<<hr);
7054 regs[i+1].wasdirty|=(1<<hr)®s[i].dirty;
7055 }
7056 else if(itype[i]!=RJUMP&&itype[i]!=UJUMP) {
7057 regmap_pre[i+2][hr]=f_regmap[hr];
7058 regs[i+2].wasdirty&=~(1<<hr);
7059 regs[i+2].wasdirty|=(1<<hr)®s[i].dirty;
7060 }
7061 }
7062 }
7063 for(k=t;k<j;k++) {
7064 // Alloc register clean at beginning of loop,
7065 // but may dirty it in pass 6
7066 regs[k].regmap_entry[hr]=f_regmap[hr];
7067 regs[k].regmap[hr]=f_regmap[hr];
7068 regs[k].dirty&=~(1<<hr);
7069 regs[k].wasdoingcp&=~(1<<hr);
7070 regs[k].isdoingcp&=~(1<<hr);
7071 if(itype[k]==UJUMP||itype[k]==RJUMP||itype[k]==SJUMP) {
7072 branch_regs[k].regmap_entry[hr]=f_regmap[hr];
7073 branch_regs[k].regmap[hr]=f_regmap[hr];
7074 branch_regs[k].dirty&=~(1<<hr);
7075 branch_regs[k].wasdoingcp&=~(1<<hr);
7076 branch_regs[k].isdoingcp&=~(1<<hr);
7077 if(itype[k]!=RJUMP&&itype[k]!=UJUMP) {
7078 regmap_pre[k+2][hr]=f_regmap[hr];
7079 regs[k+2].wasdirty&=~(1<<hr);
7080 }
7081 }
7082 else
7083 {
7084 regmap_pre[k+1][hr]=f_regmap[hr];
7085 regs[k+1].wasdirty&=~(1<<hr);
7086 }
7087 }
7088 if(regs[j].regmap[hr]==f_regmap[hr])
7089 regs[j].regmap_entry[hr]=f_regmap[hr];
7090 break;
7091 }
7092 if(j==i) break;
7093 if(regs[j].regmap[hr]>=0)
7094 break;
7095 if(get_reg(regs[j].regmap,f_regmap[hr])>=0) {
7096 //printf("no-match due to different register\n");
7097 break;
7098 }
7099 if(itype[j]==UJUMP||itype[j]==RJUMP)
7100 {
7101 // Stop on unconditional branch
7102 break;
7103 }
7104 if(itype[j]==SJUMP)
7105 {
7106 if(ooo[j]) {
7107 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
7108 break;
7109 }else{
7110 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
7111 break;
7112 }
7113 if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) {
7114 //printf("no-match due to different register (branch)\n");
7115 break;
7116 }
7117 }
7118 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
7119 //printf("No free regs for store %x\n",start+j*4);
7120 break;
7121 }
7122 }
7123 }
7124 }
7125 }
7126 }
7127 }else{
7128 // Non branch or undetermined branch target
7129 for(hr=0;hr<HOST_REGS;hr++)
7130 {
7131 if(hr!=EXCLUDE_REG) {
7132 if(regs[i].regmap[hr]>=0) {
7133 if(f_regmap[hr]!=regs[i].regmap[hr]) {
7134 // dealloc old register
7135 int n;
7136 for(n=0;n<HOST_REGS;n++)
7137 {
7138 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
7139 }
7140 // and alloc new one
7141 f_regmap[hr]=regs[i].regmap[hr];
7142 }
7143 }
7144 }
7145 }
7146 // Try to restore cycle count at branch targets
7147 if(bt[i]) {
7148 for(j=i;j<slen-1;j++) {
7149 if(regs[j].regmap[HOST_CCREG]!=-1) break;
7150 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
7151 //printf("no free regs for store %x\n",start+j*4);
7152 break;
7153 }
7154 }
7155 if(regs[j].regmap[HOST_CCREG]==CCREG) {
7156 int k=i;
7157 //printf("Extend CC, %x -> %x\n",start+k*4,start+j*4);
7158 while(k<j) {
7159 regs[k].regmap_entry[HOST_CCREG]=CCREG;
7160 regs[k].regmap[HOST_CCREG]=CCREG;
7161 regmap_pre[k+1][HOST_CCREG]=CCREG;
7162 regs[k+1].wasdirty|=1<<HOST_CCREG;
7163 regs[k].dirty|=1<<HOST_CCREG;
7164 regs[k].wasdoingcp&=~(1<<HOST_CCREG);
7165 regs[k].isdoingcp&=~(1<<HOST_CCREG);
7166 k++;
7167 }
7168 regs[j].regmap_entry[HOST_CCREG]=CCREG;
7169 }
7170 // Work backwards from the branch target
7171 if(j>i&&f_regmap[HOST_CCREG]==CCREG)
7172 {
7173 //printf("Extend backwards\n");
7174 int k;
7175 k=i;
7176 while(regs[k-1].regmap[HOST_CCREG]==-1) {
7177 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
7178 //printf("no free regs for store %x\n",start+(k-1)*4);
7179 break;
7180 }
7181 k--;
7182 }
7183 if(regs[k-1].regmap[HOST_CCREG]==CCREG) {
7184 //printf("Extend CC, %x ->\n",start+k*4);
7185 while(k<=i) {
7186 regs[k].regmap_entry[HOST_CCREG]=CCREG;
7187 regs[k].regmap[HOST_CCREG]=CCREG;
7188 regmap_pre[k+1][HOST_CCREG]=CCREG;
7189 regs[k+1].wasdirty|=1<<HOST_CCREG;
7190 regs[k].dirty|=1<<HOST_CCREG;
7191 regs[k].wasdoingcp&=~(1<<HOST_CCREG);
7192 regs[k].isdoingcp&=~(1<<HOST_CCREG);
7193 k++;
7194 }
7195 }
7196 else {
7197 //printf("Fail Extend CC, %x ->\n",start+k*4);
7198 }
7199 }
7200 }
7201 // Don't try to add registers to complex instructions like MAC, division, etc.
7202 if(itype[i]!=STORE&&itype[i]!=RMW&&itype[i]!=PCREL&&
7203 itype[i]!=NOP&&itype[i]!=MOV&&itype[i]!=ALU&&itype[i]!=SHIFTIMM&&
7204 itype[i]!=IMM8&&itype[i]!=LOAD&&itype[i]!=EXT&&itype[i]!=FLAGS)
7205 {
7206 memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap));
7207 }
7208 }
7209 }
7210
7211 // Cache memory_map pointer if a register is available
7212 #ifndef HOST_IMM_ADDR32
7213 {
7214 int earliest_available[HOST_REGS];
7215 int loop_start[HOST_REGS];
7216 int score[HOST_REGS];
7217 int end[HOST_REGS];
7218 int reg=MMREG;
7219
7220 // Init
7221 for(hr=0;hr<HOST_REGS;hr++) {
7222 score[hr]=0;earliest_available[hr]=0;
7223 loop_start[hr]=MAXBLOCK;
7224 }
7225 for(i=0;i<slen-1;i++)
7226 {
7227 // Can't do anything if no registers are available
7228 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i]) {
7229 for(hr=0;hr<HOST_REGS;hr++) {
7230 score[hr]=0;earliest_available[hr]=i+1;
7231 loop_start[hr]=MAXBLOCK;
7232 }
7233 }
7234 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==SJUMP) {
7235 if(!ooo[i]) {
7236 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) {
7237 for(hr=0;hr<HOST_REGS;hr++) {
7238 score[hr]=0;earliest_available[hr]=i+1;
7239 loop_start[hr]=MAXBLOCK;
7240 }
7241 }
7242 }else{
7243 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) {
7244 for(hr=0;hr<HOST_REGS;hr++) {
7245 score[hr]=0;earliest_available[hr]=i+1;
7246 loop_start[hr]=MAXBLOCK;
7247 }
7248 }
7249 }
7250 }
7251 // Mark unavailable registers
7252 for(hr=0;hr<HOST_REGS;hr++) {
7253 if(regs[i].regmap[hr]>=0) {
7254 score[hr]=0;earliest_available[hr]=i+1;
7255 loop_start[hr]=MAXBLOCK;
7256 }
7257 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==SJUMP) {
7258 if(branch_regs[i].regmap[hr]>=0) {
7259 score[hr]=0;earliest_available[hr]=i+2;
7260 loop_start[hr]=MAXBLOCK;
7261 }
7262 }
7263 }
7264 // No register allocations after unconditional jumps
7265 if(itype[i]==UJUMP||itype[i]==RJUMP)
7266 {
7267 for(hr=0;hr<HOST_REGS;hr++) {
7268 score[hr]=0;earliest_available[hr]=i+2;
7269 loop_start[hr]=MAXBLOCK;
7270 }
7271 i++; // Skip delay slot too
7272 //printf("skip delay slot: %x\n",start+i*4);
7273 }
7274 else
7275 // Possible match
7276 if(itype[i]==LOAD||itype[i]==STORE||itype[i]==RMW) {
7277 for(hr=0;hr<HOST_REGS;hr++) {
7278 if(hr!=EXCLUDE_REG) {
7279 end[hr]=i-1;
7280 for(j=i;j<slen-1;j++) {
7281 if(regs[j].regmap[hr]>=0) break;
7282 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==SJUMP) {
7283 if(branch_regs[j].regmap[hr]>=0) break;
7284 if(ooo[j]) {
7285 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break;
7286 }else{
7287 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break;
7288 }
7289 }
7290 else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break;
7291 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP) {
7292 int t=(ba[j]-start)>>1;
7293 if(t<j&&t>=earliest_available[hr]) {
7294 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=PR)) { // call/ret assumes no registers allocated
7295 // Score a point for hoisting loop invariant
7296 if(t<loop_start[hr]) loop_start[hr]=t;
7297 //printf("set loop_start: i=%x j=%x (%x)\n",start+i*2,start+j*2,start+t*2);
7298 score[hr]++;
7299 end[hr]=j;
7300 }
7301 }
7302 else if(t<j) {
7303 if(regs[t].regmap[hr]==reg) {
7304 // Score a point if the branch target matches this register
7305 score[hr]++;
7306 end[hr]=j;
7307 }
7308 }
7309 if(itype[j+1]==LOAD||itype[j+1]==STORE||itype[j+1]==RMW) {
7310 score[hr]++;
7311 end[hr]=j;
7312 }
7313 }
7314 if(itype[j]==UJUMP||itype[j]==RJUMP)
7315 {
7316 // Stop on unconditional branch
7317 break;
7318 }
7319 else
7320 if(itype[j]==LOAD||itype[j]==STORE||itype[j]==RMW) {
7321 score[hr]++;
7322 end[hr]=j;
7323 }
7324 }
7325 }
7326 }
7327 // Find highest score and allocate that register
7328 int maxscore=0;
7329 for(hr=0;hr<HOST_REGS;hr++) {
7330 if(hr!=EXCLUDE_REG) {
7331 if(score[hr]>score[maxscore]) {
7332 maxscore=hr;
7333 //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*2,start+end[hr]*2);
7334 }
7335 }
7336 }
7337 if(score[maxscore]>1)
7338 {
7339 if(i<loop_start[maxscore]) loop_start[maxscore]=i;
7340 for(j=loop_start[maxscore];j<slen&&j<=end[maxscore];j++) {
7341 //if(regs[j].regmap[maxscore]>=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*2+start,j*2+start,maxscore,regs[j].regmap[maxscore]);}
7342 assert(regs[j].regmap[maxscore]<0);
7343 if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg;
7344 regs[j].regmap[maxscore]=reg;
7345 regs[j].dirty&=~(1<<maxscore);
7346 regs[j].wasdoingcp&=~(1<<maxscore);
7347 regs[j].isdoingcp&=~(1<<maxscore);
7348 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP) {
7349 if(itype[j]!=CJUMP) {
7350 branch_regs[j].regmap[maxscore]=reg;
7351 branch_regs[j].wasdirty&=~(1<<maxscore);
7352 branch_regs[j].dirty&=~(1<<maxscore);
7353 branch_regs[j].wasdoingcp&=~(1<<maxscore);
7354 branch_regs[j].isdoingcp&=~(1<<maxscore);
7355 if(itype[j]==SJUMP) {
7356 regmap_pre[j+2][maxscore]=reg;
7357 regs[j+2].wasdirty&=~(1<<maxscore);
7358 }
7359 }
7360 else { // if(itype[j]==CJUMP)
7361 regmap_pre[j+1][maxscore]=reg;
7362 regs[j+1].wasdirty&=~(1<<maxscore);
7363 }
7364 // loop optimization (loop_preload)
7365 int t=(ba[j]-start)>>1;
7366 if(t==loop_start[maxscore]) {
7367 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=PR)) // call/ret assumes no registers allocated
7368 regs[t].regmap_entry[maxscore]=reg;
7369 }
7370 }
7371 else
7372 {
7373 if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=SJUMP)) {
7374 regmap_pre[j+1][maxscore]=reg;
7375 regs[j+1].wasdirty&=~(1<<maxscore);
7376 }
7377 }
7378 }
7379 i=j-1;
7380 if(itype[j-1]==RJUMP||itype[j-1]==UJUMP||itype[j-1]==SJUMP) i++; // skip delay slot
7381 for(hr=0;hr<HOST_REGS;hr++) {
7382 score[hr]=0;earliest_available[hr]=i+i;
7383 loop_start[hr]=MAXBLOCK;
7384 }
7385 }
7386 }
7387 }
7388 }
7389 #endif
7390
7391 // This allocates registers (if possible) one instruction prior
7392 // to use, which can avoid a load-use penalty on certain CPUs.
7393 for(i=0;i<slen-1;i++)
7394 {
7395 if(!i||(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP))
7396 {
7397 if(!bt[i+1])
7398 {
7399 if(itype[i]==LOAD||itype[i]==PCREL||itype[i]==MOV||itype[i]==ALU||itype[i]==SHIFTIMM||itype[i]==IMM8||itype[i]==EXT||itype[i]==FLAGS)
7400 {
7401 if(rs1[i+1]>=0) {
7402 if((hr=get_reg(regs[i+1].regmap,rs1[i+1]==TBIT?SR:rs1[i+1]))>=0)
7403 {
7404 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7405 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7406 {
7407 regs[i].regmap[hr]=regs[i+1].regmap[hr];
7408 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
7409 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
7410 regs[i].isdoingcp&=~(1<<hr);
7411 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7412 cpmap[i][hr]=cpmap[i+1][hr];
7413 regs[i+1].wasdirty&=~(1<<hr);
7414 regs[i].dirty&=~(1<<hr);
7415 }
7416 }
7417 }
7418 if(rs2[i+1]>=0) {
7419 if((hr=get_reg(regs[i+1].regmap,rs2[i+1]==TBIT?SR:rs2[i+1]))>=0)
7420 {
7421 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7422 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7423 {
7424 regs[i].regmap[hr]=regs[i+1].regmap[hr];
7425 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
7426 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
7427 regs[i].isdoingcp&=~(1<<hr);
7428 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7429 cpmap[i][hr]=cpmap[i+1][hr];
7430 regs[i+1].wasdirty&=~(1<<hr);
7431 regs[i].dirty&=~(1<<hr);
7432 }
7433 }
7434 }
7435 if(rs3[i+1]>=0) {
7436 if((hr=get_reg(regs[i+1].regmap,rs3[i+1]==TBIT?SR:rs3[i+1]))>=0)
7437 {
7438 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7439 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7440 {
7441 regs[i].regmap[hr]=regs[i+1].regmap[hr];
7442 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
7443 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
7444 regs[i].isdoingcp&=~(1<<hr);
7445 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7446 cpmap[i][hr]=cpmap[i+1][hr];
7447 regs[i+1].wasdirty&=~(1<<hr);
7448 regs[i].dirty&=~(1<<hr);
7449 }
7450 }
7451 }
7452 if(rt1[i+1]==TBIT||rt2[i+1]==TBIT) {
7453 if(rt1[i+1]!=SR&&rt2[i+1]!=SR)
7454 if((hr=get_reg(regs[i+1].regmap,SR))>=0)
7455 {
7456 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7457 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7458 {
7459 regs[i].regmap[hr]=regs[i+1].regmap[hr];
7460 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
7461 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
7462 regs[i].isdoingcp&=~(1<<hr);
7463 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7464 cpmap[i][hr]=cpmap[i+1][hr];
7465 regs[i+1].wasdirty&=~(1<<hr);
7466 regs[i].dirty&=~(1<<hr);
7467 }
7468 }
7469 }
7470 // Preload target address for load instruction (non-constant)
7471 if(itype[i+1]==LOAD&&rs1[i+1]>=0&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
7472 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
7473 {
7474 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7475 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7476 {
7477 regs[i].regmap[hr]=rs1[i+1];
7478 regmap_pre[i+1][hr]=rs1[i+1];
7479 regs[i+1].regmap_entry[hr]=rs1[i+1];
7480 regs[i].isdoingcp&=~(1<<hr);
7481 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7482 cpmap[i][hr]=cpmap[i+1][hr];
7483 regs[i+1].wasdirty&=~(1<<hr);
7484 regs[i].dirty&=~(1<<hr);
7485 }
7486 }
7487 }
7488 #if 0
7489 // Load source into target register (not implemented)
7490 if(lt1[i+1]>=0&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
7491 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
7492 {
7493 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7494 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7495 {
7496 regs[i].regmap[hr]=rs1[i+1];
7497 regmap_pre[i+1][hr]=rs1[i+1];
7498 regs[i+1].regmap_entry[hr]=rs1[i+1];
7499 regs[i].isdoingcp&=~(1<<hr);
7500 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7501 cpmap[i][hr]=cpmap[i+1][hr];
7502 regs[i+1].wasdirty&=~(1<<hr);
7503 regs[i].dirty&=~(1<<hr);
7504 }
7505 }
7506 }
7507 #endif
7508 #ifndef HOST_IMM_ADDR32
7509 // Preload map address
7510 if(itype[i+1]==LOAD||itype[i+1]==STORE) {
7511 hr=get_reg(regs[i+1].regmap,MOREG);
7512 if(hr>=0) {
7513 int sr=get_reg(regs[i+1].regmap,rs1[i+1]);
7514 if(sr>=0&&((regs[i+1].wasdoingcp>>sr)&1)
7515 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i]) {
7516 int nr;
7517 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
7518 {
7519 regs[i].regmap[hr]=MGEN1+((i+1)&1);
7520 regmap_pre[i+1][hr]=MGEN1+((i+1)&1);
7521 regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1);
7522 regs[i].isdoingcp&=~(1<<hr);
7523 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7524 cpmap[i][hr]=cpmap[i+1][hr];
7525 regs[i+1].wasdirty&=~(1<<hr);
7526 regs[i].dirty&=~(1<<hr);
7527 }
7528 else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
7529 {
7530 // move it to another register
7531 regs[i+1].regmap[hr]=-1;
7532 regmap_pre[i+2][hr]=-1;
7533 regs[i+1].regmap[nr]=MOREG;
7534 regmap_pre[i+2][nr]=MOREG;
7535 regs[i].regmap[nr]=MGEN1+((i+1)&1);
7536 regmap_pre[i+1][nr]=MGEN1+((i+1)&1);
7537 regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1);
7538 regs[i].isdoingcp&=~(1<<nr);
7539 regs[i+1].isdoingcp&=~(1<<nr);
7540 regs[i].dirty&=~(1<<nr);
7541 regs[i+1].wasdirty&=~(1<<nr);
7542 regs[i+1].dirty&=~(1<<nr);
7543 regs[i+2].wasdirty&=~(1<<nr);
7544 }
7545 }
7546 }
7547 }
7548 #endif
7549 // Address for store instruction (non-constant)
7550 if(itype[i+1]==STORE) {
7551 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
7552 hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1);
7553 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
7554 else {regs[i+1].regmap[hr]=AGEN1+((i+1)&1);regs[i+1].isdoingcp&=~(1<<hr);}
7555 assert(hr>=0);
7556 if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0
7557 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7558 {
7559 regs[i].regmap[hr]=rs1[i+1];
7560 regmap_pre[i+1][hr]=rs1[i+1];
7561 regs[i+1].regmap_entry[hr]=rs1[i+1];
7562 regs[i].isdoingcp&=~(1<<hr);
7563 regs[i].isdoingcp|=regs[i+1].isdoingcp&(1<<hr);
7564 cpmap[i][hr]=cpmap[i+1][hr];
7565 regs[i+1].wasdirty&=~(1<<hr);
7566 regs[i].dirty&=~(1<<hr);
7567 }
7568 }
7569 }
7570 // Load/store address (constant)
7571 if(itype[i+1]==LOAD||itype[i+1]==STORE) {
7572 if(itype[i+1]==LOAD)
7573 hr=get_reg(regs[i+1].regmap,rt1[i+1]);
7574 if(itype[i+1]==STORE) {
7575 hr=get_reg(regs[i+1].regmap,AGEN1+((i+1)&1));
7576 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
7577 }
7578 if(hr>=0&®s[i].regmap[hr]<0
7579 &&count_free_regs(regs[i].regmap)>minimum_free_regs[i])
7580 {
7581 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
7582 if(rs>=0&&((regs[i+1].wasdoingcp>>rs)&1)) {
7583 regs[i].regmap[hr]=AGEN1+((i+1)&1);
7584 regmap_pre[i+1][hr]=AGEN1+((i+1)&1);
7585 regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1);
7586 regs[i].isdoingcp&=~(1<<hr);
7587 regs[i+1].wasdirty&=~(1<<hr);
7588 regs[i].dirty&=~(1<<hr);
7589 }
7590 }
7591 }
7592 }
7593 }
7594 }
7595 }
7596 }
7597
7598 /* Pass 6 - Optimize clean/dirty state */
7599 clean_registers(0,slen-1,1);
7600
7601 /* Pass 7 - Identify interrupt return locations */
7602
7603 for (i=slen-1;i>=0;i--)
7604 {
7605 if(itype[i]==CJUMP||itype[i]==SJUMP)
7606 {
7607 // Avoid unnecessary constant propagation
7608 int hr;
7609 u32 sregs;
7610 for(hr=0;hr<HOST_REGS;hr++) {
7611 if(hr!=EXCLUDE_REG) {
7612 if(regs[i].regmap_entry[hr]>=0) {
7613 if(itype[i]==SJUMP) {
7614 if(regs[i].regmap_entry[hr]==rs1[i+1]) continue;
7615 if(regs[i].regmap_entry[hr]==rs2[i+1]) continue;
7616 if(regs[i].regmap_entry[hr]==rs3[i+1]) continue;
7617 if(regs[i].regmap_entry[hr]==rt1[i+1]) continue;
7618 if(regs[i].regmap_entry[hr]==rt2[i+1]) continue;
7619 }
7620 if(i>0) {
7621 if(regs[i].regmap_entry[hr]==rs1[i-1]) continue;
7622 if(regs[i].regmap_entry[hr]==rs2[i-1]) continue;
7623 if(regs[i].regmap_entry[hr]==rs3[i-1]) continue;
7624 if(regs[i].regmap_entry[hr]==rt1[i-1]) continue;
7625 if(regs[i].regmap_entry[hr]==rt2[i-1]) continue;
7626 }
7627 //if(regs[i].wasdoingcp&(1<<hr)) printf("drop wcp: %x\n",start+i*2);
7628 //if(regs[i].isdoingcp&(1<<hr)) printf("drop icp: %x\n",start+i*2);
7629 regs[i].wasdoingcp&=~(1<<hr);
7630 regs[i].isdoingcp&=~(1<<hr);
7631 }
7632 }
7633 }
7634 sregs=0;
7635 if(itype[i]==SJUMP)
7636 {
7637 // Don't intervene if constant propagation is being performed
7638 // on a register used by an instruction in the delay slot
7639 if(itype[i+1]==LOAD) {
7640 if(rs1[i+1]>=0) sregs|=1<<rs1[i+1];
7641 if(rs2[i+1]>=0) sregs|=1<<rs2[i+1];
7642 }
7643 if(itype[i+1]==STORE) {
7644 if(rs2[i+1]>=0) sregs|=1<<rs2[i+1];
7645 if(rs3[i+1]>=0) sregs|=1<<rs3[i+1];
7646 }
7647 }
7648 // If no constant propagation is being done, mark this address as a
7649 // branch target since it may be called upon return from interrupt
7650 if(!regs[i].wasdoingcp&&!(regs[i].isconst&sregs))
7651 bt[i]=1;
7652 }
7653 }
7654
7655 /* Debug/disassembly */
7656 if((void*)assem_debug==(void*)printf)
7657 for(i=0;i<slen;i++)
7658 {
7659 int r;
7660 printf("U:");
7661 for(r=0;r<=CCREG;r++) {
7662 if((unneeded_reg[i]>>r)&1) {
7663 if(r==SR) printf(" SR(16)");
7664 else if(r==GBR) printf(" GBR(17)");
7665 else if(r==VBR) printf(" VBR(18)");
7666 else if(r==MACH) printf(" MACH(19)");
7667 else if(r==MACL) printf(" MACL(20)");
7668 else if(r==PR) printf(" PR(21)");
7669 else if(r==TBIT) printf(" T(22)");
7670 else printf(" r%d",r);
7671 }
7672 }
7673 printf("\n");
7674 #if defined(__i386__) || defined(__x86_64__)
7675 printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]);
7676 #endif
7677 #ifdef __arm__
7678 printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]);
7679 #endif
7680 printf("needs: ");
7681 if(needed_reg[i]&1) printf("eax ");
7682 if((needed_reg[i]>>1)&1) printf("ecx ");
7683 if((needed_reg[i]>>2)&1) printf("edx ");
7684 if((needed_reg[i]>>3)&1) printf("ebx ");
7685 if((needed_reg[i]>>5)&1) printf("ebp ");
7686 if((needed_reg[i]>>6)&1) printf("esi ");
7687 if((needed_reg[i]>>7)&1) printf("edi ");
7688 printf("\n");
7689 #if defined(__i386__) || defined(__x86_64__)
7690 printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
7691 printf("dirty: ");
7692 if(regs[i].wasdirty&1) printf("eax ");
7693 if((regs[i].wasdirty>>1)&1) printf("ecx ");
7694 if((regs[i].wasdirty>>2)&1) printf("edx ");
7695 if((regs[i].wasdirty>>3)&1) printf("ebx ");
7696 if((regs[i].wasdirty>>5)&1) printf("ebp ");
7697 if((regs[i].wasdirty>>6)&1) printf("esi ");
7698 if((regs[i].wasdirty>>7)&1) printf("edi ");
7699 #endif
7700 #ifdef __arm__
7701 printf("entry: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[4],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7],regs[i].regmap_entry[8],regs[i].regmap_entry[9],regs[i].regmap_entry[10],regs[i].regmap_entry[12]);
7702 printf("dirty: ");
7703 if(regs[i].wasdirty&1) printf("r0 ");
7704 if((regs[i].wasdirty>>1)&1) printf("r1 ");
7705 if((regs[i].wasdirty>>2)&1) printf("r2 ");
7706 if((regs[i].wasdirty>>3)&1) printf("r3 ");
7707 if((regs[i].wasdirty>>4)&1) printf("r4 ");
7708 if((regs[i].wasdirty>>5)&1) printf("r5 ");
7709 if((regs[i].wasdirty>>6)&1) printf("r6 ");
7710 if((regs[i].wasdirty>>7)&1) printf("r7 ");
7711 if((regs[i].wasdirty>>8)&1) printf("r8 ");
7712 if((regs[i].wasdirty>>9)&1) printf("r9 ");
7713 if((regs[i].wasdirty>>10)&1) printf("r10 ");
7714 if((regs[i].wasdirty>>12)&1) printf("r12 ");
7715 #endif
7716 printf("ccadj=%d",ccadj[i]);
7717 printf("\n");
7718 disassemble_inst(i);
7719 //printf ("ccadj[%d] = %d\n",i,ccadj[i]);
7720 #if defined(__i386__) || defined(__x86_64__)
7721 printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7]);
7722 if(regs[i].dirty&1) printf("eax ");
7723 if((regs[i].dirty>>1)&1) printf("ecx ");
7724 if((regs[i].dirty>>2)&1) printf("edx ");
7725 if((regs[i].dirty>>3)&1) printf("ebx ");
7726 if((regs[i].dirty>>5)&1) printf("ebp ");
7727 if((regs[i].dirty>>6)&1) printf("esi ");
7728 if((regs[i].dirty>>7)&1) printf("edi ");
7729 #endif
7730 #ifdef __arm__
7731 printf("r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[4],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7],regs[i].regmap[8],regs[i].regmap[9],regs[i].regmap[10],regs[i].regmap[12]);
7732 if(regs[i].dirty&1) printf("r0 ");
7733 if((regs[i].dirty>>1)&1) printf("r1 ");
7734 if((regs[i].dirty>>2)&1) printf("r2 ");
7735 if((regs[i].dirty>>3)&1) printf("r3 ");
7736 if((regs[i].dirty>>4)&1) printf("r4 ");
7737 if((regs[i].dirty>>5)&1) printf("r5 ");
7738 if((regs[i].dirty>>6)&1) printf("r6 ");
7739 if((regs[i].dirty>>7)&1) printf("r7 ");
7740 if((regs[i].dirty>>8)&1) printf("r8 ");
7741 if((regs[i].dirty>>9)&1) printf("r9 ");
7742 if((regs[i].dirty>>10)&1) printf("r10 ");
7743 if((regs[i].dirty>>12)&1) printf("r12 ");
7744 #endif
7745 printf("\n");
7746 if(regs[i].isdoingcp) {
7747 printf("constants: ");
7748 #if defined(__i386__) || defined(__x86_64__)
7749 if(regs[i].isdoingcp&1) printf("eax=%x ",(int)cpmap[i][0]);
7750 if((regs[i].isdoingcp>>1)&1) printf("ecx=%x ",(int)cpmap[i][1]);
7751 if((regs[i].isdoingcp>>2)&1) printf("edx=%x ",(int)cpmap[i][2]);
7752 if((regs[i].isdoingcp>>3)&1) printf("ebx=%x ",(int)cpmap[i][3]);
7753 if((regs[i].isdoingcp>>5)&1) printf("ebp=%x ",(int)cpmap[i][5]);
7754 if((regs[i].isdoingcp>>6)&1) printf("esi=%x ",(int)cpmap[i][6]);
7755 if((regs[i].isdoingcp>>7)&1) printf("edi=%x ",(int)cpmap[i][7]);
7756 #endif
7757 #ifdef __arm__
7758 if(regs[i].isdoingcp&1) printf("r0=%x ",(int)cpmap[i][0]);
7759 if((regs[i].isdoingcp>>1)&1) printf("r1=%x ",(int)cpmap[i][1]);
7760 if((regs[i].isdoingcp>>2)&1) printf("r2=%x ",(int)cpmap[i][2]);
7761 if((regs[i].isdoingcp>>3)&1) printf("r3=%x ",(int)cpmap[i][3]);
7762 if((regs[i].isdoingcp>>4)&1) printf("r4=%x ",(int)cpmap[i][4]);
7763 if((regs[i].isdoingcp>>5)&1) printf("r5=%x ",(int)cpmap[i][5]);
7764 if((regs[i].isdoingcp>>6)&1) printf("r6=%x ",(int)cpmap[i][6]);
7765 if((regs[i].isdoingcp>>7)&1) printf("r7=%x ",(int)cpmap[i][7]);
7766 if((regs[i].isdoingcp>>8)&1) printf("r8=%x ",(int)cpmap[i][8]);
7767 if((regs[i].isdoingcp>>9)&1) printf("r9=%x ",(int)cpmap[i][9]);
7768 if((regs[i].isdoingcp>>10)&1) printf("r10=%x ",(int)cpmap[i][10]);
7769 if((regs[i].isdoingcp>>12)&1) printf("r12=%x ",(int)cpmap[i][12]);
7770 #endif
7771 printf("\n");
7772 }
7773 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) {
7774 #if defined(__i386__) || defined(__x86_64__)
7775 printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
7776 if(branch_regs[i].dirty&1) printf("eax ");
7777 if((branch_regs[i].dirty>>1)&1) printf("ecx ");
7778 if((branch_regs[i].dirty>>2)&1) printf("edx ");
7779 if((branch_regs[i].dirty>>3)&1) printf("ebx ");
7780 if((branch_regs[i].dirty>>5)&1) printf("ebp ");
7781 if((branch_regs[i].dirty>>6)&1) printf("esi ");
7782 if((branch_regs[i].dirty>>7)&1) printf("edi ");
7783 #endif
7784 #ifdef __arm__
7785 printf("branch(%d): r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[4],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7],branch_regs[i].regmap[8],branch_regs[i].regmap[9],branch_regs[i].regmap[10],branch_regs[i].regmap[12]);
7786 if(branch_regs[i].dirty&1) printf("r0 ");
7787 if((branch_regs[i].dirty>>1)&1) printf("r1 ");
7788 if((branch_regs[i].dirty>>2)&1) printf("r2 ");
7789 if((branch_regs[i].dirty>>3)&1) printf("r3 ");
7790 if((branch_regs[i].dirty>>4)&1) printf("r4 ");
7791 if((branch_regs[i].dirty>>5)&1) printf("r5 ");
7792 if((branch_regs[i].dirty>>6)&1) printf("r6 ");
7793 if((branch_regs[i].dirty>>7)&1) printf("r7 ");
7794 if((branch_regs[i].dirty>>8)&1) printf("r8 ");
7795 if((branch_regs[i].dirty>>9)&1) printf("r9 ");
7796 if((branch_regs[i].dirty>>10)&1) printf("r10 ");
7797 if((branch_regs[i].dirty>>12)&1) printf("r12 ");
7798 #endif
7799 printf("\n");
7800 }
7801 }
7802
7803 /* Pass 8 - Assembly */
7804 {
7805 u32 dirty_pre=0;
7806 linkcount=0;stubcount=0;
7807 ds=0;is_delayslot=0;
7808 beginning=(pointer)out;
7809 for(i=0;i<slen;i++)
7810 {
7811 //if(ds) printf("ds: ");
7812 if((void*)assem_debug==(void*)printf) disassemble_inst(i);
7813 if(ds) {
7814 ds=0; // Skip delay slot
7815 if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
7816 instr_addr[i]=0;
7817 } else {
7818 int srloaded;
7819
7820 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&itype[i-1]!=DATA))
7821 {
7822 wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,
7823 unneeded_reg[i]);
7824 }
7825 if(itype[i]==SJUMP) dirty_pre=branch_regs[i].dirty;
7826 else dirty_pre=regs[i].dirty;
7827 // write back
7828 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&itype[i-1]!=DATA))
7829 {
7830 wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,
7831 unneeded_reg[i]);
7832 loop_preload(regmap_pre[i],regs[i].regmap_entry);
7833 }
7834 // branch target entry point
7835 instr_addr[i]=(pointer)out;
7836 assem_debug("<->\n");
7837 // load regs
7838 if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG)
7839 wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty);
7840 load_regs(regs[i].regmap_entry,regs[i].regmap,rs1[i],rs2[i],rs3[i]);
7841 srloaded=(rs1[i]==TBIT||rs2[i]==TBIT||rs3[i]==TBIT||rs1[i]==SR||rs2[i]==SR||rs3[i]==SR);
7842 if(rt1[i]==TBIT||rt2[i]==TBIT)
7843 if(!srloaded&&rt1[i]!=SR&&rt2[i]!=SR)
7844 {srloaded=1;load_regs(regs[i].regmap_entry,regs[i].regmap,SR,SR,SR);}
7845 address_generation(i,®s[i],regs[i].regmap_entry);
7846 load_consts(regmap_pre[i],regs[i].regmap,i);
7847 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==SJUMP)
7848 {
7849 // Load the delay slot registers if necessary
7850 if(!srloaded&&rt1[i+1]!=SR&&rt2[i+1]!=SR&&(rt1[i+1]==TBIT||rt2[i+1]==TBIT))
7851 {srloaded=1;load_regs(regs[i].regmap_entry,regs[i].regmap,SR,SR,SR);}
7852
7853 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&rs1[i+1]!=rs3[i])
7854 if(!srloaded||(rs1[i+1]!=TBIT&&rs1[i+1]!=SR))
7855 load_regs(regs[i].regmap_entry,regs[i].regmap,rs1[i+1],rs1[i+1],rs1[i+1]);
7856
7857 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&rs2[i+1]!=rs3[i])
7858 if(!srloaded||(rs2[i+1]!=TBIT&&rs2[i+1]!=SR))
7859 load_regs(regs[i].regmap_entry,regs[i].regmap,rs2[i+1],rs2[i+1],rs2[i+1]);
7860
7861 if(rs3[i+1]!=rs1[i+1]&&rs3[i+1]!=rs2[i+1]&&rs3[i+1]!=rs1[i]&&rs3[i+1]!=rs2[i]&&rs3[i+1]!=rs3[i])
7862 if(!srloaded||(rs3[i+1]!=TBIT&&rs3[i+1]!=SR))
7863 load_regs(regs[i].regmap_entry,regs[i].regmap,rs3[i+1],rs3[i+1],rs3[i+1]);
7864 }
7865 else if(i+1<slen)
7866 {
7867 signed char preload1, preload2, preload3;
7868 // Preload registers for following instruction
7869 preload1=rs1[i+1];
7870 if(preload1==TBIT||preload1==SR) {
7871 if(!srloaded) {preload1=SR;srloaded=1;}
7872 else preload1=-1;
7873 }
7874 if(preload1!=rs1[i]&&preload1!=rs2[i]&&preload1!=rs3[i])
7875 if(preload1!=rt1[i]&&preload1!=rt2[i])
7876 load_regs(regs[i].regmap_entry,regs[i].regmap,preload1,preload1,preload1);
7877 preload2=rs2[i+1];
7878 if(preload2==TBIT||preload2==SR) {
7879 if(!srloaded) {preload2=SR;srloaded=1;}
7880 else preload2=-1;
7881 }
7882 if(preload2!=rs1[i+1]&&preload2!=rs1[i]&&preload2!=rs2[i]&&preload2!=rs3[i])
7883 if(preload2!=rt1[i]&&preload2!=rt2[i])
7884 load_regs(regs[i].regmap_entry,regs[i].regmap,preload2,preload2,preload2);
7885 preload3=rs3[i+1];
7886 if(preload3==TBIT||preload3==SR) {
7887 if(!srloaded) {preload3=SR;srloaded=1;}
7888 else preload3=-1;
7889 }
7890 if(preload3!=rs1[i+1]&&preload3!=rs2[i+1]&&preload3!=rs1[i]&&preload3!=rs2[i]&&preload3!=rs3[i])
7891 if(preload3!=rt1[i]&&preload3!=rt2[i])
7892 load_regs(regs[i].regmap_entry,regs[i].regmap,preload3,preload3,preload3);
7893 if(rt1[i+1]==TBIT||rt2[i+1]==TBIT)
7894 if(!srloaded&&rt1[i]!=SR&&rt2[i]!=SR&&rt1[i+1]!=SR&&rt2[i+1]!=SR)
7895 {srloaded=1;load_regs(regs[i].regmap_entry,regs[i].regmap,SR,SR,SR);}
7896 }
7897 // TODO: if(is_ooo(i)) address_generation(i+1);
7898 if(itype[i]==LOAD||itype[i]==STORE||itype[i]==RMW)
7899 load_regs(regs[i].regmap_entry,regs[i].regmap,MMREG,MMREG,MMREG);
7900 // assemble
7901 switch(itype[i]) {
7902 case ALU:
7903 alu_assemble(i,®s[i]);break;
7904 case IMM8:
7905 imm8_assemble(i,®s[i]);break;
7906 case SHIFTIMM:
7907 shiftimm_assemble(i,®s[i]);break;
7908 case LOAD:
7909 load_assemble(i,®s[i]);break;
7910 case STORE:
7911 store_assemble(i,®s[i]);break;
7912 case RMW:
7913 rmw_assemble(i,®s[i]);break;
7914 case PCREL:
7915 pcrel_assemble(i,®s[i]);break;
7916 case MULTDIV:
7917 multdiv_assemble(i,®s[i]);break;
7918 case MOV:
7919 mov_assemble(i,®s[i]);break;
7920 case EXT:
7921 ext_assemble(i,®s[i]);break;
7922 case FLAGS:
7923 flags_assemble(i,®s[i]);break;
7924 case COMPLEX:
7925 complex_assemble(i,®s[i]);break;
7926 case SYSTEM:
7927 system_assemble(i,®s[i]);break;
7928 case BIOS:
7929 bios_assemble(i,®s[i]);break;
7930 case UJUMP:
7931 ujump_assemble(i,®s[i]);ds=1;break;
7932 case RJUMP:
7933 rjump_assemble(i,®s[i]);ds=1;break;
7934 case CJUMP:
7935 cjump_assemble(i,®s[i]);break;
7936 case SJUMP:
7937 sjump_assemble(i,®s[i]);ds=1;break;
7938 }
7939 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
7940 literal_pool(1024);
7941 else
7942 literal_pool_jumpover(256);
7943 }
7944 }
7945 // If the block did not end with an unconditional branch,
7946 // add a jump to the next instruction.
7947 if(i>1) {
7948 if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&itype[i-1]!=DATA) {
7949 assert(i==slen);
7950 if(itype[i-2]!=SJUMP) {
7951 store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*2);
7952 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
7953 emit_loadreg(CCREG,HOST_CCREG);
7954 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG);
7955 }
7956 else
7957 {
7958 store_regs_bt(regs[i-2].regmap,regs[i-2].dirty,start+i*2);
7959 assert(regs[i-2].regmap[HOST_CCREG]==CCREG);
7960 }
7961 add_to_linker((int)out,start+i*2,0);
7962 emit_jmp(0);
7963 }
7964 }
7965 else
7966 {
7967 assert(i>0);
7968 store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*2);
7969 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
7970 emit_loadreg(CCREG,HOST_CCREG);
7971 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG);
7972 add_to_linker((int)out,start+i*2,0);
7973 emit_jmp(0);
7974 }
7975
7976 // Stubs
7977 for(i=0;i<stubcount;i++)
7978 {
7979 switch(stubs[i][0])
7980 {
7981 case LOADB_STUB:
7982 case LOADW_STUB:
7983 case LOADL_STUB:
7984 case LOADS_STUB:
7985 do_readstub(i);break;
7986 case STOREB_STUB:
7987 case STOREW_STUB:
7988 case STOREL_STUB:
7989 do_writestub(i);break;
7990 case RMWT_STUB:
7991 case RMWA_STUB:
7992 case RMWX_STUB:
7993 case RMWO_STUB:
7994 do_rmwstub(i);break;
7995 case CC_STUB:
7996 do_ccstub(i);break;
7997 }
7998 }
7999 }
8000
8001 /* Pass 9 - Linker */
8002 {
8003 u32 *ht_bin;
8004 int entry_point;
8005 u32 alignedlen;
8006 u32 alignedstart;
8007 u32 index;
8008 for(i=0;i<linkcount;i++)
8009 {
8010 assem_debug("%8x -> %8x\n",link_addr[i][0],link_addr[i][1]);
8011 literal_pool(64);
8012 if(!link_addr[i][2])
8013 {
8014 void *stub=out;
8015 void *addr=check_addr(link_addr[i][1]);
8016 emit_extjump(link_addr[i][0],link_addr[i][1]);
8017 if(addr) {
8018 set_jump_target(link_addr[i][0],(int)addr);
8019 add_link(link_addr[i][1],stub);
8020 }
8021 else set_jump_target(link_addr[i][0],(int)stub);
8022 }
8023 else
8024 {
8025 // Internal branch
8026 int target=(link_addr[i][1]-start)>>1;
8027 assert(target>=0&&target<slen);
8028 assert(instr_addr[target]);
8029 //#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
8030 //set_jump_target_fillslot(link_addr[i][0],instr_addr[target],link_addr[i][2]>>1);
8031 //#else
8032 set_jump_target(link_addr[i][0],instr_addr[target]);
8033 //#endif
8034 }
8035 }
8036 // External Branch Targets (jump_in)
8037 if(copy+slen*2+4>shadow+sizeof(shadow)) copy=shadow;
8038 for(i=0;i<slen;i++)
8039 {
8040 if(bt[i]||i==0)
8041 {
8042 if(itype[i]==CJUMP||itype[i]==SJUMP) assert(instr_addr[i]);
8043 if(instr_addr[i]) // TODO - delay slots (=null)
8044 {
8045 u32 vaddr=start+i*2+slave;
8046 u32 page=(vaddr&0xDFFFFFFF)>>12;
8047 if(page>1024) page=1024+(page&1023);
8048 literal_pool(256);
8049 assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*2);
8050 assem_debug("jump_in: %x\n",start+i*2);
8051 ll_add(jump_dirty+page,vaddr,(void *)out);
8052 entry_point=do_dirty_stub(i);
8053 ll_add_nodup(jump_in+page,vaddr,(void *)entry_point);
8054 if((itype[i]==CJUMP||itype[i]==SJUMP)&&ccstub_return[i]) set_jump_target(ccstub_return[i],entry_point);
8055
8056 // If there was an existing entry in the hash table,
8057 // replace it with the new address.
8058 // Don't add new entries. We'll insert the
8059 // ones that actually get used in check_addr().
8060 ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
8061 if(ht_bin[0]==vaddr) {
8062 ht_bin[1]=entry_point;
8063 }
8064 if(ht_bin[2]==vaddr) {
8065 ht_bin[3]=entry_point;
8066 }
8067 }
8068 }
8069 }
8070 // Write out the literal pool if necessary
8071 literal_pool(0);
8072 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
8073 // Align code
8074 if(((u32)out)&7) emit_addnop(13);
8075 #endif
8076 assert((pointer)out-beginning<MAX_OUTPUT_BLOCK_SIZE);
8077 //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4);
8078 alignedlen=((((u32)source)+slen*2+2)&~2)-(u32)alignedsource;
8079 memcpy(copy,alignedsource,alignedlen);
8080 copy+=alignedlen;
8081
8082 #ifdef __arm__
8083 __clear_cache((void *)beginning,out);
8084 #endif
8085
8086 // If we're within 256K of the end of the buffer,
8087 // start over from the beginning. (Is 256K enough?)
8088 if((int)out>BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE-JUMP_TABLE_SIZE) out=(u8 *)BASE_ADDR;
8089
8090 // Trap writes to any of the pages we compiled
8091 for(i=start>>12;i<=(start+slen*2)>>12;i++) {
8092 //invalid_code[i]=0;
8093 cached_code[i>>3]|=1<<(i&7);
8094 cached_code[(i^0x20000)>>3]|=1<<(i&7);
8095 #ifdef POINTERS_64BIT
8096 memory_map[i]|=0x4000000000000000LL;
8097 memory_map[i^0x20000]|=0x4000000000000000LL;
8098 #else
8099 memory_map[i]|=0x40000000;
8100 memory_map[i^0x20000]|=0x40000000;
8101 #endif
8102 }
8103 alignedstart=start&~3;
8104 index=alignedstart&0xDFFFFFFF;
8105 if(index>4194304) index=(addr|0x400000)&0x7fffff;
8106 for(i=0;i<alignedlen;i+=4) {
8107 cached_code_words[(index+i)>>5]|=1<<(((index+i)>>2)&7);
8108 }
8109 }
8110
8111 /* Pass 10 - Free memory by expiring oldest blocks */
8112
8113 {
8114 int end=((((int)out-BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
8115 while(expirep!=end)
8116 {
8117 int shift=TARGET_SIZE_2-3; // Divide into 8 blocks
8118 int base=BASE_ADDR+((expirep>>13)<<shift); // Base address of this block
8119 inv_debug("EXP: Phase %d\n",expirep);
8120 switch((expirep>>11)&3)
8121 {
8122 case 0:
8123 // Clear jump_in and jump_dirty
8124 ll_remove_matching_addrs(jump_in+(expirep&2047),base,shift);
8125 ll_remove_matching_addrs(jump_dirty+(expirep&2047),base,shift);
8126 break;
8127 case 1:
8128 // Clear pointers
8129 ll_kill_pointers(jump_out[expirep&2047],base,shift);
8130 break;
8131 case 2:
8132 // Clear hash table
8133 for(i=0;i<32;i++) {
8134 u32 *ht_bin=hash_table[((expirep&2047)<<5)+i];
8135 if((ht_bin[3]>>shift)==(base>>shift) ||
8136 ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
8137 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]);
8138 ht_bin[2]=ht_bin[3]=-1;
8139 }
8140 if((ht_bin[1]>>shift)==(base>>shift) ||
8141 ((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
8142 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[0],ht_bin[1]);
8143 ht_bin[0]=ht_bin[2];
8144 ht_bin[1]=ht_bin[3];
8145 ht_bin[2]=ht_bin[3]=-1;
8146 }
8147 }
8148 break;
8149 case 3:
8150 // Clear jump_out
8151 if((expirep&2047)==0) {
8152 #ifdef __arm__
8153 do_clear_cache();
8154 #endif
8155 #ifdef USE_MINI_HT
8156 memset(mini_ht_master,-1,sizeof(mini_ht_master));
8157 memset(mini_ht_slave,-1,sizeof(mini_ht_slave));
8158 #endif
8159 }
8160 ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
8161 break;
8162 }
8163 expirep=(expirep+1)&65535;
8164 }
8165 }
8166 return 0;
8167 }
8168
8169 #include "../sh2core.h"
8170
8171 extern int framecounter;
DynarecMasterHandleInterrupts()8172 void DynarecMasterHandleInterrupts()
8173 {
8174 if (MSH2->interrupts[MSH2->NumberOfInterrupts-1].level > ((master_reg[SR]>>4)&0xF))
8175 {
8176 master_reg[15] -= 4;
8177 MappedMemoryWriteLongNocache(MSH2, master_reg[15], master_reg[SR]);
8178 master_reg[15] -= 4;
8179 MappedMemoryWriteLongNocache(MSH2, master_reg[15], master_pc);
8180 master_reg[SR] &= 0xFFFFFF0F;
8181 master_reg[SR] |= (MSH2->interrupts[MSH2->NumberOfInterrupts-1].level)<<4;
8182 master_pc = MappedMemoryReadLongNocache(MSH2, master_reg[VBR] + (MSH2->interrupts[MSH2->NumberOfInterrupts-1].vector << 2));
8183 master_ip = get_addr_ht(master_pc);
8184 MSH2->NumberOfInterrupts--;
8185 MSH2->isIdle = 0;
8186 MSH2->isSleeping = 0;
8187 }
8188 //printf("DynarecMasterHandleInterrupts pc=%x ip=%x\n",master_pc,(int)master_ip);
8189 //printf("master_cc=%d slave_cc=%d\n",master_cc,slave_cc);
8190 //printf("frame=%d\n",framecounter);
8191 }
8192
DynarecSlaveHandleInterrupts()8193 void DynarecSlaveHandleInterrupts()
8194 {
8195 if (SSH2->interrupts[SSH2->NumberOfInterrupts-1].level > ((slave_reg[SR]>>4)&0xF))
8196 {
8197 slave_reg[15] -= 4;
8198 MappedMemoryWriteLongNocache(SSH2, slave_reg[15], slave_reg[SR]);
8199 slave_reg[15] -= 4;
8200 MappedMemoryWriteLongNocache(SSH2, slave_reg[15], slave_pc);
8201 slave_reg[SR] &= 0xFFFFFF0F;
8202 slave_reg[SR] |= (SSH2->interrupts[SSH2->NumberOfInterrupts-1].level)<<4;
8203 slave_pc = MappedMemoryReadLongNocache(SSH2, slave_reg[VBR] + (SSH2->interrupts[SSH2->NumberOfInterrupts-1].vector << 2));
8204 slave_ip = get_addr_ht(slave_pc|1);
8205 SSH2->NumberOfInterrupts--;
8206 SSH2->isIdle = 0;
8207 SSH2->isSleeping = 0;
8208 }
8209 //printf("DynarecSlaveHandleInterrupts pc=%x ip=%x\n",slave_pc,(int)slave_ip);
8210 //printf("master_cc=%d slave_cc=%d\n",master_cc,slave_cc);
8211 }
8212
8213 void SH2InterpreterSendInterrupt(SH2_struct *context, u8 level, u8 vector);
8214 int SH2InterpreterGetInterrupts(SH2_struct *context,
8215 interrupt_struct interrupts[MAX_INTERRUPTS]);
8216 void SH2InterpreterSetInterrupts(SH2_struct *context, int num_interrupts,
8217 const interrupt_struct interrupts[MAX_INTERRUPTS]);
8218
SH2DynarecInit(enum SHMODELTYPE model,SH2_struct * msh,SH2_struct * ssh)8219 int SH2DynarecInit(enum SHMODELTYPE model, SH2_struct *msh, SH2_struct *ssh) {return 0;}
8220
SH2DynarecDeInit()8221 void SH2DynarecDeInit() {
8222 sh2_dynarec_cleanup();
8223 }
8224
SH2DynarecExec(SH2_struct * context,u32 cycles)8225 void FASTCALL SH2DynarecExec(SH2_struct *context, u32 cycles) {
8226 printf("SH2DynarecExec called! oops\n");
8227 printf("master_ip=%x\n",(int)master_ip);
8228 exit(1);
8229 }
8230
SH2DynarecGetSR(SH2_struct * context)8231 u32 SH2DynarecGetSR(SH2_struct *context)
8232 {
8233 if(context==MSH2)
8234 return master_reg[SR];
8235 else
8236 return slave_reg[SR];
8237 }
SH2DynarecGetGBR(SH2_struct * context)8238 u32 SH2DynarecGetGBR(SH2_struct *context)
8239 {
8240 if(context==MSH2)
8241 return master_reg[GBR];
8242 else
8243 return slave_reg[GBR];
8244 }
SH2DynarecGetVBR(SH2_struct * context)8245 u32 SH2DynarecGetVBR(SH2_struct *context)
8246 {
8247 if(context==MSH2)
8248 return master_reg[VBR];
8249 else
8250 return slave_reg[VBR];
8251 }
SH2DynarecGetMACH(SH2_struct * context)8252 u32 SH2DynarecGetMACH(SH2_struct *context)
8253 {
8254 if(context==MSH2)
8255 return master_reg[MACH];
8256 else
8257 return slave_reg[MACH];
8258 }
SH2DynarecGetMACL(SH2_struct * context)8259 u32 SH2DynarecGetMACL(SH2_struct *context)
8260 {
8261 if(context==MSH2)
8262 return master_reg[MACL];
8263 else
8264 return slave_reg[MACL];
8265 }
SH2DynarecGetPR(SH2_struct * context)8266 u32 SH2DynarecGetPR(SH2_struct *context)
8267 {
8268 if(context==MSH2)
8269 return master_reg[PR];
8270 else
8271 return slave_reg[PR];
8272 }
SH2DynarecGetGPR(SH2_struct * context,int num)8273 u32 SH2DynarecGetGPR(SH2_struct *context, int num)
8274 {
8275 if(context==MSH2)
8276 return master_reg[num];
8277 else
8278 return slave_reg[num];
8279 }
8280
SH2DynarecGetPC(SH2_struct * context)8281 u32 SH2DynarecGetPC(SH2_struct *context)
8282 {
8283 if(context==MSH2)
8284 return master_pc;
8285 else
8286 return slave_pc;
8287 }
8288
SH2DynarecSetSR(SH2_struct * context,u32 value)8289 void SH2DynarecSetSR(SH2_struct *context, u32 value) {
8290 if(context==MSH2)
8291 master_reg[SR]=value;
8292 else
8293 slave_reg[SR]=value;
8294 }
SH2DynarecSetGBR(SH2_struct * context,u32 value)8295 void SH2DynarecSetGBR(SH2_struct *context, u32 value) {
8296 if(context==MSH2)
8297 master_reg[GBR]=value;
8298 else
8299 slave_reg[GBR]=value;
8300 }
SH2DynarecSetVBR(SH2_struct * context,u32 value)8301 void SH2DynarecSetVBR(SH2_struct *context, u32 value) {
8302 if(context==MSH2)
8303 master_reg[VBR]=value;
8304 else
8305 slave_reg[VBR]=value;
8306 }
SH2DynarecSetMACH(SH2_struct * context,u32 value)8307 void SH2DynarecSetMACH(SH2_struct *context, u32 value) {
8308 if(context==MSH2)
8309 master_reg[MACH]=value;
8310 else
8311 slave_reg[MACH]=value;
8312 }
SH2DynarecSetMACL(SH2_struct * context,u32 value)8313 void SH2DynarecSetMACL(SH2_struct *context, u32 value) {
8314 if(context==MSH2)
8315 master_reg[MACL]=value;
8316 else
8317 slave_reg[MACL]=value;
8318 }
SH2DynarecSetPR(SH2_struct * context,u32 value)8319 void SH2DynarecSetPR(SH2_struct *context, u32 value) {
8320 if(context==MSH2)
8321 master_reg[PR]=value;
8322 else
8323 slave_reg[PR]=value;
8324 }
SH2DynarecSetGPR(SH2_struct * context,int num,u32 value)8325 void SH2DynarecSetGPR(SH2_struct *context, int num, u32 value) {
8326 if(context==MSH2)
8327 master_reg[num]=value;
8328 else
8329 slave_reg[num]=value;
8330 }
8331
SH2DynarecSetPC(SH2_struct * context,u32 value)8332 void SH2DynarecSetPC(SH2_struct *context, u32 value) {
8333 //printf("SH2DynarecSetPC(%s,%x)\n",(context==MSH2)?"master":"slave",value);
8334 if(context==MSH2) {
8335 master_pc=value;
8336 master_ip=get_addr_ht(value);
8337 }
8338 else {
8339 slave_pc=value;
8340 slave_ip=get_addr_ht(value+1);
8341 }
8342 }
8343
8344 #undef SR
8345 #undef GBR
8346 #undef VBR
8347 #undef MACH
8348 #undef MACL
8349 #undef PR
8350
SH2DynarecGetRegisters(SH2_struct * context,sh2regs_struct * regs)8351 void SH2DynarecGetRegisters(SH2_struct *context, sh2regs_struct *regs)
8352 {
8353 if(context==MSH2)
8354 memcpy(&(regs->R), master_reg, 16*sizeof(int));
8355 else
8356 memcpy(&(regs->R), slave_reg, 16*sizeof(int));
8357 regs->SR.all=SH2DynarecGetSR(context);
8358 regs->GBR=SH2DynarecGetGBR(context);
8359 regs->VBR=SH2DynarecGetVBR(context);
8360 regs->MACH=SH2DynarecGetMACH(context);
8361 regs->MACL=SH2DynarecGetMACL(context);
8362 regs->PR=SH2DynarecGetPR(context);
8363 regs->PC=SH2DynarecGetPC(context);
8364 }
8365
SH2DynarecSetRegisters(SH2_struct * context,const sh2regs_struct * regs)8366 void SH2DynarecSetRegisters(SH2_struct *context, const sh2regs_struct *regs)
8367 {
8368 if(context==MSH2)
8369 memcpy(master_reg, &(regs->R), 16*sizeof(int));
8370 else
8371 memcpy(slave_reg, &(regs->R), 16*sizeof(int));
8372 SH2DynarecSetSR(context, regs->SR.all);
8373 SH2DynarecSetGBR(context, regs->GBR);
8374 SH2DynarecSetVBR(context, regs->VBR);
8375 SH2DynarecSetMACH(context, regs->MACH);
8376 SH2DynarecSetMACL(context, regs->MACL);
8377 SH2DynarecSetPR(context, regs->PR);
8378 SH2DynarecSetPC(context, regs->PC);
8379 }
8380
SH2DynarecWriteNotify(u32 start,u32 length)8381 void SH2DynarecWriteNotify(u32 start, u32 length) {
8382 int block,wp=0;
8383 // Ignore non-RAM regions
8384 if((start&0xDFF00000)!=0x200000&&(start&0xDE000000)!=0x6000000) return;
8385 // Check if any pages contain compiled code
8386 for(block=start>>12;block<=(start+length-1)>>12;block++)
8387 wp|=((cached_code[block>>3]>>(block&7))&1);
8388 if(!wp) return;
8389 //printf("SH2DynarecWriteNotify(%x,%x)\n",start,length);
8390 invalidate_blocks(start>>12,(start+length-1)>>12);
8391 }
8392
8393 SH2Interface_struct SH2Dynarec = {
8394 SH2CORE_DYNAREC,
8395 "SH2 Dynamic Recompiler",
8396
8397 SH2DynarecInit,
8398 SH2DynarecDeInit,
8399 SH2DynarecReset,
8400 SH2DynarecExec,
8401
8402 SH2DynarecGetRegisters,
8403 SH2DynarecGetGPR,
8404 SH2DynarecGetSR,
8405 SH2DynarecGetGBR,
8406 SH2DynarecGetVBR,
8407 SH2DynarecGetMACH,
8408 SH2DynarecGetMACL,
8409 SH2DynarecGetPR,
8410 SH2DynarecGetPC,
8411
8412 SH2DynarecSetRegisters,
8413 SH2DynarecSetGPR,
8414 SH2DynarecSetSR,
8415 SH2DynarecSetGBR,
8416 SH2DynarecSetVBR,
8417 SH2DynarecSetMACH,
8418 SH2DynarecSetMACL,
8419 SH2DynarecSetPR,
8420 SH2DynarecSetPC,
8421
8422 SH2InterpreterSendInterrupt,
8423 SH2InterpreterGetInterrupts,
8424 SH2InterpreterSetInterrupts,
8425
8426 SH2DynarecWriteNotify
8427 };
8428
8429 u32 * decilinestop_p = &yabsys.DecilineStop;
8430 u32 * decilineusec_p = &yabsys.DecilineUsec;
8431 u32 * SH2CycleFrac_p = &yabsys.SH2CycleFrac;
8432 u32 * UsecFrac_p = &yabsys.UsecFrac;
8433 //u32 decilinecycles = yabsys.DecilineStop >> YABSYS_TIMING_BITS;
8434 u32 yabsys_timing_bits = YABSYS_TIMING_BITS;
8435 u32 yabsys_timing_mask = YABSYS_TIMING_MASK;
8436 int * linecount_p = &yabsys.LineCount;
8437 int * vblanklinecount_p = &yabsys.VBlankLineCount;
8438 int * maxlinecount_p = &yabsys.MaxLineCount;
8439
8440 void * NumberOfInterruptsOffset = &((SH2_struct *)0)->NumberOfInterrupts;
8441