1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x86.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include "main/main.h"
22
23 int cycle_count;
24 int last_count;
25 int pcaddr;
26 int pending_exception;
27 int branch_target;
28 uint64_t readmem_dword;
29 static precomp_instr fake_pc;
30 u_int memory_map[1048576];
31 ALIGN(8, static u_int mini_ht[32][2]);
32 ALIGN(4, u_char restore_candidate[512]);
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 void do_interrupt();
38 void jump_vaddr_eax();
39 void jump_vaddr_ecx();
40 void jump_vaddr_edx();
41 void jump_vaddr_ebx();
42 void jump_vaddr_ebp();
43 void jump_vaddr_edi();
44 #ifdef __cplusplus
45 }
46 #endif
47
48 static const u_int jump_vaddr_reg[8] = {
49 (int)jump_vaddr_eax,
50 (int)jump_vaddr_ecx,
51 (int)jump_vaddr_edx,
52 (int)jump_vaddr_ebx,
53 0,
54 (int)jump_vaddr_ebp,
55 0,
56 (int)jump_vaddr_edi };
57
58 #ifdef __cplusplus
59 extern "C" {
60 #endif
61 void invalidate_block_eax();
62 void invalidate_block_ecx();
63 void invalidate_block_edx();
64 void invalidate_block_ebx();
65 void invalidate_block_ebp();
66 void invalidate_block_esi();
67 void invalidate_block_edi();
68 #ifdef __cplusplus
69 }
70 #endif
71
72 static const u_int invalidate_block_reg[8] = {
73 (int)invalidate_block_eax,
74 (int)invalidate_block_ecx,
75 (int)invalidate_block_edx,
76 (int)invalidate_block_ebx,
77 0,
78 (int)invalidate_block_ebp,
79 (int)invalidate_block_esi,
80 (int)invalidate_block_edi };
81
82 static const u_short rounding_modes[4] = {
83 0x33F, // round
84 0xF3F, // trunc
85 0xB3F, // ceil
86 0x73F};// floor
87
88 #include "../../fpu.h"
89
90 // We need these for cmovcc instructions on x86
91 static const u_int const_zero=0;
92 static const u_int const_one=1;
93
94 /* Linker */
95
set_jump_target(int addr,int target)96 static void set_jump_target(int addr,int target)
97 {
98 u_char *ptr=(u_char *)addr;
99 if(*ptr==0x0f)
100 {
101 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
102 u_int *ptr2=(u_int *)(ptr+2);
103 *ptr2=target-(int)ptr2-4;
104 }
105 else if(*ptr==0xe8||*ptr==0xe9) {
106 u_int *ptr2=(u_int *)(ptr+1);
107 *ptr2=target-(int)ptr2-4;
108 }
109 else
110 {
111 assert(*ptr==0xc7); /* mov immediate (store address) */
112 u_int *ptr2=(u_int *)(ptr+6);
113 *ptr2=target;
114 }
115 }
116
dynamic_linker(void * src,u_int vaddr)117 void *dynamic_linker(void * src, u_int vaddr)
118 {
119 assert((vaddr&1)==0);
120 u_int page=(vaddr^0x80000000)>>12;
121 u_int vpage=page;
122 if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
123 if(page>2048) page=2048+(page&2047);
124 if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
125 if(vpage>2048) vpage=2048+(vpage&2047);
126 struct ll_entry *head;
127 head=jump_in[page];
128
129 while(head!=NULL) {
130 if(head->vaddr==vaddr&&head->reg_sv_flags==0) {
131 int *ptr=(int*)src;
132 int *ptr2=(int*)((u_int)ptr + (u_int)*ptr + 4);
133 assert((*ptr2&0xFF)==0x68); //push
134 assert((*(int*)((u_int)ptr2+5)&0xFF)==0x68); //push
135 assert((*(int*)((u_int)ptr2+10)&0xFF)==0xE8); //call
136 add_link(vaddr, ptr2);
137 u_int offset=(u_int)head->addr-(u_int)ptr-4;
138 *ptr=offset;
139 return head->addr;
140 }
141 head=head->next;
142 }
143
144 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
145 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
146 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
147
148 head=jump_dirty[vpage];
149 while(head!=NULL) {
150 if(head->vaddr==vaddr&&head->reg_sv_flags==0) {
151 //DebugMessage(M64MSG_VERBOSE, "TRACE: count=%d next=%d (get_addr match dirty %x: %x)",g_cp0_regs[CP0_COUNT_REG],next_interrupt,vaddr,(int)head->addr);
152 // Don't restore blocks which are about to expire from the cache
153 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
154 if(verify_dirty(head->addr)) {
155 //DebugMessage(M64MSG_VERBOSE, "restore candidate: %x (%d) d=%d",vaddr,page,invalid_code[vaddr>>12]);
156 invalid_code[vaddr>>12]=0;
157 memory_map[vaddr>>12]|=0x40000000;
158 if(vpage<2048) {
159 if(tlb_LUT_r[vaddr>>12]) {
160 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
161 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
162 }
163 restore_candidate[vpage>>3]|=1<<(vpage&7);
164 }
165 else restore_candidate[page>>3]|=1<<(page&7);
166 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
167 if(ht_bin[0]==vaddr) {
168 ht_bin[1]=(int)head->addr; // Replace existing entry
169 }
170 else
171 {
172 ht_bin[3]=ht_bin[1];
173 ht_bin[2]=ht_bin[0];
174 ht_bin[1]=(int)head->addr;
175 ht_bin[0]=vaddr;
176 }
177 return (void*)get_clean_addr((int)head->addr);
178 }
179 }
180 }
181 head=head->next;
182 }
183
184 int r=new_recompile_block(vaddr);
185 if(r==0) return dynamic_linker(src, vaddr);
186 // Execute in unmapped page, generate pagefault exception
187 return TLB_refill_exception_new(vaddr,vaddr&~1,0);
188 }
189
dynamic_linker_ds(void * src,u_int vaddr)190 void *dynamic_linker_ds(void * src, u_int vaddr)
191 {
192 u_int page=(vaddr^0x80000000)>>12;
193 u_int vpage=page;
194 if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
195 if(page>2048) page=2048+(page&2047);
196 if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
197 if(vpage>2048) vpage=2048+(vpage&2047);
198 struct ll_entry *head;
199 head=jump_in[page];
200
201 while(head!=NULL) {
202 if(head->vaddr==vaddr&&head->reg_sv_flags==0) {
203 int *ptr=(int*)src;
204 int *ptr2=(int*)((u_int)ptr + (u_int)*ptr + 4);
205 assert((*ptr2&0xFF)==0x68); //push
206 assert((*(int*)((u_int)ptr2+5)&0xFF)==0x68); //push
207 assert((*(int*)((u_int)ptr2+10)&0xFF)==0xE8); //call
208 add_link(vaddr, ptr2);
209 u_int offset=(u_int)head->addr-(u_int)ptr-4;
210 *ptr=offset;
211 return head->addr;
212 }
213 head=head->next;
214 }
215
216 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
217 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
218 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
219
220 head=jump_dirty[vpage];
221 while(head!=NULL) {
222 if(head->vaddr==vaddr&&head->reg_sv_flags==0) {
223 //DebugMessage(M64MSG_VERBOSE, "TRACE: count=%d next=%d (get_addr match dirty %x: %x)",g_cp0_regs[CP0_COUNT_REG],next_interrupt,vaddr,(int)head->addr);
224 // Don't restore blocks which are about to expire from the cache
225 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
226 if(verify_dirty(head->addr)) {
227 //DebugMessage(M64MSG_VERBOSE, "restore candidate: %x (%d) d=%d",vaddr,page,invalid_code[vaddr>>12]);
228 invalid_code[vaddr>>12]=0;
229 memory_map[vaddr>>12]|=0x40000000;
230 if(vpage<2048) {
231 if(tlb_LUT_r[vaddr>>12]) {
232 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
233 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
234 }
235 restore_candidate[vpage>>3]|=1<<(vpage&7);
236 }
237 else restore_candidate[page>>3]|=1<<(page&7);
238 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
239 if(ht_bin[0]==vaddr) {
240 ht_bin[1]=(int)head->addr; // Replace existing entry
241 }
242 else
243 {
244 ht_bin[3]=ht_bin[1];
245 ht_bin[2]=ht_bin[0];
246 ht_bin[1]=(int)head->addr;
247 ht_bin[0]=vaddr;
248 }
249 return (void*)get_clean_addr((int)head->addr);
250 }
251 }
252 }
253 head=head->next;
254 }
255
256 int r=new_recompile_block((vaddr&0xFFFFFFF8)+1);
257 if(r==0) return dynamic_linker_ds(src, vaddr);
258 // Execute in unmapped page, generate pagefault exception
259 return TLB_refill_exception_new(vaddr,vaddr&~1,0);
260 }
261
kill_pointer(void * stub)262 static void *kill_pointer(void *stub)
263 {
264 int *i_ptr=*((int **)((int)stub+6));
265 *i_ptr=(int)stub-(int)i_ptr-4;
266 return i_ptr;
267 }
get_pointer(void * stub)268 static int get_pointer(void *stub)
269 {
270 int *i_ptr=*((int **)((int)stub+6));
271 return *i_ptr+(int)i_ptr+4;
272 }
273
274 // Find the "clean" entry point from a "dirty" entry point
275 // by skipping past the call to verify_code
get_clean_addr(int addr)276 static u_int get_clean_addr(int addr)
277 {
278 u_char *ptr=(u_char *)addr;
279 assert(ptr[20]==0xE8); // call instruction
280 assert(ptr[25]==0x83); // pop (add esp,4) instruction
281 if(ptr[28]==0xE9) return *(u_int *)(ptr+29)+addr+33; // follow jmp
282 else return(addr+28);
283 }
284
verify_dirty(void * addr)285 static int verify_dirty(void *addr)
286 {
287 u_char *ptr=(u_char *)addr;
288 assert(ptr[5]==0xB8);
289 u_int source=*(u_int *)(ptr+6);
290 u_int copy=*(u_int *)(ptr+11);
291 u_int len=*(u_int *)(ptr+16);
292 assert(ptr[20]==0xE8); // call instruction
293 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
294 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
295 unsigned int page=source>>12;
296 unsigned int map_value=memory_map[page];
297 if(map_value>=0x80000000) return 0;
298 while(page<((source+len-1)>>12)) {
299 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
300 }
301 source = source+(map_value<<2);
302 }
303 //DebugMessage(M64MSG_VERBOSE, "verify_dirty: %x %x %x",source,copy,len);
304 return !memcmp((void *)source,(void *)copy,len);
305 }
306
get_copy_addr(void * addr,u_int * copy,u_int * length)307 static void get_copy_addr(void *addr, u_int *copy, u_int *length)
308 {
309 u_char *ptr=(u_char *)addr;
310 assert(ptr[5]==0xB8);
311 *copy=*(u_int *)(ptr+11);
312 *length=*(u_int *)(ptr+16);
313 assert(ptr[20]==0xE8); // call instruction
314 }
315
316 // This doesn't necessarily find all clean entry points, just
317 // guarantees that it's not dirty
isclean(int addr)318 static int isclean(int addr)
319 {
320 u_char *ptr=(u_char *)addr;
321 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
322 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
323 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
324 if(ptr[20]!=0xE8) return 1; // call instruction
325 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
326 return 0;
327 }
328
get_bounds(int addr,u_int * start,u_int * end)329 static void get_bounds(int addr,u_int *start,u_int *end)
330 {
331 u_char *ptr=(u_char *)addr;
332 assert(ptr[5]==0xB8);
333 u_int source=*(u_int *)(ptr+6);
334 //u_int copy=*(u_int *)(ptr+11);
335 u_int len=*(u_int *)(ptr+16);
336 assert(ptr[20]==0xE8); // call instruction
337 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
338 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
339 if(memory_map[source>>12]>=0x80000000) source = 0;
340 else source = source+(memory_map[source>>12]<<2);
341 }
342 if(start) *start=source;
343 if(end) *end=source+len;
344 }
345
346 /* Register allocation */
347
348 // Note: registers are allocated clean (unmodified state)
349 // if you intend to modify the register, you must call dirty_reg().
alloc_reg(struct regstat * cur,int i,signed char reg)350 static void alloc_reg(struct regstat *cur,int i,signed char reg)
351 {
352 int r,hr;
353 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
354
355 // Don't allocate unused registers
356 if((cur->u>>reg)&1) return;
357
358 // see if it's already allocated
359 for(hr=0;hr<HOST_REGS;hr++)
360 {
361 if(cur->regmap[hr]==reg) return;
362 }
363
364 // Keep the same mapping if the register was already allocated in a loop
365 preferred_reg = loop_reg(i,reg,preferred_reg);
366
367 // Try to allocate the preferred register
368 if(cur->regmap[preferred_reg]==-1) {
369 cur->regmap[preferred_reg]=reg;
370 cur->dirty&=~(1<<preferred_reg);
371 cur->isconst&=~(1<<preferred_reg);
372 return;
373 }
374 r=cur->regmap[preferred_reg];
375 if(r<64&&((cur->u>>r)&1)) {
376 cur->regmap[preferred_reg]=reg;
377 cur->dirty&=~(1<<preferred_reg);
378 cur->isconst&=~(1<<preferred_reg);
379 return;
380 }
381 if(r>=64&&((cur->uu>>(r&63))&1)) {
382 cur->regmap[preferred_reg]=reg;
383 cur->dirty&=~(1<<preferred_reg);
384 cur->isconst&=~(1<<preferred_reg);
385 return;
386 }
387
388 // Try to allocate EAX, EBX, ECX, or EDX
389 // We prefer these because they can do byte and halfword loads
390 for(hr=0;hr<4;hr++) {
391 if(cur->regmap[hr]==-1) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398
399 // Clear any unneeded registers
400 // We try to keep the mapping consistent, if possible, because it
401 // makes branches easier (especially loops). So we try to allocate
402 // first (see above) before removing old mappings. If this is not
403 // possible then go ahead and clear out the registers that are no
404 // longer needed.
405 for(hr=0;hr<HOST_REGS;hr++)
406 {
407 r=cur->regmap[hr];
408 if(r>=0) {
409 if(r<64) {
410 if((cur->u>>r)&1)
411 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
412 }
413 else
414 {
415 if((cur->uu>>(r&63))&1)
416 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
417 }
418 }
419 }
420 // Try to allocate any available register, but prefer
421 // registers that have not been used recently.
422 if(i>0) {
423 for(hr=0;hr<HOST_REGS;hr++) {
424 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
425 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
426 cur->regmap[hr]=reg;
427 cur->dirty&=~(1<<hr);
428 cur->isconst&=~(1<<hr);
429 return;
430 }
431 }
432 }
433 }
434 // Try to allocate any available register
435 for(hr=0;hr<HOST_REGS;hr++) {
436 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
437 cur->regmap[hr]=reg;
438 cur->dirty&=~(1<<hr);
439 cur->isconst&=~(1<<hr);
440 return;
441 }
442 }
443
444 // Ok, now we have to evict someone
445 // Pick a register we hopefully won't need soon
446 u_char hsn[MAXREG+1];
447 memset(hsn,10,sizeof(hsn));
448 int j;
449 lsn(hsn,i,&preferred_reg);
450 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
451 if(i>0) {
452 // Don't evict the cycle count at entry points, otherwise the entry
453 // stub will have to write it.
454 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
455 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
456 for(j=10;j>=3;j--)
457 {
458 // Alloc preferred register if available
459 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
460 for(hr=0;hr<HOST_REGS;hr++) {
461 // Evict both parts of a 64-bit register
462 if((cur->regmap[hr]&63)==r) {
463 cur->regmap[hr]=-1;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 }
467 }
468 cur->regmap[preferred_reg]=reg;
469 return;
470 }
471 for(r=1;r<=MAXREG;r++)
472 {
473 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
474 for(hr=0;hr<HOST_REGS;hr++) {
475 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
476 if(cur->regmap[hr]==r+64) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 }
484 for(hr=0;hr<HOST_REGS;hr++) {
485 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
486 if(cur->regmap[hr]==r) {
487 cur->regmap[hr]=reg;
488 cur->dirty&=~(1<<hr);
489 cur->isconst&=~(1<<hr);
490 return;
491 }
492 }
493 }
494 }
495 }
496 }
497 }
498 for(j=10;j>=0;j--)
499 {
500 for(r=1;r<=MAXREG;r++)
501 {
502 if(hsn[r]==j) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(cur->regmap[hr]==r+64) {
505 cur->regmap[hr]=reg;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 for(hr=0;hr<HOST_REGS;hr++) {
512 if(cur->regmap[hr]==r) {
513 cur->regmap[hr]=reg;
514 cur->dirty&=~(1<<hr);
515 cur->isconst&=~(1<<hr);
516 return;
517 }
518 }
519 }
520 }
521 }
522 DebugMessage(M64MSG_ERROR, "This shouldn't happen (alloc_reg)");exit(1);
523 }
524
alloc_reg64(struct regstat * cur,int i,signed char reg)525 static void alloc_reg64(struct regstat *cur,int i,signed char reg)
526 {
527 int preferred_reg = 5+reg%3;
528 int r,hr;
529
530 // allocate the lower 32 bits
531 alloc_reg(cur,i,reg);
532
533 // Don't allocate unused registers
534 if((cur->uu>>reg)&1) return;
535
536 // see if the upper half is already allocated
537 for(hr=0;hr<HOST_REGS;hr++)
538 {
539 if(cur->regmap[hr]==reg+64) return;
540 }
541
542 // Keep the same mapping if the register was already allocated in a loop
543 preferred_reg = loop_reg(i,reg,preferred_reg);
544
545 // Try to allocate the preferred register
546 if(cur->regmap[preferred_reg]==-1) {
547 cur->regmap[preferred_reg]=reg|64;
548 cur->dirty&=~(1<<preferred_reg);
549 cur->isconst&=~(1<<preferred_reg);
550 return;
551 }
552 r=cur->regmap[preferred_reg];
553 if(r<64&&((cur->u>>r)&1)) {
554 cur->regmap[preferred_reg]=reg|64;
555 cur->dirty&=~(1<<preferred_reg);
556 cur->isconst&=~(1<<preferred_reg);
557 return;
558 }
559 if(r>=64&&((cur->uu>>(r&63))&1)) {
560 cur->regmap[preferred_reg]=reg|64;
561 cur->dirty&=~(1<<preferred_reg);
562 cur->isconst&=~(1<<preferred_reg);
563 return;
564 }
565
566 // Try to allocate EBP, ESI or EDI
567 for(hr=5;hr<8;hr++) {
568 if(cur->regmap[hr]==-1) {
569 cur->regmap[hr]=reg|64;
570 cur->dirty&=~(1<<hr);
571 cur->isconst&=~(1<<hr);
572 return;
573 }
574 }
575
576 // Clear any unneeded registers
577 // We try to keep the mapping consistent, if possible, because it
578 // makes branches easier (especially loops). So we try to allocate
579 // first (see above) before removing old mappings. If this is not
580 // possible then go ahead and clear out the registers that are no
581 // longer needed.
582 for(hr=HOST_REGS-1;hr>=0;hr--)
583 {
584 r=cur->regmap[hr];
585 if(r>=0) {
586 if(r<64) {
587 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
588 }
589 else
590 {
591 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
592 }
593 }
594 }
595 // Try to allocate any available register, but prefer
596 // registers that have not been used recently.
597 if(i>0) {
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
600 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 // Try to allocate any available register
610 for(hr=0;hr<HOST_REGS;hr++) {
611 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
612 cur->regmap[hr]=reg|64;
613 cur->dirty&=~(1<<hr);
614 cur->isconst&=~(1<<hr);
615 return;
616 }
617 }
618
619 // Ok, now we have to evict someone
620 // Pick a register we hopefully won't need soon
621 u_char hsn[MAXREG+1];
622 memset(hsn,10,sizeof(hsn));
623 int j;
624 lsn(hsn,i,&preferred_reg);
625 //DebugMessage(M64MSG_VERBOSE, "eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
626 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
627 if(i>0) {
628 // Don't evict the cycle count at entry points, otherwise the entry
629 // stub will have to write it.
630 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
631 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
632 for(j=10;j>=3;j--)
633 {
634 // Alloc preferred register if available
635 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
636 for(hr=0;hr<HOST_REGS;hr++) {
637 // Evict both parts of a 64-bit register
638 if((cur->regmap[hr]&63)==r) {
639 cur->regmap[hr]=-1;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 }
643 }
644 cur->regmap[preferred_reg]=reg|64;
645 return;
646 }
647 for(r=1;r<=MAXREG;r++)
648 {
649 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
650 for(hr=0;hr<HOST_REGS;hr++) {
651 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
652 if(cur->regmap[hr]==r+64) {
653 cur->regmap[hr]=reg|64;
654 cur->dirty&=~(1<<hr);
655 cur->isconst&=~(1<<hr);
656 return;
657 }
658 }
659 }
660 for(hr=0;hr<HOST_REGS;hr++) {
661 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
662 if(cur->regmap[hr]==r) {
663 cur->regmap[hr]=reg|64;
664 cur->dirty&=~(1<<hr);
665 cur->isconst&=~(1<<hr);
666 return;
667 }
668 }
669 }
670 }
671 }
672 }
673 }
674 for(j=10;j>=0;j--)
675 {
676 for(r=1;r<=MAXREG;r++)
677 {
678 if(hsn[r]==j) {
679 for(hr=0;hr<HOST_REGS;hr++) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg|64;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(cur->regmap[hr]==r) {
689 cur->regmap[hr]=reg|64;
690 cur->dirty&=~(1<<hr);
691 cur->isconst&=~(1<<hr);
692 return;
693 }
694 }
695 }
696 }
697 }
698 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
699 }
700
701 // Allocate a temporary register. This is done without regard to
702 // dirty status or whether the register we request is on the unneeded list
703 // Note: This will only allocate one register, even if called multiple times
alloc_reg_temp(struct regstat * cur,int i,signed char reg)704 static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
705 {
706 int r,hr;
707 int preferred_reg = -1;
708
709 // see if it's already allocated
710 for(hr=0;hr<HOST_REGS;hr++)
711 {
712 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
713 }
714
715 // Try to allocate any available register, starting with EDI, ESI, EBP...
716 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
717 for(hr=HOST_REGS-1;hr>=0;hr--) {
718 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
719 cur->regmap[hr]=reg;
720 cur->dirty&=~(1<<hr);
721 cur->isconst&=~(1<<hr);
722 return;
723 }
724 }
725
726 // Find an unneeded register
727 for(hr=HOST_REGS-1;hr>=0;hr--)
728 {
729 r=cur->regmap[hr];
730 if(r>=0) {
731 if(r<64) {
732 if((cur->u>>r)&1) {
733 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 else
742 {
743 if((cur->uu>>(r&63))&1) {
744 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
745 cur->regmap[hr]=reg;
746 cur->dirty&=~(1<<hr);
747 cur->isconst&=~(1<<hr);
748 return;
749 }
750 }
751 }
752 }
753 }
754
755 // Ok, now we have to evict someone
756 // Pick a register we hopefully won't need soon
757 // TODO: we might want to follow unconditional jumps here
758 // TODO: get rid of dupe code and make this into a function
759 u_char hsn[MAXREG+1];
760 memset(hsn,10,sizeof(hsn));
761 int j;
762 lsn(hsn,i,&preferred_reg);
763 //DebugMessage(M64MSG_VERBOSE, "hsn: %d %d %d %d %d %d %d",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
764 if(i>0) {
765 // Don't evict the cycle count at entry points, otherwise the entry
766 // stub will have to write it.
767 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
768 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
769 for(j=10;j>=3;j--)
770 {
771 for(r=1;r<=MAXREG;r++)
772 {
773 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
774 for(hr=0;hr<HOST_REGS;hr++) {
775 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
776 if(cur->regmap[hr]==r+64) {
777 cur->regmap[hr]=reg;
778 cur->dirty&=~(1<<hr);
779 cur->isconst&=~(1<<hr);
780 return;
781 }
782 }
783 }
784 for(hr=0;hr<HOST_REGS;hr++) {
785 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
786 if(cur->regmap[hr]==r) {
787 cur->regmap[hr]=reg;
788 cur->dirty&=~(1<<hr);
789 cur->isconst&=~(1<<hr);
790 return;
791 }
792 }
793 }
794 }
795 }
796 }
797 }
798 for(j=10;j>=0;j--)
799 {
800 for(r=1;r<=MAXREG;r++)
801 {
802 if(hsn[r]==j) {
803 for(hr=0;hr<HOST_REGS;hr++) {
804 if(cur->regmap[hr]==r+64) {
805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
807 cur->isconst&=~(1<<hr);
808 return;
809 }
810 }
811 for(hr=0;hr<HOST_REGS;hr++) {
812 if(cur->regmap[hr]==r) {
813 cur->regmap[hr]=reg;
814 cur->dirty&=~(1<<hr);
815 cur->isconst&=~(1<<hr);
816 return;
817 }
818 }
819 }
820 }
821 }
822 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
823 }
824 // Allocate a specific x86 register.
alloc_x86_reg(struct regstat * cur,int i,signed char reg,int hr)825 static void alloc_x86_reg(struct regstat *cur,int i,signed char reg,int hr)
826 {
827 int n;
828 int dirty=0;
829
830 // see if it's already allocated (and dealloc it)
831 for(n=0;n<HOST_REGS;n++)
832 {
833 if(n!=ESP&&cur->regmap[n]==reg) {
834 dirty=(cur->dirty>>n)&1;
835 cur->regmap[n]=-1;
836 }
837 }
838
839 cur->regmap[hr]=reg;
840 cur->dirty&=~(1<<hr);
841 cur->dirty|=dirty<<hr;
842 cur->isconst&=~(1<<hr);
843 }
844
845 // Alloc cycle count into dedicated register
alloc_cc(struct regstat * cur,int i)846 static void alloc_cc(struct regstat *cur,int i)
847 {
848 alloc_x86_reg(cur,i,CCREG,ESI);
849 }
850
851 /* Special alloc */
852
multdiv_alloc_x86(struct regstat * current,int i)853 static void multdiv_alloc_x86(struct regstat *current,int i)
854 {
855 // case 0x18: MULT
856 // case 0x19: MULTU
857 // case 0x1A: DIV
858 // case 0x1B: DIVU
859 // case 0x1C: DMULT
860 // case 0x1D: DMULTU
861 // case 0x1E: DDIV
862 // case 0x1F: DDIVU
863 clear_const(current,rs1[i]);
864 clear_const(current,rs2[i]);
865 if(rs1[i]&&rs2[i])
866 {
867 if((opcode2[i]&4)==0) // 32-bit
868 {
869 current->u&=~(1LL<<HIREG);
870 current->u&=~(1LL<<LOREG);
871 alloc_x86_reg(current,i,HIREG,EDX);
872 alloc_x86_reg(current,i,LOREG,EAX);
873 alloc_reg(current,i,rs1[i]);
874 alloc_reg(current,i,rs2[i]);
875 current->is32|=1LL<<HIREG;
876 current->is32|=1LL<<LOREG;
877 dirty_reg(current,HIREG);
878 dirty_reg(current,LOREG);
879 }
880 else // 64-bit
881 {
882 current->u&=~(1LL<<HIREG);
883 current->u&=~(1LL<<LOREG);
884 current->uu&=~(1LL<<HIREG);
885 current->uu&=~(1LL<<LOREG);
886 alloc_x86_reg(current,i,HIREG|64,EDX);
887 alloc_x86_reg(current,i,HIREG,EAX);
888 alloc_reg64(current,i,rs1[i]);
889 alloc_reg64(current,i,rs2[i]);
890 alloc_all(current,i);
891 current->is32&=~(1LL<<HIREG);
892 current->is32&=~(1LL<<LOREG);
893 dirty_reg(current,HIREG);
894 dirty_reg(current,LOREG);
895 minimum_free_regs[i]=HOST_REGS;
896 }
897 }
898 else
899 {
900 // Multiply by zero is zero.
901 // MIPS does not have a divide by zero exception.
902 // The result is undefined, we return zero.
903 alloc_reg(current,i,HIREG);
904 alloc_reg(current,i,LOREG);
905 current->is32|=1LL<<HIREG;
906 current->is32|=1LL<<LOREG;
907 dirty_reg(current,HIREG);
908 dirty_reg(current,LOREG);
909 }
910 }
911 #define multdiv_alloc multdiv_alloc_x86
912
913 /* Assembler */
914
915 static const char regname[8][4] = {
916 "eax",
917 "ecx",
918 "edx",
919 "ebx",
920 "esp",
921 "ebp",
922 "esi",
923 "edi"};
924
output_byte(u_char byte)925 static void output_byte(u_char byte)
926 {
927 *(out++)=byte;
928 }
output_modrm(u_char mod,u_char rm,u_char ext)929 static void output_modrm(u_char mod,u_char rm,u_char ext)
930 {
931 assert(mod<4);
932 assert(rm<8);
933 assert(ext<8);
934 u_char byte=(mod<<6)|(ext<<3)|rm;
935 *(out++)=byte;
936 }
output_sib(u_char scale,u_char index,u_char base)937 static void output_sib(u_char scale,u_char index,u_char base)
938 {
939 assert(scale<4);
940 assert(index<8);
941 assert(base<8);
942 u_char byte=(scale<<6)|(index<<3)|base;
943 *(out++)=byte;
944 }
output_w32(u_int word)945 static void output_w32(u_int word)
946 {
947 *((u_int *)out)=word;
948 out+=4;
949 }
950
emit_mov(int rs,int rt)951 static void emit_mov(int rs,int rt)
952 {
953 assem_debug("mov %%%s,%%%s",regname[rs],regname[rt]);
954 output_byte(0x89);
955 output_modrm(3,rt,rs);
956 }
957
emit_add(int rs1,int rs2,int rt)958 static void emit_add(int rs1,int rs2,int rt)
959 {
960 if(rs1==rt) {
961 assem_debug("add %%%s,%%%s",regname[rs2],regname[rs1]);
962 output_byte(0x01);
963 output_modrm(3,rs1,rs2);
964 }else if(rs2==rt) {
965 assem_debug("add %%%s,%%%s",regname[rs1],regname[rs2]);
966 output_byte(0x01);
967 output_modrm(3,rs2,rs1);
968 }else {
969 assem_debug("mov %%%s,%%%s",regname[rs1],regname[rt]);
970 output_byte(0x89);
971 output_modrm(3,rt,rs1);
972 assem_debug("add %%%s,%%%s",regname[rs2],regname[rt]);
973 output_byte(0x01);
974 output_modrm(3,rt,rs2);
975 }
976 }
977
emit_adc(int rs1,int rs2,int rt)978 static void emit_adc(int rs1,int rs2,int rt)
979 {
980 if(rs1==rt) {
981 assem_debug("adc %%%s,%%%s",regname[rs2],regname[rs1]);
982 output_byte(0x11);
983 output_modrm(3,rs1,rs2);
984 }else if(rs2==rt) {
985 assem_debug("adc %%%s,%%%s",regname[rs1],regname[rs2]);
986 output_byte(0x11);
987 output_modrm(3,rs2,rs1);
988 }else {
989 assem_debug("mov %%%s,%%%s",regname[rs1],regname[rt]);
990 output_byte(0x89);
991 output_modrm(3,rt,rs1);
992 assem_debug("adc %%%s,%%%s",regname[rs2],regname[rt]);
993 output_byte(0x11);
994 output_modrm(3,rt,rs2);
995 }
996 }
997
emit_adds(int rs1,int rs2,int rt)998 static void emit_adds(int rs1,int rs2,int rt)
999 {
1000 emit_add(rs1,rs2,rt);
1001 }
1002
emit_lea8(int rs1,int rt)1003 static void emit_lea8(int rs1,int rt)
1004 {
1005 assem_debug("lea 0(%%%s,8),%%%s",regname[rs1],regname[rt]);
1006 output_byte(0x8D);
1007 output_modrm(0,4,rt);
1008 output_sib(3,rs1,5);
1009 output_w32(0);
1010 }
emit_leairrx1(int imm,int rs1,int rs2,int rt)1011 static void emit_leairrx1(int imm,int rs1,int rs2,int rt)
1012 {
1013 assem_debug("lea %x(%%%s,%%%s,1),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
1014 output_byte(0x8D);
1015 if(imm!=0||rs1==EBP) {
1016 output_modrm(2,4,rt);
1017 output_sib(0,rs2,rs1);
1018 output_w32(imm);
1019 }else{
1020 output_modrm(0,4,rt);
1021 output_sib(0,rs2,rs1);
1022 }
1023 }
emit_leairrx4(int imm,int rs1,int rs2,int rt)1024 static void emit_leairrx4(int imm,int rs1,int rs2,int rt)
1025 {
1026 assem_debug("lea %x(%%%s,%%%s,4),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
1027 output_byte(0x8D);
1028 if(imm!=0||rs1==EBP) {
1029 output_modrm(2,4,rt);
1030 output_sib(2,rs2,rs1);
1031 output_w32(imm);
1032 }else{
1033 output_modrm(0,4,rt);
1034 output_sib(2,rs2,rs1);
1035 }
1036 }
1037
emit_neg(int rs,int rt)1038 static void emit_neg(int rs, int rt)
1039 {
1040 if(rs!=rt) emit_mov(rs,rt);
1041 assem_debug("neg %%%s",regname[rt]);
1042 output_byte(0xF7);
1043 output_modrm(3,rt,3);
1044 }
1045
emit_negs(int rs,int rt)1046 static void emit_negs(int rs, int rt)
1047 {
1048 emit_neg(rs,rt);
1049 }
1050
emit_sub(int rs1,int rs2,int rt)1051 static void emit_sub(int rs1,int rs2,int rt)
1052 {
1053 if(rs1==rt) {
1054 assem_debug("sub %%%s,%%%s",regname[rs2],regname[rs1]);
1055 output_byte(0x29);
1056 output_modrm(3,rs1,rs2);
1057 } else if(rs2==rt) {
1058 emit_neg(rs2,rs2);
1059 emit_add(rs2,rs1,rs2);
1060 } else {
1061 emit_mov(rs1,rt);
1062 emit_sub(rt,rs2,rt);
1063 }
1064 }
1065
emit_subs(int rs1,int rs2,int rt)1066 static void emit_subs(int rs1,int rs2,int rt)
1067 {
1068 emit_sub(rs1,rs2,rt);
1069 }
1070
emit_zeroreg(int rt)1071 static void emit_zeroreg(int rt)
1072 {
1073 output_byte(0x31);
1074 output_modrm(3,rt,rt);
1075 assem_debug("xor %%%s,%%%s",regname[rt],regname[rt]);
1076 }
1077
emit_loadreg(int r,int hr)1078 static void emit_loadreg(int r, int hr)
1079 {
1080 if((r&63)==0)
1081 emit_zeroreg(hr);
1082 else {
1083 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
1084 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1085 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1086 if(r==CCREG) addr=(int)&cycle_count;
1087 if(r==CSREG) addr=(int)&g_cp0_regs[CP0_STATUS_REG];
1088 if(r==FSREG) addr=(int)&FCR31;
1089 assem_debug("mov %x+%d,%%%s",addr,r,regname[hr]);
1090 output_byte(0x8B);
1091 output_modrm(0,5,hr);
1092 output_w32(addr);
1093 }
1094 }
emit_storereg(int r,int hr)1095 static void emit_storereg(int r, int hr)
1096 {
1097 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
1098 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1099 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1100 if(r==CCREG) addr=(int)&cycle_count;
1101 if(r==FSREG) addr=(int)&FCR31;
1102 assem_debug("mov %%%s,%x+%d",regname[hr],addr,r);
1103 output_byte(0x89);
1104 output_modrm(0,5,hr);
1105 output_w32(addr);
1106 }
1107
emit_test(int rs,int rt)1108 static void emit_test(int rs, int rt)
1109 {
1110 assem_debug("test %%%s,%%%s",regname[rs],regname[rt]);
1111 output_byte(0x85);
1112 output_modrm(3,rs,rt);
1113 }
1114
emit_testimm(int rs,int imm)1115 static void emit_testimm(int rs,int imm)
1116 {
1117 assem_debug("test $0x%x,%%%s",imm,regname[rs]);
1118 if(imm<128&&imm>=-128&&rs<4) {
1119 output_byte(0xF6);
1120 output_modrm(3,rs,0);
1121 output_byte(imm);
1122 }
1123 else
1124 {
1125 output_byte(0xF7);
1126 output_modrm(3,rs,0);
1127 output_w32(imm);
1128 }
1129 }
1130
emit_not(int rs,int rt)1131 static void emit_not(int rs,int rt)
1132 {
1133 if(rs!=rt) emit_mov(rs,rt);
1134 assem_debug("not %%%s",regname[rt]);
1135 output_byte(0xF7);
1136 output_modrm(3,rt,2);
1137 }
1138
emit_and(u_int rs1,u_int rs2,u_int rt)1139 static void emit_and(u_int rs1,u_int rs2,u_int rt)
1140 {
1141 assert(rs1<8);
1142 assert(rs2<8);
1143 assert(rt<8);
1144 if(rs1==rt) {
1145 assem_debug("and %%%s,%%%s",regname[rs2],regname[rt]);
1146 output_byte(0x21);
1147 output_modrm(3,rs1,rs2);
1148 }
1149 else
1150 if(rs2==rt) {
1151 assem_debug("and %%%s,%%%s",regname[rs1],regname[rt]);
1152 output_byte(0x21);
1153 output_modrm(3,rs2,rs1);
1154 }
1155 else {
1156 emit_mov(rs1,rt);
1157 emit_and(rt,rs2,rt);
1158 }
1159 }
1160
emit_or(u_int rs1,u_int rs2,u_int rt)1161 static void emit_or(u_int rs1,u_int rs2,u_int rt)
1162 {
1163 assert(rs1<8);
1164 assert(rs2<8);
1165 assert(rt<8);
1166 if(rs1==rt) {
1167 assem_debug("or %%%s,%%%s",regname[rs2],regname[rt]);
1168 output_byte(0x09);
1169 output_modrm(3,rs1,rs2);
1170 }
1171 else
1172 if(rs2==rt) {
1173 assem_debug("or %%%s,%%%s",regname[rs1],regname[rt]);
1174 output_byte(0x09);
1175 output_modrm(3,rs2,rs1);
1176 }
1177 else {
1178 emit_mov(rs1,rt);
1179 emit_or(rt,rs2,rt);
1180 }
1181 }
emit_or_and_set_flags(int rs1,int rs2,int rt)1182 static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1183 {
1184 emit_or(rs1,rs2,rt);
1185 }
1186
emit_xor(u_int rs1,u_int rs2,u_int rt)1187 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1188 {
1189 assert(rs1<8);
1190 assert(rs2<8);
1191 assert(rt<8);
1192 if(rs1==rt) {
1193 assem_debug("xor %%%s,%%%s",regname[rs2],regname[rt]);
1194 output_byte(0x31);
1195 output_modrm(3,rs1,rs2);
1196 }
1197 else
1198 if(rs2==rt) {
1199 assem_debug("xor %%%s,%%%s",regname[rs1],regname[rt]);
1200 output_byte(0x31);
1201 output_modrm(3,rs2,rs1);
1202 }
1203 else {
1204 emit_mov(rs1,rt);
1205 emit_xor(rt,rs2,rt);
1206 }
1207 }
1208
emit_movimm(int imm,u_int rt)1209 static void emit_movimm(int imm,u_int rt)
1210 {
1211 assem_debug("mov $%d,%%%s",imm,regname[rt]);
1212 assert(rt<8);
1213 output_byte(0xB8+rt);
1214 output_w32(imm);
1215 }
1216
emit_addimm(int rs,int imm,int rt)1217 static void emit_addimm(int rs,int imm,int rt)
1218 {
1219 if(rs==rt) {
1220 if(imm!=0) {
1221 assem_debug("add $%d,%%%s",imm,regname[rt]);
1222 if(imm<128&&imm>=-128) {
1223 output_byte(0x83);
1224 output_modrm(3,rt,0);
1225 output_byte(imm);
1226 }
1227 else
1228 {
1229 output_byte(0x81);
1230 output_modrm(3,rt,0);
1231 output_w32(imm);
1232 }
1233 }
1234 }
1235 else {
1236 if(imm!=0) {
1237 assem_debug("lea %d(%%%s),%%%s",imm,regname[rs],regname[rt]);
1238 output_byte(0x8D);
1239 if(imm<128&&imm>=-128) {
1240 output_modrm(1,rs,rt);
1241 output_byte(imm);
1242 }else{
1243 output_modrm(2,rs,rt);
1244 output_w32(imm);
1245 }
1246 }else{
1247 emit_mov(rs,rt);
1248 }
1249 }
1250 }
1251
emit_addimm_and_set_flags(int imm,int rt)1252 static void emit_addimm_and_set_flags(int imm,int rt)
1253 {
1254 assem_debug("add $%d,%%%s",imm,regname[rt]);
1255 if(imm<128&&imm>=-128) {
1256 output_byte(0x83);
1257 output_modrm(3,rt,0);
1258 output_byte(imm);
1259 }
1260 else
1261 {
1262 output_byte(0x81);
1263 output_modrm(3,rt,0);
1264 output_w32(imm);
1265 }
1266 }
emit_addimm_no_flags(int imm,int rt)1267 static void emit_addimm_no_flags(int imm,int rt)
1268 {
1269 if(imm!=0) {
1270 assem_debug("lea %d(%%%s),%%%s",imm,regname[rt],regname[rt]);
1271 output_byte(0x8D);
1272 if(imm<128&&imm>=-128) {
1273 output_modrm(1,rt,rt);
1274 output_byte(imm);
1275 }else{
1276 output_modrm(2,rt,rt);
1277 output_w32(imm);
1278 }
1279 }
1280 }
1281
emit_adcimm(int imm,u_int rt)1282 static void emit_adcimm(int imm,u_int rt)
1283 {
1284 assem_debug("adc $%d,%%%s",imm,regname[rt]);
1285 assert(rt<8);
1286 if(imm<128&&imm>=-128) {
1287 output_byte(0x83);
1288 output_modrm(3,rt,2);
1289 output_byte(imm);
1290 }
1291 else
1292 {
1293 output_byte(0x81);
1294 output_modrm(3,rt,2);
1295 output_w32(imm);
1296 }
1297 }
emit_sbbimm(int imm,u_int rt)1298 static void emit_sbbimm(int imm,u_int rt)
1299 {
1300 assem_debug("sbb $%d,%%%s",imm,regname[rt]);
1301 assert(rt<8);
1302 if(imm<128&&imm>=-128) {
1303 output_byte(0x83);
1304 output_modrm(3,rt,3);
1305 output_byte(imm);
1306 }
1307 else
1308 {
1309 output_byte(0x81);
1310 output_modrm(3,rt,3);
1311 output_w32(imm);
1312 }
1313 }
1314
emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)1315 static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1316 {
1317 if(rsh==rth&&rsl==rtl) {
1318 assem_debug("add $%d,%%%s",imm,regname[rtl]);
1319 if(imm<128&&imm>=-128) {
1320 output_byte(0x83);
1321 output_modrm(3,rtl,0);
1322 output_byte(imm);
1323 }
1324 else
1325 {
1326 output_byte(0x81);
1327 output_modrm(3,rtl,0);
1328 output_w32(imm);
1329 }
1330 assem_debug("adc $%d,%%%s",imm>>31,regname[rth]);
1331 output_byte(0x83);
1332 output_modrm(3,rth,2);
1333 output_byte(imm>>31);
1334 }
1335 else {
1336 emit_mov(rsh,rth);
1337 emit_mov(rsl,rtl);
1338 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1339 }
1340 }
1341
emit_sub64_32(int rs1l,int rs1h,int rs2l,int rs2h,int rtl,int rth)1342 static void emit_sub64_32(int rs1l,int rs1h,int rs2l,int rs2h,int rtl,int rth)
1343 {
1344 if((rs1l==rtl)&&(rs1h==rth)) {
1345 assem_debug("sub %%%s,%%%s",regname[rs2l],regname[rs1l]);
1346 output_byte(0x29);
1347 output_modrm(3,rs1l,rs2l);
1348 assem_debug("sbb %%%s,%%%s",regname[rs2h],regname[rs1h]);
1349 output_byte(0x19);
1350 output_modrm(3,rs1h,rs2h);
1351 } else if((rs2l==rtl)&&(rs2h==rth)) {
1352 emit_neg(rs2l,rs2l);
1353 emit_adcimm(-1,rs2h);
1354 assem_debug("add %%%s,%%%s",regname[rs1l],regname[rs2l]);
1355 output_byte(0x01);
1356 output_modrm(3,rs2l,rs1l);
1357 emit_not(rs2h,rs2h);
1358 assem_debug("adc %%%s,%%%s",regname[rs1h],regname[rs2h]);
1359 output_byte(0x11);
1360 output_modrm(3,rs2h,rs1h);
1361 } else {
1362 emit_mov(rs1l,rtl);
1363 assem_debug("sub %%%s,%%%s",regname[rs2l],regname[rtl]);
1364 output_byte(0x29);
1365 output_modrm(3,rtl,rs2l);
1366 emit_mov(rs1h,rth);
1367 assem_debug("sbb %%%s,%%%s",regname[rs2h],regname[rth]);
1368 output_byte(0x19);
1369 output_modrm(3,rth,rs2h);
1370 }
1371 }
1372
emit_sbb(int rs1,int rs2)1373 static void emit_sbb(int rs1,int rs2)
1374 {
1375 assem_debug("sbb %%%s,%%%s",regname[rs1],regname[rs2]);
1376 output_byte(0x19);
1377 output_modrm(3,rs2,rs1);
1378 }
1379
emit_andimm(int rs,int imm,int rt)1380 static void emit_andimm(int rs,int imm,int rt)
1381 {
1382 if(imm==0) {
1383 emit_zeroreg(rt);
1384 }
1385 else if(rs==rt) {
1386 assem_debug("and $%d,%%%s",imm,regname[rt]);
1387 if(imm<128&&imm>=-128) {
1388 output_byte(0x83);
1389 output_modrm(3,rt,4);
1390 output_byte(imm);
1391 }
1392 else
1393 {
1394 output_byte(0x81);
1395 output_modrm(3,rt,4);
1396 output_w32(imm);
1397 }
1398 }
1399 else {
1400 emit_mov(rs,rt);
1401 emit_andimm(rt,imm,rt);
1402 }
1403 }
1404
emit_orimm(int rs,int imm,int rt)1405 static void emit_orimm(int rs,int imm,int rt)
1406 {
1407 if(rs==rt) {
1408 if(imm!=0) {
1409 assem_debug("or $%d,%%%s",imm,regname[rt]);
1410 if(imm<128&&imm>=-128) {
1411 output_byte(0x83);
1412 output_modrm(3,rt,1);
1413 output_byte(imm);
1414 }
1415 else
1416 {
1417 output_byte(0x81);
1418 output_modrm(3,rt,1);
1419 output_w32(imm);
1420 }
1421 }
1422 }
1423 else {
1424 emit_mov(rs,rt);
1425 emit_orimm(rt,imm,rt);
1426 }
1427 }
1428
emit_xorimm(int rs,int imm,int rt)1429 static void emit_xorimm(int rs,int imm,int rt)
1430 {
1431 if(rs==rt) {
1432 if(imm!=0) {
1433 assem_debug("xor $%d,%%%s",imm,regname[rt]);
1434 if(imm<128&&imm>=-128) {
1435 output_byte(0x83);
1436 output_modrm(3,rt,6);
1437 output_byte(imm);
1438 }
1439 else
1440 {
1441 output_byte(0x81);
1442 output_modrm(3,rt,6);
1443 output_w32(imm);
1444 }
1445 }
1446 }
1447 else {
1448 emit_mov(rs,rt);
1449 emit_xorimm(rt,imm,rt);
1450 }
1451 }
1452
emit_shlimm(int rs,u_int imm,int rt)1453 static void emit_shlimm(int rs,u_int imm,int rt)
1454 {
1455 if(rs==rt) {
1456 assem_debug("shl %%%s,%d",regname[rt],imm);
1457 assert(imm>0);
1458 if(imm==1) output_byte(0xD1);
1459 else output_byte(0xC1);
1460 output_modrm(3,rt,4);
1461 if(imm>1) output_byte(imm);
1462 }
1463 else {
1464 emit_mov(rs,rt);
1465 emit_shlimm(rt,imm,rt);
1466 }
1467 }
1468
emit_shrimm(int rs,u_int imm,int rt)1469 static void emit_shrimm(int rs,u_int imm,int rt)
1470 {
1471 if(rs==rt) {
1472 assem_debug("shr %%%s,%d",regname[rt],imm);
1473 assert(imm>0);
1474 if(imm==1) output_byte(0xD1);
1475 else output_byte(0xC1);
1476 output_modrm(3,rt,5);
1477 if(imm>1) output_byte(imm);
1478 }
1479 else {
1480 emit_mov(rs,rt);
1481 emit_shrimm(rt,imm,rt);
1482 }
1483 }
1484
emit_sarimm(int rs,u_int imm,int rt)1485 static void emit_sarimm(int rs,u_int imm,int rt)
1486 {
1487 if(rs==rt) {
1488 assem_debug("sar %%%s,%d",regname[rt],imm);
1489 assert(imm>0);
1490 if(imm==1) output_byte(0xD1);
1491 else output_byte(0xC1);
1492 output_modrm(3,rt,7);
1493 if(imm>1) output_byte(imm);
1494 }
1495 else {
1496 emit_mov(rs,rt);
1497 emit_sarimm(rt,imm,rt);
1498 }
1499 }
1500
emit_rorimm(int rs,u_int imm,int rt)1501 static void emit_rorimm(int rs,u_int imm,int rt)
1502 {
1503 if(rs==rt) {
1504 assem_debug("ror %%%s,%d",regname[rt],imm);
1505 assert(imm>0);
1506 if(imm==1) output_byte(0xD1);
1507 else output_byte(0xC1);
1508 output_modrm(3,rt,1);
1509 if(imm>1) output_byte(imm);
1510 }
1511 else {
1512 emit_mov(rs,rt);
1513 emit_rorimm(rt,imm,rt);
1514 }
1515 }
1516
emit_shldimm(int rs,int rs2,u_int imm,int rt)1517 static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1518 {
1519 if(rs==rt) {
1520 assem_debug("shld %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1521 assert(imm>0);
1522 output_byte(0x0F);
1523 output_byte(0xA4);
1524 output_modrm(3,rt,rs2);
1525 output_byte(imm);
1526 }
1527 else {
1528 emit_mov(rs,rt);
1529 emit_shldimm(rt,rs2,imm,rt);
1530 }
1531 }
1532
emit_shrdimm(int rs,int rs2,u_int imm,int rt)1533 static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1534 {
1535 if(rs==rt) {
1536 assem_debug("shrd %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1537 assert(imm>0);
1538 output_byte(0x0F);
1539 output_byte(0xAC);
1540 output_modrm(3,rt,rs2);
1541 output_byte(imm);
1542 }
1543 else {
1544 emit_mov(rs,rt);
1545 emit_shrdimm(rt,rs2,imm,rt);
1546 }
1547 }
1548
emit_shlcl(int r)1549 static void emit_shlcl(int r)
1550 {
1551 assem_debug("shl %%%s,%%cl",regname[r]);
1552 output_byte(0xD3);
1553 output_modrm(3,r,4);
1554 }
emit_shrcl(int r)1555 static void emit_shrcl(int r)
1556 {
1557 assem_debug("shr %%%s,%%cl",regname[r]);
1558 output_byte(0xD3);
1559 output_modrm(3,r,5);
1560 }
emit_sarcl(int r)1561 static void emit_sarcl(int r)
1562 {
1563 assem_debug("sar %%%s,%%cl",regname[r]);
1564 output_byte(0xD3);
1565 output_modrm(3,r,7);
1566 }
1567
emit_shldcl(int r1,int r2)1568 static void emit_shldcl(int r1,int r2)
1569 {
1570 assem_debug("shld %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1571 output_byte(0x0F);
1572 output_byte(0xA5);
1573 output_modrm(3,r1,r2);
1574 }
emit_shrdcl(int r1,int r2)1575 static void emit_shrdcl(int r1,int r2)
1576 {
1577 assem_debug("shrd %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1578 output_byte(0x0F);
1579 output_byte(0xAD);
1580 output_modrm(3,r1,r2);
1581 }
1582
emit_cmpimm(int rs,int imm)1583 static void emit_cmpimm(int rs,int imm)
1584 {
1585 assem_debug("cmp $%d,%%%s",imm,regname[rs]);
1586 if(imm<128&&imm>=-128) {
1587 output_byte(0x83);
1588 output_modrm(3,rs,7);
1589 output_byte(imm);
1590 }
1591 else
1592 {
1593 output_byte(0x81);
1594 output_modrm(3,rs,7);
1595 output_w32(imm);
1596 }
1597 }
1598
emit_cmovne(const u_int * addr,int rt)1599 static void emit_cmovne(const u_int *addr,int rt)
1600 {
1601 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1602 if(addr==&const_zero) assem_debug(" [zero]");
1603 else if(addr==&const_one) assem_debug(" [one]");
1604 else assem_debug("");
1605 output_byte(0x0F);
1606 output_byte(0x45);
1607 output_modrm(0,5,rt);
1608 output_w32((int)addr);
1609 }
emit_cmovl(const u_int * addr,int rt)1610 static void emit_cmovl(const u_int *addr,int rt)
1611 {
1612 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1613 if(addr==&const_zero) assem_debug(" [zero]");
1614 else if(addr==&const_one) assem_debug(" [one]");
1615 else assem_debug("");
1616 output_byte(0x0F);
1617 output_byte(0x4C);
1618 output_modrm(0,5,rt);
1619 output_w32((int)addr);
1620 }
emit_cmovs(const u_int * addr,int rt)1621 static void emit_cmovs(const u_int *addr,int rt)
1622 {
1623 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1624 if(addr==&const_zero) assem_debug(" [zero]");
1625 else if(addr==&const_one) assem_debug(" [one]");
1626 else assem_debug("");
1627 output_byte(0x0F);
1628 output_byte(0x48);
1629 output_modrm(0,5,rt);
1630 output_w32((int)addr);
1631 }
emit_cmovne_reg(int rs,int rt)1632 static void emit_cmovne_reg(int rs,int rt)
1633 {
1634 assem_debug("cmovne %%%s,%%%s",regname[rs],regname[rt]);
1635 output_byte(0x0F);
1636 output_byte(0x45);
1637 output_modrm(3,rs,rt);
1638 }
emit_cmovl_reg(int rs,int rt)1639 static void emit_cmovl_reg(int rs,int rt)
1640 {
1641 assem_debug("cmovl %%%s,%%%s",regname[rs],regname[rt]);
1642 output_byte(0x0F);
1643 output_byte(0x4C);
1644 output_modrm(3,rs,rt);
1645 }
emit_cmovs_reg(int rs,int rt)1646 static void emit_cmovs_reg(int rs,int rt)
1647 {
1648 assem_debug("cmovs %%%s,%%%s",regname[rs],regname[rt]);
1649 output_byte(0x0F);
1650 output_byte(0x48);
1651 output_modrm(3,rs,rt);
1652 }
emit_cmovnc_reg(int rs,int rt)1653 static void emit_cmovnc_reg(int rs,int rt)
1654 {
1655 assem_debug("cmovae %%%s,%%%s",regname[rs],regname[rt]);
1656 output_byte(0x0F);
1657 output_byte(0x43);
1658 output_modrm(3,rs,rt);
1659 }
emit_cmova_reg(int rs,int rt)1660 static void emit_cmova_reg(int rs,int rt)
1661 {
1662 assem_debug("cmova %%%s,%%%s",regname[rs],regname[rt]);
1663 output_byte(0x0F);
1664 output_byte(0x47);
1665 output_modrm(3,rs,rt);
1666 }
emit_cmovp_reg(int rs,int rt)1667 static void emit_cmovp_reg(int rs,int rt)
1668 {
1669 assem_debug("cmovp %%%s,%%%s",regname[rs],regname[rt]);
1670 output_byte(0x0F);
1671 output_byte(0x4A);
1672 output_modrm(3,rs,rt);
1673 }
emit_cmovnp_reg(int rs,int rt)1674 static void emit_cmovnp_reg(int rs,int rt)
1675 {
1676 assem_debug("cmovnp %%%s,%%%s",regname[rs],regname[rt]);
1677 output_byte(0x0F);
1678 output_byte(0x4B);
1679 output_modrm(3,rs,rt);
1680 }
emit_setl(int rt)1681 static void emit_setl(int rt)
1682 {
1683 assem_debug("setl %%%s",regname[rt]);
1684 output_byte(0x0F);
1685 output_byte(0x9C);
1686 output_modrm(3,rt,2);
1687 }
emit_movzbl_reg(int rs,int rt)1688 static void emit_movzbl_reg(int rs, int rt)
1689 {
1690 assem_debug("movzbl %%%s,%%%s",regname[rs]+1,regname[rt]);
1691 output_byte(0x0F);
1692 output_byte(0xB6);
1693 output_modrm(3,rs,rt);
1694 }
1695
emit_slti32(int rs,int imm,int rt)1696 static void emit_slti32(int rs,int imm,int rt)
1697 {
1698 if(rs!=rt) emit_zeroreg(rt);
1699 emit_cmpimm(rs,imm);
1700 if(rt<4) {
1701 emit_setl(rt);
1702 if(rs==rt) emit_movzbl_reg(rt,rt);
1703 }
1704 else
1705 {
1706 if(rs==rt) emit_movimm(0,rt);
1707 emit_cmovl(&const_one,rt);
1708 }
1709 }
emit_sltiu32(int rs,int imm,int rt)1710 static void emit_sltiu32(int rs,int imm,int rt)
1711 {
1712 if(rs!=rt) emit_zeroreg(rt);
1713 emit_cmpimm(rs,imm);
1714 if(rs==rt) emit_movimm(0,rt);
1715 emit_adcimm(0,rt);
1716 }
emit_slti64_32(int rsh,int rsl,int imm,int rt)1717 static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1718 {
1719 assert(rsh!=rt);
1720 emit_slti32(rsl,imm,rt);
1721 if(imm>=0)
1722 {
1723 emit_test(rsh,rsh);
1724 emit_cmovne(&const_zero,rt);
1725 emit_cmovs(&const_one,rt);
1726 }
1727 else
1728 {
1729 emit_cmpimm(rsh,-1);
1730 emit_cmovne(&const_zero,rt);
1731 emit_cmovl(&const_one,rt);
1732 }
1733 }
emit_sltiu64_32(int rsh,int rsl,int imm,int rt)1734 static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1735 {
1736 assert(rsh!=rt);
1737 emit_sltiu32(rsl,imm,rt);
1738 if(imm>=0)
1739 {
1740 emit_test(rsh,rsh);
1741 emit_cmovne(&const_zero,rt);
1742 }
1743 else
1744 {
1745 emit_cmpimm(rsh,-1);
1746 emit_cmovne(&const_one,rt);
1747 }
1748 }
1749
emit_cmp(int rs,int rt)1750 static void emit_cmp(int rs,int rt)
1751 {
1752 assem_debug("cmp %%%s,%%%s",regname[rt],regname[rs]);
1753 output_byte(0x39);
1754 output_modrm(3,rs,rt);
1755 }
emit_set_gz32(int rs,int rt)1756 static void emit_set_gz32(int rs, int rt)
1757 {
1758 //assem_debug("set_gz32");
1759 emit_cmpimm(rs,1);
1760 emit_movimm(1,rt);
1761 emit_cmovl(&const_zero,rt);
1762 }
emit_set_nz32(int rs,int rt)1763 static void emit_set_nz32(int rs, int rt)
1764 {
1765 //assem_debug("set_nz32");
1766 emit_cmpimm(rs,1);
1767 emit_movimm(1,rt);
1768 emit_sbbimm(0,rt);
1769 }
emit_set_gz64_32(int rsh,int rsl,int rt)1770 static void emit_set_gz64_32(int rsh, int rsl, int rt)
1771 {
1772 //assem_debug("set_gz64");
1773 emit_set_gz32(rsl,rt);
1774 emit_test(rsh,rsh);
1775 emit_cmovne(&const_one,rt);
1776 emit_cmovs(&const_zero,rt);
1777 }
emit_set_nz64_32(int rsh,int rsl,int rt)1778 static void emit_set_nz64_32(int rsh, int rsl, int rt)
1779 {
1780 //assem_debug("set_nz64");
1781 emit_or_and_set_flags(rsh,rsl,rt);
1782 emit_cmovne(&const_one,rt);
1783 }
emit_set_if_less32(int rs1,int rs2,int rt)1784 static void emit_set_if_less32(int rs1, int rs2, int rt)
1785 {
1786 //assem_debug("set if less (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1787 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1788 emit_cmp(rs1,rs2);
1789 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1790 emit_cmovl(&const_one,rt);
1791 }
emit_set_if_carry32(int rs1,int rs2,int rt)1792 static void emit_set_if_carry32(int rs1, int rs2, int rt)
1793 {
1794 //assem_debug("set if carry (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1795 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1796 emit_cmp(rs1,rs2);
1797 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1798 emit_adcimm(0,rt);
1799 }
emit_set_if_less64_32(int u1,int l1,int u2,int l2,int rt)1800 static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1801 {
1802 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1803 assert(u1!=rt);
1804 assert(u2!=rt);
1805 emit_cmp(l1,l2);
1806 emit_mov(u1,rt);
1807 emit_sbb(u2,rt);
1808 emit_movimm(0,rt);
1809 emit_cmovl(&const_one,rt);
1810 }
emit_set_if_carry64_32(int u1,int l1,int u2,int l2,int rt)1811 static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1812 {
1813 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1814 assert(u1!=rt);
1815 assert(u2!=rt);
1816 emit_cmp(l1,l2);
1817 emit_mov(u1,rt);
1818 emit_sbb(u2,rt);
1819 emit_movimm(0,rt);
1820 emit_adcimm(0,rt);
1821 }
1822
emit_call(int a)1823 static void emit_call(int a)
1824 {
1825 assem_debug("call %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1826 output_byte(0xe8);
1827 output_w32(a-(int)out-4);
1828 }
emit_jmp(int a)1829 static void emit_jmp(int a)
1830 {
1831 assem_debug("jmp %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1832 output_byte(0xe9);
1833 output_w32(a-(int)out-4);
1834 }
emit_jne(int a)1835 static void emit_jne(int a)
1836 {
1837 assem_debug("jne %x",a);
1838 output_byte(0x0f);
1839 output_byte(0x85);
1840 output_w32(a-(int)out-4);
1841 }
emit_jeq(int a)1842 static void emit_jeq(int a)
1843 {
1844 assem_debug("jeq %x",a);
1845 output_byte(0x0f);
1846 output_byte(0x84);
1847 output_w32(a-(int)out-4);
1848 }
emit_js(int a)1849 static void emit_js(int a)
1850 {
1851 assem_debug("js %x",a);
1852 output_byte(0x0f);
1853 output_byte(0x88);
1854 output_w32(a-(int)out-4);
1855 }
emit_jns(int a)1856 static void emit_jns(int a)
1857 {
1858 assem_debug("jns %x",a);
1859 output_byte(0x0f);
1860 output_byte(0x89);
1861 output_w32(a-(int)out-4);
1862 }
emit_jl(int a)1863 static void emit_jl(int a)
1864 {
1865 assem_debug("jl %x",a);
1866 output_byte(0x0f);
1867 output_byte(0x8c);
1868 output_w32(a-(int)out-4);
1869 }
emit_jge(int a)1870 static void emit_jge(int a)
1871 {
1872 assem_debug("jge %x",a);
1873 output_byte(0x0f);
1874 output_byte(0x8d);
1875 output_w32(a-(int)out-4);
1876 }
emit_jno(int a)1877 static void emit_jno(int a)
1878 {
1879 assem_debug("jno %x",a);
1880 output_byte(0x0f);
1881 output_byte(0x81);
1882 output_w32(a-(int)out-4);
1883 }
emit_jc(int a)1884 static void emit_jc(int a)
1885 {
1886 assem_debug("jc %x",a);
1887 output_byte(0x0f);
1888 output_byte(0x82);
1889 output_w32(a-(int)out-4);
1890 }
emit_jae(int a)1891 static void emit_jae(int a)
1892 {
1893 assem_debug("jae %x",a);
1894 output_byte(0x0f);
1895 output_byte(0x83);
1896 output_w32(a-(int)out-4);
1897 }
emit_jb(int a)1898 static void emit_jb(int a)
1899 {
1900 assem_debug("jb %x",a);
1901 output_byte(0x0f);
1902 output_byte(0x82);
1903 output_w32(a-(int)out-4);
1904 }
1905
emit_pushimm(int imm)1906 static void emit_pushimm(int imm)
1907 {
1908 assem_debug("push $%x",imm);
1909 output_byte(0x68);
1910 output_w32(imm);
1911 }
emit_pushmem(int addr)1912 static void emit_pushmem(int addr)
1913 {
1914 assem_debug("push *%x",addr);
1915 output_byte(0xFF);
1916 output_modrm(0,5,6);
1917 output_w32(addr);
1918 }
emit_pusha()1919 static void emit_pusha()
1920 {
1921 assem_debug("pusha");
1922 output_byte(0x60);
1923 }
emit_popa()1924 static void emit_popa()
1925 {
1926 assem_debug("popa");
1927 output_byte(0x61);
1928 }
emit_pushreg(u_int r)1929 static void emit_pushreg(u_int r)
1930 {
1931 assem_debug("push %%%s",regname[r]);
1932 assert(r<8);
1933 output_byte(0x50+r);
1934 }
emit_popreg(u_int r)1935 static void emit_popreg(u_int r)
1936 {
1937 assem_debug("pop %%%s",regname[r]);
1938 assert(r<8);
1939 output_byte(0x58+r);
1940 }
emit_callreg(u_int r)1941 static void emit_callreg(u_int r)
1942 {
1943 assem_debug("call *%%%s",regname[r]);
1944 assert(r<8);
1945 output_byte(0xFF);
1946 output_modrm(3,r,2);
1947 }
emit_jmpreg(u_int r)1948 static void emit_jmpreg(u_int r)
1949 {
1950 assem_debug("jmp *%%%s",regname[r]);
1951 assert(r<8);
1952 output_byte(0xFF);
1953 output_modrm(3,r,4);
1954 }
emit_jmpmem_indexed(u_int addr,u_int r)1955 static void emit_jmpmem_indexed(u_int addr,u_int r)
1956 {
1957 assem_debug("jmp *%x(%%%s)",addr,regname[r]);
1958 assert(r<8);
1959 output_byte(0xFF);
1960 output_modrm(2,r,4);
1961 output_w32(addr);
1962 }
1963
emit_readword(int addr,int rt)1964 static void emit_readword(int addr, int rt)
1965 {
1966 assem_debug("mov %x,%%%s",addr,regname[rt]);
1967 output_byte(0x8B);
1968 output_modrm(0,5,rt);
1969 output_w32(addr);
1970 }
emit_readword_indexed(int addr,int rs,int rt)1971 static void emit_readword_indexed(int addr, int rs, int rt)
1972 {
1973 assem_debug("mov %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1974 output_byte(0x8B);
1975 if(addr<128&&addr>=-128) {
1976 output_modrm(1,rs,rt);
1977 if(rs==ESP) output_sib(0,4,4);
1978 output_byte(addr);
1979 }
1980 else
1981 {
1982 output_modrm(2,rs,rt);
1983 if(rs==ESP) output_sib(0,4,4);
1984 output_w32(addr);
1985 }
1986 }
emit_readword_tlb(int addr,int map,int rt)1987 static void emit_readword_tlb(int addr, int map, int rt)
1988 {
1989 if(map<0) emit_readword(addr+(int)g_dev.ri.rdram.dram-0x80000000, rt);
1990 else
1991 {
1992 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1993 output_byte(0x8B);
1994 output_modrm(0,4,rt);
1995 output_sib(2,map,5);
1996 output_w32(addr);
1997 }
1998 }
emit_readword_indexed_tlb(int addr,int rs,int map,int rt)1999 static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
2000 {
2001 if(map<0) emit_readword_indexed(addr+(int)g_dev.ri.rdram.dram-0x80000000, rs, rt);
2002 else {
2003 assem_debug("mov %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
2004 assert(rs!=ESP);
2005 output_byte(0x8B);
2006 if(addr==0&&rs!=EBP) {
2007 output_modrm(0,4,rt);
2008 output_sib(2,map,rs);
2009 }
2010 else if(addr<128&&addr>=-128) {
2011 output_modrm(1,4,rt);
2012 output_sib(2,map,rs);
2013 output_byte(addr);
2014 }
2015 else
2016 {
2017 output_modrm(2,4,rt);
2018 output_sib(2,map,rs);
2019 output_w32(addr);
2020 }
2021 }
2022 }
emit_movmem_indexedx4(int addr,int rs,int rt)2023 static void emit_movmem_indexedx4(int addr, int rs, int rt)
2024 {
2025 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[rs],regname[rt]);
2026 output_byte(0x8B);
2027 output_modrm(0,4,rt);
2028 output_sib(2,rs,5);
2029 output_w32(addr);
2030 }
emit_readdword_tlb(int addr,int map,int rh,int rl)2031 static void emit_readdword_tlb(int addr, int map, int rh, int rl)
2032 {
2033 if(map<0) {
2034 if(rh>=0) emit_readword(addr+(int)g_dev.ri.rdram.dram-0x80000000, rh);
2035 emit_readword(addr+(int)g_dev.ri.rdram.dram-0x7FFFFFFC, rl);
2036 }
2037 else {
2038 if(rh>=0) emit_movmem_indexedx4(addr, map, rh);
2039 emit_movmem_indexedx4(addr+4, map, rl);
2040 }
2041 }
emit_readdword_indexed_tlb(int addr,int rs,int map,int rh,int rl)2042 static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
2043 {
2044 assert(rh!=rs);
2045 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
2046 emit_readword_indexed_tlb(addr+4, rs, map, rl);
2047 }
emit_movsbl(int addr,int rt)2048 static void emit_movsbl(int addr, int rt)
2049 {
2050 assem_debug("movsbl %x,%%%s",addr,regname[rt]);
2051 output_byte(0x0F);
2052 output_byte(0xBE);
2053 output_modrm(0,5,rt);
2054 output_w32(addr);
2055 }
emit_movsbl_indexed(int addr,int rs,int rt)2056 static void emit_movsbl_indexed(int addr, int rs, int rt)
2057 {
2058 assem_debug("movsbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2059 output_byte(0x0F);
2060 output_byte(0xBE);
2061 output_modrm(2,rs,rt);
2062 output_w32(addr);
2063 }
emit_movsbl_tlb(int addr,int map,int rt)2064 static void emit_movsbl_tlb(int addr, int map, int rt)
2065 {
2066 if(map<0) emit_movsbl(addr+(int)g_dev.ri.rdram.dram-0x80000000, rt);
2067 else
2068 {
2069 assem_debug("movsbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
2070 output_byte(0x0F);
2071 output_byte(0xBE);
2072 output_modrm(0,4,rt);
2073 output_sib(2,map,5);
2074 output_w32(addr);
2075 }
2076 }
emit_movsbl_indexed_tlb(int addr,int rs,int map,int rt)2077 static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
2078 {
2079 if(map<0) emit_movsbl_indexed(addr+(int)g_dev.ri.rdram.dram-0x80000000, rs, rt);
2080 else {
2081 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
2082 assert(rs!=ESP);
2083 output_byte(0x0F);
2084 output_byte(0xBE);
2085 if(addr==0&&rs!=EBP) {
2086 output_modrm(0,4,rt);
2087 output_sib(2,map,rs);
2088 }
2089 else if(addr<128&&addr>=-128) {
2090 output_modrm(1,4,rt);
2091 output_sib(2,map,rs);
2092 output_byte(addr);
2093 }
2094 else
2095 {
2096 output_modrm(2,4,rt);
2097 output_sib(2,map,rs);
2098 output_w32(addr);
2099 }
2100 }
2101 }
emit_movswl(int addr,int rt)2102 static void emit_movswl(int addr, int rt)
2103 {
2104 assem_debug("movswl %x,%%%s",addr,regname[rt]);
2105 output_byte(0x0F);
2106 output_byte(0xBF);
2107 output_modrm(0,5,rt);
2108 output_w32(addr);
2109 }
emit_movswl_indexed(int addr,int rs,int rt)2110 static void emit_movswl_indexed(int addr, int rs, int rt)
2111 {
2112 assem_debug("movswl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2113 output_byte(0x0F);
2114 output_byte(0xBF);
2115 output_modrm(2,rs,rt);
2116 output_w32(addr);
2117 }
emit_movswl_tlb(int addr,int map,int rt)2118 static void emit_movswl_tlb(int addr, int map, int rt)
2119 {
2120 if(map<0) emit_movswl(addr+(int)g_dev.ri.rdram.dram-0x80000000, rt);
2121 else
2122 {
2123 assem_debug("movswl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
2124 output_byte(0x0F);
2125 output_byte(0xBF);
2126 output_modrm(0,4,rt);
2127 output_sib(2,map,5);
2128 output_w32(addr);
2129 }
2130 }
emit_movzbl(int addr,int rt)2131 static void emit_movzbl(int addr, int rt)
2132 {
2133 assem_debug("movzbl %x,%%%s",addr,regname[rt]);
2134 output_byte(0x0F);
2135 output_byte(0xB6);
2136 output_modrm(0,5,rt);
2137 output_w32(addr);
2138 }
emit_movzbl_indexed(int addr,int rs,int rt)2139 static void emit_movzbl_indexed(int addr, int rs, int rt)
2140 {
2141 assem_debug("movzbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2142 output_byte(0x0F);
2143 output_byte(0xB6);
2144 output_modrm(2,rs,rt);
2145 output_w32(addr);
2146 }
emit_movzbl_tlb(int addr,int map,int rt)2147 static void emit_movzbl_tlb(int addr, int map, int rt)
2148 {
2149 if(map<0) emit_movzbl(addr+(int)g_dev.ri.rdram.dram-0x80000000, rt);
2150 else
2151 {
2152 assem_debug("movzbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
2153 output_byte(0x0F);
2154 output_byte(0xB6);
2155 output_modrm(0,4,rt);
2156 output_sib(2,map,5);
2157 output_w32(addr);
2158 }
2159 }
emit_movzbl_indexed_tlb(int addr,int rs,int map,int rt)2160 static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
2161 {
2162 if(map<0) emit_movzbl_indexed(addr+(int)g_dev.ri.rdram.dram-0x80000000, rs, rt);
2163 else {
2164 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
2165 assert(rs!=ESP);
2166 output_byte(0x0F);
2167 output_byte(0xB6);
2168 if(addr==0&&rs!=EBP) {
2169 output_modrm(0,4,rt);
2170 output_sib(2,map,rs);
2171 }
2172 else if(addr<128&&addr>=-128) {
2173 output_modrm(1,4,rt);
2174 output_sib(2,map,rs);
2175 output_byte(addr);
2176 }
2177 else
2178 {
2179 output_modrm(2,4,rt);
2180 output_sib(2,map,rs);
2181 output_w32(addr);
2182 }
2183 }
2184 }
emit_movzwl(int addr,int rt)2185 static void emit_movzwl(int addr, int rt)
2186 {
2187 assem_debug("movzwl %x,%%%s",addr,regname[rt]);
2188 output_byte(0x0F);
2189 output_byte(0xB7);
2190 output_modrm(0,5,rt);
2191 output_w32(addr);
2192 }
emit_movzwl_indexed(int addr,int rs,int rt)2193 static void emit_movzwl_indexed(int addr, int rs, int rt)
2194 {
2195 assem_debug("movzwl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2196 output_byte(0x0F);
2197 output_byte(0xB7);
2198 output_modrm(2,rs,rt);
2199 output_w32(addr);
2200 }
emit_movzwl_tlb(int addr,int map,int rt)2201 static void emit_movzwl_tlb(int addr, int map, int rt)
2202 {
2203 if(map<0) emit_movzwl(addr+(int)g_dev.ri.rdram.dram-0x80000000, rt);
2204 else
2205 {
2206 assem_debug("movzwl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
2207 output_byte(0x0F);
2208 output_byte(0xB7);
2209 output_modrm(0,4,rt);
2210 output_sib(2,map,5);
2211 output_w32(addr);
2212 }
2213 }
2214 /*
2215 static void emit_movzwl_reg(int rs, int rt)
2216 {
2217 assem_debug("movzwl %%%s,%%%s",regname[rs]+1,regname[rt]);
2218 output_byte(0x0F);
2219 output_byte(0xB7);
2220 output_modrm(3,rs,rt);
2221 }*/
2222
emit_xchg(int rs,int rt)2223 static void emit_xchg(int rs, int rt)
2224 {
2225 assem_debug("xchg %%%s,%%%s",regname[rs],regname[rt]);
2226 if(rs==EAX) {
2227 output_byte(0x90+rt);
2228 }
2229 else
2230 {
2231 output_byte(0x87);
2232 output_modrm(3,rs,rt);
2233 }
2234 }
emit_writeword(int rt,int addr)2235 static void emit_writeword(int rt, int addr)
2236 {
2237 assem_debug("movl %%%s,%x",regname[rt],addr);
2238 output_byte(0x89);
2239 output_modrm(0,5,rt);
2240 output_w32(addr);
2241 }
emit_writeword_indexed(int rt,int addr,int rs)2242 static void emit_writeword_indexed(int rt, int addr, int rs)
2243 {
2244 assem_debug("mov %%%s,%x+%%%s",regname[rt],addr,regname[rs]);
2245 output_byte(0x89);
2246 if(addr<128&&addr>=-128) {
2247 output_modrm(1,rs,rt);
2248 if(rs==ESP) output_sib(0,4,4);
2249 output_byte(addr);
2250 }
2251 else
2252 {
2253 output_modrm(2,rs,rt);
2254 if(rs==ESP) output_sib(0,4,4);
2255 output_w32(addr);
2256 }
2257 }
emit_writeword_indexed_tlb(int rt,int addr,int rs,int map,int temp)2258 static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2259 {
2260 if(map<0) emit_writeword_indexed(rt, addr+(int)g_dev.ri.rdram.dram-0x80000000, rs);
2261 else {
2262 assem_debug("mov %%%s,%x(%%%s,%%%s,1)",regname[rt],addr,regname[rs],regname[map]);
2263 assert(rs!=ESP);
2264 output_byte(0x89);
2265 if(addr==0&&rs!=EBP) {
2266 output_modrm(0,4,rt);
2267 output_sib(0,map,rs);
2268 }
2269 else if(addr<128&&addr>=-128) {
2270 output_modrm(1,4,rt);
2271 output_sib(0,map,rs);
2272 output_byte(addr);
2273 }
2274 else
2275 {
2276 output_modrm(2,4,rt);
2277 output_sib(0,map,rs);
2278 output_w32(addr);
2279 }
2280 }
2281 }
emit_writedword_indexed_tlb(int rh,int rl,int addr,int rs,int map,int temp)2282 static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2283 {
2284 assert(rh>=0);
2285 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2286 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2287 }
emit_writehword(int rt,int addr)2288 static void emit_writehword(int rt, int addr)
2289 {
2290 assem_debug("movw %%%s,%x",regname[rt]+1,addr);
2291 output_byte(0x66);
2292 output_byte(0x89);
2293 output_modrm(0,5,rt);
2294 output_w32(addr);
2295 }
emit_writehword_indexed(int rt,int addr,int rs)2296 static void emit_writehword_indexed(int rt, int addr, int rs)
2297 {
2298 assem_debug("movw %%%s,%x+%%%s",regname[rt]+1,addr,regname[rs]);
2299 output_byte(0x66);
2300 output_byte(0x89);
2301 if(addr<128&&addr>=-128) {
2302 output_modrm(1,rs,rt);
2303 output_byte(addr);
2304 }
2305 else
2306 {
2307 output_modrm(2,rs,rt);
2308 output_w32(addr);
2309 }
2310 }
emit_writebyte(int rt,int addr)2311 static void emit_writebyte(int rt, int addr)
2312 {
2313 if(rt<4) {
2314 assem_debug("movb %%%cl,%x",regname[rt][1],addr);
2315 output_byte(0x88);
2316 output_modrm(0,5,rt);
2317 output_w32(addr);
2318 }
2319 else
2320 {
2321 emit_xchg(EAX,rt);
2322 emit_writebyte(EAX,addr);
2323 emit_xchg(EAX,rt);
2324 }
2325 }
emit_writebyte_indexed(int rt,int addr,int rs)2326 static void emit_writebyte_indexed(int rt, int addr, int rs)
2327 {
2328 if(rt<4) {
2329 assem_debug("movb %%%cl,%x+%%%s",regname[rt][1],addr,regname[rs]);
2330 output_byte(0x88);
2331 if(addr<128&&addr>=-128) {
2332 output_modrm(1,rs,rt);
2333 output_byte(addr);
2334 }
2335 else
2336 {
2337 output_modrm(2,rs,rt);
2338 output_w32(addr);
2339 }
2340 }
2341 else
2342 {
2343 emit_xchg(EAX,rt);
2344 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2345 emit_xchg(EAX,rt);
2346 }
2347 }
emit_writebyte_indexed_tlb(int rt,int addr,int rs,int map,int temp)2348 static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2349 {
2350 if(map<0) emit_writebyte_indexed(rt, addr+(int)g_dev.ri.rdram.dram-0x80000000, rs);
2351 else
2352 if(rt<4) {
2353 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)",regname[rt][1],addr,regname[rs],regname[map]);
2354 assert(rs!=ESP);
2355 output_byte(0x88);
2356 if(addr==0&&rs!=EBP) {
2357 output_modrm(0,4,rt);
2358 output_sib(0,map,rs);
2359 }
2360 else if(addr<128&&addr>=-128) {
2361 output_modrm(1,4,rt);
2362 output_sib(0,map,rs);
2363 output_byte(addr);
2364 }
2365 else
2366 {
2367 output_modrm(2,4,rt);
2368 output_sib(0,map,rs);
2369 output_w32(addr);
2370 }
2371 }
2372 else
2373 {
2374 emit_xchg(EAX,rt);
2375 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2376 emit_xchg(EAX,rt);
2377 }
2378 }
emit_writeword_imm(int imm,int addr)2379 static void emit_writeword_imm(int imm, int addr)
2380 {
2381 assem_debug("movl $%x,%x",imm,addr);
2382 output_byte(0xC7);
2383 output_modrm(0,5,0);
2384 output_w32(addr);
2385 output_w32(imm);
2386 }
emit_writeword_imm_esp(int imm,int addr)2387 static void emit_writeword_imm_esp(int imm, int addr)
2388 {
2389 assem_debug("mov $%x,%x(%%esp)",imm,addr);
2390 assert(addr>=-128&&addr<128);
2391 output_byte(0xC7);
2392 output_modrm(1,4,0);
2393 output_sib(0,4,4);
2394 output_byte(addr);
2395 output_w32(imm);
2396 }
emit_writebyte_imm(int imm,int addr)2397 static void emit_writebyte_imm(int imm, int addr)
2398 {
2399 assem_debug("movb $%x,%x",imm,addr);
2400 assert(imm>=-128&&imm<128);
2401 output_byte(0xC6);
2402 output_modrm(0,5,0);
2403 output_w32(addr);
2404 output_byte(imm);
2405 }
2406
emit_mul(int rs)2407 static void emit_mul(int rs)
2408 {
2409 assem_debug("mul %%%s",regname[rs]);
2410 output_byte(0xF7);
2411 output_modrm(3,rs,4);
2412 }
emit_imul(int rs)2413 static void emit_imul(int rs)
2414 {
2415 assem_debug("imul %%%s",regname[rs]);
2416 output_byte(0xF7);
2417 output_modrm(3,rs,5);
2418 }
emit_div(int rs)2419 static void emit_div(int rs)
2420 {
2421 assem_debug("div %%%s",regname[rs]);
2422 output_byte(0xF7);
2423 output_modrm(3,rs,6);
2424 }
emit_idiv(int rs)2425 static void emit_idiv(int rs)
2426 {
2427 assem_debug("idiv %%%s",regname[rs]);
2428 output_byte(0xF7);
2429 output_modrm(3,rs,7);
2430 }
emit_cdq()2431 static void emit_cdq()
2432 {
2433 assem_debug("cdq");
2434 output_byte(0x99);
2435 }
2436
2437 // Load 2 immediates optimizing for small code size
emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)2438 static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2439 {
2440 emit_movimm(imm1,rt1);
2441 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2442 else emit_movimm(imm2,rt2);
2443 }
2444
2445 // special case for checking pending_exception
emit_cmpmem_imm_byte(int addr,int imm)2446 static void emit_cmpmem_imm_byte(int addr,int imm)
2447 {
2448 assert(imm<128&&imm>=-127);
2449 assem_debug("cmpb $%d,%x",imm,addr);
2450 output_byte(0x80);
2451 output_modrm(0,5,7);
2452 output_w32(addr);
2453 output_byte(imm);
2454 }
2455
2456 // special case for checking invalid_code
emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)2457 static void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2458 {
2459 assert(imm<128&&imm>=-127);
2460 assert(r>=0&&r<8);
2461 emit_shrimm(r,12,r);
2462 assem_debug("cmp $%d,%x+%%%s",imm,addr,regname[r]);
2463 output_byte(0x80);
2464 output_modrm(2,r,7);
2465 output_w32(addr);
2466 output_byte(imm);
2467 }
2468
2469 // special case for checking hash_table
emit_cmpmem_indexed(int addr,int rs,int rt)2470 static void emit_cmpmem_indexed(int addr,int rs,int rt)
2471 {
2472 assert(rs>=0&&rs<8);
2473 assert(rt>=0&&rt<8);
2474 assem_debug("cmp %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2475 output_byte(0x39);
2476 output_modrm(2,rs,rt);
2477 output_w32(addr);
2478 }
2479
2480 // Used to preload hash table entries
2481 #ifdef IMM_PREFETCH
emit_prefetch(void * addr)2482 static void emit_prefetch(void *addr)
2483 {
2484 assem_debug("prefetch %x",(int)addr);
2485 output_byte(0x0F);
2486 output_byte(0x18);
2487 output_modrm(0,5,1);
2488 output_w32((int)addr);
2489 }
2490 #endif
2491
2492 /*void emit_submem(int r,int addr)
2493 {
2494 assert(r>=0&&r<8);
2495 assem_debug("sub %x,%%%s",addr,regname[r]);
2496 output_byte(0x2B);
2497 output_modrm(0,5,r);
2498 output_w32((int)addr);
2499 }
2500 static void emit_subfrommem(int addr,int r)
2501 {
2502 assert(r>=0&&r<8);
2503 assem_debug("sub %%%s,%x",regname[r],addr);
2504 output_byte(0x29);
2505 output_modrm(0,5,r);
2506 output_w32((int)addr);
2507 }*/
2508
emit_flds(int r)2509 static void emit_flds(int r)
2510 {
2511 assem_debug("flds (%%%s)",regname[r]);
2512 output_byte(0xd9);
2513 if(r!=EBP) output_modrm(0,r,0);
2514 else {output_modrm(1,EBP,0);output_byte(0);}
2515 }
emit_fldl(int r)2516 static void emit_fldl(int r)
2517 {
2518 assem_debug("fldl (%%%s)",regname[r]);
2519 output_byte(0xdd);
2520 if(r!=EBP) output_modrm(0,r,0);
2521 else {output_modrm(1,EBP,0);output_byte(0);}
2522 }
emit_fucomip(u_int r)2523 static void emit_fucomip(u_int r)
2524 {
2525 assem_debug("fucomip %d",r);
2526 assert(r<8);
2527 output_byte(0xdf);
2528 output_byte(0xe8+r);
2529 }
emit_fchs()2530 static void emit_fchs()
2531 {
2532 assem_debug("fchs");
2533 output_byte(0xd9);
2534 output_byte(0xe0);
2535 }
emit_fabs()2536 static void emit_fabs()
2537 {
2538 assem_debug("fabs");
2539 output_byte(0xd9);
2540 output_byte(0xe1);
2541 }
emit_fsqrt()2542 static void emit_fsqrt()
2543 {
2544 assem_debug("fsqrt");
2545 output_byte(0xd9);
2546 output_byte(0xfa);
2547 }
emit_fadds(int r)2548 static void emit_fadds(int r)
2549 {
2550 assem_debug("fadds (%%%s)",regname[r]);
2551 output_byte(0xd8);
2552 if(r!=EBP) output_modrm(0,r,0);
2553 else {output_modrm(1,EBP,0);output_byte(0);}
2554 }
emit_faddl(int r)2555 static void emit_faddl(int r)
2556 {
2557 assem_debug("faddl (%%%s)",regname[r]);
2558 output_byte(0xdc);
2559 if(r!=EBP) output_modrm(0,r,0);
2560 else {output_modrm(1,EBP,0);output_byte(0);}
2561 }
emit_fadd(int r)2562 static void emit_fadd(int r)
2563 {
2564 assem_debug("fadd st%d",r);
2565 output_byte(0xd8);
2566 output_byte(0xc0+r);
2567 }
emit_fsubs(int r)2568 static void emit_fsubs(int r)
2569 {
2570 assem_debug("fsubs (%%%s)",regname[r]);
2571 output_byte(0xd8);
2572 if(r!=EBP) output_modrm(0,r,4);
2573 else {output_modrm(1,EBP,4);output_byte(0);}
2574 }
emit_fsubl(int r)2575 static void emit_fsubl(int r)
2576 {
2577 assem_debug("fsubl (%%%s)",regname[r]);
2578 output_byte(0xdc);
2579 if(r!=EBP) output_modrm(0,r,4);
2580 else {output_modrm(1,EBP,4);output_byte(0);}
2581 }
emit_fsub(int r)2582 static void emit_fsub(int r)
2583 {
2584 assem_debug("fsub st%d",r);
2585 output_byte(0xd8);
2586 output_byte(0xe0+r);
2587 }
emit_fmuls(int r)2588 static void emit_fmuls(int r)
2589 {
2590 assem_debug("fmuls (%%%s)",regname[r]);
2591 output_byte(0xd8);
2592 if(r!=EBP) output_modrm(0,r,1);
2593 else {output_modrm(1,EBP,1);output_byte(0);}
2594 }
emit_fmull(int r)2595 static void emit_fmull(int r)
2596 {
2597 assem_debug("fmull (%%%s)",regname[r]);
2598 output_byte(0xdc);
2599 if(r!=EBP) output_modrm(0,r,1);
2600 else {output_modrm(1,EBP,1);output_byte(0);}
2601 }
emit_fmul(int r)2602 static void emit_fmul(int r)
2603 {
2604 assem_debug("fmul st%d",r);
2605 output_byte(0xd8);
2606 output_byte(0xc8+r);
2607 }
emit_fdivs(int r)2608 static void emit_fdivs(int r)
2609 {
2610 assem_debug("fdivs (%%%s)",regname[r]);
2611 output_byte(0xd8);
2612 if(r!=EBP) output_modrm(0,r,6);
2613 else {output_modrm(1,EBP,6);output_byte(0);}
2614 }
emit_fdivl(int r)2615 static void emit_fdivl(int r)
2616 {
2617 assem_debug("fdivl (%%%s)",regname[r]);
2618 output_byte(0xdc);
2619 if(r!=EBP) output_modrm(0,r,6);
2620 else {output_modrm(1,EBP,6);output_byte(0);}
2621 }
emit_fdiv(int r)2622 static void emit_fdiv(int r)
2623 {
2624 assem_debug("fdiv st%d",r);
2625 output_byte(0xd8);
2626 output_byte(0xf0+r);
2627 }
emit_fpop()2628 static void emit_fpop()
2629 {
2630 // fstp st(0)
2631 assem_debug("fpop");
2632 output_byte(0xdd);
2633 output_byte(0xd8);
2634 }
emit_fildl(int r)2635 static void emit_fildl(int r)
2636 {
2637 assem_debug("fildl (%%%s)",regname[r]);
2638 output_byte(0xdb);
2639 if(r!=EBP) output_modrm(0,r,0);
2640 else {output_modrm(1,EBP,0);output_byte(0);}
2641 }
emit_fildll(int r)2642 static void emit_fildll(int r)
2643 {
2644 assem_debug("fildll (%%%s)",regname[r]);
2645 output_byte(0xdf);
2646 if(r!=EBP) output_modrm(0,r,5);
2647 else {output_modrm(1,EBP,5);output_byte(0);}
2648 }
emit_fistpl(int r)2649 static void emit_fistpl(int r)
2650 {
2651 assem_debug("fistpl (%%%s)",regname[r]);
2652 output_byte(0xdb);
2653 if(r!=EBP) output_modrm(0,r,3);
2654 else {output_modrm(1,EBP,3);output_byte(0);}
2655 }
emit_fistpll(int r)2656 static void emit_fistpll(int r)
2657 {
2658 assem_debug("fistpll (%%%s)",regname[r]);
2659 output_byte(0xdf);
2660 if(r!=EBP) output_modrm(0,r,7);
2661 else {output_modrm(1,EBP,7);output_byte(0);}
2662 }
emit_fstps(int r)2663 static void emit_fstps(int r)
2664 {
2665 assem_debug("fstps (%%%s)",regname[r]);
2666 output_byte(0xd9);
2667 if(r!=EBP) output_modrm(0,r,3);
2668 else {output_modrm(1,EBP,3);output_byte(0);}
2669 }
emit_fstpl(int r)2670 static void emit_fstpl(int r)
2671 {
2672 assem_debug("fstpl (%%%s)",regname[r]);
2673 output_byte(0xdd);
2674 if(r!=EBP) output_modrm(0,r,3);
2675 else {output_modrm(1,EBP,3);output_byte(0);}
2676 }
emit_fnstcw_stack()2677 static void emit_fnstcw_stack()
2678 {
2679 assem_debug("fnstcw (%%esp)");
2680 output_byte(0xd9);
2681 output_modrm(0,4,7);
2682 output_sib(0,4,4);
2683 }
emit_fldcw_stack()2684 static void emit_fldcw_stack()
2685 {
2686 assem_debug("fldcw (%%esp)");
2687 output_byte(0xd9);
2688 output_modrm(0,4,5);
2689 output_sib(0,4,4);
2690 }
emit_fldcw_indexed(int addr,int r)2691 static void emit_fldcw_indexed(int addr,int r)
2692 {
2693 assem_debug("fldcw %x(%%%s)",addr,regname[r]);
2694 output_byte(0xd9);
2695 output_modrm(0,4,5);
2696 output_sib(1,r,5);
2697 output_w32(addr);
2698 }
emit_fldcw(int addr)2699 static void emit_fldcw(int addr)
2700 {
2701 assem_debug("fldcw %x",addr);
2702 output_byte(0xd9);
2703 output_modrm(0,5,5);
2704 output_w32(addr);
2705 }
2706 #ifdef __SSE__
emit_movss_load(u_int addr,u_int ssereg)2707 static void emit_movss_load(u_int addr,u_int ssereg)
2708 {
2709 assem_debug("movss (%%%s),xmm%d",regname[addr],ssereg);
2710 assert(ssereg<8);
2711 output_byte(0xf3);
2712 output_byte(0x0f);
2713 output_byte(0x10);
2714 if(addr!=EBP) output_modrm(0,addr,ssereg);
2715 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2716 }
emit_movsd_load(u_int addr,u_int ssereg)2717 static void emit_movsd_load(u_int addr,u_int ssereg)
2718 {
2719 assem_debug("movsd (%%%s),xmm%d",regname[addr],ssereg);
2720 assert(ssereg<8);
2721 output_byte(0xf2);
2722 output_byte(0x0f);
2723 output_byte(0x10);
2724 if(addr!=EBP) output_modrm(0,addr,ssereg);
2725 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2726 }
emit_movd_store(u_int ssereg,u_int addr)2727 static void emit_movd_store(u_int ssereg,u_int addr)
2728 {
2729 assem_debug("movd xmm%d,(%%%s)",ssereg,regname[addr]);
2730 assert(ssereg<8);
2731 output_byte(0x66);
2732 output_byte(0x0f);
2733 output_byte(0x7e);
2734 if(addr!=EBP) output_modrm(0,addr,ssereg);
2735 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2736 }
emit_cvttps2dq(u_int ssereg1,u_int ssereg2)2737 static void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2738 {
2739 assem_debug("cvttps2dq xmm%d,xmm%d",ssereg1,ssereg2);
2740 assert(ssereg1<8);
2741 assert(ssereg2<8);
2742 output_byte(0xf3);
2743 output_byte(0x0f);
2744 output_byte(0x5b);
2745 output_modrm(3,ssereg1,ssereg2);
2746 }
emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)2747 static void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2748 {
2749 assem_debug("cvttpd2dq xmm%d,xmm%d",ssereg1,ssereg2);
2750 assert(ssereg1<8);
2751 assert(ssereg2<8);
2752 output_byte(0x66);
2753 output_byte(0x0f);
2754 output_byte(0xe6);
2755 output_modrm(3,ssereg1,ssereg2);
2756 }
2757 #endif
2758
2759 /* Stubs/epilogue */
2760
emit_extjump2(int addr,int target,int linker)2761 static void emit_extjump2(int addr, int target, int linker)
2762 {
2763 u_char *ptr=(u_char *)addr;
2764 if(*ptr==0x0f)
2765 {
2766 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2767 addr+=2;
2768 }
2769 else
2770 {
2771 assert(*ptr==0xe8||*ptr==0xe9);
2772 addr++;
2773 }
2774 emit_pushimm(target);
2775 emit_pushimm(addr);
2776 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2777 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2778 //DEBUG >
2779 #ifdef DEBUG_CYCLE_COUNT
2780 emit_readword((int)&last_count,ECX);
2781 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2782 emit_readword((int)&next_interrupt,ECX);
2783 emit_writeword(HOST_CCREG,(int)&g_cp0_regs[CP0_COUNT_REG]);
2784 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2785 emit_writeword(ECX,(int)&last_count);
2786 #endif
2787 //DEBUG <
2788 emit_call(linker);
2789 emit_addimm(ESP,8,ESP);
2790 emit_jmpreg(EAX);
2791 }
2792
emit_extjump(int addr,int target)2793 static void emit_extjump(int addr, int target)
2794 {
2795 emit_extjump2(addr, target, (int)dynamic_linker);
2796 }
emit_extjump_ds(int addr,int target)2797 static void emit_extjump_ds(int addr, int target)
2798 {
2799 emit_extjump2(addr, target, (int)dynamic_linker_ds);
2800 }
2801
do_readstub(int n)2802 static void do_readstub(int n)
2803 {
2804 assem_debug("do_readstub %x",start+stubs[n][3]*4);
2805 set_jump_target(stubs[n][1],(int)out);
2806 int type=stubs[n][0];
2807 int i=stubs[n][3];
2808 int rs=stubs[n][4];
2809 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2810 signed char *i_regmap=i_regs->regmap;
2811 int addr=get_reg(i_regmap,AGEN1+(i&1));
2812 int rth,rt;
2813 int ds;
2814 if(itype[i]==C1LS||itype[i]==LOADLR) {
2815 rth=get_reg(i_regmap,FTEMP|64);
2816 rt=get_reg(i_regmap,FTEMP);
2817 }else{
2818 rth=get_reg(i_regmap,rt1[i]|64);
2819 rt=get_reg(i_regmap,rt1[i]);
2820 }
2821 assert(rs>=0);
2822 if(addr<0) addr=rt;
2823 if(addr<0&&itype[i]!=C1LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2824 assert(addr>=0);
2825 int ftable=0;
2826 if(type==LOADB_STUB||type==LOADBU_STUB)
2827 ftable=(int)readmemb;
2828 if(type==LOADH_STUB||type==LOADHU_STUB)
2829 ftable=(int)readmemh;
2830 if(type==LOADW_STUB)
2831 ftable=(int)readmem;
2832 if(type==LOADD_STUB)
2833 ftable=(int)readmemd;
2834 emit_writeword(rs,(int)&address);
2835 emit_shrimm(rs,16,addr);
2836 emit_movmem_indexedx4(ftable,addr,addr);
2837 emit_pusha();
2838 ds=i_regs!=®s[i];
2839 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2840 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2841 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2842
2843 int temp;
2844 int cc=get_reg(i_regmap,CCREG);
2845 if(cc<0) {
2846 if(addr==HOST_CCREG)
2847 {
2848 cc=0;temp=1;
2849 assert(cc!=HOST_CCREG);
2850 assert(temp!=HOST_CCREG);
2851 emit_loadreg(CCREG,cc);
2852 }
2853 else
2854 {
2855 cc=HOST_CCREG;
2856 emit_loadreg(CCREG,cc);
2857 temp=!addr;
2858 }
2859 }
2860 else
2861 {
2862 temp=!addr;
2863 }
2864 emit_readword((int)&last_count,temp);
2865 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2866 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2867 emit_add(cc,temp,cc);
2868 emit_writeword(cc,(int)&g_cp0_regs[CP0_COUNT_REG]);
2869 emit_callreg(addr);
2870 // We really shouldn't need to update the count here,
2871 // but not doing so causes random crashes...
2872 emit_readword((int)&g_cp0_regs[CP0_COUNT_REG],HOST_CCREG);
2873 emit_readword((int)&next_interrupt,ECX);
2874 emit_addimm(HOST_CCREG,-(int)CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2875 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2876 emit_writeword(ECX,(int)&last_count);
2877 emit_storereg(CCREG,HOST_CCREG);
2878 emit_popa();
2879 if((cc=get_reg(i_regmap,CCREG))>=0) {
2880 emit_loadreg(CCREG,cc);
2881 }
2882 if(rt>=0) {
2883 if(type==LOADB_STUB)
2884 emit_movsbl((int)&readmem_dword,rt);
2885 if(type==LOADBU_STUB)
2886 emit_movzbl((int)&readmem_dword,rt);
2887 if(type==LOADH_STUB)
2888 emit_movswl((int)&readmem_dword,rt);
2889 if(type==LOADHU_STUB)
2890 emit_movzwl((int)&readmem_dword,rt);
2891 if(type==LOADW_STUB)
2892 emit_readword((int)&readmem_dword,rt);
2893 if(type==LOADD_STUB) {
2894 emit_readword((int)&readmem_dword,rt);
2895 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2896 }
2897 }
2898 emit_jmp(stubs[n][2]); // return address
2899 }
2900
inline_readstub(int type,int i,u_int addr,signed char regmap[],int target,int adj,u_int reglist)2901 static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2902 {
2903 assem_debug("inline_readstub");
2904 int rs=get_reg(regmap,target);
2905 int rth=get_reg(regmap,target|64);
2906 int rt=get_reg(regmap,target);
2907 if(rs<0) rs=get_reg(regmap,-1);
2908 assert(rs>=0);
2909 int ftable=0;
2910 if(type==LOADB_STUB||type==LOADBU_STUB)
2911 ftable=(int)readmemb;
2912 if(type==LOADH_STUB||type==LOADHU_STUB)
2913 ftable=(int)readmemh;
2914 if(type==LOADW_STUB)
2915 ftable=(int)readmem;
2916 if(type==LOADD_STUB)
2917 ftable=(int)readmemd;
2918 #ifdef HOST_IMM_ADDR32
2919 emit_writeword_imm(addr,(int)&address);
2920 #else
2921 emit_writeword(rs,(int)&address);
2922 #endif
2923 emit_pusha();
2924 if((signed int)addr>=(signed int)0xC0000000) {
2925 // Theoretically we can have a pagefault here, if the TLB has never
2926 // been enabled and the address is outside the range 80000000..BFFFFFFF
2927 // Write out the registers so the pagefault can be handled. This is
2928 // a very rare case and likely represents a bug.
2929 int ds=regmap!=regs[i].regmap;
2930 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2931 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2932 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2933 }
2934 int cc=get_reg(regmap,CCREG);
2935 int temp;
2936 if(cc<0) {
2937 if(rs==HOST_CCREG)
2938 {
2939 cc=0;temp=1;
2940 assert(cc!=HOST_CCREG);
2941 assert(temp!=HOST_CCREG);
2942 emit_loadreg(CCREG,cc);
2943 }
2944 else
2945 {
2946 cc=HOST_CCREG;
2947 emit_loadreg(CCREG,cc);
2948 temp=!rs;
2949 }
2950 }
2951 else
2952 {
2953 temp=!rs;
2954 }
2955 emit_readword((int)&last_count,temp);
2956 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2957 emit_add(cc,temp,cc);
2958 emit_writeword(cc,(int)&g_cp0_regs[CP0_COUNT_REG]);
2959 if((signed int)addr>=(signed int)0xC0000000) {
2960 // Pagefault address
2961 int ds=regmap!=regs[i].regmap;
2962 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2963 }
2964 emit_call(((u_int *)ftable)[addr>>16]);
2965 // We really shouldn't need to update the count here,
2966 // but not doing so causes random crashes...
2967 emit_readword((int)&g_cp0_regs[CP0_COUNT_REG],HOST_CCREG);
2968 emit_readword((int)&next_interrupt,ECX);
2969 emit_addimm(HOST_CCREG,-(int)CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2970 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2971 emit_writeword(ECX,(int)&last_count);
2972 emit_storereg(CCREG,HOST_CCREG);
2973 emit_popa();
2974 if((cc=get_reg(regmap,CCREG))>=0) {
2975 emit_loadreg(CCREG,cc);
2976 }
2977 if(rt>=0) {
2978 if(type==LOADB_STUB)
2979 emit_movsbl((int)&readmem_dword,rt);
2980 if(type==LOADBU_STUB)
2981 emit_movzbl((int)&readmem_dword,rt);
2982 if(type==LOADH_STUB)
2983 emit_movswl((int)&readmem_dword,rt);
2984 if(type==LOADHU_STUB)
2985 emit_movzwl((int)&readmem_dword,rt);
2986 if(type==LOADW_STUB)
2987 emit_readword((int)&readmem_dword,rt);
2988 if(type==LOADD_STUB) {
2989 emit_readword((int)&readmem_dword,rt);
2990 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2991 }
2992 }
2993 }
2994
do_writestub(int n)2995 static void do_writestub(int n)
2996 {
2997 assem_debug("do_writestub %x",start+stubs[n][3]*4);
2998 set_jump_target(stubs[n][1],(int)out);
2999 int type=stubs[n][0];
3000 int i=stubs[n][3];
3001 int rs=stubs[n][4];
3002 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3003 signed char *i_regmap=i_regs->regmap;
3004 int addr=get_reg(i_regmap,AGEN1+(i&1));
3005 int rth,rt,r;
3006 int ds;
3007 if(itype[i]==C1LS) {
3008 rth=get_reg(i_regmap,FTEMP|64);
3009 rt=get_reg(i_regmap,r=FTEMP);
3010 }else{
3011 rth=get_reg(i_regmap,rs2[i]|64);
3012 rt=get_reg(i_regmap,r=rs2[i]);
3013 }
3014 assert(rs>=0);
3015 assert(rt>=0);
3016 if(addr<0) addr=get_reg(i_regmap,-1);
3017 assert(addr>=0);
3018 int ftable=0;
3019 if(type==STOREB_STUB)
3020 ftable=(int)writememb;
3021 if(type==STOREH_STUB)
3022 ftable=(int)writememh;
3023 if(type==STOREW_STUB)
3024 ftable=(int)writemem;
3025 if(type==STORED_STUB)
3026 ftable=(int)writememd;
3027 emit_writeword(rs,(int)&address);
3028 emit_shrimm(rs,16,addr);
3029 emit_movmem_indexedx4(ftable,addr,addr);
3030 if(type==STOREB_STUB)
3031 emit_writebyte(rt,(int)&cpu_byte);
3032 if(type==STOREH_STUB)
3033 emit_writehword(rt,(int)&cpu_hword);
3034 if(type==STOREW_STUB)
3035 emit_writeword(rt,(int)&cpu_word);
3036 if(type==STORED_STUB) {
3037 emit_writeword(rt,(int)&cpu_dword);
3038 emit_writeword(r?rth:rt,(int)&cpu_dword+4);
3039 }
3040 emit_pusha();
3041 ds=i_regs!=®s[i];
3042 int real_rs=get_reg(i_regmap,rs1[i]);
3043 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
3044 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3045
3046 int temp;
3047 int cc=get_reg(i_regmap,CCREG);
3048 if(cc<0) {
3049 if(addr==HOST_CCREG)
3050 {
3051 cc=0;temp=1;
3052 assert(cc!=HOST_CCREG);
3053 assert(temp!=HOST_CCREG);
3054 emit_loadreg(CCREG,cc);
3055 }
3056 else
3057 {
3058 cc=HOST_CCREG;
3059 emit_loadreg(CCREG,cc);
3060 temp=!addr;
3061 }
3062 }
3063 else
3064 {
3065 temp=!addr;
3066 }
3067 emit_readword((int)&last_count,temp);
3068 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
3069 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
3070 emit_add(cc,temp,cc);
3071 emit_writeword(cc,(int)&g_cp0_regs[CP0_COUNT_REG]);
3072 emit_callreg(addr);
3073 emit_readword((int)&g_cp0_regs[CP0_COUNT_REG],HOST_CCREG);
3074 emit_readword((int)&next_interrupt,ECX);
3075 emit_addimm(HOST_CCREG,-(int)CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
3076 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3077 emit_writeword(ECX,(int)&last_count);
3078 emit_storereg(CCREG,HOST_CCREG);
3079 emit_popa();
3080 if((cc=get_reg(i_regmap,CCREG))>=0) {
3081 emit_loadreg(CCREG,cc);
3082 }
3083 emit_jmp(stubs[n][2]); // return address
3084 }
3085
inline_writestub(int type,int i,u_int addr,signed char regmap[],int target,int adj,u_int reglist)3086 static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3087 {
3088 assem_debug("inline_writestub");
3089 int rs=get_reg(regmap,-1);
3090 int rth=get_reg(regmap,target|64);
3091 int rt=get_reg(regmap,target);
3092 assert(rs>=0);
3093 assert(rt>=0);
3094 int ftable=0;
3095 if(type==STOREB_STUB)
3096 ftable=(int)writememb;
3097 if(type==STOREH_STUB)
3098 ftable=(int)writememh;
3099 if(type==STOREW_STUB)
3100 ftable=(int)writemem;
3101 if(type==STORED_STUB)
3102 ftable=(int)writememd;
3103 emit_writeword(rs,(int)&address);
3104 if(type==STOREB_STUB)
3105 emit_writebyte(rt,(int)&cpu_byte);
3106 if(type==STOREH_STUB)
3107 emit_writehword(rt,(int)&cpu_hword);
3108 if(type==STOREW_STUB)
3109 emit_writeword(rt,(int)&cpu_word);
3110 if(type==STORED_STUB) {
3111 emit_writeword(rt,(int)&cpu_dword);
3112 emit_writeword(target?rth:rt,(int)&cpu_dword+4);
3113 }
3114 emit_pusha();
3115 if(((signed int)addr>=(signed int)0xC0000000)||((addr>>16)==0xa430)||((addr>>16)==0x8430)) {
3116 // Theoretically we can have a pagefault here, if the TLB has never
3117 // been enabled and the address is outside the range 80000000..BFFFFFFF
3118 // Write out the registers so the pagefault can be handled. This is
3119 // a very rare case and likely represents a bug.
3120 int ds=regmap!=regs[i].regmap;
3121 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3122 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3123 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3124 }
3125 int cc=get_reg(regmap,CCREG);
3126 int temp;
3127 if(cc<0) {
3128 if(rs==HOST_CCREG)
3129 {
3130 cc=0;temp=1;
3131 assert(cc!=HOST_CCREG);
3132 assert(temp!=HOST_CCREG);
3133 emit_loadreg(CCREG,cc);
3134 }
3135 else
3136 {
3137 cc=HOST_CCREG;
3138 emit_loadreg(CCREG,cc);
3139 temp=!rs;
3140 }
3141 }
3142 else
3143 {
3144 temp=!rs;
3145 }
3146 emit_readword((int)&last_count,temp);
3147 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
3148 emit_add(cc,temp,cc);
3149 emit_writeword(cc,(int)&g_cp0_regs[CP0_COUNT_REG]);
3150 if(((signed int)addr>=(signed int)0xC0000000)||((addr>>16)==0xa430)||((addr>>16)==0x8430)) {
3151 // Pagefault address
3152 int ds=regmap!=regs[i].regmap;
3153 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
3154 }
3155 emit_call(((u_int *)ftable)[addr>>16]);
3156 emit_readword((int)&g_cp0_regs[CP0_COUNT_REG],HOST_CCREG);
3157 emit_readword((int)&next_interrupt,ECX);
3158 emit_addimm(HOST_CCREG,-(int)CLOCK_DIVIDER*(adj+1),HOST_CCREG);
3159 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3160 emit_writeword(ECX,(int)&last_count);
3161 emit_storereg(CCREG,HOST_CCREG);
3162 emit_popa();
3163 if((cc=get_reg(regmap,CCREG))>=0) {
3164 emit_loadreg(CCREG,cc);
3165 }
3166 }
3167
do_unalignedwritestub(int n)3168 static void do_unalignedwritestub(int n)
3169 {
3170 set_jump_target(stubs[n][1],(int)out);
3171 output_byte(0xCC);
3172 emit_jmp(stubs[n][2]); // return address
3173 }
3174
do_invstub(int n)3175 static void do_invstub(int n)
3176 {
3177 set_jump_target(stubs[n][1],(int)out);
3178 emit_call(invalidate_block_reg[stubs[n][4]]);
3179 emit_jmp(stubs[n][2]); // return address
3180 }
3181
do_dirty_stub(int i)3182 static int do_dirty_stub(int i)
3183 {
3184 assem_debug("do_dirty_stub %x",start+i*4);
3185 emit_pushimm(start+i*4);
3186 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3187 emit_movimm((int)copy,EBX);
3188 emit_movimm(slen*4,ECX);
3189 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3190 emit_addimm(ESP,4,ESP);
3191 int entry=(int)out;
3192 load_regs_entry(i);
3193 if(entry==(int)out) entry=instr_addr[i];
3194 emit_jmp(instr_addr[i]);
3195 return entry;
3196 }
3197
do_dirty_stub_ds()3198 static void do_dirty_stub_ds()
3199 {
3200 emit_pushimm(start+1);
3201 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3202 emit_movimm((int)copy,EBX);
3203 emit_movimm(slen*4,ECX);
3204 emit_call((int)&verify_code_ds);
3205 emit_addimm(ESP,4,ESP);
3206 }
3207
do_cop1stub(int n)3208 static void do_cop1stub(int n)
3209 {
3210 assem_debug("do_cop1stub %x",start+stubs[n][3]*4);
3211 set_jump_target(stubs[n][1],(int)out);
3212 int i=stubs[n][3];
3213 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3214 int ds=stubs[n][6];
3215 if(!ds) {
3216 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3217 //if(i_regs!=®s[i]) DebugMessage(M64MSG_VERBOSE, "oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs);
3218 }
3219 //else {DebugMessage(M64MSG_VERBOSE, "fp exception in delay slot");}
3220 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3221 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3222 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3223 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3224 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3225 }
3226
3227 /* TLB */
3228
do_tlb_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u_int addr)3229 static int do_tlb_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u_int addr)
3230 {
3231 if(c) {
3232 if((signed int)addr>=(signed int)0xC0000000) {
3233 emit_readword((int)(memory_map+(addr>>12)),map);
3234 }
3235 else
3236 return -1; // No mapping
3237 }
3238 else {
3239 if(s!=map) emit_mov(s,map);
3240 emit_shrimm(map,12,map);
3241 // Schedule this while we wait on the load
3242 //if(x) emit_xorimm(addr,x,addr);
3243 if(shift>=0) emit_lea8(s,shift);
3244 if(~a) emit_andimm(s,a,ar);
3245 emit_movmem_indexedx4((int)memory_map,map,map);
3246 }
3247 return map;
3248 }
do_tlb_r_branch(int map,int c,u_int addr,int * jaddr)3249 static int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3250 {
3251 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3252 emit_test(map,map);
3253 *jaddr=(int)out;
3254 emit_js(0);
3255 }
3256 return map;
3257 }
3258
gen_tlb_addr_r(int ar,int map)3259 static void gen_tlb_addr_r(int ar, int map) {
3260 if(map>=0) {
3261 emit_leairrx4(0,ar,map,ar);
3262 }
3263 }
3264
do_tlb_w(int s,int ar,int map,int cache,int x,int c,u_int addr)3265 static int do_tlb_w(int s,int ar,int map,int cache,int x,int c,u_int addr)
3266 {
3267 if(c) {
3268 if(addr<0x80800000||addr>=0xC0000000) {
3269 emit_readword((int)(memory_map+(addr>>12)),map);
3270 }
3271 else
3272 return -1; // No mapping
3273 }
3274 else {
3275 if(s!=map) emit_mov(s,map);
3276 //if(s!=ar) emit_mov(s,ar);
3277 emit_shrimm(map,12,map);
3278 // Schedule this while we wait on the load
3279 //if(x) emit_xorimm(s,x,addr);
3280 emit_movmem_indexedx4((int)memory_map,map,map);
3281 }
3282 emit_shlimm(map,2,map);
3283 return map;
3284 }
do_tlb_w_branch(int map,int c,u_int addr,int * jaddr)3285 static void do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3286 {
3287 if(!c||addr<0x80800000||addr>=0xC0000000) {
3288 *jaddr=(int)out;
3289 emit_jc(0);
3290 }
3291 }
3292
gen_tlb_addr_w(int ar,int map)3293 static void gen_tlb_addr_w(int ar, int map) {
3294 if(map>=0) {
3295 emit_leairrx1(0,ar,map,ar);
3296 }
3297 }
3298
3299 // We don't need this for x86
generate_map_const(u_int addr,int reg)3300 static void generate_map_const(u_int addr,int reg) {
3301 // void *mapaddr=memory_map+(addr>>12);
3302 }
3303
3304 /* Special assem */
3305
shift_assemble_x86(int i,struct regstat * i_regs)3306 static void shift_assemble_x86(int i,struct regstat *i_regs)
3307 {
3308 if(rt1[i]) {
3309 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3310 {
3311 char s,t,shift;
3312 t=get_reg(i_regs->regmap,rt1[i]);
3313 s=get_reg(i_regs->regmap,rs1[i]);
3314 shift=get_reg(i_regs->regmap,rs2[i]);
3315 if(t>=0){
3316 if(rs1[i]==0)
3317 {
3318 emit_zeroreg(t);
3319 }
3320 else if(rs2[i]==0)
3321 {
3322 assert(s>=0);
3323 if(s!=t) emit_mov(s,t);
3324 }
3325 else
3326 {
3327 char temp=get_reg(i_regs->regmap,-1);
3328 assert(s>=0);
3329 if(t==ECX&&s!=ECX) {
3330 if(shift!=ECX) emit_mov(shift,ECX);
3331 if(rt1[i]==rs2[i]) {shift=temp;}
3332 if(s!=shift) emit_mov(s,shift);
3333 }
3334 else
3335 {
3336 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3337 if(s!=t) emit_mov(s,t);
3338 if(shift!=ECX) {
3339 if(i_regs->regmap[ECX]<0)
3340 emit_mov(shift,ECX);
3341 else
3342 emit_xchg(shift,ECX);
3343 }
3344 }
3345 if(opcode2[i]==4) // SLLV
3346 {
3347 emit_shlcl(t==ECX?shift:t);
3348 }
3349 if(opcode2[i]==6) // SRLV
3350 {
3351 emit_shrcl(t==ECX?shift:t);
3352 }
3353 if(opcode2[i]==7) // SRAV
3354 {
3355 emit_sarcl(t==ECX?shift:t);
3356 }
3357 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3358 }
3359 }
3360 } else { // DSLLV/DSRLV/DSRAV
3361 char sh,sl,th,tl,shift;
3362 th=get_reg(i_regs->regmap,rt1[i]|64);
3363 tl=get_reg(i_regs->regmap,rt1[i]);
3364 sh=get_reg(i_regs->regmap,rs1[i]|64);
3365 sl=get_reg(i_regs->regmap,rs1[i]);
3366 shift=get_reg(i_regs->regmap,rs2[i]);
3367 if(tl>=0){
3368 if(rs1[i]==0)
3369 {
3370 emit_zeroreg(tl);
3371 if(th>=0) emit_zeroreg(th);
3372 }
3373 else if(rs2[i]==0)
3374 {
3375 assert(sl>=0);
3376 if(sl!=tl) emit_mov(sl,tl);
3377 if(th>=0&&sh!=th) emit_mov(sh,th);
3378 }
3379 else
3380 {
3381 // FIXME: What if shift==tl ?
3382 assert(shift!=tl);
3383 int temp=get_reg(i_regs->regmap,-1);
3384 int real_th=th;
3385 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3386 assert(sl>=0);
3387 assert(sh>=0);
3388 if(tl==ECX&&sl!=ECX) {
3389 if(shift!=ECX) emit_mov(shift,ECX);
3390 if(sl!=shift) emit_mov(sl,shift);
3391 if(th>=0 && sh!=th) emit_mov(sh,th);
3392 }
3393 else if(th==ECX&&sh!=ECX) {
3394 if(shift!=ECX) emit_mov(shift,ECX);
3395 if(sh!=shift) emit_mov(sh,shift);
3396 if(sl!=tl) emit_mov(sl,tl);
3397 }
3398 else
3399 {
3400 if(sl!=tl) emit_mov(sl,tl);
3401 if(th>=0 && sh!=th) emit_mov(sh,th);
3402 if(shift!=ECX) {
3403 if(i_regs->regmap[ECX]<0)
3404 emit_mov(shift,ECX);
3405 else
3406 emit_xchg(shift,ECX);
3407 }
3408 }
3409 if(opcode2[i]==0x14) // DSLLV
3410 {
3411 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3412 emit_shlcl(tl==ECX?shift:tl);
3413 emit_testimm(ECX,32);
3414 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3415 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3416 }
3417 if(opcode2[i]==0x16) // DSRLV
3418 {
3419 assert(th>=0);
3420 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3421 emit_shrcl(th==ECX?shift:th);
3422 emit_testimm(ECX,32);
3423 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3424 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3425 }
3426 if(opcode2[i]==0x17) // DSRAV
3427 {
3428 assert(th>=0);
3429 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3430 if(real_th>=0) {
3431 assert(temp>=0);
3432 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3433 }
3434 emit_sarcl(th==ECX?shift:th);
3435 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3436 emit_testimm(ECX,32);
3437 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3438 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3439 }
3440 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3441 }
3442 }
3443 }
3444 }
3445 }
3446 #define shift_assemble shift_assemble_x86
3447
loadlr_assemble_x86(int i,struct regstat * i_regs)3448 static void loadlr_assemble_x86(int i,struct regstat *i_regs)
3449 {
3450 int s,th,tl,temp,temp2,addr,map=-1;
3451 int offset;
3452 int jaddr=0;
3453 int memtarget,c=0;
3454 u_int hr,reglist=0;
3455 th=get_reg(i_regs->regmap,rt1[i]|64);
3456 tl=get_reg(i_regs->regmap,rt1[i]);
3457 s=get_reg(i_regs->regmap,rs1[i]);
3458 temp=get_reg(i_regs->regmap,-1);
3459 temp2=get_reg(i_regs->regmap,FTEMP);
3460 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3461 assert(addr<0);
3462 offset=imm[i];
3463 for(hr=0;hr<HOST_REGS;hr++) {
3464 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3465 }
3466 reglist|=1<<temp;
3467 if(offset||s<0||c) addr=temp2;
3468 else addr=s;
3469 if(s>=0) {
3470 c=(i_regs->wasconst>>s)&1;
3471 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3472 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3473 }
3474 if(!using_tlb) {
3475 if(!c) {
3476 emit_lea8(addr,temp);
3477 if (opcode[i]==0x22||opcode[i]==0x26) {
3478 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3479 }else{
3480 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3481 }
3482 emit_cmpimm(addr,0x800000);
3483 jaddr=(int)out;
3484 emit_jno(0);
3485 }
3486 else {
3487 if (opcode[i]==0x22||opcode[i]==0x26) {
3488 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3489 }else{
3490 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3491 }
3492 }
3493 }else{ // using tlb
3494 int a;
3495 if(c) {
3496 a=-1;
3497 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3498 a=0xFFFFFFFC; // LWL/LWR
3499 }else{
3500 a=0xFFFFFFF8; // LDL/LDR
3501 }
3502 map=get_reg(i_regs->regmap,TLREG);
3503 assert(map>=0);
3504 reglist&=~(1<<map);
3505 map=do_tlb_r(addr,temp2,map,-1,0,a,c?-1:temp,c,constmap[i][s]+offset);
3506 if(c) {
3507 if (opcode[i]==0x22||opcode[i]==0x26) {
3508 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3509 }else{
3510 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3511 }
3512 }
3513 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3514 }
3515 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3516 if(!c||memtarget) {
3517 //emit_readword_indexed((int)g_dev.ri.rdram.dram-0x80000000,temp2,temp2);
3518 emit_readword_indexed_tlb(0,temp2,map,temp2);
3519 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3520 }
3521 else
3522 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3523 if(rt1[i]) {
3524 assert(tl>=0);
3525 emit_andimm(temp,24,temp);
3526 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3527 if(temp==ECX)
3528 {
3529 int temp3=EDX;
3530 if(temp3==temp2) temp3++;
3531 emit_pushreg(temp3);
3532 emit_movimm(-1,temp3);
3533 if (opcode[i]==0x26) {
3534 emit_shrcl(temp3);
3535 emit_shrcl(temp2);
3536 }else{
3537 emit_shlcl(temp3);
3538 emit_shlcl(temp2);
3539 }
3540 emit_mov(temp3,ECX);
3541 emit_not(ECX,ECX);
3542 emit_popreg(temp3);
3543 }
3544 else
3545 {
3546 int temp3=EBP;
3547 if(temp3==temp) temp3++;
3548 if(temp3==temp2) temp3++;
3549 if(temp3==temp) temp3++;
3550 emit_xchg(ECX,temp);
3551 emit_pushreg(temp3);
3552 emit_movimm(-1,temp3);
3553 if (opcode[i]==0x26) {
3554 emit_shrcl(temp3);
3555 emit_shrcl(temp2==ECX?temp:temp2);
3556 }else{
3557 emit_shlcl(temp3);
3558 emit_shlcl(temp2==ECX?temp:temp2);
3559 }
3560 emit_not(temp3,temp3);
3561 emit_mov(temp,ECX);
3562 emit_mov(temp3,temp);
3563 emit_popreg(temp3);
3564 }
3565 emit_and(temp,tl,tl);
3566 emit_or(temp2,tl,tl);
3567 //emit_storereg(rt1[i],tl); // DEBUG
3568 /*emit_pusha();
3569 //save_regs(0x100f);
3570 emit_readword((int)&last_count,ECX);
3571 if(get_reg(i_regs->regmap,CCREG)<0)
3572 emit_loadreg(CCREG,HOST_CCREG);
3573 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3574 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3575 emit_writeword(HOST_CCREG,(int)&g_cp0_regs[CP0_COUNT_REG]);
3576 emit_call((int)memdebug);
3577 emit_popa();
3578 //restore_regs(0x100f);*/
3579 }
3580 }
3581 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3582 if(s>=0)
3583 if((i_regs->wasdirty>>s)&1)
3584 emit_storereg(rs1[i],s);
3585 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3586 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3587 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3588 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3589 if(!c||memtarget) {
3590 //if(th>=0) emit_readword_indexed((int)g_dev.ri.rdram.dram-0x80000000,temp2,temp2h);
3591 //emit_readword_indexed((int)g_dev.ri.rdram.dram-0x7FFFFFFC,temp2,temp2);
3592 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3593 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3594 }
3595 else
3596 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3597 if(rt1[i]) {
3598 assert(th>=0);
3599 assert(tl>=0);
3600 emit_andimm(temp,56,temp);
3601 emit_pushreg(temp);
3602 emit_pushreg(temp2h);
3603 emit_pushreg(temp2);
3604 emit_pushreg(th);
3605 emit_pushreg(tl);
3606 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3607 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3608 emit_addimm(ESP,20,ESP);
3609 if(tl!=EDX) {
3610 if(tl!=EAX) emit_mov(EAX,tl);
3611 if(th!=EDX) emit_mov(EDX,th);
3612 } else
3613 if(th!=EAX) {
3614 if(th!=EDX) emit_mov(EDX,th);
3615 if(tl!=EAX) emit_mov(EAX,tl);
3616 } else {
3617 emit_xchg(EAX,EDX);
3618 }
3619 if(s>=0) emit_loadreg(rs1[i],s);
3620 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3621 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3622 }
3623 }
3624 }
3625 #define loadlr_assemble loadlr_assemble_x86
3626
cop0_assemble(int i,struct regstat * i_regs)3627 static void cop0_assemble(int i,struct regstat *i_regs)
3628 {
3629 if(opcode2[i]==0) // MFC0
3630 {
3631 if(rt1[i]) {
3632 signed char t=get_reg(i_regs->regmap,rt1[i]);
3633 char copr=(source[i]>>11)&0x1f;
3634 if(t>=0) {
3635 emit_writeword_imm((int)&fake_pc,(int)&PC);
3636 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3637 if(copr==9) {
3638 emit_readword((int)&last_count,ECX);
3639 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3640 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3641 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3642 emit_writeword(HOST_CCREG,(int)&g_cp0_regs[CP0_COUNT_REG]);
3643 }
3644 emit_call((int)cached_interpreter_table.MFC0);
3645 emit_readword((int)&readmem_dword,t);
3646 }
3647 }
3648 }
3649 else if(opcode2[i]==4) // MTC0
3650 {
3651 signed char s=get_reg(i_regs->regmap,rs1[i]);
3652 char copr=(source[i]>>11)&0x1f;
3653 assert(s>=0);
3654 emit_writeword(s,(int)&readmem_dword);
3655 emit_pusha();
3656 emit_writeword_imm((int)&fake_pc,(int)&PC);
3657 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3658 if(copr==9||copr==11||copr==12) {
3659 if((copr==12||copr==9)&&!is_delayslot) {
3660 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3661 }
3662 emit_readword((int)&last_count,ECX);
3663 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3664 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3665 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3666 emit_writeword(HOST_CCREG,(int)&g_cp0_regs[CP0_COUNT_REG]);
3667 }
3668 // What a mess. The status register (12) can enable interrupts,
3669 // so needs a special case to handle a pending interrupt.
3670 // The interrupt must be taken immediately, because a subsequent
3671 // instruction might disable interrupts again.
3672 if((copr==12||copr==9)&&!is_delayslot) {
3673 emit_writeword_imm(start+i*4+(copr==12)*4,(int)&pcaddr);
3674 emit_writebyte_imm(0,(int)&pending_exception);
3675 }
3676 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3677 //else
3678 emit_call((int)cached_interpreter_table.MTC0);
3679 if(copr==9||copr==11||copr==12) {
3680 emit_readword((int)&g_cp0_regs[CP0_COUNT_REG],HOST_CCREG);
3681 emit_readword((int)&next_interrupt,ECX);
3682 emit_addimm(HOST_CCREG,-(int)CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3683 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3684 emit_writeword(ECX,(int)&last_count);
3685 emit_storereg(CCREG,HOST_CCREG);
3686 }
3687 emit_popa();
3688 if(copr==12||copr==9) {
3689 assert(!is_delayslot);
3690 //if(is_delayslot) output_byte(0xcc);
3691 emit_cmpmem_imm_byte((int)&pending_exception,0);
3692 emit_jne((int)&do_interrupt);
3693 }
3694 cop1_usable=0;
3695 }
3696 else
3697 {
3698 assert(opcode2[i]==0x10);
3699 if((source[i]&0x3f)==0x01) // TLBR
3700 emit_call((int)cached_interpreter_table.TLBR);
3701 if((source[i]&0x3f)==0x02) // TLBWI
3702 emit_call((int)TLBWI_new);
3703 if((source[i]&0x3f)==0x06) { // TLBWR
3704 // The TLB entry written by TLBWR is dependent on the count,
3705 // so update the cycle count
3706 emit_readword((int)&last_count,ECX);
3707 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3708 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3709 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3710 emit_writeword(HOST_CCREG,(int)&g_cp0_regs[CP0_COUNT_REG]);
3711 emit_call((int)TLBWR_new);
3712 }
3713 if((source[i]&0x3f)==0x08) // TLBP
3714 emit_call((int)cached_interpreter_table.TLBP);
3715 if((source[i]&0x3f)==0x18) // ERET
3716 {
3717 assert(!is_delayslot);
3718 int count=ccadj[i];
3719 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3720 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3721 emit_jmp((int)jump_eret);
3722 }
3723 }
3724 }
3725
cop1_assemble(int i,struct regstat * i_regs)3726 static void cop1_assemble(int i,struct regstat *i_regs)
3727 {
3728 // Check cop1 unusable
3729 if(!cop1_usable) {
3730 signed char rs=get_reg(i_regs->regmap,CSREG);
3731 assert(rs>=0);
3732 emit_testimm(rs,0x20000000);
3733 int jaddr=(int)out;
3734 emit_jeq(0);
3735 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3736 cop1_usable=1;
3737 }
3738 if (opcode2[i]==0) { // MFC1
3739 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3740 if(tl>=0) {
3741 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl);
3742 emit_readword_indexed(0,tl,tl);
3743 }
3744 }
3745 else if (opcode2[i]==1) { // DMFC1
3746 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3747 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3748 if(tl>=0) {
3749 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl);
3750 if(th>=0) emit_readword_indexed(4,tl,th);
3751 emit_readword_indexed(0,tl,tl);
3752 }
3753 }
3754 else if (opcode2[i]==4) { // MTC1
3755 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3756 signed char temp=get_reg(i_regs->regmap,-1);
3757 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3758 emit_writeword_indexed(sl,0,temp);
3759 }
3760 else if (opcode2[i]==5) { // DMTC1
3761 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3762 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3763 signed char temp=get_reg(i_regs->regmap,-1);
3764 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3765 emit_writeword_indexed(sh,4,temp);
3766 emit_writeword_indexed(sl,0,temp);
3767 }
3768 else if (opcode2[i]==2) // CFC1
3769 {
3770 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3771 if(tl>=0) {
3772 u_int copr=(source[i]>>11)&0x1f;
3773 if(copr==0) emit_readword((int)&FCR0,tl);
3774 if(copr==31) emit_readword((int)&FCR31,tl);
3775 }
3776 }
3777 else if (opcode2[i]==6) // CTC1
3778 {
3779 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3780 u_int copr=(source[i]>>11)&0x1f;
3781 assert(sl>=0);
3782 if(copr==31)
3783 {
3784 emit_writeword(sl,(int)&FCR31);
3785 // Set the rounding mode
3786 char temp=get_reg(i_regs->regmap,-1);
3787 emit_movimm(3,temp);
3788 emit_and(sl,temp,temp);
3789 emit_fldcw_indexed((int)&rounding_modes,temp);
3790 }
3791 }
3792 }
3793
fconv_assemble_x86(int i,struct regstat * i_regs)3794 static void fconv_assemble_x86(int i,struct regstat *i_regs)
3795 {
3796 signed char temp=get_reg(i_regs->regmap,-1);
3797 assert(temp>=0);
3798 // Check cop1 unusable
3799 if(!cop1_usable) {
3800 signed char rs=get_reg(i_regs->regmap,CSREG);
3801 assert(rs>=0);
3802 emit_testimm(rs,0x20000000);
3803 int jaddr=(int)out;
3804 emit_jeq(0);
3805 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3806 cop1_usable=1;
3807 }
3808 #ifdef __SSE__
3809 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3810 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3811 emit_movss_load(temp,0);
3812 emit_cvttps2dq(0,0); // float->int, truncate
3813 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3814 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3815 emit_movd_store(0,temp);
3816 return;
3817 }
3818 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3819 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3820 emit_movsd_load(temp,0);
3821 emit_cvttpd2dq(0,0); // double->int, truncate
3822 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3823 emit_movd_store(0,temp);
3824 return;
3825 }
3826 #endif
3827
3828 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3829 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3830 emit_fildl(temp);
3831 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3832 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3833 emit_fstps(temp);
3834 return;
3835 }
3836 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3837 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3838 emit_fildl(temp);
3839 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3840 emit_fstpl(temp);
3841 return;
3842 }
3843 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3844 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3845 emit_fildll(temp);
3846 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3847 emit_fstps(temp);
3848 return;
3849 }
3850 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3851 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3852 emit_fildll(temp);
3853 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3854 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3855 emit_fstpl(temp);
3856 return;
3857 }
3858
3859 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3860 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3861 emit_flds(temp);
3862 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3863 emit_fstpl(temp);
3864 return;
3865 }
3866 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3867 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3868 emit_fldl(temp);
3869 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3870 emit_fstps(temp);
3871 return;
3872 }
3873
3874 if(opcode2[i]==0x10) { // cvt_*_s
3875 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3876 emit_flds(temp);
3877 }
3878 if(opcode2[i]==0x11) { // cvt_*_d
3879 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3880 emit_fldl(temp);
3881 }
3882 if((source[i]&0x3f)<0x10) {
3883 emit_fnstcw_stack();
3884 if((source[i]&3)==0) emit_fldcw((int)&rounding_modes[0]); //DebugMessage(M64MSG_VERBOSE, "round");
3885 if((source[i]&3)==1) emit_fldcw((int)&rounding_modes[1]); //DebugMessage(M64MSG_VERBOSE, "trunc");
3886 if((source[i]&3)==2) emit_fldcw((int)&rounding_modes[2]); //DebugMessage(M64MSG_VERBOSE, "ceil");
3887 if((source[i]&3)==3) emit_fldcw((int)&rounding_modes[3]); //DebugMessage(M64MSG_VERBOSE, "floor");
3888 }
3889 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3890 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3891 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3892 emit_fistpl(temp);
3893 }
3894 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3895 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3896 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3897 emit_fistpll(temp);
3898 }
3899 if((source[i]&0x3f)<0x10) {
3900 emit_fldcw_stack();
3901 }
3902 return;
3903
3904 // C emulation code for debugging
3905
3906 emit_pusha();
3907
3908 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3909 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3910 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3911 emit_call((int)cvt_s_w);
3912 }
3913 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3914 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3915 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3916 emit_call((int)cvt_d_w);
3917 }
3918 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3919 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3920 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3921 emit_call((int)cvt_s_l);
3922 }
3923 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3924 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3925 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3926 emit_call((int)cvt_d_l);
3927 }
3928
3929 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3930 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3931 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3932 emit_call((int)cvt_d_s);
3933 }
3934 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3935 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3936 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3937 emit_call((int)cvt_w_s);
3938 }
3939 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3940 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3941 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3942 emit_call((int)cvt_l_s);
3943 }
3944
3945 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3946 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3947 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3948 emit_call((int)cvt_s_d);
3949 }
3950 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3951 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3952 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3953 emit_call((int)cvt_w_d);
3954 }
3955 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3956 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3957 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3958 emit_call((int)cvt_l_d);
3959 }
3960
3961 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3962 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3963 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3964 emit_call((int)round_l_s);
3965 }
3966 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3967 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3968 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3969 emit_call((int)trunc_l_s);
3970 }
3971 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3972 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3973 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3974 emit_call((int)ceil_l_s);
3975 }
3976 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3977 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3978 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3979 emit_call((int)floor_l_s);
3980 }
3981 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3982 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3983 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3984 emit_call((int)round_w_s);
3985 }
3986 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3987 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3988 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3989 emit_call((int)trunc_w_s);
3990 }
3991 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3992 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3993 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3994 emit_call((int)ceil_w_s);
3995 }
3996 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3997 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3998 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3999 emit_call((int)floor_w_s);
4000 }
4001
4002 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4003 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4004 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4005 emit_call((int)round_l_d);
4006 }
4007 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4008 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4009 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4010 emit_call((int)trunc_l_d);
4011 }
4012 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4013 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4014 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4015 emit_call((int)ceil_l_d);
4016 }
4017 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4018 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4019 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4020 emit_call((int)floor_l_d);
4021 }
4022 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4023 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4024 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4025 emit_call((int)round_w_d);
4026 }
4027 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4028 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4029 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4030 emit_call((int)trunc_w_d);
4031 }
4032 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4033 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4034 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4035 emit_call((int)ceil_w_d);
4036 }
4037 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4038 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4039 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4040 emit_call((int)floor_w_d);
4041 }
4042
4043 emit_addimm(ESP,8,ESP);
4044 emit_popa();
4045 //emit_loadreg(CSREG,rs);
4046 return;
4047 }
4048 #define fconv_assemble fconv_assemble_x86
4049
fcomp_assemble(int i,struct regstat * i_regs)4050 static void fcomp_assemble(int i,struct regstat *i_regs)
4051 {
4052 signed char fs=get_reg(i_regs->regmap,FSREG);
4053 signed char temp=get_reg(i_regs->regmap,-1);
4054 assert(temp>=0);
4055 // Check cop1 unusable
4056 if(!cop1_usable) {
4057 signed char cs=get_reg(i_regs->regmap,CSREG);
4058 assert(cs>=0);
4059 emit_testimm(cs,0x20000000);
4060 int jaddr=(int)out;
4061 emit_jeq(0);
4062 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4063 cop1_usable=1;
4064 }
4065
4066 if((source[i]&0x3f)==0x30) {
4067 emit_andimm(fs,~0x800000,fs);
4068 return;
4069 }
4070
4071 if((source[i]&0x3e)==0x38) {
4072 // sf/ngle - these should throw exceptions for NaNs
4073 emit_andimm(fs,~0x800000,fs);
4074 return;
4075 }
4076
4077 if(opcode2[i]==0x10) {
4078 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
4079 emit_flds(temp);
4080 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4081 emit_flds(temp);
4082 emit_movimm(0x800000,temp);
4083 emit_or(fs,temp,fs);
4084 emit_xor(temp,fs,temp);
4085 emit_fucomip(1);
4086 emit_fpop();
4087 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
4088 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
4089 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
4090 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
4091 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
4092 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
4093 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
4094 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
4095 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
4096 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
4097 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
4098 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
4099 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
4100 return;
4101 }
4102 if(opcode2[i]==0x11) {
4103 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
4104 emit_fldl(temp);
4105 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4106 emit_fldl(temp);
4107 emit_movimm(0x800000,temp);
4108 emit_or(fs,temp,fs);
4109 emit_xor(temp,fs,temp);
4110 emit_fucomip(1);
4111 emit_fpop();
4112 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
4113 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
4114 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
4115 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
4116 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
4117 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
4118 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
4119 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
4120 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
4121 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
4122 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
4123 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
4124 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
4125 return;
4126 }
4127
4128 emit_pusha();
4129 if(opcode2[i]==0x10) {
4130 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
4131 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
4132 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4133 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4134 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4135 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4136 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4137 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4138 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4139 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4140 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4141 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4142 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4143 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4144 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4145 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4146 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4147 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4148 }
4149 if(opcode2[i]==0x11) {
4150 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
4151 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4152 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4153 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4154 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4155 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4156 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4157 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4158 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4159 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4160 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4161 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4162 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4163 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4164 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4165 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4166 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4167 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4168 }
4169 emit_addimm(ESP,8,ESP);
4170 emit_popa();
4171 emit_loadreg(FSREG,fs);
4172 return;
4173 }
4174
float_assemble(int i,struct regstat * i_regs)4175 static void float_assemble(int i,struct regstat *i_regs)
4176 {
4177 signed char temp=get_reg(i_regs->regmap,-1);
4178 assert(temp>=0);
4179 // Check cop1 unusable
4180 if(!cop1_usable) {
4181 signed char cs=get_reg(i_regs->regmap,CSREG);
4182 assert(cs>=0);
4183 emit_testimm(cs,0x20000000);
4184 int jaddr=(int)out;
4185 emit_jeq(0);
4186 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4187 cop1_usable=1;
4188 }
4189
4190 if((source[i]&0x3f)==6) // mov
4191 {
4192 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4193 if(opcode2[i]==0x10) {
4194 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4195 emit_flds(temp);
4196 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4197 emit_fstps(temp);
4198 }
4199 if(opcode2[i]==0x11) {
4200 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4201 emit_fldl(temp);
4202 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4203 emit_fstpl(temp);
4204 }
4205 }
4206 return;
4207 }
4208
4209 if((source[i]&0x3f)>3)
4210 {
4211 if(opcode2[i]==0x10) {
4212 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4213 emit_flds(temp);
4214 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4215 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4216 }
4217 }
4218 if(opcode2[i]==0x11) {
4219 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4220 emit_fldl(temp);
4221 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4222 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4223 }
4224 }
4225 if((source[i]&0x3f)==4) // sqrt
4226 emit_fsqrt();
4227 if((source[i]&0x3f)==5) // abs
4228 emit_fabs();
4229 if((source[i]&0x3f)==7) // neg
4230 emit_fchs();
4231 if(opcode2[i]==0x10) {
4232 emit_fstps(temp);
4233 }
4234 if(opcode2[i]==0x11) {
4235 emit_fstpl(temp);
4236 }
4237 return;
4238 }
4239 if((source[i]&0x3f)<4)
4240 {
4241 if(opcode2[i]==0x10) {
4242 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4243 emit_flds(temp);
4244 }
4245 if(opcode2[i]==0x11) {
4246 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4247 emit_fldl(temp);
4248 }
4249 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4250 if(opcode2[i]==0x10) {
4251 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
4252 if((source[i]&0x3f)==0) emit_fadds(temp);
4253 if((source[i]&0x3f)==1) emit_fsubs(temp);
4254 if((source[i]&0x3f)==2) emit_fmuls(temp);
4255 if((source[i]&0x3f)==3) emit_fdivs(temp);
4256 }
4257 else if(opcode2[i]==0x11) {
4258 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
4259 if((source[i]&0x3f)==0) emit_faddl(temp);
4260 if((source[i]&0x3f)==1) emit_fsubl(temp);
4261 if((source[i]&0x3f)==2) emit_fmull(temp);
4262 if((source[i]&0x3f)==3) emit_fdivl(temp);
4263 }
4264 }
4265 else {
4266 if((source[i]&0x3f)==0) emit_fadd(0);
4267 if((source[i]&0x3f)==1) emit_fsub(0);
4268 if((source[i]&0x3f)==2) emit_fmul(0);
4269 if((source[i]&0x3f)==3) emit_fdiv(0);
4270 }
4271 if(opcode2[i]==0x10) {
4272 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4273 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4274 }
4275 emit_fstps(temp);
4276 }
4277 if(opcode2[i]==0x11) {
4278 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4279 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4280 }
4281 emit_fstpl(temp);
4282 }
4283 return;
4284 }
4285
4286 if(opcode2[i]==0x10) { // Single precision
4287 emit_pusha();
4288 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4289 if((source[i]&0x3f)<4)
4290 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
4291 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
4292 switch(source[i]&0x3f)
4293 {
4294 case 0x00: emit_call((int)add_s);break;
4295 case 0x01: emit_call((int)sub_s);break;
4296 case 0x02: emit_call((int)mul_s);break;
4297 case 0x03: emit_call((int)div_s);break;
4298 case 0x04: emit_call((int)sqrt_s);break;
4299 case 0x05: emit_call((int)abs_s);break;
4300 case 0x06: emit_call((int)mov_s);break;
4301 case 0x07: emit_call((int)neg_s);break;
4302 }
4303 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4304 emit_popa();
4305 }
4306 if(opcode2[i]==0x11) { // Double precision
4307 emit_pusha();
4308 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4309 if((source[i]&0x3f)<4)
4310 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
4311 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4312 switch(source[i]&0x3f)
4313 {
4314 case 0x00: emit_call((int)add_d);break;
4315 case 0x01: emit_call((int)sub_d);break;
4316 case 0x02: emit_call((int)mul_d);break;
4317 case 0x03: emit_call((int)div_d);break;
4318 case 0x04: emit_call((int)sqrt_d);break;
4319 case 0x05: emit_call((int)abs_d);break;
4320 case 0x06: emit_call((int)mov_d);break;
4321 case 0x07: emit_call((int)neg_d);break;
4322 }
4323 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4324 emit_popa();
4325 }
4326 }
4327
multdiv_assemble_x86(int i,struct regstat * i_regs)4328 static void multdiv_assemble_x86(int i,struct regstat *i_regs)
4329 {
4330 // case 0x18: MULT
4331 // case 0x19: MULTU
4332 // case 0x1A: DIV
4333 // case 0x1B: DIVU
4334 // case 0x1C: DMULT
4335 // case 0x1D: DMULTU
4336 // case 0x1E: DDIV
4337 // case 0x1F: DDIVU
4338 if(rs1[i]&&rs2[i])
4339 {
4340 if((opcode2[i]&4)==0) // 32-bit
4341 {
4342 if(opcode2[i]==0x18) // MULT
4343 {
4344 char m1=get_reg(i_regs->regmap,rs1[i]);
4345 char m2=get_reg(i_regs->regmap,rs2[i]);
4346 assert(m1>=0);
4347 assert(m2>=0);
4348 emit_mov(m1,EAX);
4349 emit_imul(m2);
4350 }
4351 if(opcode2[i]==0x19) // MULTU
4352 {
4353 char m1=get_reg(i_regs->regmap,rs1[i]);
4354 char m2=get_reg(i_regs->regmap,rs2[i]);
4355 assert(m1>=0);
4356 assert(m2>=0);
4357 emit_mov(m1,EAX);
4358 emit_mul(m2);
4359 }
4360 if(opcode2[i]==0x1A) // DIV
4361 {
4362 char d1=get_reg(i_regs->regmap,rs1[i]);
4363 char d2=get_reg(i_regs->regmap,rs2[i]);
4364 assert(d1>=0);
4365 assert(d2>=0);
4366 emit_mov(d1,EAX);
4367 emit_cdq();
4368 emit_test(d2,d2);
4369 emit_jeq((int)out+8);
4370 emit_idiv(d2);
4371 }
4372 if(opcode2[i]==0x1B) // DIVU
4373 {
4374 char d1=get_reg(i_regs->regmap,rs1[i]);
4375 char d2=get_reg(i_regs->regmap,rs2[i]);
4376 assert(d1>=0);
4377 assert(d2>=0);
4378 emit_mov(d1,EAX);
4379 emit_zeroreg(EDX);
4380 emit_test(d2,d2);
4381 emit_jeq((int)out+8);
4382 emit_div(d2);
4383 }
4384 }
4385 else // 64-bit
4386 {
4387 if(opcode2[i]==0x1C) // DMULT
4388 {
4389 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4390 char m1l=get_reg(i_regs->regmap,rs1[i]);
4391 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4392 char m2l=get_reg(i_regs->regmap,rs2[i]);
4393 char temp=get_reg(i_regs->regmap,-1);
4394
4395 assert((m1h>=0)&&(m1h!=EAX)&&(m1h!=EDX));
4396 assert((m2h>=0)&&(m2h!=EAX)&&(m2h!=EDX));
4397 assert((m1l>=0)&&(m1l!=EAX)&&(m1l!=EDX));
4398 assert((m2l>=0)&&(m2l!=EAX)&&(m2l!=EDX));
4399 assert((temp>=0)&&(temp!=EAX)&&(temp!=EDX));
4400
4401 // Multiply m2l*m1l
4402 emit_mov(m1l,EAX);
4403 emit_mul(m2l);
4404 emit_storereg(LOREG,EAX);
4405 emit_mov(EDX,temp);
4406
4407 // Multiply m2l*m1h
4408 emit_mov(m1h,EAX);
4409 emit_mul(m2l);
4410 emit_add(EAX,temp,temp);
4411 emit_adcimm(0,EDX);
4412 emit_storereg(HIREG,EDX);
4413
4414 // Multiply m2h*m1l
4415 emit_mov(m1l,EAX);
4416 emit_mul(m2h);
4417 emit_add(EAX,temp,temp);
4418 emit_adcimm(0,EDX);
4419 emit_storereg(LOREG|64,temp);
4420 emit_mov(EDX,temp);
4421
4422 // Multiply m2h*m1h
4423 emit_mov(m1h,EAX);
4424 emit_mul(m2h);
4425 emit_add(EAX,temp,EAX);
4426 emit_adcimm(0,EDX);
4427 emit_loadreg(HIREG,temp);
4428 emit_add(EAX,temp,EAX);
4429 emit_adcimm(0,EDX);
4430
4431 // If m1<0 subtract m2 from the high 64bit part
4432 emit_testimm(m1h,0x80000000);
4433 emit_jeq((int)out+10);
4434 emit_sub(EAX,m2l,EAX);
4435 emit_sbb(m2h,EDX);
4436
4437 // If m2<0 subtract m1 from the high 64bit part
4438 emit_testimm(m2h,0x80000000);
4439 emit_jeq((int)out+10);
4440 emit_sub(EAX,m1l,EAX);
4441 emit_sbb(m1h,EDX);
4442 }
4443 if(opcode2[i]==0x1D) // DMULTU
4444 {
4445 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4446 char m1l=get_reg(i_regs->regmap,rs1[i]);
4447 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4448 char m2l=get_reg(i_regs->regmap,rs2[i]);
4449 char temp=get_reg(i_regs->regmap,-1);
4450
4451 assert((m1h>=0)&&(m1h!=EAX)&&(m1h!=EDX));
4452 assert((m2h>=0)&&(m2h!=EAX)&&(m2h!=EDX));
4453 assert((m1l>=0)&&(m1l!=EAX)&&(m1l!=EDX));
4454 assert((m2l>=0)&&(m2l!=EAX)&&(m2l!=EDX));
4455 assert((temp>=0)&&(temp!=EAX)&&(temp!=EDX));
4456
4457 // Multiply m2l*m1l
4458 emit_mov(m1l,EAX);
4459 emit_mul(m2l);
4460 emit_storereg(LOREG,EAX);
4461 emit_mov(EDX,temp);
4462
4463 // Multiply m2l*m1h
4464 emit_mov(m1h,EAX);
4465 emit_mul(m2l);
4466 emit_add(EAX,temp,temp);
4467 emit_adcimm(0,EDX);
4468 emit_storereg(HIREG,EDX);
4469
4470 // Multiply m2h*m1l
4471 emit_mov(m1l, EAX);
4472 emit_mul(m2h);
4473 emit_add(EAX,temp,temp);
4474 emit_adcimm(0,EDX);
4475 emit_storereg(LOREG|64,temp);
4476 emit_mov(EDX,temp);
4477
4478 // Multiply m2h*m1h
4479 emit_mov(m1h,EAX);
4480 emit_mul(m2h);
4481 emit_add(EAX,temp,EAX);
4482 emit_adcimm(0, EDX);
4483 emit_loadreg(HIREG,temp);
4484 emit_add(EAX,temp,EAX);
4485 emit_adcimm(0,EDX);
4486 }
4487 if(opcode2[i]==0x1E) // DDIV
4488 {
4489 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4490 char d1l=get_reg(i_regs->regmap,rs1[i]);
4491 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4492 char d2l=get_reg(i_regs->regmap,rs2[i]);
4493 assert(d1h>=0);
4494 assert(d2h>=0);
4495 assert(d1l>=0);
4496 assert(d2l>=0);
4497 //emit_pushreg(d2h);
4498 //emit_pushreg(d2l);
4499 //emit_pushreg(d1h);
4500 //emit_pushreg(d1l);
4501 emit_addimm(ESP,-16,ESP);
4502 emit_writeword_indexed(d2h,12,ESP);
4503 emit_writeword_indexed(d2l,8,ESP);
4504 emit_writeword_indexed(d1h,4,ESP);
4505 emit_writeword_indexed(d1l,0,ESP);
4506 emit_call((int)&div64);
4507 //emit_popreg(d1l);
4508 //emit_popreg(d1h);
4509 //emit_popreg(d2l);
4510 //emit_popreg(d2h);
4511 emit_readword_indexed(0,ESP,d1l);
4512 emit_readword_indexed(4,ESP,d1h);
4513 emit_readword_indexed(8,ESP,d2l);
4514 emit_readword_indexed(12,ESP,d2h);
4515 emit_addimm(ESP,16,ESP);
4516 char hih=get_reg(i_regs->regmap,HIREG|64);
4517 char hil=get_reg(i_regs->regmap,HIREG);
4518 char loh=get_reg(i_regs->regmap,LOREG|64);
4519 char lol=get_reg(i_regs->regmap,LOREG);
4520 if(hih>=0) emit_loadreg(HIREG|64,hih);
4521 if(hil>=0) emit_loadreg(HIREG,hil);
4522 if(loh>=0) emit_loadreg(LOREG|64,loh);
4523 if(lol>=0) emit_loadreg(LOREG,lol);
4524 }
4525 if(opcode2[i]==0x1F) // DDIVU
4526 {
4527 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4528 char d1l=get_reg(i_regs->regmap,rs1[i]);
4529 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4530 char d2l=get_reg(i_regs->regmap,rs2[i]);
4531 assert(d1h>=0);
4532 assert(d2h>=0);
4533 assert(d1l>=0);
4534 assert(d2l>=0);
4535 //emit_pushreg(d2h);
4536 //emit_pushreg(d2l);
4537 //emit_pushreg(d1h);
4538 //emit_pushreg(d1l);
4539 emit_addimm(ESP,-16,ESP);
4540 emit_writeword_indexed(d2h,12,ESP);
4541 emit_writeword_indexed(d2l,8,ESP);
4542 emit_writeword_indexed(d1h,4,ESP);
4543 emit_writeword_indexed(d1l,0,ESP);
4544 emit_call((int)&divu64);
4545 //emit_popreg(d1l);
4546 //emit_popreg(d1h);
4547 //emit_popreg(d2l);
4548 //emit_popreg(d2h);
4549 emit_readword_indexed(0,ESP,d1l);
4550 emit_readword_indexed(4,ESP,d1h);
4551 emit_readword_indexed(8,ESP,d2l);
4552 emit_readword_indexed(12,ESP,d2h);
4553 emit_addimm(ESP,16,ESP);
4554 char hih=get_reg(i_regs->regmap,HIREG|64);
4555 char hil=get_reg(i_regs->regmap,HIREG);
4556 char loh=get_reg(i_regs->regmap,LOREG|64);
4557 char lol=get_reg(i_regs->regmap,LOREG);
4558 if(hih>=0) emit_loadreg(HIREG|64,hih);
4559 if(hil>=0) emit_loadreg(HIREG,hil);
4560 if(loh>=0) emit_loadreg(LOREG|64,loh);
4561 if(lol>=0) emit_loadreg(LOREG,lol);
4562 }
4563 }
4564 }
4565 else
4566 {
4567 // Multiply by zero is zero.
4568 // MIPS does not have a divide by zero exception.
4569 // The result is undefined, we return zero.
4570 char hr=get_reg(i_regs->regmap,HIREG);
4571 char lr=get_reg(i_regs->regmap,LOREG);
4572 if(hr>=0) emit_zeroreg(hr);
4573 if(lr>=0) emit_zeroreg(lr);
4574 }
4575 }
4576 #define multdiv_assemble multdiv_assemble_x86
4577
do_preload_rhash(int r)4578 static void do_preload_rhash(int r) {
4579 emit_movimm(0xf8,r);
4580 }
4581
do_preload_rhtbl(int r)4582 static void do_preload_rhtbl(int r) {
4583 // Don't need this for x86
4584 }
4585
do_rhash(int rs,int rh)4586 static void do_rhash(int rs,int rh) {
4587 emit_and(rs,rh,rh);
4588 }
4589
do_miniht_load(int ht,int rh)4590 static void do_miniht_load(int ht,int rh) {
4591 // Don't need this for x86. The load and compare can be combined into
4592 // a single instruction (below)
4593 }
4594
do_miniht_jump(int rs,int rh,int ht)4595 static void do_miniht_jump(int rs,int rh,int ht) {
4596 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4597 emit_jne(jump_vaddr_reg[rs]);
4598 emit_jmpmem_indexed((int)mini_ht+4,rh);
4599 }
4600
do_miniht_insert(int return_address,int rt,int temp)4601 static void do_miniht_insert(int return_address,int rt,int temp) {
4602 emit_movimm(return_address,rt); // PC into link register
4603 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4604 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4605 add_to_linker((int)out,return_address,1);
4606 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4607 }
4608
4609 // We don't need this for x86
literal_pool(int n)4610 static void literal_pool(int n) {}
literal_pool_jumpover(int n)4611 static void literal_pool_jumpover(int n) {}
4612
4613 // CPU-architecture-specific initialization, not needed for x86
arch_init()4614 static void arch_init() {}
4615