1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Yabause - assem_x86.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 u32 memory_map[1048576];
22 ALIGNED(8) u32 mini_ht_master[32][2];
23 ALIGNED(8) u32 mini_ht_slave[32][2];
24 ALIGNED(4) u8 restore_candidate[512];
25 int rccount;
26 int master_reg[22];
27 int master_cc; // Cycle count
28 int master_pc; // Virtual PC
29 void * master_ip; // Translated PC
30 int slave_reg[22];
31 int slave_cc; // Cycle count
32 int slave_pc; // Virtual PC
33 void * slave_ip; // Translated PC
34
35 void FASTCALL WriteInvalidateLong(u32 addr, u32 val);
36 void FASTCALL WriteInvalidateWord(u32 addr, u32 val);
37 void FASTCALL WriteInvalidateByte(u32 addr, u32 val);
38 void FASTCALL WriteInvalidateByteSwapped(u32 addr, u32 val);
39
40 u32 rmw_temp; // Temporary storage for TAS.B instruction
41
42 void jump_vaddr_eax_master();
43 void jump_vaddr_ecx_master();
44 void jump_vaddr_edx_master();
45 void jump_vaddr_ebx_master();
46 void jump_vaddr_ebp_master();
47 void jump_vaddr_edi_master();
48 void jump_vaddr_eax_slave();
49 void jump_vaddr_ecx_slave();
50 void jump_vaddr_edx_slave();
51 void jump_vaddr_ebx_slave();
52 void jump_vaddr_ebp_slave();
53 void jump_vaddr_edi_slave();
54
55 const pointer jump_vaddr_reg[2][8] = {
56 {
57 (pointer)jump_vaddr_eax_master,
58 (pointer)jump_vaddr_ecx_master,
59 (pointer)jump_vaddr_edx_master,
60 (pointer)jump_vaddr_ebx_master,
61 0,
62 (pointer)jump_vaddr_ebp_master,
63 0,
64 (pointer)jump_vaddr_edi_master
65 },{
66 (pointer)jump_vaddr_eax_slave,
67 (pointer)jump_vaddr_ecx_slave,
68 (pointer)jump_vaddr_edx_slave,
69 (pointer)jump_vaddr_ebx_slave,
70 0,
71 (pointer)jump_vaddr_ebp_slave,
72 0,
73 (pointer)jump_vaddr_edi_slave
74 }
75 };
76
77 // We need these for cmovcc instructions on x86
78 u32 const_zero=0;
79 u32 const_one=1;
80
81 /* Linker */
82
set_jump_target(pointer addr,pointer target)83 void set_jump_target(pointer addr,pointer target)
84 {
85 u8 *ptr=(u8 *)addr;
86 if(*ptr==0x0f)
87 {
88 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
89 u32 *ptr2=(u32 *)(ptr+2);
90 *ptr2=target-(u32)ptr2-4;
91 }
92 else if(*ptr==0xe8||*ptr==0xe9) {
93 u32 *ptr2=(u32 *)(ptr+1);
94 *ptr2=target-(u32)ptr2-4;
95 }
96 else
97 {
98 assert(*ptr==0xc7); /* mov immediate (store address) */
99 u32 *ptr2=(u32 *)(ptr+6);
100 *ptr2=target;
101 }
102 }
103
kill_pointer(void * stub)104 void *kill_pointer(void *stub)
105 {
106 u32 *i_ptr=*((u32 **)(stub+6));
107 *i_ptr=(u32)stub-(u32)i_ptr-4;
108 return i_ptr;
109 }
get_pointer(void * stub)110 pointer get_pointer(void *stub)
111 {
112 s32 *i_ptr=*((u32 **)(stub+6));
113 return *i_ptr+(pointer)i_ptr+4;
114 }
115
116 // Find the "clean" entry point from a "dirty" entry point
117 // by skipping past the call to verify_code
get_clean_addr(pointer addr)118 pointer get_clean_addr(pointer addr)
119 {
120 u8 *ptr=(u8 *)addr;
121 assert(ptr[20]==0xE8); // call instruction
122 assert(ptr[25]==0x83); // pop (add esp,4) instruction
123 if(ptr[28]==0xE9) return *(s32 *)(ptr+29)+addr+33; // follow jmp
124 else return(addr+28);
125 }
126
verify_dirty(pointer addr)127 int verify_dirty(pointer addr)
128 {
129 u8 *ptr=(u8 *)addr;
130 assert(ptr[5]==0xB8);
131 u32 source=*(u32 *)(ptr+6);
132 u32 copy=*(u32 *)(ptr+11);
133 u32 len=*(u32 *)(ptr+16);
134 assert(ptr[20]==0xE8); // call instruction
135 return !memcmp((void *)source,(void *)copy,len);
136 }
137
138 // This doesn't necessarily find all clean entry points, just
139 // guarantees that it's not dirty
isclean(pointer addr)140 int isclean(pointer addr)
141 {
142 u8 *ptr=(u8 *)addr;
143 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
144 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
145 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
146 if(ptr[20]!=0xE8) return 1; // call instruction
147 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
148 return 0;
149 }
150
get_bounds(pointer addr,u32 * start,u32 * end)151 void get_bounds(pointer addr,u32 *start,u32 *end)
152 {
153 u8 *ptr=(u8 *)addr;
154 assert(ptr[5]==0xB8);
155 u32 source=*(u32 *)(ptr+6);
156 //u32 copy=*(u32 *)(ptr+11);
157 u32 len=*(u32 *)(ptr+16);
158 assert(ptr[20]==0xE8); // call instruction
159 if(start) *start=source;
160 if(end) *end=source+len;
161 }
162
163 /* Register allocation */
164
165 // Note: registers are allocated clean (unmodified state)
166 // if you intend to modify the register, you must call dirty_reg().
alloc_reg(struct regstat * cur,int i,signed char reg)167 void alloc_reg(struct regstat *cur,int i,signed char reg)
168 {
169 int r,hr;
170 int preferred_reg = (reg&3)+(reg>21)*4+(reg==24)+(reg==28)+(reg==32);
171 if(reg==CCREG) preferred_reg=HOST_CCREG;
172
173 // Don't allocate unused registers
174 if((cur->u>>reg)&1) return;
175
176 // see if it's already allocated
177 for(hr=0;hr<HOST_REGS;hr++)
178 {
179 if(cur->regmap[hr]==reg) return;
180 }
181
182 // Keep the same mapping if the register was already allocated in a loop
183 preferred_reg = loop_reg(i,reg,preferred_reg);
184
185 // Try to allocate the preferred register
186 if(cur->regmap[preferred_reg]==-1) {
187 cur->regmap[preferred_reg]=reg;
188 cur->dirty&=~(1<<preferred_reg);
189 cur->isdoingcp&=~(1<<preferred_reg);
190 return;
191 }
192 r=cur->regmap[preferred_reg];
193 if(r<64&&((cur->u>>r)&1)) {
194 cur->regmap[preferred_reg]=reg;
195 cur->dirty&=~(1<<preferred_reg);
196 cur->isdoingcp&=~(1<<preferred_reg);
197 return;
198 }
199
200 // Try to allocate EAX, EBX, ECX, or EDX
201 // We prefer these because they can do byte and halfword loads
202 for(hr=0;hr<4;hr++) {
203 if(cur->regmap[hr]==-1) {
204 cur->regmap[hr]=reg;
205 cur->dirty&=~(1<<hr);
206 cur->isdoingcp&=~(1<<hr);
207 return;
208 }
209 }
210
211 // Clear any unneeded registers
212 // We try to keep the mapping consistent, if possible, because it
213 // makes branches easier (especially loops). So we try to allocate
214 // first (see above) before removing old mappings. If this is not
215 // possible then go ahead and clear out the registers that are no
216 // longer needed.
217 for(hr=0;hr<HOST_REGS;hr++)
218 {
219 r=cur->regmap[hr];
220 if(r>=0) {
221 if((cur->u>>r)&1)
222 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
223 }
224 }
225 // Try to allocate any available register, but prefer
226 // registers that have not been used recently.
227 if(i>0) {
228 for(hr=0;hr<HOST_REGS;hr++) {
229 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
230 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
231 cur->regmap[hr]=reg;
232 cur->dirty&=~(1<<hr);
233 cur->isdoingcp&=~(1<<hr);
234 return;
235 }
236 }
237 }
238 }
239 // Try to allocate any available register
240 for(hr=0;hr<HOST_REGS;hr++) {
241 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
242 cur->regmap[hr]=reg;
243 cur->dirty&=~(1<<hr);
244 cur->isdoingcp&=~(1<<hr);
245 return;
246 }
247 }
248
249 // Ok, now we have to evict someone
250 // Pick a register we hopefully won't need soon
251 unsigned char hsn[MAXREG+1];
252 memset(hsn,10,sizeof(hsn));
253 int j;
254 lsn(hsn,i,&preferred_reg);
255 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
256 if(i>0) {
257 // Don't evict the cycle count at entry points, otherwise the entry
258 // stub will have to write it.
259 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
260 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
261 for(j=10;j>=3;j--)
262 {
263 // Alloc preferred register if available
264 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
265 for(hr=0;hr<HOST_REGS;hr++) {
266 // Evict both parts of a 64-bit register
267 if((cur->regmap[hr]&63)==r) {
268 cur->regmap[hr]=-1;
269 cur->dirty&=~(1<<hr);
270 cur->isdoingcp&=~(1<<hr);
271 }
272 }
273 cur->regmap[preferred_reg]=reg;
274 return;
275 }
276 for(r=0;r<=MAXREG;r++)
277 {
278 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
279 for(hr=0;hr<HOST_REGS;hr++) {
280 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
281 if(cur->regmap[hr]==r+64) {
282 cur->regmap[hr]=reg;
283 cur->dirty&=~(1<<hr);
284 cur->isdoingcp&=~(1<<hr);
285 return;
286 }
287 }
288 }
289 for(hr=0;hr<HOST_REGS;hr++) {
290 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
291 if(cur->regmap[hr]==r) {
292 cur->regmap[hr]=reg;
293 cur->dirty&=~(1<<hr);
294 cur->isdoingcp&=~(1<<hr);
295 return;
296 }
297 }
298 }
299 }
300 }
301 }
302 }
303 for(j=10;j>=0;j--)
304 {
305 for(r=0;r<=MAXREG;r++)
306 {
307 if(hsn[r]==j) {
308 for(hr=0;hr<HOST_REGS;hr++) {
309 if(cur->regmap[hr]==r+64) {
310 cur->regmap[hr]=reg;
311 cur->dirty&=~(1<<hr);
312 cur->isdoingcp&=~(1<<hr);
313 return;
314 }
315 }
316 for(hr=0;hr<HOST_REGS;hr++) {
317 if(cur->regmap[hr]==r) {
318 cur->regmap[hr]=reg;
319 cur->dirty&=~(1<<hr);
320 cur->isdoingcp&=~(1<<hr);
321 return;
322 }
323 }
324 }
325 }
326 }
327 printf("This shouldn't happen (alloc_reg)");exit(1);
328 }
329
330 // Allocate a temporary register. This is done without regard to
331 // dirty status or whether the register we request is on the unneeded list
332 // Note: This will only allocate one register, even if called multiple times
alloc_reg_temp(struct regstat * cur,int i,signed char reg)333 void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
334 {
335 int r,hr;
336 int preferred_reg = -1;
337
338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
342 }
343
344 // Try to allocate any available register, starting with EDI, ESI, EBP...
345 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
346 for(hr=HOST_REGS-1;hr>=0;hr--) {
347 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
348 cur->regmap[hr]=reg;
349 cur->dirty&=~(1<<hr);
350 cur->isdoingcp&=~(1<<hr);
351 return;
352 }
353 }
354
355 // Find an unneeded register
356 for(hr=HOST_REGS-1;hr>=0;hr--)
357 {
358 r=cur->regmap[hr];
359 if(r>=0) {
360 if((cur->u>>r)&1) {
361 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
362 cur->regmap[hr]=reg;
363 cur->dirty&=~(1<<hr);
364 cur->isdoingcp&=~(1<<hr);
365 return;
366 }
367 }
368 }
369 }
370
371 // Ok, now we have to evict someone
372 // Pick a register we hopefully won't need soon
373 // TODO: we might want to follow unconditional jumps here
374 // TODO: get rid of dupe code and make this into a function
375 unsigned char hsn[MAXREG+1];
376 memset(hsn,10,sizeof(hsn));
377 int j;
378 lsn(hsn,i,&preferred_reg);
379 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
380 if(i>0) {
381 // Don't evict the cycle count at entry points, otherwise the entry
382 // stub will have to write it.
383 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
384 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
385 for(j=10;j>=3;j--)
386 {
387 for(r=0;r<=MAXREG;r++)
388 {
389 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
392 if(cur->regmap[hr]==r+64) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isdoingcp&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 for(hr=0;hr<HOST_REGS;hr++) {
401 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
402 if(cur->regmap[hr]==r) {
403 cur->regmap[hr]=reg;
404 cur->dirty&=~(1<<hr);
405 cur->isdoingcp&=~(1<<hr);
406 return;
407 }
408 }
409 }
410 }
411 }
412 }
413 }
414 for(j=10;j>=0;j--)
415 {
416 for(r=0;r<=MAXREG;r++)
417 {
418 if(hsn[r]==j) {
419 for(hr=0;hr<HOST_REGS;hr++) {
420 if(cur->regmap[hr]==r+64) {
421 cur->regmap[hr]=reg;
422 cur->dirty&=~(1<<hr);
423 cur->isdoingcp&=~(1<<hr);
424 return;
425 }
426 }
427 for(hr=0;hr<HOST_REGS;hr++) {
428 if(cur->regmap[hr]==r) {
429 cur->regmap[hr]=reg;
430 cur->dirty&=~(1<<hr);
431 cur->isdoingcp&=~(1<<hr);
432 return;
433 }
434 }
435 }
436 }
437 }
438 printf("This shouldn't happen");exit(1);
439 }
440 // Allocate a specific x86 register.
alloc_x86_reg(struct regstat * cur,int i,signed char reg,char hr)441 void alloc_x86_reg(struct regstat *cur,int i,signed char reg,char hr)
442 {
443 int n;
444 u32 dirty=0;
445
446 // see if it's already allocated (and dealloc it)
447 for(n=0;n<HOST_REGS;n++)
448 {
449 if(n!=ESP&&cur->regmap[n]==reg) {
450 dirty=(cur->dirty>>n)&1;
451 cur->regmap[n]=-1;
452 }
453 }
454
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->dirty|=dirty<<hr;
458 cur->isdoingcp&=~(1<<hr);
459 }
460
461 // Alloc cycle count into dedicated register
alloc_cc(struct regstat * cur,int i)462 void alloc_cc(struct regstat *cur,int i)
463 {
464 alloc_x86_reg(cur,i,CCREG,ESI);
465 }
466
467 /* Assembler */
468
469 char regname[8][4] = {
470 "eax",
471 "ecx",
472 "edx",
473 "ebx",
474 "esp",
475 "ebp",
476 "esi",
477 "edi"};
478
output_byte(u8 byte)479 void output_byte(u8 byte)
480 {
481 *(out++)=byte;
482 }
output_modrm(u8 mod,u8 rm,u8 ext)483 void output_modrm(u8 mod,u8 rm,u8 ext)
484 {
485 assert(mod<4);
486 assert(rm<8);
487 assert(ext<8);
488 u8 byte=(mod<<6)|(ext<<3)|rm;
489 *(out++)=byte;
490 }
output_sib(u8 scale,u8 index,u8 base)491 void output_sib(u8 scale,u8 index,u8 base)
492 {
493 assert(scale<4);
494 assert(index<8);
495 assert(base<8);
496 u8 byte=(scale<<6)|(index<<3)|base;
497 *(out++)=byte;
498 }
output_w32(u32 word)499 void output_w32(u32 word)
500 {
501 *((u32 *)out)=word;
502 out+=4;
503 }
504
emit_mov(int rs,int rt)505 void emit_mov(int rs,int rt)
506 {
507 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
508 output_byte(0x89);
509 output_modrm(3,rt,rs);
510 }
511
emit_add(int rs1,int rs2,int rt)512 void emit_add(int rs1,int rs2,int rt)
513 {
514 if(rs1==rt) {
515 assem_debug("add %%%s,%%%s\n",regname[rs2],regname[rs1]);
516 output_byte(0x01);
517 output_modrm(3,rs1,rs2);
518 }else if(rs2==rt) {
519 assem_debug("add %%%s,%%%s\n",regname[rs1],regname[rs2]);
520 output_byte(0x01);
521 output_modrm(3,rs2,rs1);
522 }else {
523 assem_debug("lea (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
524 output_byte(0x8D);
525 if(rs1!=EBP) {
526 output_modrm(0,4,rt);
527 output_sib(0,rs2,rs1);
528 }else if(rs2!=EBP) {
529 output_modrm(0,4,rt);
530 output_sib(0,rs1,rs2);
531 }else /* lea 0(,%ebp,2) */{
532 output_modrm(0,4,rt);
533 output_sib(1,EBP,5);
534 output_w32(0);
535 }
536 }
537 }
538
emit_adds(int rs1,int rs2,int rt)539 void emit_adds(int rs1,int rs2,int rt)
540 {
541 emit_add(rs1,rs2,rt);
542 }
543
emit_lea8(int rs1,int rt)544 void emit_lea8(int rs1,int rt)
545 {
546 assem_debug("lea 0(%%%s,8),%%%s\n",regname[rs1],regname[rt]);
547 output_byte(0x8D);
548 output_modrm(0,4,rt);
549 output_sib(3,rs1,5);
550 output_w32(0);
551 }
emit_leairrx1(int imm,int rs1,int rs2,int rt)552 void emit_leairrx1(int imm,int rs1,int rs2,int rt)
553 {
554 assem_debug("lea %x(%%%s,%%%s,1),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
555 output_byte(0x8D);
556 if(imm!=0||rs1==EBP) {
557 output_modrm(2,4,rt);
558 output_sib(0,rs2,rs1);
559 output_w32(imm);
560 }else{
561 output_modrm(0,4,rt);
562 output_sib(0,rs2,rs1);
563 }
564 }
emit_leairrx4(int imm,int rs1,int rs2,int rt)565 void emit_leairrx4(int imm,int rs1,int rs2,int rt)
566 {
567 assem_debug("lea %x(%%%s,%%%s,4),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
568 output_byte(0x8D);
569 if(imm!=0||rs1==EBP) {
570 output_modrm(2,4,rt);
571 output_sib(2,rs2,rs1);
572 output_w32(imm);
573 }else{
574 output_modrm(0,4,rt);
575 output_sib(2,rs2,rs1);
576 }
577 }
578
emit_neg(int rs,int rt)579 void emit_neg(int rs, int rt)
580 {
581 if(rs!=rt) emit_mov(rs,rt);
582 assem_debug("neg %%%s\n",regname[rt]);
583 output_byte(0xF7);
584 output_modrm(3,rt,3);
585 }
586
emit_negs(int rs,int rt)587 void emit_negs(int rs, int rt)
588 {
589 emit_neg(rs,rt);
590 }
591
emit_sub(int rs1,int rs2,int rt)592 void emit_sub(int rs1,int rs2,int rt)
593 {
594 if(rs1==rt) {
595 assem_debug("sub %%%s,%%%s\n",regname[rs2],regname[rs1]);
596 output_byte(0x29);
597 output_modrm(3,rs1,rs2);
598 } else if(rs2==rt) {
599 emit_neg(rs2,rs2);
600 emit_add(rs2,rs1,rs2);
601 } else {
602 emit_mov(rs1,rt);
603 emit_sub(rt,rs2,rt);
604 }
605 }
606
emit_subs(int rs1,int rs2,int rt)607 void emit_subs(int rs1,int rs2,int rt)
608 {
609 emit_sub(rs1,rs2,rt);
610 }
611
emit_zeroreg(int rt)612 void emit_zeroreg(int rt)
613 {
614 output_byte(0x31);
615 output_modrm(3,rt,rt);
616 assem_debug("xor %%%s,%%%s\n",regname[rt],regname[rt]);
617 }
618
emit_loadreg(int r,int hr)619 void emit_loadreg(int r, int hr)
620 {
621 int addr=(slave?(int)slave_reg:(int)master_reg)+(r<<2);
622 if(r==CCREG) addr=slave?(int)&slave_cc:(int)&master_cc;
623 assem_debug("mov %x+%d,%%%s\n",addr,r,regname[hr]);
624 output_byte(0x8B);
625 output_modrm(0,5,hr);
626 output_w32(addr);
627 }
emit_storereg(int r,int hr)628 void emit_storereg(int r, int hr)
629 {
630 int addr=(slave?(int)slave_reg:(int)master_reg)+(r<<2);
631 if(r==CCREG) addr=slave?(int)&slave_cc:(int)&master_cc;
632 assem_debug("mov %%%s,%x+%d\n",regname[hr],addr,r);
633 output_byte(0x89);
634 output_modrm(0,5,hr);
635 output_w32(addr);
636 }
637
emit_test(int rs,int rt)638 void emit_test(int rs, int rt)
639 {
640 assem_debug("test %%%s,%%%s\n",regname[rs],regname[rt]);
641 output_byte(0x85);
642 output_modrm(3,rs,rt);
643 }
644
emit_testimm(int rs,int imm)645 void emit_testimm(int rs,int imm)
646 {
647 assem_debug("test $0x%x,%%%s\n",imm,regname[rs]);
648 if(imm<128&&imm>=-128&&rs<4) {
649 output_byte(0xF6);
650 output_modrm(3,rs,0);
651 output_byte(imm);
652 }
653 else
654 {
655 output_byte(0xF7);
656 output_modrm(3,rs,0);
657 output_w32(imm);
658 }
659 }
660
emit_not(int rs,int rt)661 void emit_not(int rs,int rt)
662 {
663 if(rs!=rt) emit_mov(rs,rt);
664 assem_debug("not %%%s\n",regname[rt]);
665 output_byte(0xF7);
666 output_modrm(3,rt,2);
667 }
668
emit_and(unsigned int rs1,unsigned int rs2,unsigned int rt)669 void emit_and(unsigned int rs1,unsigned int rs2,unsigned int rt)
670 {
671 assert(rs1<8);
672 assert(rs2<8);
673 assert(rt<8);
674 if(rs1==rt) {
675 assem_debug("and %%%s,%%%s\n",regname[rs2],regname[rt]);
676 output_byte(0x21);
677 output_modrm(3,rs1,rs2);
678 }
679 else
680 if(rs2==rt) {
681 assem_debug("and %%%s,%%%s\n",regname[rs1],regname[rt]);
682 output_byte(0x21);
683 output_modrm(3,rs2,rs1);
684 }
685 else {
686 emit_mov(rs1,rt);
687 emit_and(rt,rs2,rt);
688 }
689 }
690
emit_or(unsigned int rs1,unsigned int rs2,unsigned int rt)691 void emit_or(unsigned int rs1,unsigned int rs2,unsigned int rt)
692 {
693 assert(rs1<8);
694 assert(rs2<8);
695 assert(rt<8);
696 if(rs1==rt) {
697 assem_debug("or %%%s,%%%s\n",regname[rs2],regname[rt]);
698 output_byte(0x09);
699 output_modrm(3,rs1,rs2);
700 }
701 else
702 if(rs2==rt) {
703 assem_debug("or %%%s,%%%s\n",regname[rs1],regname[rt]);
704 output_byte(0x09);
705 output_modrm(3,rs2,rs1);
706 }
707 else {
708 emit_mov(rs1,rt);
709 emit_or(rt,rs2,rt);
710 }
711 }
emit_or_and_set_flags(int rs1,int rs2,int rt)712 void emit_or_and_set_flags(int rs1,int rs2,int rt)
713 {
714 emit_or(rs1,rs2,rt);
715 }
716
emit_xor(unsigned int rs1,unsigned int rs2,unsigned int rt)717 void emit_xor(unsigned int rs1,unsigned int rs2,unsigned int rt)
718 {
719 assert(rs1<8);
720 assert(rs2<8);
721 assert(rt<8);
722 if(rs1==rt) {
723 assem_debug("xor %%%s,%%%s\n",regname[rs2],regname[rt]);
724 output_byte(0x31);
725 output_modrm(3,rs1,rs2);
726 }
727 else
728 if(rs2==rt) {
729 assem_debug("xor %%%s,%%%s\n",regname[rs1],regname[rt]);
730 output_byte(0x31);
731 output_modrm(3,rs2,rs1);
732 }
733 else {
734 emit_mov(rs1,rt);
735 emit_xor(rt,rs2,rt);
736 }
737 }
738
emit_movimm(int imm,unsigned int rt)739 void emit_movimm(int imm,unsigned int rt)
740 {
741 assem_debug("mov $%d,%%%s\n",imm,regname[rt]);
742 assert(rt<8);
743 output_byte(0xB8+rt);
744 output_w32(imm);
745 }
746
emit_addimm(int rs,int imm,int rt)747 void emit_addimm(int rs,int imm,int rt)
748 {
749 if(rs==rt) {
750 if(imm!=0) {
751 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
752 if(imm<128&&imm>=-128) {
753 output_byte(0x83);
754 output_modrm(3,rt,0);
755 output_byte(imm);
756 }
757 else
758 {
759 output_byte(0x81);
760 output_modrm(3,rt,0);
761 output_w32(imm);
762 }
763 }
764 }
765 else {
766 if(imm!=0) {
767 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
768 output_byte(0x8D);
769 if(imm<128&&imm>=-128) {
770 output_modrm(1,rs,rt);
771 output_byte(imm);
772 }else{
773 output_modrm(2,rs,rt);
774 output_w32(imm);
775 }
776 }else{
777 emit_mov(rs,rt);
778 }
779 }
780 }
781
emit_addimm_and_set_flags(int imm,int rt)782 void emit_addimm_and_set_flags(int imm,int rt)
783 {
784 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
785 if(imm<128&&imm>=-128) {
786 output_byte(0x83);
787 output_modrm(3,rt,0);
788 output_byte(imm);
789 }
790 else
791 {
792 output_byte(0x81);
793 output_modrm(3,rt,0);
794 output_w32(imm);
795 }
796 }
emit_addimm_no_flags(int imm,int rt)797 void emit_addimm_no_flags(int imm,int rt)
798 {
799 if(imm!=0) {
800 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rt],regname[rt]);
801 output_byte(0x8D);
802 if(imm<128&&imm>=-128) {
803 output_modrm(1,rt,rt);
804 output_byte(imm);
805 }else{
806 output_modrm(2,rt,rt);
807 output_w32(imm);
808 }
809 }
810 }
811
emit_adcimm(int imm,unsigned int rt)812 void emit_adcimm(int imm,unsigned int rt)
813 {
814 assem_debug("adc $%d,%%%s\n",imm,regname[rt]);
815 assert(rt<8);
816 if(imm<128&&imm>=-128) {
817 output_byte(0x83);
818 output_modrm(3,rt,2);
819 output_byte(imm);
820 }
821 else
822 {
823 output_byte(0x81);
824 output_modrm(3,rt,2);
825 output_w32(imm);
826 }
827 }
emit_sbbimm(int imm,unsigned int rt)828 void emit_sbbimm(int imm,unsigned int rt)
829 {
830 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
831 assert(rt<8);
832 if(imm<128&&imm>=-128) {
833 output_byte(0x83);
834 output_modrm(3,rt,3);
835 output_byte(imm);
836 }
837 else
838 {
839 output_byte(0x81);
840 output_modrm(3,rt,3);
841 output_w32(imm);
842 }
843 }
844
emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)845 void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
846 {
847 if(rsh==rth&&rsl==rtl) {
848 assem_debug("add $%d,%%%s\n",imm,regname[rtl]);
849 if(imm<128&&imm>=-128) {
850 output_byte(0x83);
851 output_modrm(3,rtl,0);
852 output_byte(imm);
853 }
854 else
855 {
856 output_byte(0x81);
857 output_modrm(3,rtl,0);
858 output_w32(imm);
859 }
860 assem_debug("adc $%d,%%%s\n",imm>>31,regname[rth]);
861 output_byte(0x83);
862 output_modrm(3,rth,2);
863 output_byte(imm>>31);
864 }
865 else {
866 emit_mov(rsh,rth);
867 emit_mov(rsl,rtl);
868 emit_addimm64_32(rth,rtl,imm,rth,rtl);
869 }
870 }
871
emit_sbb(int rs1,int rs2)872 void emit_sbb(int rs1,int rs2)
873 {
874 assem_debug("sbb %%%s,%%%s\n",regname[rs1],regname[rs2]);
875 output_byte(0x19);
876 output_modrm(3,rs2,rs1);
877 }
878
emit_andimm(int rs,int imm,int rt)879 void emit_andimm(int rs,int imm,int rt)
880 {
881 if(imm==0) {
882 emit_zeroreg(rt);
883 }
884 else if(rs==rt) {
885 assem_debug("and $%d,%%%s\n",imm,regname[rt]);
886 if(imm<128&&imm>=-128) {
887 output_byte(0x83);
888 output_modrm(3,rt,4);
889 output_byte(imm);
890 }
891 else
892 {
893 output_byte(0x81);
894 output_modrm(3,rt,4);
895 output_w32(imm);
896 }
897 }
898 else {
899 emit_mov(rs,rt);
900 emit_andimm(rt,imm,rt);
901 }
902 }
903
emit_orimm(int rs,int imm,int rt)904 void emit_orimm(int rs,int imm,int rt)
905 {
906 if(rs==rt) {
907 if(imm!=0) {
908 assem_debug("or $%d,%%%s\n",imm,regname[rt]);
909 if(imm<128&&imm>=-128) {
910 output_byte(0x83);
911 output_modrm(3,rt,1);
912 output_byte(imm);
913 }
914 else
915 {
916 output_byte(0x81);
917 output_modrm(3,rt,1);
918 output_w32(imm);
919 }
920 }
921 }
922 else {
923 emit_mov(rs,rt);
924 emit_orimm(rt,imm,rt);
925 }
926 }
927
emit_xorimm(int rs,int imm,int rt)928 void emit_xorimm(int rs,int imm,int rt)
929 {
930 if(rs==rt) {
931 if(imm!=0) {
932 assem_debug("xor $%d,%%%s\n",imm,regname[rt]);
933 if(imm<128&&imm>=-128) {
934 output_byte(0x83);
935 output_modrm(3,rt,6);
936 output_byte(imm);
937 }
938 else
939 {
940 output_byte(0x81);
941 output_modrm(3,rt,6);
942 output_w32(imm);
943 }
944 }
945 }
946 else {
947 emit_mov(rs,rt);
948 emit_xorimm(rt,imm,rt);
949 }
950 }
951
emit_shlimm(int rs,unsigned int imm,int rt)952 void emit_shlimm(int rs,unsigned int imm,int rt)
953 {
954 if(rs==rt) {
955 assem_debug("shl %%%s,%d\n",regname[rt],imm);
956 assert(imm>0);
957 if(imm==1) output_byte(0xD1);
958 else output_byte(0xC1);
959 output_modrm(3,rt,4);
960 if(imm>1) output_byte(imm);
961 }
962 else {
963 emit_mov(rs,rt);
964 emit_shlimm(rt,imm,rt);
965 }
966 }
967
emit_shrimm(int rs,unsigned int imm,int rt)968 void emit_shrimm(int rs,unsigned int imm,int rt)
969 {
970 if(rs==rt) {
971 assem_debug("shr %%%s,%d\n",regname[rt],imm);
972 assert(imm>0);
973 if(imm==1) output_byte(0xD1);
974 else output_byte(0xC1);
975 output_modrm(3,rt,5);
976 if(imm>1) output_byte(imm);
977 }
978 else {
979 emit_mov(rs,rt);
980 emit_shrimm(rt,imm,rt);
981 }
982 }
983
emit_sarimm(int rs,unsigned int imm,int rt)984 void emit_sarimm(int rs,unsigned int imm,int rt)
985 {
986 if(rs==rt) {
987 assem_debug("sar %%%s,%d\n",regname[rt],imm);
988 assert(imm>0);
989 if(imm==1) output_byte(0xD1);
990 else output_byte(0xC1);
991 output_modrm(3,rt,7);
992 if(imm>1) output_byte(imm);
993 }
994 else {
995 emit_mov(rs,rt);
996 emit_sarimm(rt,imm,rt);
997 }
998 }
999
emit_rorimm(int rs,unsigned int imm,int rt)1000 void emit_rorimm(int rs,unsigned int imm,int rt)
1001 {
1002 if(rs==rt) {
1003 assem_debug("ror %%%s,%d\n",regname[rt],imm);
1004 assert(imm>0);
1005 if(imm==1) output_byte(0xD1);
1006 else output_byte(0xC1);
1007 output_modrm(3,rt,1);
1008 if(imm>1) output_byte(imm);
1009 }
1010 else {
1011 emit_mov(rs,rt);
1012 emit_rorimm(rt,imm,rt);
1013 }
1014 }
1015
emit_swapb(int rs,int rt)1016 void emit_swapb(int rs,int rt)
1017 {
1018 if(rs==rt) {
1019 assem_debug("ror %%%s,8\n",regname[rt]+1);
1020 output_byte(0x66);
1021 output_byte(0xC1);
1022 output_modrm(3,rt,1);
1023 output_byte(8);
1024 }
1025 else {
1026 emit_mov(rs,rt);
1027 emit_swapb(rt,rt);
1028 }
1029 }
1030
emit_shldimm(int rs,int rs2,unsigned int imm,int rt)1031 void emit_shldimm(int rs,int rs2,unsigned int imm,int rt)
1032 {
1033 if(rs==rt) {
1034 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1035 assert(imm>0);
1036 output_byte(0x0F);
1037 output_byte(0xA4);
1038 output_modrm(3,rt,rs2);
1039 output_byte(imm);
1040 }
1041 else {
1042 emit_mov(rs,rt);
1043 emit_shldimm(rt,rs2,imm,rt);
1044 }
1045 }
1046
emit_shrdimm(int rs,int rs2,unsigned int imm,int rt)1047 void emit_shrdimm(int rs,int rs2,unsigned int imm,int rt)
1048 {
1049 if(rs==rt) {
1050 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1051 assert(imm>0);
1052 output_byte(0x0F);
1053 output_byte(0xAC);
1054 output_modrm(3,rt,rs2);
1055 output_byte(imm);
1056 }
1057 else {
1058 emit_mov(rs,rt);
1059 emit_shrdimm(rt,rs2,imm,rt);
1060 }
1061 }
1062
emit_shlcl(int r)1063 void emit_shlcl(int r)
1064 {
1065 assem_debug("shl %%%s,%%cl\n",regname[r]);
1066 output_byte(0xD3);
1067 output_modrm(3,r,4);
1068 }
emit_shrcl(int r)1069 void emit_shrcl(int r)
1070 {
1071 assem_debug("shr %%%s,%%cl\n",regname[r]);
1072 output_byte(0xD3);
1073 output_modrm(3,r,5);
1074 }
emit_sarcl(int r)1075 void emit_sarcl(int r)
1076 {
1077 assem_debug("sar %%%s,%%cl\n",regname[r]);
1078 output_byte(0xD3);
1079 output_modrm(3,r,7);
1080 }
1081
emit_shldcl(int r1,int r2)1082 void emit_shldcl(int r1,int r2)
1083 {
1084 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1085 output_byte(0x0F);
1086 output_byte(0xA5);
1087 output_modrm(3,r1,r2);
1088 }
emit_shrdcl(int r1,int r2)1089 void emit_shrdcl(int r1,int r2)
1090 {
1091 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1092 output_byte(0x0F);
1093 output_byte(0xAD);
1094 output_modrm(3,r1,r2);
1095 }
1096
emit_cmpimm(int rs,int imm)1097 void emit_cmpimm(int rs,int imm)
1098 {
1099 assem_debug("cmp $%d,%%%s\n",imm,regname[rs]);
1100 if(imm<128&&imm>=-128) {
1101 output_byte(0x83);
1102 output_modrm(3,rs,7);
1103 output_byte(imm);
1104 }
1105 else
1106 {
1107 output_byte(0x81);
1108 output_modrm(3,rs,7);
1109 output_w32(imm);
1110 }
1111 }
1112
emit_cmovne(u32 * addr,int rt)1113 void emit_cmovne(u32 *addr,int rt)
1114 {
1115 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1116 if(addr==&const_zero) assem_debug(" [zero]\n");
1117 else if(addr==&const_one) assem_debug(" [one]\n");
1118 else assem_debug("\n");
1119 output_byte(0x0F);
1120 output_byte(0x45);
1121 output_modrm(0,5,rt);
1122 output_w32((int)addr);
1123 }
emit_cmovl(u32 * addr,int rt)1124 void emit_cmovl(u32 *addr,int rt)
1125 {
1126 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1127 if(addr==&const_zero) assem_debug(" [zero]\n");
1128 else if(addr==&const_one) assem_debug(" [one]\n");
1129 else assem_debug("\n");
1130 output_byte(0x0F);
1131 output_byte(0x4C);
1132 output_modrm(0,5,rt);
1133 output_w32((int)addr);
1134 }
emit_cmovs(u32 * addr,int rt)1135 void emit_cmovs(u32 *addr,int rt)
1136 {
1137 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1138 if(addr==&const_zero) assem_debug(" [zero]\n");
1139 else if(addr==&const_one) assem_debug(" [one]\n");
1140 else assem_debug("\n");
1141 output_byte(0x0F);
1142 output_byte(0x48);
1143 output_modrm(0,5,rt);
1144 output_w32((int)addr);
1145 }
emit_cmovne_reg(int rs,int rt)1146 void emit_cmovne_reg(int rs,int rt)
1147 {
1148 assem_debug("cmovne %%%s,%%%s\n",regname[rs],regname[rt]);
1149 output_byte(0x0F);
1150 output_byte(0x45);
1151 output_modrm(3,rs,rt);
1152 }
emit_cmovl_reg(int rs,int rt)1153 void emit_cmovl_reg(int rs,int rt)
1154 {
1155 assem_debug("cmovl %%%s,%%%s\n",regname[rs],regname[rt]);
1156 output_byte(0x0F);
1157 output_byte(0x4C);
1158 output_modrm(3,rs,rt);
1159 }
emit_cmovle_reg(int rs,int rt)1160 void emit_cmovle_reg(int rs,int rt)
1161 {
1162 assem_debug("cmovle %%%s,%%%s\n",regname[rs],regname[rt]);
1163 output_byte(0x0F);
1164 output_byte(0x4E);
1165 output_modrm(3,rs,rt);
1166 }
emit_cmovs_reg(int rs,int rt)1167 void emit_cmovs_reg(int rs,int rt)
1168 {
1169 assem_debug("cmovs %%%s,%%%s\n",regname[rs],regname[rt]);
1170 output_byte(0x0F);
1171 output_byte(0x48);
1172 output_modrm(3,rs,rt);
1173 }
emit_cmovnc_reg(int rs,int rt)1174 void emit_cmovnc_reg(int rs,int rt)
1175 {
1176 assem_debug("cmovae %%%s,%%%s\n",regname[rs],regname[rt]);
1177 output_byte(0x0F);
1178 output_byte(0x43);
1179 output_modrm(3,rs,rt);
1180 }
emit_cmova_reg(int rs,int rt)1181 void emit_cmova_reg(int rs,int rt)
1182 {
1183 assem_debug("cmova %%%s,%%%s\n",regname[rs],regname[rt]);
1184 output_byte(0x0F);
1185 output_byte(0x47);
1186 output_modrm(3,rs,rt);
1187 }
emit_cmovp_reg(int rs,int rt)1188 void emit_cmovp_reg(int rs,int rt)
1189 {
1190 assem_debug("cmovp %%%s,%%%s\n",regname[rs],regname[rt]);
1191 output_byte(0x0F);
1192 output_byte(0x4A);
1193 output_modrm(3,rs,rt);
1194 }
emit_cmovnp_reg(int rs,int rt)1195 void emit_cmovnp_reg(int rs,int rt)
1196 {
1197 assem_debug("cmovnp %%%s,%%%s\n",regname[rs],regname[rt]);
1198 output_byte(0x0F);
1199 output_byte(0x4B);
1200 output_modrm(3,rs,rt);
1201 }
emit_setl(int rt)1202 void emit_setl(int rt)
1203 {
1204 assem_debug("setl %%%s\n",regname[rt]);
1205 output_byte(0x0F);
1206 output_byte(0x9C);
1207 output_modrm(3,rt,2);
1208 }
emit_movzbl_reg(int rs,int rt)1209 void emit_movzbl_reg(int rs, int rt)
1210 {
1211 if(rs<4) {
1212 assem_debug("movzbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1213 output_byte(0x0F);
1214 output_byte(0xB6);
1215 output_modrm(3,rs,rt);
1216 }
1217 else if(rt<4) {
1218 emit_mov(rs,rt);
1219 emit_movzbl_reg(rt,rt);
1220 }
1221 else {
1222 emit_andimm(rs,0xFF,rt);
1223 }
1224 }
emit_movzwl_reg(int rs,int rt)1225 void emit_movzwl_reg(int rs, int rt)
1226 {
1227 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1228 output_byte(0x0F);
1229 output_byte(0xB7);
1230 output_modrm(3,rs,rt);
1231 }
emit_movsbl_reg(int rs,int rt)1232 void emit_movsbl_reg(int rs, int rt)
1233 {
1234 if(rs<4) {
1235 assem_debug("movsbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1236 output_byte(0x0F);
1237 output_byte(0xBE);
1238 output_modrm(3,rs,rt);
1239 }
1240 else if(rt<4) {
1241 emit_mov(rs,rt);
1242 emit_movsbl_reg(rt,rt);
1243 }
1244 else {
1245 emit_shlimm(rs,24,rt);
1246 emit_sarimm(rt,24,rt);
1247 }
1248 }
emit_movswl_reg(int rs,int rt)1249 void emit_movswl_reg(int rs, int rt)
1250 {
1251 assem_debug("movswl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1252 output_byte(0x0F);
1253 output_byte(0xBF);
1254 output_modrm(3,rs,rt);
1255 }
1256
emit_slti32(int rs,int imm,int rt)1257 void emit_slti32(int rs,int imm,int rt)
1258 {
1259 if(rs!=rt) emit_zeroreg(rt);
1260 emit_cmpimm(rs,imm);
1261 if(rt<4) {
1262 emit_setl(rt);
1263 if(rs==rt) emit_movzbl_reg(rt,rt);
1264 }
1265 else
1266 {
1267 if(rs==rt) emit_movimm(0,rt);
1268 emit_cmovl(&const_one,rt);
1269 }
1270 }
emit_sltiu32(int rs,int imm,int rt)1271 void emit_sltiu32(int rs,int imm,int rt)
1272 {
1273 if(rs!=rt) emit_zeroreg(rt);
1274 emit_cmpimm(rs,imm);
1275 if(rs==rt) emit_movimm(0,rt);
1276 emit_adcimm(0,rt);
1277 }
emit_slti64_32(int rsh,int rsl,int imm,int rt)1278 void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1279 {
1280 assert(rsh!=rt);
1281 emit_slti32(rsl,imm,rt);
1282 if(imm>=0)
1283 {
1284 emit_test(rsh,rsh);
1285 emit_cmovne(&const_zero,rt);
1286 emit_cmovs(&const_one,rt);
1287 }
1288 else
1289 {
1290 emit_cmpimm(rsh,-1);
1291 emit_cmovne(&const_zero,rt);
1292 emit_cmovl(&const_one,rt);
1293 }
1294 }
emit_sltiu64_32(int rsh,int rsl,int imm,int rt)1295 void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1296 {
1297 assert(rsh!=rt);
1298 emit_sltiu32(rsl,imm,rt);
1299 if(imm>=0)
1300 {
1301 emit_test(rsh,rsh);
1302 emit_cmovne(&const_zero,rt);
1303 }
1304 else
1305 {
1306 emit_cmpimm(rsh,-1);
1307 emit_cmovne(&const_one,rt);
1308 }
1309 }
1310
emit_cmp(int rs,int rt)1311 void emit_cmp(int rs,int rt)
1312 {
1313 assem_debug("cmp %%%s,%%%s\n",regname[rt],regname[rs]);
1314 output_byte(0x39);
1315 output_modrm(3,rs,rt);
1316 }
emit_set_gz32(int rs,int rt)1317 void emit_set_gz32(int rs, int rt)
1318 {
1319 //assem_debug("set_gz32\n");
1320 emit_cmpimm(rs,1);
1321 emit_movimm(1,rt);
1322 emit_cmovl(&const_zero,rt);
1323 }
emit_set_nz32(int rs,int rt)1324 void emit_set_nz32(int rs, int rt)
1325 {
1326 //assem_debug("set_nz32\n");
1327 emit_cmpimm(rs,1);
1328 emit_movimm(1,rt);
1329 emit_sbbimm(0,rt);
1330 }
emit_set_gz64_32(int rsh,int rsl,int rt)1331 void emit_set_gz64_32(int rsh, int rsl, int rt)
1332 {
1333 //assem_debug("set_gz64\n");
1334 emit_set_gz32(rsl,rt);
1335 emit_test(rsh,rsh);
1336 emit_cmovne(&const_one,rt);
1337 emit_cmovs(&const_zero,rt);
1338 }
emit_set_nz64_32(int rsh,int rsl,int rt)1339 void emit_set_nz64_32(int rsh, int rsl, int rt)
1340 {
1341 //assem_debug("set_nz64\n");
1342 emit_or_and_set_flags(rsh,rsl,rt);
1343 emit_cmovne(&const_one,rt);
1344 }
emit_set_if_less32(int rs1,int rs2,int rt)1345 void emit_set_if_less32(int rs1, int rs2, int rt)
1346 {
1347 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1348 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1349 emit_cmp(rs1,rs2);
1350 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1351 emit_cmovl(&const_one,rt);
1352 }
emit_set_if_carry32(int rs1,int rs2,int rt)1353 void emit_set_if_carry32(int rs1, int rs2, int rt)
1354 {
1355 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1356 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1357 emit_cmp(rs1,rs2);
1358 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1359 emit_adcimm(0,rt);
1360 }
emit_set_if_less64_32(int u1,int l1,int u2,int l2,int rt)1361 void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1362 {
1363 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1364 assert(u1!=rt);
1365 assert(u2!=rt);
1366 emit_cmp(l1,l2);
1367 emit_mov(u1,rt);
1368 emit_sbb(u2,rt);
1369 emit_movimm(0,rt);
1370 emit_cmovl(&const_one,rt);
1371 }
emit_set_if_carry64_32(int u1,int l1,int u2,int l2,int rt)1372 void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1373 {
1374 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1375 assert(u1!=rt);
1376 assert(u2!=rt);
1377 emit_cmp(l1,l2);
1378 emit_mov(u1,rt);
1379 emit_sbb(u2,rt);
1380 emit_movimm(0,rt);
1381 emit_adcimm(0,rt);
1382 }
emit_adc(int rs,int rt)1383 void emit_adc(int rs,int rt)
1384 {
1385 assem_debug("adc %%%s,%%%s\n",regname[rs],regname[rt]);
1386 output_byte(0x11);
1387 output_modrm(3,rt,rs);
1388 }
emit_sh2tst(int s1,int s2,int sr,int temp)1389 void emit_sh2tst(int s1, int s2, int sr, int temp)
1390 {
1391 assert(temp>=0);
1392 emit_orimm(sr,1,sr);
1393 emit_addimm(sr,-1,temp);
1394 emit_test(s1,s2);
1395 emit_cmovne_reg(temp,sr);
1396 }
emit_sh2tstimm(int s,int imm,int sr,int temp)1397 void emit_sh2tstimm(int s, int imm, int sr, int temp)
1398 {
1399 assert(temp>=0);
1400 emit_orimm(sr,1,sr);
1401 emit_testimm(s,imm);
1402 //emit_addimm(sr,-1,temp);
1403 assem_debug("lea -1(%%%s),%%%s\n",regname[sr],regname[temp]);
1404 output_byte(0x8D);
1405 output_modrm(1,sr,temp);
1406 output_byte(0xFF);
1407 emit_cmovne_reg(temp,sr);
1408 }
emit_cmpeq(int s1,int s2,int sr,int temp)1409 void emit_cmpeq(int s1, int s2, int sr, int temp)
1410 {
1411 assert(temp>=0);
1412 emit_orimm(sr,1,sr);
1413 emit_addimm(sr,-1,temp);
1414 emit_cmp(s1,s2);
1415 emit_cmovne_reg(temp,sr);
1416 }
emit_cmpeqimm(int s,int imm,int sr,int temp)1417 void emit_cmpeqimm(int s, int imm, int sr, int temp)
1418 {
1419 assert(temp>=0);
1420 emit_orimm(sr,1,sr);
1421 emit_addimm(sr,-1,temp);
1422 emit_cmpimm(s,imm);
1423 emit_cmovne_reg(temp,sr);
1424 }
emit_cmpge(int s1,int s2,int sr,int temp)1425 void emit_cmpge(int s1, int s2, int sr, int temp)
1426 {
1427 assert(temp>=0);
1428 emit_orimm(sr,1,sr);
1429 emit_addimm(sr,-1,temp);
1430 emit_cmp(s2,s1);
1431 emit_cmovl_reg(temp,sr);
1432 }
emit_cmpgt(int s1,int s2,int sr,int temp)1433 void emit_cmpgt(int s1, int s2, int sr, int temp)
1434 {
1435 assert(temp>=0);
1436 emit_orimm(sr,1,sr);
1437 emit_addimm(sr,-1,temp);
1438 emit_cmp(s2,s1);
1439 emit_cmovle_reg(temp,sr);
1440 }
emit_cmphi(int s1,int s2,int sr,int temp)1441 void emit_cmphi(int s1, int s2, int sr, int temp)
1442 {
1443 emit_andimm(sr,~1,sr);
1444 emit_cmp(s1,s2);
1445 emit_adcimm(0,sr);
1446 }
emit_cmphs(int s1,int s2,int sr,int temp)1447 void emit_cmphs(int s1, int s2, int sr, int temp)
1448 {
1449 emit_orimm(sr,1,sr);
1450 emit_cmp(s2,s1);
1451 emit_sbbimm(0,sr);
1452 }
emit_dt(int t,int sr)1453 void emit_dt(int t, int sr)
1454 {
1455 emit_addimm(t,-2,t);
1456 emit_shrimm(sr,1,sr);
1457 emit_addimm(t,1,t);
1458 emit_adc(sr,sr);
1459 }
emit_cmppz(int s,int sr)1460 void emit_cmppz(int s, int sr)
1461 {
1462 emit_shrimm(sr,1,sr);
1463 emit_cmpimm(s,0x80000000);
1464 emit_adc(sr,sr);
1465 }
emit_cmppl(int s,int sr,int temp)1466 void emit_cmppl(int s, int sr, int temp)
1467 {
1468 assert(temp>=0);
1469 emit_orimm(sr,1,sr);
1470 emit_addimm(sr,-1,temp);
1471 emit_test(s,s);
1472 emit_cmovle_reg(temp,sr);
1473 }
emit_addc(int s,int t,int sr)1474 void emit_addc(int s, int t, int sr)
1475 {
1476 emit_shrimm(sr,1,sr);
1477 emit_adc(s,t);
1478 emit_adc(sr,sr);
1479 }
emit_subc(int s,int t,int sr)1480 void emit_subc(int s, int t, int sr)
1481 {
1482 emit_shrimm(sr,1,sr);
1483 emit_sbb(s,t);
1484 emit_adc(sr,sr);
1485 }
emit_shrsr(int t,int sr)1486 void emit_shrsr(int t, int sr)
1487 {
1488 emit_shrimm(sr,1,sr);
1489 emit_shrimm(t,1,t);
1490 emit_adc(sr,sr);
1491 }
emit_sarsr(int t,int sr)1492 void emit_sarsr(int t, int sr)
1493 {
1494 emit_shrimm(sr,1,sr);
1495 emit_sarimm(t,1,t);
1496 emit_adc(sr,sr);
1497 }
emit_shlsr(int t,int sr)1498 void emit_shlsr(int t, int sr)
1499 {
1500 emit_shrimm(sr,1,sr);
1501 emit_shlimm(t,1,t);
1502 emit_adc(sr,sr);
1503 }
emit_rotl(int t)1504 void emit_rotl(int t)
1505 {
1506 assem_debug("rol %%%s\n",regname[t]);
1507 output_byte(0xD1);
1508 output_modrm(3,t,0);
1509 }
emit_rotlsr(int t,int sr)1510 void emit_rotlsr(int t, int sr)
1511 {
1512 emit_shrimm(sr,1,sr);
1513 emit_rotl(t);
1514 emit_adc(sr,sr);
1515 }
emit_rotr(int t)1516 void emit_rotr(int t)
1517 {
1518 assem_debug("ror %%%s\n",regname[t]);
1519 output_byte(0xD1);
1520 output_modrm(3,t,1);
1521 }
emit_rotrsr(int t,int sr)1522 void emit_rotrsr(int t, int sr)
1523 {
1524 emit_shrimm(sr,1,sr);
1525 emit_rotr(t);
1526 emit_adc(sr,sr);
1527 }
emit_rotclsr(int t,int sr)1528 void emit_rotclsr(int t, int sr)
1529 {
1530 emit_shrimm(sr,1,sr);
1531 assem_debug("rcl %%%s\n",regname[t]);
1532 output_byte(0xD1);
1533 output_modrm(3,t,2);
1534 emit_adc(sr,sr);
1535 }
emit_rotcrsr(int t,int sr)1536 void emit_rotcrsr(int t, int sr)
1537 {
1538 emit_shrimm(sr,1,sr);
1539 assem_debug("rcr %%%s\n",regname[t]);
1540 output_byte(0xD1);
1541 output_modrm(3,t,3);
1542 emit_adc(sr,sr);
1543 }
1544
emit_call(int a)1545 void emit_call(int a)
1546 {
1547 assem_debug("call %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1548 output_byte(0xe8);
1549 output_w32(a-(int)out-4);
1550 }
emit_jmp(int a)1551 void emit_jmp(int a)
1552 {
1553 assem_debug("jmp %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1554 output_byte(0xe9);
1555 output_w32(a-(int)out-4);
1556 }
emit_jne(int a)1557 void emit_jne(int a)
1558 {
1559 assem_debug("jne %x\n",a);
1560 output_byte(0x0f);
1561 output_byte(0x85);
1562 output_w32(a-(int)out-4);
1563 }
emit_jeq(int a)1564 void emit_jeq(int a)
1565 {
1566 assem_debug("jeq %x\n",a);
1567 output_byte(0x0f);
1568 output_byte(0x84);
1569 output_w32(a-(int)out-4);
1570 }
emit_js(int a)1571 void emit_js(int a)
1572 {
1573 assem_debug("js %x\n",a);
1574 output_byte(0x0f);
1575 output_byte(0x88);
1576 output_w32(a-(int)out-4);
1577 }
emit_jns(int a)1578 void emit_jns(int a)
1579 {
1580 assem_debug("jns %x\n",a);
1581 output_byte(0x0f);
1582 output_byte(0x89);
1583 output_w32(a-(int)out-4);
1584 }
emit_jl(int a)1585 void emit_jl(int a)
1586 {
1587 assem_debug("jl %x\n",a);
1588 output_byte(0x0f);
1589 output_byte(0x8c);
1590 output_w32(a-(int)out-4);
1591 }
emit_jge(int a)1592 void emit_jge(int a)
1593 {
1594 assem_debug("jge %x\n",a);
1595 output_byte(0x0f);
1596 output_byte(0x8d);
1597 output_w32(a-(int)out-4);
1598 }
emit_jno(int a)1599 void emit_jno(int a)
1600 {
1601 assem_debug("jno %x\n",a);
1602 output_byte(0x0f);
1603 output_byte(0x81);
1604 output_w32(a-(int)out-4);
1605 }
emit_jc(int a)1606 void emit_jc(int a)
1607 {
1608 assem_debug("jc %x\n",a);
1609 output_byte(0x0f);
1610 output_byte(0x82);
1611 output_w32(a-(int)out-4);
1612 }
1613
emit_pushimm(int imm)1614 void emit_pushimm(int imm)
1615 {
1616 assem_debug("push $%x\n",imm);
1617 output_byte(0x68);
1618 output_w32(imm);
1619 }
emit_pushmem(int addr)1620 void emit_pushmem(int addr)
1621 {
1622 assem_debug("push *%x\n",addr);
1623 output_byte(0xFF);
1624 output_modrm(0,5,6);
1625 output_w32(addr);
1626 }
emit_pusha()1627 void emit_pusha()
1628 {
1629 assem_debug("pusha\n");
1630 output_byte(0x60);
1631 }
emit_popa()1632 void emit_popa()
1633 {
1634 assem_debug("popa\n");
1635 output_byte(0x61);
1636 }
emit_pushreg(unsigned int r)1637 void emit_pushreg(unsigned int r)
1638 {
1639 assem_debug("push %%%s\n",regname[r]);
1640 assert(r<8);
1641 output_byte(0x50+r);
1642 }
emit_popreg(unsigned int r)1643 void emit_popreg(unsigned int r)
1644 {
1645 assem_debug("pop %%%s\n",regname[r]);
1646 assert(r<8);
1647 output_byte(0x58+r);
1648 }
emit_callreg(unsigned int r)1649 void emit_callreg(unsigned int r)
1650 {
1651 assem_debug("call *%%%s\n",regname[r]);
1652 assert(r<8);
1653 output_byte(0xFF);
1654 output_modrm(3,r,2);
1655 }
emit_jmpreg(unsigned int r)1656 void emit_jmpreg(unsigned int r)
1657 {
1658 assem_debug("jmp *%%%s\n",regname[r]);
1659 assert(r<8);
1660 output_byte(0xFF);
1661 output_modrm(3,r,4);
1662 }
emit_jmpmem_indexed(u32 addr,unsigned int r)1663 void emit_jmpmem_indexed(u32 addr,unsigned int r)
1664 {
1665 assem_debug("jmp *%x(%%%s)\n",addr,regname[r]);
1666 assert(r<8);
1667 output_byte(0xFF);
1668 output_modrm(2,r,4);
1669 output_w32(addr);
1670 }
emit_cmpstr(int s1,int s2,int sr,int temp)1671 void emit_cmpstr(int s1, int s2, int sr, int temp)
1672 {
1673 // Compare s1 and s2. If any byte is equal, set T.
1674 // Calculates the xor of the strings, then checks if any byte is
1675 // zero by subtracting 1 from each byte. If there is a carry/borrow
1676 // then a byte was zero.
1677 assert(temp>=0);
1678 emit_pushreg(s2);
1679 emit_xor(s1,s2,s2);
1680 emit_shrimm(sr,1,sr);
1681 emit_mov(s2,temp);
1682 emit_addimm_and_set_flags(0-0x01010101,temp);
1683 emit_adcimm(-1,temp);
1684 emit_not(s2,s2);
1685 emit_xor(temp,s2,temp);
1686 emit_andimm(temp,0x01010101,temp);
1687 emit_addimm_and_set_flags(-1,temp);
1688 emit_adc(sr,sr);
1689 emit_popreg(s2);
1690 }
emit_negc(int rs,int rt,int sr)1691 void emit_negc(int rs, int rt, int sr)
1692 {
1693 assert(rs>=0&&rs<8);
1694 if(rt<0) {
1695 emit_shrimm(sr,1,sr); // Get C flag
1696 emit_jc((pointer)out+10); // 6
1697 emit_neg(rs,rs); // 2
1698 emit_neg(rs,rs); // 2
1699 emit_adc(sr,sr); // Save C flag
1700 }else{
1701 if(rs!=rt) emit_mov(rs,rt);
1702 emit_shrimm(sr,1,sr); // Get C flag
1703 emit_jc((pointer)out+9); // 6
1704 emit_addimm(rt,-1,rt); // 3
1705 emit_adc(sr,sr); // Save C flag
1706 emit_not(rt,rt);
1707 }
1708 }
1709
emit_readword(int addr,int rt)1710 void emit_readword(int addr, int rt)
1711 {
1712 assem_debug("mov %x,%%%s\n",addr,regname[rt]);
1713 output_byte(0x8B);
1714 output_modrm(0,5,rt);
1715 output_w32(addr);
1716 }
emit_readword_indexed(int addr,int rs,int rt)1717 void emit_readword_indexed(int addr, int rs, int rt)
1718 {
1719 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1720 output_byte(0x8B);
1721 if(addr<128&&addr>=-128) {
1722 output_modrm(1,rs,rt);
1723 if(rs==ESP) output_sib(0,4,4);
1724 output_byte(addr);
1725 }
1726 else
1727 {
1728 output_modrm(2,rs,rt);
1729 if(rs==ESP) output_sib(0,4,4);
1730 output_w32(addr);
1731 }
1732 }
emit_readword_map(int addr,int map,int rt)1733 void emit_readword_map(int addr, int map, int rt)
1734 {
1735 if(map<0) emit_readword(addr, rt);
1736 else
1737 {
1738 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1739 output_byte(0x8B);
1740 output_modrm(0,4,rt);
1741 output_sib(2,map,5);
1742 output_w32(addr);
1743 }
1744 }
emit_readword_indexed_map(int addr,int rs,int map,int rt)1745 void emit_readword_indexed_map(int addr, int rs, int map, int rt)
1746 {
1747 assert(map>=0);
1748 if(map<0) emit_readword_indexed(addr, rs, rt);
1749 else {
1750 assem_debug("mov %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1751 assert(rs!=ESP);
1752 output_byte(0x8B);
1753 if(addr==0&&rs!=EBP) {
1754 output_modrm(0,4,rt);
1755 output_sib(2,map,rs);
1756 }
1757 else if(addr<128&&addr>=-128) {
1758 output_modrm(1,4,rt);
1759 output_sib(2,map,rs);
1760 output_byte(addr);
1761 }
1762 else
1763 {
1764 output_modrm(2,4,rt);
1765 output_sib(2,map,rs);
1766 output_w32(addr);
1767 }
1768 }
1769 }
emit_movmem_indexedx4(int addr,int rs,int rt)1770 void emit_movmem_indexedx4(int addr, int rs, int rt)
1771 {
1772 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1773 output_byte(0x8B);
1774 output_modrm(0,4,rt);
1775 output_sib(2,rs,5);
1776 output_w32(addr);
1777 }
emit_movsbl(int addr,int rt)1778 void emit_movsbl(int addr, int rt)
1779 {
1780 assem_debug("movsbl %x,%%%s\n",addr,regname[rt]);
1781 output_byte(0x0F);
1782 output_byte(0xBE);
1783 output_modrm(0,5,rt);
1784 output_w32(addr);
1785 }
emit_movsbl_indexed(int addr,int rs,int rt)1786 void emit_movsbl_indexed(int addr, int rs, int rt)
1787 {
1788 assem_debug("movsbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1789 output_byte(0x0F);
1790 output_byte(0xBE);
1791 output_modrm(2,rs,rt);
1792 output_w32(addr);
1793 }
emit_movsbl_map(int addr,int map,int rt)1794 void emit_movsbl_map(int addr, int map, int rt)
1795 {
1796 if(map<0) emit_movsbl(addr, rt);
1797 else
1798 {
1799 assem_debug("movsbl (%x,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1800 output_byte(0x0F);
1801 output_byte(0xBE);
1802 output_modrm(0,4,rt);
1803 output_sib(2,map,5);
1804 output_w32(addr);
1805 }
1806 }
emit_movsbl_indexed_map(int addr,int rs,int map,int rt)1807 void emit_movsbl_indexed_map(int addr, int rs, int map, int rt)
1808 {
1809 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1810 else {
1811 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1812 assert(rs!=ESP);
1813 output_byte(0x0F);
1814 output_byte(0xBE);
1815 if(addr==0&&rs!=EBP) {
1816 output_modrm(0,4,rt);
1817 output_sib(2,map,rs);
1818 }
1819 else if(addr<128&&addr>=-128) {
1820 output_modrm(1,4,rt);
1821 output_sib(2,map,rs);
1822 output_byte(addr);
1823 }
1824 else
1825 {
1826 output_modrm(2,4,rt);
1827 output_sib(2,map,rs);
1828 output_w32(addr);
1829 }
1830 }
1831 }
emit_movswl(int addr,int rt)1832 void emit_movswl(int addr, int rt)
1833 {
1834 assem_debug("movswl %x,%%%s\n",addr,regname[rt]);
1835 output_byte(0x0F);
1836 output_byte(0xBF);
1837 output_modrm(0,5,rt);
1838 output_w32(addr);
1839 }
emit_movswl_indexed(int addr,int rs,int rt)1840 void emit_movswl_indexed(int addr, int rs, int rt)
1841 {
1842 assem_debug("movswl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1843 output_byte(0x0F);
1844 output_byte(0xBF);
1845 output_modrm(2,rs,rt);
1846 output_w32(addr);
1847 }
emit_movswl_map(int addr,int map,int rt)1848 void emit_movswl_map(int addr, int map, int rt)
1849 {
1850 if(map<0) emit_movswl(addr, rt);
1851 else
1852 {
1853 assem_debug("movswl (%x,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1854 output_byte(0x0F);
1855 output_byte(0xBF);
1856 output_modrm(0,4,rt);
1857 output_sib(2,map,5);
1858 output_w32(addr);
1859 }
1860 }
emit_movswl_indexed_map(int addr,int rs,int map,int rt)1861 void emit_movswl_indexed_map(int addr, int rs, int map, int rt)
1862 {
1863 assert(map>=0);
1864 if(map<0) emit_movswl_indexed(addr, rs, rt);
1865 else {
1866 assem_debug("movswl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1867 assert(rs!=ESP);
1868 output_byte(0x0F);
1869 output_byte(0xBF);
1870 if(addr==0&&rs!=EBP) {
1871 output_modrm(0,4,rt);
1872 output_sib(2,map,rs);
1873 }
1874 else if(addr<128&&addr>=-128) {
1875 output_modrm(1,4,rt);
1876 output_sib(2,map,rs);
1877 output_byte(addr);
1878 }
1879 else
1880 {
1881 output_modrm(2,4,rt);
1882 output_sib(2,map,rs);
1883 output_w32(addr);
1884 }
1885 }
1886 }
emit_movzbl(int addr,int rt)1887 void emit_movzbl(int addr, int rt)
1888 {
1889 assem_debug("movzbl %x,%%%s\n",addr,regname[rt]);
1890 output_byte(0x0F);
1891 output_byte(0xB6);
1892 output_modrm(0,5,rt);
1893 output_w32(addr);
1894 }
emit_movzbl_indexed(int addr,int rs,int rt)1895 void emit_movzbl_indexed(int addr, int rs, int rt)
1896 {
1897 assem_debug("movzbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1898 output_byte(0x0F);
1899 output_byte(0xB6);
1900 output_modrm(2,rs,rt);
1901 output_w32(addr);
1902 }
emit_movzbl_map(int addr,int map,int rt)1903 void emit_movzbl_map(int addr, int map, int rt)
1904 {
1905 if(map<0) emit_movzbl(addr, rt);
1906 else
1907 {
1908 assem_debug("movzbl (%x,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1909 output_byte(0x0F);
1910 output_byte(0xB6);
1911 output_modrm(0,4,rt);
1912 output_sib(2,map,5);
1913 output_w32(addr);
1914 }
1915 }
emit_movzbl_indexed_map(int addr,int rs,int map,int rt)1916 void emit_movzbl_indexed_map(int addr, int rs, int map, int rt)
1917 {
1918 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1919 else {
1920 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1921 assert(rs!=ESP);
1922 output_byte(0x0F);
1923 output_byte(0xB6);
1924 if(addr==0&&rs!=EBP) {
1925 output_modrm(0,4,rt);
1926 output_sib(2,map,rs);
1927 }
1928 else if(addr<128&&addr>=-128) {
1929 output_modrm(1,4,rt);
1930 output_sib(2,map,rs);
1931 output_byte(addr);
1932 }
1933 else
1934 {
1935 output_modrm(2,4,rt);
1936 output_sib(2,map,rs);
1937 output_w32(addr);
1938 }
1939 }
1940 }
emit_movzwl(int addr,int rt)1941 void emit_movzwl(int addr, int rt)
1942 {
1943 assem_debug("movzwl %x,%%%s\n",addr,regname[rt]);
1944 output_byte(0x0F);
1945 output_byte(0xB7);
1946 output_modrm(0,5,rt);
1947 output_w32(addr);
1948 }
emit_movzwl_indexed(int addr,int rs,int rt)1949 void emit_movzwl_indexed(int addr, int rs, int rt)
1950 {
1951 assem_debug("movzwl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1952 output_byte(0x0F);
1953 output_byte(0xB7);
1954 output_modrm(2,rs,rt);
1955 output_w32(addr);
1956 }
emit_movzwl_map(int addr,int map,int rt)1957 void emit_movzwl_map(int addr, int map, int rt)
1958 {
1959 if(map<0) emit_movzwl(addr, rt);
1960 else
1961 {
1962 assem_debug("movzwl (%x,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1963 output_byte(0x0F);
1964 output_byte(0xB7);
1965 output_modrm(0,4,rt);
1966 output_sib(2,map,5);
1967 output_w32(addr);
1968 }
1969 }
1970
emit_xchg(int rs,int rt)1971 void emit_xchg(int rs, int rt)
1972 {
1973 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1974 if(rs==EAX) {
1975 output_byte(0x90+rt);
1976 }
1977 else
1978 {
1979 output_byte(0x87);
1980 output_modrm(3,rs,rt);
1981 }
1982 }
emit_writeword(int rt,int addr)1983 void emit_writeword(int rt, int addr)
1984 {
1985 assem_debug("movl %%%s,%x\n",regname[rt],addr);
1986 output_byte(0x89);
1987 output_modrm(0,5,rt);
1988 output_w32(addr);
1989 }
emit_writeword_indexed(int rt,int addr,int rs)1990 void emit_writeword_indexed(int rt, int addr, int rs)
1991 {
1992 assem_debug("mov %%%s,%x+%%%s\n",regname[rt],addr,regname[rs]);
1993 output_byte(0x89);
1994 if(addr<128&&addr>=-128) {
1995 output_modrm(1,rs,rt);
1996 if(rs==ESP) output_sib(0,4,4);
1997 output_byte(addr);
1998 }
1999 else
2000 {
2001 output_modrm(2,rs,rt);
2002 if(rs==ESP) output_sib(0,4,4);
2003 output_w32(addr);
2004 }
2005 }
2006 #if 0
2007 void emit_writeword_map(int rt, int addr, int map)
2008 {
2009 if(map<0) {
2010 emit_writeword(rt, addr+(int)rdram-0x80000000);
2011 } else {
2012 emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, map);
2013 }
2014 }
2015 #endif
emit_writeword_indexed_map(int rt,int addr,int rs,int map,int temp)2016 void emit_writeword_indexed_map(int rt, int addr, int rs, int map, int temp)
2017 {
2018 if(map<0) emit_writeword_indexed(rt, addr, rs);
2019 else {
2020 assem_debug("mov %%%s,%x(%%%s,%%%s,1)\n",regname[rt],addr,regname[rs],regname[map]);
2021 assert(rs!=ESP);
2022 output_byte(0x89);
2023 if(addr==0&&rs!=EBP) {
2024 output_modrm(0,4,rt);
2025 output_sib(0,map,rs);
2026 }
2027 else if(addr<128&&addr>=-128) {
2028 output_modrm(1,4,rt);
2029 output_sib(0,map,rs);
2030 output_byte(addr);
2031 }
2032 else
2033 {
2034 output_modrm(2,4,rt);
2035 output_sib(0,map,rs);
2036 output_w32(addr);
2037 }
2038 }
2039 }
emit_writehword(int rt,int addr)2040 void emit_writehword(int rt, int addr)
2041 {
2042 assem_debug("movw %%%s,%x\n",regname[rt]+1,addr);
2043 output_byte(0x66);
2044 output_byte(0x89);
2045 output_modrm(0,5,rt);
2046 output_w32(addr);
2047 }
emit_writehword_indexed(int rt,int addr,int rs)2048 void emit_writehword_indexed(int rt, int addr, int rs)
2049 {
2050 assem_debug("movw %%%s,%x+%%%s\n",regname[rt]+1,addr,regname[rs]);
2051 output_byte(0x66);
2052 output_byte(0x89);
2053 if(addr<128&&addr>=-128) {
2054 output_modrm(1,rs,rt);
2055 output_byte(addr);
2056 }
2057 else
2058 {
2059 output_modrm(2,rs,rt);
2060 output_w32(addr);
2061 }
2062 }
2063 #if 0
2064 void emit_writehword_map(int rt, int addr, int map)
2065 {
2066 if(map<0) {
2067 emit_writehword(rt, addr+(int)rdram-0x80000000);
2068 } else {
2069 emit_writehword_indexed(rt, addr+(int)rdram-0x80000000, map);
2070 }
2071 }
2072 #endif
emit_writehword_indexed_map(int rt,int addr,int rs,int map,int temp)2073 void emit_writehword_indexed_map(int rt, int addr, int rs, int map, int temp)
2074 {
2075 if(map<0) emit_writeword_indexed(rt, addr, rs);
2076 else {
2077 assem_debug("movw %%%s,%x(%%%s,%%%s,1)\n",regname[rt]+1,addr,regname[rs],regname[map]);
2078 assert(rs!=ESP);
2079 output_byte(0x66);
2080 output_byte(0x89);
2081 if(addr==0&&rs!=EBP) {
2082 output_modrm(0,4,rt);
2083 output_sib(0,map,rs);
2084 }
2085 else if(addr<128&&addr>=-128) {
2086 output_modrm(1,4,rt);
2087 output_sib(0,map,rs);
2088 output_byte(addr);
2089 }
2090 else
2091 {
2092 output_modrm(2,4,rt);
2093 output_sib(0,map,rs);
2094 output_w32(addr);
2095 }
2096 }
2097 }
emit_writebyte(int rt,int addr)2098 void emit_writebyte(int rt, int addr)
2099 {
2100 if(rt<4) {
2101 assem_debug("movb %%%cl,%x\n",regname[rt][1],addr);
2102 output_byte(0x88);
2103 output_modrm(0,5,rt);
2104 output_w32(addr);
2105 }
2106 else
2107 {
2108 emit_xchg(EAX,rt);
2109 emit_writebyte(EAX,addr);
2110 emit_xchg(EAX,rt);
2111 }
2112 }
emit_writebyte_indexed(int rt,int addr,int rs)2113 void emit_writebyte_indexed(int rt, int addr, int rs)
2114 {
2115 if(rt<4) {
2116 assem_debug("movb %%%cl,%x+%%%s\n",regname[rt][1],addr,regname[rs]);
2117 output_byte(0x88);
2118 if(addr<128&&addr>=-128) {
2119 output_modrm(1,rs,rt);
2120 output_byte(addr);
2121 }
2122 else
2123 {
2124 output_modrm(2,rs,rt);
2125 output_w32(addr);
2126 }
2127 }
2128 else
2129 {
2130 emit_xchg(EAX,rt);
2131 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2132 emit_xchg(EAX,rt);
2133 }
2134 }
2135 #if 0
2136 void emit_writebyte_map(int rt, int addr, int map)
2137 {
2138 if(map<0) {
2139 emit_writebyte(rt, addr+(int)rdram-0x80000000);
2140 } else {
2141 emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, map);
2142 }
2143 }
2144 #endif
emit_writebyte_indexed_map(int rt,int addr,int rs,int map,int temp)2145 void emit_writebyte_indexed_map(int rt, int addr, int rs, int map, int temp)
2146 {
2147 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2148 else
2149 if(rt<4) {
2150 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)\n",regname[rt][1],addr,regname[rs],regname[map]);
2151 assert(rs!=ESP);
2152 output_byte(0x88);
2153 if(addr==0&&rs!=EBP) {
2154 output_modrm(0,4,rt);
2155 output_sib(0,map,rs);
2156 }
2157 else if(addr<128&&addr>=-128) {
2158 output_modrm(1,4,rt);
2159 output_sib(0,map,rs);
2160 output_byte(addr);
2161 }
2162 else
2163 {
2164 output_modrm(2,4,rt);
2165 output_sib(0,map,rs);
2166 output_w32(addr);
2167 }
2168 }
2169 else
2170 {
2171 emit_xchg(EAX,rt);
2172 emit_writebyte_indexed_map(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2173 emit_xchg(EAX,rt);
2174 }
2175 }
emit_writeword_imm(int imm,int addr)2176 void emit_writeword_imm(int imm, int addr)
2177 {
2178 assem_debug("movl $%x,%x\n",imm,addr);
2179 output_byte(0xC7);
2180 output_modrm(0,5,0);
2181 output_w32(addr);
2182 output_w32(imm);
2183 }
emit_writeword_imm_esp(int imm,int addr)2184 void emit_writeword_imm_esp(int imm, int addr)
2185 {
2186 assem_debug("mov $%x,%x(%%esp)\n",imm,addr);
2187 assert(addr>=-128&&addr<128);
2188 output_byte(0xC7);
2189 output_modrm(1,4,0);
2190 output_sib(0,4,4);
2191 output_byte(addr);
2192 output_w32(imm);
2193 }
emit_writebyte_imm(int imm,int addr)2194 void emit_writebyte_imm(int imm, int addr)
2195 {
2196 assem_debug("movb $%x,%x\n",imm,addr);
2197 assert(imm>=-128&&imm<128);
2198 output_byte(0xC6);
2199 output_modrm(0,5,0);
2200 output_w32(addr);
2201 output_byte(imm);
2202 }
emit_writebyte_imm_esp(int imm,int addr)2203 void emit_writebyte_imm_esp(int imm, int addr)
2204 {
2205 assem_debug("movb $%x,%x(%%esp)\n",imm,addr);
2206 assert(addr>=-128&&addr<128);
2207 output_byte(0xC6);
2208 output_modrm(1,4,0);
2209 output_sib(0,4,4);
2210 output_byte(addr);
2211 output_byte(imm);
2212 }
2213
emit_rmw_andimm(int addr,int map,int imm)2214 void emit_rmw_andimm(int addr, int map, int imm)
2215 {
2216 if(map<0) {
2217 assem_debug("andb $0x%x,(%%%s)\n",imm,regname[addr]);
2218 assert(addr!=ESP);
2219 output_byte(0x80);
2220 output_modrm(0,addr,4);
2221 }
2222 else
2223 {
2224 assem_debug("andb $0x%x,(%%%s,%%%s,1)\n",imm,regname[addr],regname[map]);
2225 assert(addr!=ESP);
2226 output_byte(0x80);
2227 output_modrm(0,4,4);
2228 if(addr!=EBP) {
2229 output_sib(0,map,addr);
2230 }
2231 else {
2232 assert(addr!=map);
2233 output_sib(0,addr,map);
2234 }
2235 }
2236 output_byte(imm);
2237 }
emit_rmw_xorimm(int addr,int map,int imm)2238 void emit_rmw_xorimm(int addr, int map, int imm)
2239 {
2240 if(map<0) {
2241 assem_debug("xorb $0x%x,(%%%s)\n",imm,regname[addr]);
2242 assert(addr!=ESP);
2243 output_byte(0x80);
2244 output_modrm(0,addr,6);
2245 }
2246 else
2247 {
2248 assem_debug("xorb $0x%x,(%%%s,%%%s,1)\n",imm,regname[addr],regname[map]);
2249 assert(addr!=ESP);
2250 output_byte(0x80);
2251 output_modrm(0,4,6);
2252 if(addr!=EBP) {
2253 output_sib(0,map,addr);
2254 }
2255 else {
2256 assert(addr!=map);
2257 output_sib(0,addr,map);
2258 }
2259 }
2260 output_byte(imm);
2261 }
emit_rmw_orimm(int addr,int map,int imm)2262 void emit_rmw_orimm(int addr, int map, int imm)
2263 {
2264 if(map<0) {
2265 assem_debug("orb $0x%x,(%%%s)\n",imm,regname[addr]);
2266 assert(addr!=ESP);
2267 output_byte(0x80);
2268 output_modrm(0,addr,1);
2269 }
2270 else
2271 {
2272 assem_debug("orb $0x%x,(%%%s,%%%s,1)\n",imm,regname[addr],regname[map]);
2273 assert(addr!=ESP);
2274 output_byte(0x80);
2275 output_modrm(0,4,1);
2276 if(addr!=EBP) {
2277 output_sib(0,map,addr);
2278 }
2279 else {
2280 assert(addr!=map);
2281 output_sib(0,addr,map);
2282 }
2283 }
2284 output_byte(imm);
2285 }
emit_sh2tas(int addr,int map,int sr)2286 void emit_sh2tas(int addr, int map, int sr)
2287 {
2288 emit_shrimm(sr,1,sr);
2289 if(map<0) {
2290 assem_debug("cmpb $1,(%%%s)\n",regname[addr]);
2291 assert(addr!=ESP);
2292 output_byte(0x80);
2293 output_modrm(0,addr,7);
2294 }
2295 else
2296 {
2297 assem_debug("cmpb $1,(%%%s,%%%s,1)\n",regname[addr],regname[map]);
2298 assert(addr!=ESP);
2299 output_byte(0x80);
2300 output_modrm(0,4,7);
2301 if(addr!=EBP) {
2302 output_sib(0,map,addr);
2303 }
2304 else {
2305 assert(addr!=map);
2306 output_sib(0,addr,map);
2307 }
2308 }
2309 output_byte(1);
2310 emit_adc(sr,sr);
2311 emit_rmw_orimm(addr,map,0x80);
2312 }
2313
emit_mul(int rs)2314 void emit_mul(int rs)
2315 {
2316 assem_debug("mul %%%s\n",regname[rs]);
2317 output_byte(0xF7);
2318 output_modrm(3,rs,4);
2319 }
emit_imul(int rs)2320 void emit_imul(int rs)
2321 {
2322 assem_debug("imul %%%s\n",regname[rs]);
2323 output_byte(0xF7);
2324 output_modrm(3,rs,5);
2325 }
emit_multiply(int rs1,int rs2,int rt)2326 void emit_multiply(int rs1,int rs2,int rt)
2327 {
2328 if(rs1==rt) {
2329 assem_debug("imul %%%s,%%%s\n",regname[rs2],regname[rt]);
2330 output_byte(0x0F);
2331 output_byte(0xAF);
2332 output_modrm(3,rs2,rt);
2333 }
2334 else
2335 {
2336 emit_mov(rs1,rt);
2337 emit_multiply(rt,rs2,rt);
2338 }
2339 }
emit_div(int rs)2340 void emit_div(int rs)
2341 {
2342 assem_debug("div %%%s\n",regname[rs]);
2343 output_byte(0xF7);
2344 output_modrm(3,rs,6);
2345 }
emit_idiv(int rs)2346 void emit_idiv(int rs)
2347 {
2348 assem_debug("idiv %%%s\n",regname[rs]);
2349 output_byte(0xF7);
2350 output_modrm(3,rs,7);
2351 }
emit_cdq()2352 void emit_cdq()
2353 {
2354 assem_debug("cdq\n");
2355 output_byte(0x99);
2356 }
emit_div0s(int s1,int s2,int sr,int temp)2357 void emit_div0s(int s1, int s2, int sr, int temp) {
2358 emit_shlimm(sr,24,sr);
2359 emit_mov(s2,temp);
2360 assem_debug("bt %%%s,31\n",regname[s2]);
2361 output_byte(0x0f);
2362 output_byte(0xba);
2363 output_modrm(3,s2,4);
2364 output_byte(0x1f);
2365 assem_debug("rcr %%%s\n",regname[sr]);
2366 output_byte(0xD1);
2367 output_modrm(3,sr,3);
2368 emit_xor(temp,s1,temp);
2369 assem_debug("bt %%%s,31\n",regname[s1]);
2370 output_byte(0x0f);
2371 output_byte(0xba);
2372 output_modrm(3,s1,4);
2373 output_byte(0x1f);
2374 assem_debug("rcr %%%s,24\n",regname[sr]);
2375 output_byte(0xc1);
2376 output_modrm(3,sr,3);
2377 output_byte(24);
2378 assem_debug("bt %%%s,31\n",regname[temp]);
2379 output_byte(0x0f);
2380 output_byte(0xba);
2381 output_modrm(3,temp,4);
2382 output_byte(0x1f);
2383 emit_adc(sr,sr);
2384 }
2385
2386 // Load return address
emit_load_return_address(unsigned int rt)2387 void emit_load_return_address(unsigned int rt)
2388 {
2389 // (assumes this instruction will be followed by a 5-byte jmp instruction)
2390 emit_movimm((pointer)out+10,rt);
2391 }
2392
2393 // Load 2 immediates optimizing for small code size
emit_mov2imm_compact(int imm1,unsigned int rt1,int imm2,unsigned int rt2)2394 void emit_mov2imm_compact(int imm1,unsigned int rt1,int imm2,unsigned int rt2)
2395 {
2396 emit_movimm(imm1,rt1);
2397 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2398 else emit_movimm(imm2,rt2);
2399 }
2400
2401 // compare byte in memory
emit_cmpmem_imm_byte(pointer addr,int imm)2402 void emit_cmpmem_imm_byte(pointer addr,int imm)
2403 {
2404 assert(imm<128&&imm>=-127);
2405 assem_debug("cmpb $%d,%x\n",imm,addr);
2406 output_byte(0x80);
2407 output_modrm(0,5,7);
2408 output_w32(addr);
2409 output_byte(imm);
2410 }
2411
2412 // special case for checking invalid_code
emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)2413 void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2414 {
2415 assert(imm<128&&imm>=-127);
2416 assert(r>=0&&r<8);
2417 emit_shrimm(r,12,r);
2418 assem_debug("cmp $%d,%x+%%%s\n",imm,addr,regname[r]);
2419 output_byte(0x80);
2420 output_modrm(2,r,7);
2421 output_w32(addr);
2422 output_byte(imm);
2423 }
2424
2425 // special case for checking hash_table
emit_cmpmem_indexed(int addr,int rs,int rt)2426 void emit_cmpmem_indexed(int addr,int rs,int rt)
2427 {
2428 assert(rs>=0&&rs<8);
2429 assert(rt>=0&&rt<8);
2430 assem_debug("cmp %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2431 output_byte(0x39);
2432 output_modrm(2,rs,rt);
2433 output_w32(addr);
2434 }
2435
2436 // special case for checking memory_map in verify_mapping
emit_cmpmem(int addr,int rt)2437 void emit_cmpmem(int addr,int rt)
2438 {
2439 assert(rt>=0&&rt<8);
2440 assem_debug("cmp %x,%%%s\n",addr,regname[rt]);
2441 output_byte(0x39);
2442 output_modrm(0,5,rt);
2443 output_w32(addr);
2444 }
2445
2446 // Used to preload hash table entries
emit_prefetch(void * addr)2447 void emit_prefetch(void *addr)
2448 {
2449 assem_debug("prefetch %x\n",(int)addr);
2450 output_byte(0x0F);
2451 output_byte(0x18);
2452 output_modrm(0,5,1);
2453 output_w32((int)addr);
2454 }
2455
2456 /*void emit_submem(int r,int addr)
2457 {
2458 assert(r>=0&&r<8);
2459 assem_debug("sub %x,%%%s\n",addr,regname[r]);
2460 output_byte(0x2B);
2461 output_modrm(0,5,r);
2462 output_w32((int)addr);
2463 }*/
emit_subfrommem(int addr,int r)2464 void emit_subfrommem(int addr,int r)
2465 {
2466 assert(r>=0&&r<8);
2467 assem_debug("sub %%%s,%x\n",regname[r],addr);
2468 output_byte(0x29);
2469 output_modrm(0,5,r);
2470 output_w32((int)addr);
2471 }
2472
emit_flds(int r)2473 void emit_flds(int r)
2474 {
2475 assem_debug("flds (%%%s)\n",regname[r]);
2476 output_byte(0xd9);
2477 if(r!=EBP) output_modrm(0,r,0);
2478 else {output_modrm(1,EBP,0);output_byte(0);}
2479 }
emit_fldl(int r)2480 void emit_fldl(int r)
2481 {
2482 assem_debug("fldl (%%%s)\n",regname[r]);
2483 output_byte(0xdd);
2484 if(r!=EBP) output_modrm(0,r,0);
2485 else {output_modrm(1,EBP,0);output_byte(0);}
2486 }
emit_fucomip(unsigned int r)2487 void emit_fucomip(unsigned int r)
2488 {
2489 assem_debug("fucomip %d\n",r);
2490 assert(r<8);
2491 output_byte(0xdf);
2492 output_byte(0xe8+r);
2493 }
emit_fchs()2494 void emit_fchs()
2495 {
2496 assem_debug("fchs\n");
2497 output_byte(0xd9);
2498 output_byte(0xe0);
2499 }
emit_fabs()2500 void emit_fabs()
2501 {
2502 assem_debug("fabs\n");
2503 output_byte(0xd9);
2504 output_byte(0xe1);
2505 }
emit_fsqrt()2506 void emit_fsqrt()
2507 {
2508 assem_debug("fsqrt\n");
2509 output_byte(0xd9);
2510 output_byte(0xfa);
2511 }
emit_fadds(int r)2512 void emit_fadds(int r)
2513 {
2514 assem_debug("fadds (%%%s)\n",regname[r]);
2515 output_byte(0xd8);
2516 if(r!=EBP) output_modrm(0,r,0);
2517 else {output_modrm(1,EBP,0);output_byte(0);}
2518 }
emit_faddl(int r)2519 void emit_faddl(int r)
2520 {
2521 assem_debug("faddl (%%%s)\n",regname[r]);
2522 output_byte(0xdc);
2523 if(r!=EBP) output_modrm(0,r,0);
2524 else {output_modrm(1,EBP,0);output_byte(0);}
2525 }
emit_fadd(int r)2526 void emit_fadd(int r)
2527 {
2528 assem_debug("fadd st%d\n",r);
2529 output_byte(0xd8);
2530 output_byte(0xc0+r);
2531 }
emit_fsubs(int r)2532 void emit_fsubs(int r)
2533 {
2534 assem_debug("fsubs (%%%s)\n",regname[r]);
2535 output_byte(0xd8);
2536 if(r!=EBP) output_modrm(0,r,4);
2537 else {output_modrm(1,EBP,4);output_byte(0);}
2538 }
emit_fsubl(int r)2539 void emit_fsubl(int r)
2540 {
2541 assem_debug("fsubl (%%%s)\n",regname[r]);
2542 output_byte(0xdc);
2543 if(r!=EBP) output_modrm(0,r,4);
2544 else {output_modrm(1,EBP,4);output_byte(0);}
2545 }
emit_fsub(int r)2546 void emit_fsub(int r)
2547 {
2548 assem_debug("fsub st%d\n",r);
2549 output_byte(0xd8);
2550 output_byte(0xe0+r);
2551 }
emit_fmuls(int r)2552 void emit_fmuls(int r)
2553 {
2554 assem_debug("fmuls (%%%s)\n",regname[r]);
2555 output_byte(0xd8);
2556 if(r!=EBP) output_modrm(0,r,1);
2557 else {output_modrm(1,EBP,1);output_byte(0);}
2558 }
emit_fmull(int r)2559 void emit_fmull(int r)
2560 {
2561 assem_debug("fmull (%%%s)\n",regname[r]);
2562 output_byte(0xdc);
2563 if(r!=EBP) output_modrm(0,r,1);
2564 else {output_modrm(1,EBP,1);output_byte(0);}
2565 }
emit_fmul(int r)2566 void emit_fmul(int r)
2567 {
2568 assem_debug("fmul st%d\n",r);
2569 output_byte(0xd8);
2570 output_byte(0xc8+r);
2571 }
emit_fdivs(int r)2572 void emit_fdivs(int r)
2573 {
2574 assem_debug("fdivs (%%%s)\n",regname[r]);
2575 output_byte(0xd8);
2576 if(r!=EBP) output_modrm(0,r,6);
2577 else {output_modrm(1,EBP,6);output_byte(0);}
2578 }
emit_fdivl(int r)2579 void emit_fdivl(int r)
2580 {
2581 assem_debug("fdivl (%%%s)\n",regname[r]);
2582 output_byte(0xdc);
2583 if(r!=EBP) output_modrm(0,r,6);
2584 else {output_modrm(1,EBP,6);output_byte(0);}
2585 }
emit_fdiv(int r)2586 void emit_fdiv(int r)
2587 {
2588 assem_debug("fdiv st%d\n",r);
2589 output_byte(0xd8);
2590 output_byte(0xf0+r);
2591 }
emit_fpop()2592 void emit_fpop()
2593 {
2594 // fstp st(0)
2595 assem_debug("fpop\n");
2596 output_byte(0xdd);
2597 output_byte(0xd8);
2598 }
emit_fildl(int r)2599 void emit_fildl(int r)
2600 {
2601 assem_debug("fildl (%%%s)\n",regname[r]);
2602 output_byte(0xdb);
2603 if(r!=EBP) output_modrm(0,r,0);
2604 else {output_modrm(1,EBP,0);output_byte(0);}
2605 }
emit_fildll(int r)2606 void emit_fildll(int r)
2607 {
2608 assem_debug("fildll (%%%s)\n",regname[r]);
2609 output_byte(0xdf);
2610 if(r!=EBP) output_modrm(0,r,5);
2611 else {output_modrm(1,EBP,5);output_byte(0);}
2612 }
emit_fistpl(int r)2613 void emit_fistpl(int r)
2614 {
2615 assem_debug("fistpl (%%%s)\n",regname[r]);
2616 output_byte(0xdb);
2617 if(r!=EBP) output_modrm(0,r,3);
2618 else {output_modrm(1,EBP,3);output_byte(0);}
2619 }
emit_fistpll(int r)2620 void emit_fistpll(int r)
2621 {
2622 assem_debug("fistpll (%%%s)\n",regname[r]);
2623 output_byte(0xdf);
2624 if(r!=EBP) output_modrm(0,r,7);
2625 else {output_modrm(1,EBP,7);output_byte(0);}
2626 }
emit_fstps(int r)2627 void emit_fstps(int r)
2628 {
2629 assem_debug("fstps (%%%s)\n",regname[r]);
2630 output_byte(0xd9);
2631 if(r!=EBP) output_modrm(0,r,3);
2632 else {output_modrm(1,EBP,3);output_byte(0);}
2633 }
emit_fstpl(int r)2634 void emit_fstpl(int r)
2635 {
2636 assem_debug("fstpl (%%%s)\n",regname[r]);
2637 output_byte(0xdd);
2638 if(r!=EBP) output_modrm(0,r,3);
2639 else {output_modrm(1,EBP,3);output_byte(0);}
2640 }
emit_fnstcw_stack()2641 void emit_fnstcw_stack()
2642 {
2643 assem_debug("fnstcw (%%esp)\n");
2644 output_byte(0xd9);
2645 output_modrm(0,4,7);
2646 output_sib(0,4,4);
2647 }
emit_fldcw_stack()2648 void emit_fldcw_stack()
2649 {
2650 assem_debug("fldcw (%%esp)\n");
2651 output_byte(0xd9);
2652 output_modrm(0,4,5);
2653 output_sib(0,4,4);
2654 }
emit_fldcw_indexed(int addr,int r)2655 void emit_fldcw_indexed(int addr,int r)
2656 {
2657 assem_debug("fldcw %x(%%%s)\n",addr,regname[r]);
2658 output_byte(0xd9);
2659 output_modrm(0,4,5);
2660 output_sib(1,r,5);
2661 output_w32(addr);
2662 }
emit_fldcw(int addr)2663 void emit_fldcw(int addr)
2664 {
2665 assem_debug("fldcw %x\n",addr);
2666 output_byte(0xd9);
2667 output_modrm(0,5,5);
2668 output_w32(addr);
2669 }
emit_movss_load(unsigned int addr,unsigned int ssereg)2670 void emit_movss_load(unsigned int addr,unsigned int ssereg)
2671 {
2672 assem_debug("movss (%%%s),xmm%d\n",regname[addr],ssereg);
2673 assert(ssereg<8);
2674 output_byte(0xf3);
2675 output_byte(0x0f);
2676 output_byte(0x10);
2677 if(addr!=EBP) output_modrm(0,addr,ssereg);
2678 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2679 }
emit_movsd_load(unsigned int addr,unsigned int ssereg)2680 void emit_movsd_load(unsigned int addr,unsigned int ssereg)
2681 {
2682 assem_debug("movsd (%%%s),xmm%d\n",regname[addr],ssereg);
2683 assert(ssereg<8);
2684 output_byte(0xf2);
2685 output_byte(0x0f);
2686 output_byte(0x10);
2687 if(addr!=EBP) output_modrm(0,addr,ssereg);
2688 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2689 }
emit_movd_store(unsigned int ssereg,unsigned int addr)2690 void emit_movd_store(unsigned int ssereg,unsigned int addr)
2691 {
2692 assem_debug("movd xmm%d,(%%%s)\n",ssereg,regname[addr]);
2693 assert(ssereg<8);
2694 output_byte(0x66);
2695 output_byte(0x0f);
2696 output_byte(0x7e);
2697 if(addr!=EBP) output_modrm(0,addr,ssereg);
2698 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2699 }
emit_cvttps2dq(unsigned int ssereg1,unsigned int ssereg2)2700 void emit_cvttps2dq(unsigned int ssereg1,unsigned int ssereg2)
2701 {
2702 assem_debug("cvttps2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2703 assert(ssereg1<8);
2704 assert(ssereg2<8);
2705 output_byte(0xf3);
2706 output_byte(0x0f);
2707 output_byte(0x5b);
2708 output_modrm(3,ssereg1,ssereg2);
2709 }
emit_cvttpd2dq(unsigned int ssereg1,unsigned int ssereg2)2710 void emit_cvttpd2dq(unsigned int ssereg1,unsigned int ssereg2)
2711 {
2712 assem_debug("cvttpd2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2713 assert(ssereg1<8);
2714 assert(ssereg2<8);
2715 output_byte(0x66);
2716 output_byte(0x0f);
2717 output_byte(0xe6);
2718 output_modrm(3,ssereg1,ssereg2);
2719 }
2720
count_bits(u32 reglist)2721 unsigned int count_bits(u32 reglist)
2722 {
2723 int count=0;
2724 while(reglist)
2725 {
2726 count+=reglist&1;
2727 reglist>>=1;
2728 }
2729 return count;
2730 }
2731
2732 // Save registers before function call
2733 // This code is executed infrequently so we try to minimize code size
2734 // by pushing registers onto the stack instead of writing them to their
2735 // usual locations
save_regs(u32 reglist)2736 void save_regs(u32 reglist)
2737 {
2738 reglist&=0x7; // only save the caller-save registers, %eax, %ecx, %edx
2739 int hr;
2740 int count=count_bits(reglist);
2741 if(count) {
2742 for(hr=0;hr<HOST_REGS;hr++) {
2743 if(hr!=EXCLUDE_REG) {
2744 if((reglist>>hr)&1) {
2745 emit_pushreg(hr);
2746 }
2747 }
2748 }
2749 }
2750 if(slave) emit_addimm(ESP,-(4-count)*4,ESP); // slave has master's return address on stack
2751 else emit_addimm(ESP,-(5-count)*4,ESP);
2752 }
2753 // Restore registers after function call
restore_regs(u32 reglist)2754 void restore_regs(u32 reglist)
2755 {
2756 int hr;
2757 reglist&=0x7; // only save the caller-save registers, %eax, %ecx, %edx
2758 int count=count_bits(reglist);
2759 if(slave) emit_addimm(ESP,(4-count)*4,ESP);
2760 else emit_addimm(ESP,(5-count)*4,ESP);
2761 if(count) {
2762 for(hr=HOST_REGS-1;hr>=0;hr--) {
2763 if(hr!=EXCLUDE_REG) {
2764 if((reglist>>hr)&1) {
2765 emit_popreg(hr);
2766 }
2767 }
2768 }
2769 }
2770 }
2771
2772 /* Stubs/epilogue */
2773
emit_extjump(pointer addr,int target)2774 void emit_extjump(pointer addr, int target)
2775 {
2776 u8 *ptr=(u8 *)addr;
2777 if(*ptr==0x0f)
2778 {
2779 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2780 addr+=2;
2781 }
2782 else
2783 {
2784 assert(*ptr==0xe8||*ptr==0xe9);
2785 addr++;
2786 }
2787 emit_movimm(target,EAX);
2788 //emit_movimm(target|slave,EAX);
2789 emit_movimm(addr,EBX);
2790 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2791 //DEBUG >
2792 #ifdef DEBUG_CYCLE_COUNT
2793 emit_readword((int)&last_count,ECX);
2794 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2795 emit_readword((int)&next_interupt,ECX);
2796 emit_writeword(HOST_CCREG,(int)&Count);
2797 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2798 emit_writeword(ECX,(int)&last_count);
2799 #endif
2800 //DEBUG <
2801 emit_jmp((pointer)dyna_linker);
2802 }
2803
do_readstub(int n)2804 void do_readstub(int n)
2805 {
2806 assem_debug("do_readstub %x\n",start+stubs[n][3]*2);
2807 set_jump_target(stubs[n][1],(int)out);
2808 int type=stubs[n][0];
2809 int i=stubs[n][3];
2810 int rs=stubs[n][4];
2811 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2812 u32 reglist=stubs[n][7];
2813 signed char *i_regmap=i_regs->regmap;
2814 int addr=get_reg(i_regmap,AGEN1+(i&1));
2815 int rt;
2816
2817 rt=get_reg(i_regmap,rt1[i]==TBIT?-1:rt1[i]);
2818 assert(rs>=0);
2819 if(addr<0) addr=rt;
2820 if(addr<0) addr=get_reg(i_regmap,-1);
2821 assert(addr>=0);
2822 save_regs(reglist);
2823 if(rs!=EAX) emit_mov(rs,EAX);
2824 if(type==LOADB_STUB) emit_xorimm(EAX,1,EAX);
2825
2826 //if(i_regmap[HOST_CCREG]==CCREG) emit_storereg(CCREG,HOST_CCREG);//DEBUG
2827 /*if(i_regmap[HOST_CCREG]==CCREG) {
2828 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
2829 output_byte(0x03);
2830 output_modrm(1,4,HOST_CCREG);
2831 output_sib(0,4,4);
2832 output_byte(12+16);
2833 //emit_writeword(HOST_CCREG,(int)&MSH2->cycles);
2834 emit_writeword(HOST_CCREG,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
2835 output_byte(0x2B);
2836 output_modrm(1,4,HOST_CCREG);
2837 output_sib(0,4,4);
2838 output_byte(12+16);
2839 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
2840 }
2841 if(i_regmap[HOST_CCREG]!=CCREG) {
2842 emit_loadreg(CCREG,ECX);
2843 emit_addimm(ECX,CLOCK_DIVIDER*(stubs[n][6]),ECX);
2844 output_byte(0x03);
2845 output_modrm(1,4,ECX);
2846 output_sib(0,4,4);
2847 output_byte(12+16);
2848 //emit_writeword(ECX,(int)&MSH2->cycles);
2849 emit_writeword(ECX,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
2850 }
2851 /*
2852 int temp;
2853 int cc=get_reg(i_regmap,CCREG);
2854 if(cc<0) {
2855 if(addr==HOST_CCREG)
2856 {
2857 cc=0;temp=1;
2858 assert(cc!=HOST_CCREG);
2859 assert(temp!=HOST_CCREG);
2860 emit_loadreg(CCREG,cc);
2861 }
2862 else
2863 {
2864 cc=HOST_CCREG;
2865 emit_loadreg(CCREG,cc);
2866 temp=!addr;
2867 }
2868 }
2869 else
2870 {
2871 temp=!addr;
2872 }*/
2873 if(type==LOADB_STUB)
2874 emit_call((int)MappedMemoryReadByteNocache);
2875 if(type==LOADW_STUB)
2876 emit_call((int)MappedMemoryReadWordNocache);
2877 if(type==LOADL_STUB)
2878 emit_call((int)MappedMemoryReadLongNocache);
2879 if(type==LOADS_STUB)
2880 {
2881 // RTE instruction, pop PC and SR from stack
2882 int pc=get_reg(i_regmap,RTEMP);
2883 assert(pc>=0);
2884 if(rs==EAX||rs==ECX||rs==EDX)
2885 emit_writeword_indexed(rs,0,ESP);
2886 emit_call((int)MappedMemoryReadLongNocache);
2887 if(rs==ECX||rs==EDX)
2888 emit_readword_indexed(0,ESP,rs);
2889 if(pc==EAX) {
2890 emit_writeword_indexed(EAX,0,ESP);
2891 }
2892 else
2893 {
2894 if(pc==ECX||pc==EDX)
2895 emit_writeword_indexed(EAX,0,ESP);
2896 else
2897 emit_mov(EAX,pc);
2898 if(rs==EAX) {
2899 emit_readword_indexed(0,ESP,EAX);
2900 emit_addimm(EAX,4,EAX);
2901 }else
2902 emit_addimm(rs,4,EAX);
2903 }
2904 emit_call((int)MappedMemoryReadLongNocache);
2905 assert(rt>=0);
2906 if(rt!=EAX) emit_mov(EAX,rt);
2907 if(pc==EAX||pc==ECX||pc==EDX)
2908 emit_readword_indexed(0,ESP,pc);
2909 }
2910 else if(type==LOADB_STUB)
2911 {
2912 if(rt>=0) emit_movsbl_reg(EAX,rt);
2913 }
2914 else if(type==LOADW_STUB)
2915 {
2916 if(rt>=0) emit_movswl_reg(EAX,rt);
2917 }
2918 else
2919 {
2920 if(rt!=EAX&&rt>=0) emit_mov(EAX,rt);
2921 }
2922 restore_regs(reglist);
2923 if(type==LOADS_STUB) emit_addimm(rs,8,rs);
2924 emit_jmp(stubs[n][2]); // return address
2925 }
2926
inline_readstub(int type,int i,u32 addr,signed char regmap[],int target,int adj,u32 reglist)2927 void inline_readstub(int type, int i, u32 addr, signed char regmap[], int target, int adj, u32 reglist)
2928 {
2929 assem_debug("inline_readstub\n");
2930 //int rs=get_reg(regmap,target);
2931 int rt=get_reg(regmap,target);
2932 //if(rs<0) rs=get_reg(regmap,-1);
2933 if(rt<0) rt=get_reg(regmap,-1);
2934 //rt=get_reg(i_regmap,rt1[i]==TBIT?-1:rt1[i]);
2935 assert(rt>=0);
2936 //if(addr<0) addr=rt;
2937 //if(addr<0) addr=get_reg(i_regmap,-1);
2938 //assert(addr>=0);
2939 save_regs(reglist);
2940 emit_movimm(addr,EAX);
2941 if(type==LOADB_STUB)
2942 emit_call((int)MappedMemoryReadByteNocache);
2943 if(type==LOADW_STUB)
2944 emit_call((int)MappedMemoryReadWordNocache);
2945 if(type==LOADL_STUB)
2946 emit_call((int)MappedMemoryReadLongNocache);
2947 assert(type!=LOADS_STUB);
2948 if(type==LOADB_STUB)
2949 {
2950 if(rt>=0) emit_movsbl_reg(EAX,rt);
2951 }
2952 else if(type==LOADW_STUB)
2953 {
2954 if(rt>=0) emit_movswl_reg(EAX,rt);
2955 }
2956 else
2957 {
2958 if(rt!=EAX&&rt>=0) emit_mov(EAX,rt);
2959 }
2960 restore_regs(reglist);
2961 }
2962
do_writestub(int n)2963 void do_writestub(int n)
2964 {
2965 assem_debug("do_writestub %x\n",start+stubs[n][3]*2);
2966 set_jump_target(stubs[n][1],(int)out);
2967 int type=stubs[n][0];
2968 int i=stubs[n][3];
2969 int rs=stubs[n][4];
2970 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2971 u32 reglist=stubs[n][7];
2972 signed char *i_regmap=i_regs->regmap;
2973 int addr=get_reg(i_regmap,AGEN1+(i&1));
2974 int rt=get_reg(i_regmap,rs1[i]);
2975 assert(rs>=0);
2976 assert(rt>=0);
2977 if(addr<0) addr=get_reg(i_regmap,-1);
2978 assert(addr>=0);
2979 save_regs(reglist);
2980 // "FASTCALL" api: address in eax, data in edx
2981 if(rs!=EAX) {
2982 if(rt==EAX) {
2983 if(rs==EDX) emit_xchg(EAX,EDX);
2984 else {
2985 emit_mov(rt,EDX);
2986 emit_mov(rs,EAX);
2987 }
2988 }
2989 else {
2990 emit_mov(rs,EAX);
2991 if(rt!=EDX) emit_mov(rt,EDX);
2992 }
2993 }
2994 else if(rt!=EDX) emit_mov(rt,EDX);
2995 //if(type==STOREB_STUB) emit_xorimm(EAX,1,EAX); // WriteInvalidateByteSwapped does this
2996
2997 //if(i_regmap[HOST_CCREG]==CCREG) emit_storereg(CCREG,HOST_CCREG);//DEBUG
2998 /*if(i_regmap[HOST_CCREG]==CCREG) {
2999 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
3000 output_byte(0x03);
3001 output_modrm(1,4,HOST_CCREG);
3002 output_sib(0,4,4);
3003 output_byte(12+16);
3004 //emit_writeword(HOST_CCREG,(int)&MSH2->cycles);
3005 emit_writeword(HOST_CCREG,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
3006 output_byte(0x2B);
3007 output_modrm(1,4,HOST_CCREG);
3008 output_sib(0,4,4);
3009 output_byte(12+16);
3010 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
3011 }
3012 if(i_regmap[HOST_CCREG]!=CCREG) {
3013 emit_loadreg(CCREG,ECX);
3014 emit_addimm(ECX,CLOCK_DIVIDER*(stubs[n][6]),ECX);
3015 output_byte(0x03);
3016 output_modrm(1,4,ECX);
3017 output_sib(0,4,4);
3018 output_byte(12+16);
3019 //emit_writeword(ECX,(int)&MSH2->cycles);
3020 emit_writeword(ECX,slave?(int)&SSH2->cycles:(int)&MSH2->cycles);
3021 }
3022 //ds=i_regs!=®s[i];
3023 //int real_rs=get_reg(i_regmap,rs2[i]);
3024 //if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
3025 //wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3026
3027 /*int temp;
3028 int cc=get_reg(i_regmap,CCREG);
3029 if(cc<0) {
3030 if(addr==HOST_CCREG)
3031 {
3032 cc=0;temp=1;
3033 assert(cc!=HOST_CCREG);
3034 assert(temp!=HOST_CCREG);
3035 emit_loadreg(CCREG,cc);
3036 }
3037 else
3038 {
3039 cc=HOST_CCREG;
3040 emit_loadreg(CCREG,cc);
3041 temp=!addr;
3042 }
3043 }
3044 else
3045 {
3046 temp=!addr;
3047 }*/
3048 if(type==STOREB_STUB)
3049 emit_call((int)WriteInvalidateByteSwapped);
3050 if(type==STOREW_STUB)
3051 emit_call((int)WriteInvalidateWord);
3052 if(type==STOREL_STUB)
3053 emit_call((int)WriteInvalidateLong);
3054
3055 restore_regs(reglist);
3056 emit_jmp(stubs[n][2]); // return address
3057 }
3058
inline_writestub(int type,int i,u32 addr,signed char regmap[],int target,int adj,u32 reglist)3059 void inline_writestub(int type, int i, u32 addr, signed char regmap[], int target, int adj, u32 reglist)
3060 {
3061 assem_debug("inline_writestub\n");
3062 //int rs=get_reg(regmap,-1);
3063 int rt=get_reg(regmap,target);
3064 //assert(rs>=0);
3065 assert(rt>=0);
3066 save_regs(reglist);
3067 // "FASTCALL" api: address in eax, data in edx
3068 if(rt!=EDX) emit_mov(rt,EDX);
3069 emit_movimm(addr,EAX); // FIXME - should be able to move the existing value
3070 if(type==STOREB_STUB)
3071 emit_call((int)WriteInvalidateByte);
3072 if(type==STOREW_STUB)
3073 emit_call((int)WriteInvalidateWord);
3074 if(type==STOREL_STUB)
3075 emit_call((int)WriteInvalidateLong);
3076 restore_regs(reglist);
3077 }
3078
do_rmwstub(int n)3079 void do_rmwstub(int n)
3080 {
3081 assem_debug("do_rmwstub %x\n",start+stubs[n][3]*2);
3082 set_jump_target(stubs[n][1],(int)out);
3083 int type=stubs[n][0];
3084 int i=stubs[n][3];
3085 int rs=stubs[n][4];
3086 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3087 u32 reglist=stubs[n][7];
3088 signed char *i_regmap=i_regs->regmap;
3089 int addr=get_reg(i_regmap,AGEN1+(i&1));
3090 //int rt=get_reg(i_regmap,rs1[i]);
3091 assert(rs>=0);
3092 //assert(rt>=0);
3093 if(addr<0) addr=get_reg(i_regmap,-1);
3094 assert(addr>=0);
3095 save_regs(reglist);
3096 // "FASTCALL" api: address in eax, data in edx
3097 emit_xorimm(rs,1,rs);
3098 if(rs!=EAX) emit_mov(rs,EAX);
3099 if(rs==EAX||rs==ECX||rs==EDX)
3100 emit_writeword_indexed(rs,0,ESP);
3101
3102 //if(i_regmap[HOST_CCREG]==CCREG) emit_storereg(CCREG,HOST_CCREG);//DEBUG
3103 /*if(i_regmap[HOST_CCREG]==CCREG) {
3104 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
3105 output_byte(0x03);
3106 output_modrm(1,4,HOST_CCREG);
3107 output_sib(0,4,4);
3108 output_byte(12+16);
3109 emit_writeword(HOST_CCREG,(int)&MSH2->cycles);
3110 output_byte(0x2B);
3111 output_modrm(1,4,HOST_CCREG);
3112 output_sib(0,4,4);
3113 output_byte(12+16);
3114 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]),HOST_CCREG);
3115 }
3116 if(i_regmap[HOST_CCREG]!=CCREG) {
3117 emit_loadreg(CCREG,ECX);
3118 emit_addimm(ECX,CLOCK_DIVIDER*(stubs[n][6]),ECX);
3119 output_byte(0x03);
3120 output_modrm(1,4,ECX);
3121 output_sib(0,4,4);
3122 output_byte(12+16);
3123 emit_writeword(ECX,(int)&MSH2->cycles);
3124 }*/
3125 emit_call((int)MappedMemoryReadByteNocache);
3126 emit_mov(EAX,EDX);
3127 if(rs==EAX||rs==ECX||rs==EDX)
3128 emit_readword_indexed(0,ESP,EAX);
3129 else
3130 emit_mov(rs,EAX);
3131 if(type==RMWA_STUB)
3132 emit_andimm(EDX,imm[i],EDX);
3133 if(type==RMWX_STUB)
3134 emit_xorimm(EDX,imm[i],EDX);
3135 if(type==RMWO_STUB)
3136 emit_orimm(EDX,imm[i],EDX);
3137 if(type==RMWT_STUB) { // TAS.B
3138 //emit_writeword_indexed(EDX,0,ESP);
3139 emit_writeword(EDX,(pointer)&rmw_temp);
3140 emit_orimm(EDX,0x80,EDX);
3141 }
3142 //emit_call((int)MappedMemoryWriteByte);
3143 emit_call((int)WriteInvalidateByte);
3144
3145 restore_regs(reglist);
3146
3147 if(opcode2[i]==11) { // TAS.B
3148 signed char sr;
3149 sr=get_reg(i_regs->regmap,SR);
3150 assert(sr>=0); // Liveness analysis?
3151 emit_andimm(sr,~1,sr);
3152 //assem_debug("cmp $%d,%d+%%%s\n",1,-16,regname[ESP]);
3153 //output_byte(0x80);
3154 //output_modrm(1,4,7);
3155 //output_sib(0,4,4);
3156 //output_byte(-16);
3157 //output_byte(1);
3158 emit_cmpmem_imm_byte((pointer)&rmw_temp,1);
3159 emit_adcimm(0,sr);
3160 }
3161 emit_jmp(stubs[n][2]); // return address
3162 }
3163
do_unalignedwritestub(int n)3164 void do_unalignedwritestub(int n)
3165 {
3166 set_jump_target(stubs[n][1],(int)out);
3167 output_byte(0xCC);
3168 emit_jmp(stubs[n][2]); // return address
3169 }
3170
printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)3171 void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3172 {
3173 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3174 }
3175
do_dirty_stub(int i)3176 int do_dirty_stub(int i)
3177 {
3178 assem_debug("do_dirty_stub %x\n",start+i*2);
3179 u32 alignedlen=((((u32)source)+slen*2+2)&~2)-(u32)alignedsource;
3180 emit_pushimm(start+i*2+slave);
3181 emit_movimm(((u32)source)&~3,EAX); //alignedsource
3182 emit_movimm((u32)copy,EBX);
3183 emit_movimm((((u32)source+slen*2+2)&~3)-((u32)source&~3),ECX);
3184 emit_call((int)&verify_code);
3185 emit_addimm(ESP,4,ESP);
3186 int entry=(int)out;
3187 load_regs_entry(i);
3188 if(entry==(int)out) entry=instr_addr[i];
3189 emit_jmp(instr_addr[i]);
3190 return entry;
3191 }
3192
3193 /* Memory Map */
3194
do_map_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u32 addr)3195 int do_map_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u32 addr)
3196 {
3197 if(c) {
3198 /*if(can_direct_read(addr)) {
3199 emit_readword((int)(memory_map+(addr>>12)),map);
3200 }
3201 else*/
3202 return -1; // No mapping
3203 }
3204 else {
3205 if(s!=map) emit_mov(s,map);
3206 emit_shrimm(map,12,map);
3207 // Schedule this while we wait on the load
3208 if(x) emit_xorimm(s,x,ar);
3209 //if(shift>=0) emit_lea8(s,shift);
3210 //if(~a) emit_andimm(s,a,ar);
3211 emit_movmem_indexedx4((int)memory_map,map,map);
3212 }
3213 return map;
3214 }
do_map_r_branch(int map,int c,u32 addr,int * jaddr)3215 int do_map_r_branch(int map, int c, u32 addr, int *jaddr)
3216 {
3217 if(!c) {
3218 emit_test(map,map);
3219 *jaddr=(int)out;
3220 emit_js(0);
3221 }
3222 return map;
3223 }
3224
gen_tlb_addr_r(int ar,int map)3225 void gen_tlb_addr_r(int ar, int map) {
3226 if(map>=0) {
3227 emit_leairrx4(0,ar,map,ar);
3228 }
3229 }
3230
do_map_w(int s,int ar,int map,int cache,int x,int c,u32 addr)3231 int do_map_w(int s,int ar,int map,int cache,int x,int c,u32 addr)
3232 {
3233 if(c) {
3234 if(can_direct_write(addr)) {
3235 emit_readword((int)(memory_map+(addr>>12)),map);
3236 }
3237 else
3238 return -1; // No mapping
3239 }
3240 else {
3241 if(s!=map) emit_mov(s,map);
3242 //if(s!=ar) emit_mov(s,ar);
3243 emit_shrimm(map,12,map);
3244 // Schedule this while we wait on the load
3245 if(x) emit_xorimm(s,x,ar);
3246 emit_movmem_indexedx4((int)memory_map,map,map);
3247 }
3248 emit_shlimm(map,2,map);
3249 return map;
3250 }
do_map_w_branch(int map,int c,u32 addr,int * jaddr)3251 void do_map_w_branch(int map, int c, u32 addr, int *jaddr)
3252 {
3253 if(!c||can_direct_write(addr)) {
3254 *jaddr=(int)out;
3255 emit_jc(0);
3256 }
3257 }
3258
gen_tlb_addr_w(int ar,int map)3259 void gen_tlb_addr_w(int ar, int map) {
3260 if(map>=0) {
3261 emit_leairrx1(0,ar,map,ar);
3262 }
3263 }
3264
3265 // We don't need this for x86
generate_map_const(u32 addr,int reg)3266 void generate_map_const(u32 addr,int reg) {
3267 // void *mapaddr=memory_map+(addr>>12);
3268 }
3269
3270 /* Special assem */
3271
do_preload_rhash(int r)3272 void do_preload_rhash(int r) {
3273 emit_movimm(0xf8,r);
3274 }
3275
do_preload_rhtbl(int r)3276 void do_preload_rhtbl(int r) {
3277 // Don't need this for x86
3278 }
3279
do_rhash(int rs,int rh)3280 void do_rhash(int rs,int rh) {
3281 emit_and(rs,rh,rh);
3282 }
3283
do_miniht_load(int ht,int rh)3284 void do_miniht_load(int ht,int rh) {
3285 // Don't need this for x86. The load and compare can be combined into
3286 // a single instruction (below)
3287 }
3288
do_miniht_jump(int rs,int rh,int ht)3289 void do_miniht_jump(int rs,int rh,int ht) {
3290 emit_cmpmem_indexed(slave?(u32)mini_ht_slave:(u32)mini_ht_master,rh,rs);
3291 emit_jne(jump_vaddr_reg[slave][rs]);
3292 emit_jmpmem_indexed(slave?(u32)mini_ht_slave+4:(u32)mini_ht_master+4,rh);
3293 }
3294
do_miniht_insert(int return_address,int rt,int temp)3295 void do_miniht_insert(int return_address,int rt,int temp) {
3296 emit_movimm(return_address,rt); // PC into link register
3297 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
3298 if(slave) emit_writeword(rt,(int)&mini_ht_slave[(return_address&0xFF)>>3][0]);
3299 else emit_writeword(rt,(int)&mini_ht_master[(return_address&0xFF)>>3][0]);
3300 add_to_linker((int)out,return_address,1);
3301 if(slave) emit_writeword_imm(0,(int)&mini_ht_slave[(return_address&0xFF)>>3][1]);
3302 else emit_writeword_imm(0,(int)&mini_ht_master[(return_address&0xFF)>>3][1]);
3303 }
3304
wb_valid(signed char pre[],signed char entry[],u32 dirty_pre,u32 dirty,u64 u)3305 void wb_valid(signed char pre[],signed char entry[],u32 dirty_pre,u32 dirty,u64 u)
3306 {
3307 //if(dirty_pre==dirty) return;
3308 int hr,reg,new_hr;
3309 for(hr=0;hr<HOST_REGS;hr++) {
3310 if(hr!=EXCLUDE_REG) {
3311 reg=pre[hr];
3312 if(((~u)>>(reg&63))&1) {
3313 if(reg>=0) {
3314 if(((dirty_pre&~dirty)>>hr)&1) {
3315 if(reg>=0&®<TBIT) {
3316 emit_storereg(reg,hr);
3317 }
3318 }
3319 }
3320 }
3321 }
3322 }
3323 }
3324
3325 // We don't need this for x86
literal_pool(int n)3326 void literal_pool(int n) {}
literal_pool_jumpover(int n)3327 void literal_pool_jumpover(int n) {}
3328
3329 // CPU-architecture-specific initialization, not needed for x86
arch_init()3330 void arch_init() {}
3331