1 /*
2 * compiler/codegen_x86.cpp - IA-32 and AMD64 code generator
3 *
4 * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS)
5 *
6 * Inspired by Christian Bauer's Basilisk II
7 *
8 * This file is part of the ARAnyM project which builds a new and powerful
9 * TOS/FreeMiNT compatible virtual machine running on almost any hardware.
10 *
11 * JIT compiler m68k -> IA-32 and AMD64
12 *
13 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
14 * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne
15 * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 */
31
32 /* This should eventually end up in machdep/, but for now, x86 is the
33 only target, and it's easier this way... */
34
35 #include "flags_x86.h"
36
37 /*************************************************************************
38 * Some basic information about the the target CPU *
39 *************************************************************************/
40
41 #define R1 RR1
42 #define R2 RR2
43 #define R4 RR4
44
45 #define EAX_INDEX 0
46 #define ECX_INDEX 1
47 #define EDX_INDEX 2
48 #define EBX_INDEX 3
49 #define ESP_INDEX 4
50 #define EBP_INDEX 5
51 #define ESI_INDEX 6
52 #define EDI_INDEX 7
53 #if defined(CPU_x86_64)
54 #define R8_INDEX 8
55 #define R9_INDEX 9
56 #define R10_INDEX 10
57 #define R11_INDEX 11
58 #define R12_INDEX 12
59 #define R13_INDEX 13
60 #define R14_INDEX 14
61 #define R15_INDEX 15
62 #endif
63 /* XXX this has to match X86_Reg8H_Base + 4 */
64 #define AH_INDEX (0x10+4+EAX_INDEX)
65 #define CH_INDEX (0x10+4+ECX_INDEX)
66 #define DH_INDEX (0x10+4+EDX_INDEX)
67 #define BH_INDEX (0x10+4+EBX_INDEX)
68
69 /* The register in which subroutines return an integer return value */
70 #define REG_RESULT EAX_INDEX
71
72 /* The registers subroutines take their first and second argument in */
73 #ifdef _WIN32
74 /* Handle the _fastcall parameters of ECX and EDX */
75 #define REG_PAR1 ECX_INDEX
76 #define REG_PAR2 EDX_INDEX
77 #elif defined(CPU_x86_64)
78 #define REG_PAR1 EDI_INDEX
79 #define REG_PAR2 ESI_INDEX
80 #else
81 #define REG_PAR1 EAX_INDEX
82 #define REG_PAR2 EDX_INDEX
83 #endif
84
85 #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
86 #ifdef _WIN32
87 #define REG_PC_TMP ECX_INDEX
88 #else
89 #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
90 #endif
91
92 #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
93 -1 if any reg will do */
94 #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
95 #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
96
97 #define STACK_ALIGN 16
98 #define STACK_OFFSET sizeof(void *)
99 #ifdef _WIN64
100 /* In the Microsoft x64 calling convention, it's the caller's responsibility
101 * to allocate 32 bytes of "shadow space" on the stack right before calling
102 * the function (regardless of the actual number of parameters used). */
103 #define STACK_SHADOW_SPACE 32
104 #else
105 #define STACK_SHADOW_SPACE 0
106 #endif
107
108 #if defined(CPU_x86_64)
109 /* Register R12 (and ESP) cannot be used with simple [r/m + disp32] addressing,
110 * since r/m bits 100 implies SIB byte. Simplest fix is to not use these
111 * registers. Also note that these registers are listed in the freescratch
112 * function as well. */
113 uae_s8 always_used[] = { 4, 12, -1 };
114 uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
115 uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
116 #else
117 uae_s8 always_used[] = { 4, -1 };
118 uae_s8 can_byte[]={0,1,2,3,-1};
119 uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
120 #endif
121
122 #if USE_OPTIMIZED_CALLS
123 /* Make sure interpretive core does not use cpuopti */
124 uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
125 #error FIXME: code not ready
126 #else
127 /* cpuopti mutate instruction handlers to assume registers are saved
128 by the caller */
129 uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
130 #endif
131
132 /* This *should* be the same as call_saved. But:
133 - We might not really know which registers are saved, and which aren't,
134 so we need to preserve some, but don't want to rely on everyone else
135 also saving those registers
136 - Special registers (such like the stack pointer) should not be "preserved"
137 by pushing, even though they are "saved" across function calls
138 */
139 #if defined(CPU_x86_64)
140 #ifdef _WIN64
141 /* https://msdn.microsoft.com/en-us/library/6t169e9c.aspx:
142 * "The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are
143 * considered nonvolatile and must be saved and restored by a function that
144 * uses them". Also saving r11 for now (see comment below). */
145 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1};
146 #else
147 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
148 /* preserve r11 because it's generally used to hold pointers to functions */
149 /* FIXME: not really sure what the point of saving r11 is (??). If functions
150 * cannot assume calle preserves it, it will not be used across calls anyway? */
151 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
152 #endif
153 #else
154 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
155 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
156 #endif
157
158 /* Whether classes of instructions do or don't clobber the native flags */
159 #define CLOBBER_MOV
160 #define CLOBBER_LEA
161 #define CLOBBER_CMOV
162 #define CLOBBER_POP
163 #define CLOBBER_PUSH
164 #define CLOBBER_SUB clobber_flags()
165 #define CLOBBER_SBB clobber_flags()
166 #define CLOBBER_CMP clobber_flags()
167 #define CLOBBER_ADD clobber_flags()
168 #define CLOBBER_ADC clobber_flags()
169 #define CLOBBER_AND clobber_flags()
170 #define CLOBBER_OR clobber_flags()
171 #define CLOBBER_XOR clobber_flags()
172
173 #define CLOBBER_ROL clobber_flags()
174 #define CLOBBER_ROR clobber_flags()
175 #define CLOBBER_SHLL clobber_flags()
176 #define CLOBBER_SHRL clobber_flags()
177 #define CLOBBER_SHRA clobber_flags()
178 #define CLOBBER_TEST clobber_flags()
179 #define CLOBBER_CL16
180 #define CLOBBER_CL8
181 #define CLOBBER_SE32
182 #define CLOBBER_SE16
183 #define CLOBBER_SE8
184 #define CLOBBER_ZE32
185 #define CLOBBER_ZE16
186 #define CLOBBER_ZE8
187 #define CLOBBER_SW16 clobber_flags()
188 #define CLOBBER_SW32
189 #define CLOBBER_SETCC
190 #define CLOBBER_MUL clobber_flags()
191 #define CLOBBER_BT clobber_flags()
192 #define CLOBBER_BSF clobber_flags()
193
194 /* The older code generator is now deprecated. */
195 #define USE_NEW_RTASM 1
196
197 #if USE_NEW_RTASM
198
199 #if defined(CPU_x86_64)
200 #define X86_TARGET_64BIT 1
201 /* The address override prefix causes a 5 cycles penalty on Intel Core
202 processors. Another solution would be to decompose the load in an LEA,
203 MOV (to zero-extend), MOV (from memory): is it better? */
204 #define ADDR32 x86_emit_byte(0x67),
205 #else
206 #define ADDR32
207 #endif
208 #define X86_FLAT_REGISTERS 0
209 #define X86_OPTIMIZE_ALU 1
210 #define X86_OPTIMIZE_ROTSHI 1
211 #include "codegen_x86.h"
212
213 #define x86_emit_byte(B) emit_byte(B)
214 #define x86_emit_word(W) emit_word(W)
215 #define x86_emit_long(L) emit_long(L)
216 #define x86_emit_quad(Q) emit_quad(Q)
217 #define x86_get_target() get_target()
218 #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
219
x86_64_addr32(void)220 static inline void x86_64_addr32(void)
221 {
222 #ifdef CPU_x86_64
223 emit_byte(0x67);
224 #endif
225 }
226
x86_64_rex(bool w,uae_u32 * r,uae_u32 * x,uae_u32 * b)227 static inline void x86_64_rex(bool w, uae_u32 *r, uae_u32 *x, uae_u32 *b)
228 {
229 #ifdef CPU_x86_64
230 int rex_byte = 0x40;
231 if (*b >= R8_INDEX) {
232 *b -= R8_INDEX;
233 rex_byte |= 1;
234 }
235 if (rex_byte != 0x40) {
236 emit_byte(rex_byte);
237 }
238 #endif
239 }
240
x86_64_prefix(bool addr32,bool w,uae_u32 * r,uae_u32 * x,uae_u32 * b)241 static inline void x86_64_prefix(
242 bool addr32, bool w, uae_u32 *r, uae_u32 *x, uae_u32 *b)
243 {
244 if (addr32) {
245 x86_64_addr32();
246 }
247 x86_64_rex(w, r, x, b);
248 }
249
250 // Some mappings to mark compemu_support calls as only used by compemu
251 // These are still mainly x86 minded. Should be more CPU independent in the future
252 #define compemu_raw_add_l_mi(a,b) raw_add_l_mi(a,b)
253 #define compemu_raw_and_l_ri(a,b) raw_and_l_ri(a,b)
254 #define compemu_raw_bswap_32(a) raw_bswap_32(a)
255 #define compemu_raw_bt_l_ri(a,b) raw_bt_l_ri(a,b)
256 #define compemu_raw_call(a) raw_call(a)
257 #define compemu_raw_cmov_l_rm_indexed(a,b,c,d,e) raw_cmov_l_rm_indexed(a,b,c,d,e)
258 #define compemu_raw_cmp_l_mi(a,b) raw_cmp_l_mi(a,b)
259 #define compemu_raw_cmp_l_mi8(a,b) raw_cmp_l_mi(a,b)
260 #define compemu_raw_jcc_b_oponly(a) raw_jcc_b_oponly(a)
261 #define compemu_raw_jcc_l_oponly(a) raw_jcc_l_oponly(a)
262 #define compemu_raw_jl(a) raw_jl(a)
263 #define compemu_raw_jmp(a) raw_jmp(a)
264 #define compemu_raw_jmp_m_indexed(a,b,c) raw_jmp_m_indexed(a,b,c)
265 #define compemu_raw_jmp_r(a) raw_jmp_r(a)
266 #define compemu_raw_jnz(a) raw_jnz(a)
267 #define compemu_raw_jz_b_oponly() raw_jz_b_oponly()
268 #define compemu_raw_lea_l_brr(a,b,c) raw_lea_l_brr(a,b,c)
269 #define compemu_raw_lea_l_brr_indexed(a,b,c,d,e) raw_lea_l_brr_indexed(a,b,c,d,e)
270 #define compemu_raw_mov_b_mr(a,b) raw_mov_b_mr(a,b)
271 #define compemu_raw_mov_l_mi(a,b) raw_mov_l_mi(a,b)
272 #define compemu_raw_mov_l_mr(a,b) raw_mov_l_mr(a,b)
273 #define compemu_raw_mov_l_ri(a,b) raw_mov_l_ri(a,b)
274 #define compemu_raw_mov_l_rm(a,b) raw_mov_l_rm(a,b)
275 #define compemu_raw_mov_l_rr(a,b) raw_mov_l_rr(a,b)
276 #define compemu_raw_mov_w_mr(a,b) raw_mov_w_mr(a,b)
277 #define compemu_raw_sub_l_mi(a,b) raw_sub_l_mi(a,b)
278 #define compemu_raw_test_l_rr(a,b) raw_test_l_rr(a,b)
279 #define compemu_raw_zero_extend_16_rr(a,b) raw_zero_extend_16_rr(a,b)
280 #define compemu_raw_lea_l_rr_indexed(a,b,c,d) raw_lea_l_rr_indexed(a,b,c,d)
281
jit_fail(const char * msg,const char * file,int line,const char * function)282 static void jit_fail(const char *msg, const char *file, int line, const char *function)
283 {
284 jit_abort("failure in function %s from file %s at line %d: %s",
285 function, file, line, msg);
286 }
287
288 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
289 {
290 #if defined(CPU_x86_64)
291 PUSHQr(r);
292 #else
293 PUSHLr(r);
294 #endif
295 }
296 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
297
298 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
299 {
300 #if defined(CPU_x86_64)
301 POPQr(r);
302 #else
303 POPLr(r);
304 #endif
305 }
306 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
307
308 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
309 {
310 #if defined(CPU_x86_64)
311 POPQm(d, X86_NOREG, X86_NOREG, 1);
312 #else
313 POPLm(d, X86_NOREG, X86_NOREG, 1);
314 #endif
315 }
316 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
317
318 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
319 {
320 BTLir(i, r);
321 }
322 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
323
324 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
325 {
326 BTLrr(b, r);
327 }
328 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
329
330 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
331 {
332 BTCLir(i, r);
333 }
334 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
335
336 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
337 {
338 BTCLrr(b, r);
339 }
340 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
341
342 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
343 {
344 BTRLir(i, r);
345 }
346 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
347
348 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
349 {
350 BTRLrr(b, r);
351 }
352 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
353
354 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
355 {
356 BTSLir(i, r);
357 }
358 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
359
360 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
361 {
362 BTSLrr(b, r);
363 }
364 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
365
366 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
367 {
368 SUBWir(i, d);
369 }
370 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
371
372 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
373 {
374 ADDR32 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
375 }
376 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
377
378 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
379 {
380 ADDR32 MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
381 }
382 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
383
384 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
385 {
386 ADDR32 MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
387 }
388 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
389
390 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
391 {
392 ADDR32 MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
393 }
394 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
395
396 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
397 {
398 ADDR32 ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
399 }
400 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
401
402 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
403 {
404 ROLBir(i, r);
405 }
406 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
407
408 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
409 {
410 ROLWir(i, r);
411 }
412 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
413
414 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
415 {
416 ROLLir(i, r);
417 }
418 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
419
420 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
421 {
422 ROLLrr(r, d);
423 }
424 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
425
426 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
427 {
428 ROLWrr(r, d);
429 }
430 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
431
432 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
433 {
434 ROLBrr(r, d);
435 }
436 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
437
438 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
439 {
440 SHLLrr(r, d);
441 }
442 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
443
444 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
445 {
446 SHLWrr(r, d);
447 }
448 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
449
450 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
451 {
452 SHLBrr(r, d);
453 }
454 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
455
456 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
457 {
458 RORBir(i, r);
459 }
460 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
461
462 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
463 {
464 RORWir(i, r);
465 }
466 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
467
468 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
469 {
470 ADDR32 ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
471 }
472 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
473
474 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
475 {
476 RORLir(i, r);
477 }
478 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
479
480 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
481 {
482 RORLrr(r, d);
483 }
484 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
485
486 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
487 {
488 RORWrr(r, d);
489 }
490 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
491
492 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
493 {
494 RORBrr(r, d);
495 }
496 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
497
498 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
499 {
500 SHRLrr(r, d);
501 }
502 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
503
504 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
505 {
506 SHRWrr(r, d);
507 }
508 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
509
510 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
511 {
512 SHRBrr(r, d);
513 }
514 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
515
516 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
517 {
518 SARLrr(r, d);
519 }
520 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
521
522 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
523 {
524 SARWrr(r, d);
525 }
526 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
527
528 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
529 {
530 SARBrr(r, d);
531 }
532 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
533
534 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
535 {
536 SHLLir(i, r);
537 }
538 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
539
540 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
541 {
542 SHLWir(i, r);
543 }
544 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
545
546 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
547 {
548 SHLBir(i, r);
549 }
550 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
551
552 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
553 {
554 SHRLir(i, r);
555 }
556 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
557
558 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
559 {
560 SHRWir(i, r);
561 }
562 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
563
564 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
565 {
566 SHRBir(i, r);
567 }
568 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
569
570 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
571 {
572 SARLir(i, r);
573 }
574 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
575
576 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
577 {
578 SARWir(i, r);
579 }
580 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
581
582 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
583 {
584 SARBir(i, r);
585 }
586 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
587
588 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2))
589 {
590 SAHF();
591 }
592 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
593
594 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4))
595 {
596 CPUID();
597 }
598 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
599
600 LOWFUNC(READ,NONE,1,raw_lahf,(W2))
601 {
602 LAHF();
603 }
604 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
605
606 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
607 {
608 SETCCir(cc, d);
609 }
610 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
611
612 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
613 {
614 ADDR32 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
615 }
616 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
617
618 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
619 {
620 if (have_cmov)
621 CMOVLrr(cc, s, d);
622 else { /* replacement using branch and mov */
623 uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1;
624 JCCSii(cc^1, 0);
625 MOVLrr(s, d);
626 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
627 }
628 }
629 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
630
631 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
632 {
633 BSFLrr(s, d);
634 }
635 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
636
637 LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
638 {
639 MOVSLQrr(s, d);
640 }
641 LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
642
643 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
644 {
645 MOVSWLrr(s, d);
646 }
647 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
648
649 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
650 {
651 MOVSBLrr(s, d);
652 }
653 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
654
655 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
656 {
657 MOVZWLrr(s, d);
658 }
659 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
660
661 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
662 {
663 MOVZBLrr(s, d);
664 }
665 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
666
667 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
668 {
669 IMULLrr(s, d);
670 }
671 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
672
673 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
674 {
675 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
676 jit_abort("Bad register in IMUL: d=%d, s=%d",d,s);
677 }
678 IMULLr(s);
679 }
680 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
681
682 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
683 {
684 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
685 jit_abort("Bad register in MUL: d=%d, s=%d",d,s);
686 }
687 MULLr(s);
688 }
689 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
690
691 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4, R4))
692 {
693 abort(); /* %^$&%^$%#^ x86! */
694 }
695 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
696
697 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
698 {
699 MOVBrr(s, d);
700 }
701 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
702
703 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
704 {
705 MOVWrr(s, d);
706 }
707 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
708
709 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
710 {
711 ADDR32 MOVLmr(0, baser, index, factor, d);
712 }
713 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
714
715 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
716 {
717 ADDR32 MOVWmr(0, baser, index, factor, d);
718 }
719 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
720
721 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
722 {
723 ADDR32 MOVBmr(0, baser, index, factor, d);
724 }
725 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
726
727 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
728 {
729 ADDR32 MOVLrm(s, 0, baser, index, factor);
730 }
731 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
732
733 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
734 {
735 ADDR32 MOVWrm(s, 0, baser, index, factor);
736 }
737 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
738
739 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
740 {
741 ADDR32 MOVBrm(s, 0, baser, index, factor);
742 }
743 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
744
745 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
746 {
747 ADDR32 MOVLrm(s, base, baser, index, factor);
748 }
749 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
750
751 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
752 {
753 ADDR32 MOVWrm(s, base, baser, index, factor);
754 }
755 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
756
757 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
758 {
759 ADDR32 MOVBrm(s, base, baser, index, factor);
760 }
761 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
762
763 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
764 {
765 ADDR32 MOVLmr(base, baser, index, factor, d);
766 }
767 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
768
769 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
770 {
771 ADDR32 MOVWmr(base, baser, index, factor, d);
772 }
773 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
774
775 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
776 {
777 ADDR32 MOVBmr(base, baser, index, factor, d);
778 }
779 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
780
781 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
782 {
783 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
784 }
785 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
786
787 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
788 {
789 if (have_cmov)
790 ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
791 else { /* replacement using branch and mov */
792 uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1;
793 JCCSii(cond^1, 0);
794 ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
795 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
796 }
797 }
798 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
799
800 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
801 {
802 if (have_cmov)
803 CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
804 else { /* replacement using branch and mov */
805 uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1;
806 JCCSii(cond^1, 0);
807 ADDR32 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
808 *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
809 }
810 }
811 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
812
813 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
814 {
815 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
816 }
817 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
818
819 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
820 {
821 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
822 }
823 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
824
825 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
826 {
827 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
828 }
829 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
830
831 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
832 {
833 ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
834 }
835 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
836
837 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
838 {
839 ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
840 }
841 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
842
843 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
844 {
845 ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
846 }
847 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
848
849 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
850 {
851 ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
852 }
853 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
854
855 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
856 {
857 ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
858 }
859 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
860
861 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
862 {
863 ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
864 }
865 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
866
867 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
868 {
869 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
870 }
871 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
872
873 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
874 {
875 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
876 }
877 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
878
879 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
880 {
881 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
882 }
883 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
884
885 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
886 {
887 ADDR32 LEALmr(offset, s, X86_NOREG, 1, d);
888 }
889 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
890
891 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
892 {
893 ADDR32 LEALmr(offset, s, index, factor, d);
894 }
895 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
896
897 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
898 {
899 ADDR32 LEALmr(0, s, index, factor, d);
900 }
901 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
902
903 LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
904 {
905 ADDR32 LEALmr(0, X86_NOREG, index, factor, d);
906 }
907 LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
908
909 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
910 {
911 ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
912 }
913 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
914
915 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
916 {
917 ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
918 }
919 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
920
921 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
922 {
923 ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
924 }
925 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
926
927 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
928 {
929 BSWAPLr(r);
930 }
931 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
932
933 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
934 {
935 ROLWir(8, r);
936 }
937 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
938
939 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
940 {
941 MOVLrr(s, d);
942 }
943 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
944
945 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
946 {
947 ADDR32 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
948 }
949 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
950
951 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
952 {
953 ADDR32 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
954 }
955 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
956
957 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
958 {
959 ADDR32 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
960 }
961 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
962
963 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
964 {
965 ADDR32 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
966 }
967 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
968
969 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
970 {
971 ADDR32 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
972 }
973 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
974
975 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
976 {
977 MOVLir(s, d);
978 }
979 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
980
981 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
982 {
983 MOVWir(s, d);
984 }
985 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
986
987 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
988 {
989 MOVBir(s, d);
990 }
991 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
992
993 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
994 {
995 ADDR32 ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
996 }
997 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
998
999 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1000 {
1001 ADDR32 ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
1002 }
1003 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
1004
1005 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1006 {
1007 ADDR32 ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
1008 }
1009 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
1010
1011 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1012 {
1013 ADDR32 ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
1014 }
1015 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
1016
1017 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1018 {
1019 TESTLir(i, d);
1020 }
1021 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1022
1023 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1024 {
1025 TESTLrr(s, d);
1026 }
1027 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
1028
1029 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1030 {
1031 TESTWrr(s, d);
1032 }
1033 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
1034
1035 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1036 {
1037 TESTBrr(s, d);
1038 }
1039 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
1040
1041 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
1042 {
1043 XORLir(i, d);
1044 }
1045 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
1046
1047 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1048 {
1049 ANDLir(i, d);
1050 }
1051 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1052
1053 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1054 {
1055 ANDWir(i, d);
1056 }
1057 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1058
1059 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1060 {
1061 ANDLrr(s, d);
1062 }
1063 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
1064
1065 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1066 {
1067 ANDWrr(s, d);
1068 }
1069 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
1070
1071 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1072 {
1073 ANDBrr(s, d);
1074 }
1075 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
1076
1077 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1078 {
1079 ORLir(i, d);
1080 }
1081 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
1082
1083 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1084 {
1085 ORLrr(s, d);
1086 }
1087 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
1088
1089 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1090 {
1091 ORWrr(s, d);
1092 }
1093 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
1094
1095 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1096 {
1097 ORBrr(s, d);
1098 }
1099 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
1100
1101 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1102 {
1103 ADCLrr(s, d);
1104 }
1105 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
1106
1107 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1108 {
1109 ADCWrr(s, d);
1110 }
1111 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
1112
1113 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1114 {
1115 ADCBrr(s, d);
1116 }
1117 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
1118
1119 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1120 {
1121 ADDLrr(s, d);
1122 }
1123 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
1124
1125 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1126 {
1127 ADDWrr(s, d);
1128 }
1129 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
1130
1131 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1132 {
1133 ADDBrr(s, d);
1134 }
1135 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
1136
1137 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1138 {
1139 SUBLir(i, d);
1140 }
1141 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
1142
1143 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1144 {
1145 SUBBir(i, d);
1146 }
1147 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1148
1149 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1150 {
1151 ADDLir(i, d);
1152 }
1153 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
1154
1155 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1156 {
1157 ADDWir(i, d);
1158 }
1159 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1160
1161 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1162 {
1163 ADDBir(i, d);
1164 }
1165 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1166
1167 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1168 {
1169 SBBLrr(s, d);
1170 }
1171 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
1172
1173 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1174 {
1175 SBBWrr(s, d);
1176 }
1177 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
1178
1179 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1180 {
1181 SBBBrr(s, d);
1182 }
1183 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
1184
1185 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1186 {
1187 SUBLrr(s, d);
1188 }
1189 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
1190
1191 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1192 {
1193 SUBWrr(s, d);
1194 }
1195 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
1196
1197 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1198 {
1199 SUBBrr(s, d);
1200 }
1201 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
1202
1203 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1204 {
1205 CMPLrr(s, d);
1206 }
1207 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
1208
1209 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1210 {
1211 CMPLir(i, r);
1212 }
1213 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
1214
1215 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1216 {
1217 CMPWrr(s, d);
1218 }
1219 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1220
1221 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1222 {
1223 ADDR32 CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
1224 }
1225 LENDFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1226
1227 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1228 {
1229 CMPBir(i, d);
1230 }
1231 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1232
1233 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1234 {
1235 CMPBrr(s, d);
1236 }
1237 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
1238
1239 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1240 {
1241 ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
1242 }
1243 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
1244
1245 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1246 {
1247 XORLrr(s, d);
1248 }
1249 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
1250
1251 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1252 {
1253 XORWrr(s, d);
1254 }
1255 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
1256
1257 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1258 {
1259 XORBrr(s, d);
1260 }
1261 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
1262
1263 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1264 {
1265 ADDR32 SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
1266 }
1267 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
1268
1269 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1270 {
1271 ADDR32 CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
1272 }
1273 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1274
1275 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1276 {
1277 XCHGLrr(r2, r1);
1278 }
1279 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
1280
1281 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1282 {
1283 XCHGBrr(r2, r1);
1284 }
1285 LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
1286
1287 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
1288 {
1289 PUSHF();
1290 }
1291 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
1292
1293 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
1294 {
1295 POPF();
1296 }
1297 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
1298
1299 /* Generate floating-point instructions */
x86_fadd_m(MEMR s)1300 static inline void x86_fadd_m(MEMR s)
1301 {
1302 ADDR32 FADDLm(s,X86_NOREG,X86_NOREG,1);
1303 }
1304
1305 #else
1306
1307 const bool optimize_accum = true;
1308 const bool optimize_imm8 = true;
1309 const bool optimize_shift_once = true;
1310
1311 /*************************************************************************
1312 * Actual encoding of the instructions on the target CPU *
1313 *************************************************************************/
1314
isaccum(int r)1315 static inline int isaccum(int r)
1316 {
1317 return (r == EAX_INDEX);
1318 }
1319
isbyte(uae_s32 x)1320 static inline int isbyte(uae_s32 x)
1321 {
1322 return (x>=-128 && x<=127);
1323 }
1324
isword(uae_s32 x)1325 static inline int isword(uae_s32 x)
1326 {
1327 return (x>=-32768 && x<=32767);
1328 }
1329
1330 LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1331 {
1332 emit_byte(0x50+r);
1333 }
1334 LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
1335
1336 LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1337 {
1338 emit_byte(0x58+r);
1339 }
1340 LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
1341
1342 LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1343 {
1344 emit_byte(0x8f);
1345 emit_byte(0x05);
1346 emit_long(d);
1347 }
1348 LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
1349
1350 LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1351 {
1352 emit_byte(0x0f);
1353 emit_byte(0xba);
1354 emit_byte(0xe0+r);
1355 emit_byte(i);
1356 }
1357 LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
1358
1359 LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1360 {
1361 emit_byte(0x0f);
1362 emit_byte(0xa3);
1363 emit_byte(0xc0+8*b+r);
1364 }
1365 LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
1366
1367 LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1368 {
1369 emit_byte(0x0f);
1370 emit_byte(0xba);
1371 emit_byte(0xf8+r);
1372 emit_byte(i);
1373 }
1374 LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
1375
1376 LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1377 {
1378 emit_byte(0x0f);
1379 emit_byte(0xbb);
1380 emit_byte(0xc0+8*b+r);
1381 }
1382 LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
1383
1384
1385 LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1386 {
1387 emit_byte(0x0f);
1388 emit_byte(0xba);
1389 emit_byte(0xf0+r);
1390 emit_byte(i);
1391 }
1392 LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
1393
1394 LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1395 {
1396 emit_byte(0x0f);
1397 emit_byte(0xb3);
1398 emit_byte(0xc0+8*b+r);
1399 }
1400 LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
1401
1402 LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1403 {
1404 emit_byte(0x0f);
1405 emit_byte(0xba);
1406 emit_byte(0xe8+r);
1407 emit_byte(i);
1408 }
1409 LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
1410
1411 LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1412 {
1413 emit_byte(0x0f);
1414 emit_byte(0xab);
1415 emit_byte(0xc0+8*b+r);
1416 }
1417 LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
1418
1419 LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1420 {
1421 emit_byte(0x66);
1422 if (isbyte(i)) {
1423 emit_byte(0x83);
1424 emit_byte(0xe8+d);
1425 emit_byte(i);
1426 }
1427 else {
1428 if (optimize_accum && isaccum(d))
1429 emit_byte(0x2d);
1430 else {
1431 emit_byte(0x81);
1432 emit_byte(0xe8+d);
1433 }
1434 emit_word(i);
1435 }
1436 }
1437 LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
1438
1439
1440 LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1441 {
1442 emit_byte(0x8b);
1443 emit_byte(0x05+8*d);
1444 emit_long(s);
1445 }
1446 LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
1447
1448 LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1449 {
1450 emit_byte(0xc7);
1451 emit_byte(0x05);
1452 emit_long(d);
1453 emit_long(s);
1454 }
1455 LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
1456
1457 LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1458 {
1459 emit_byte(0x66);
1460 emit_byte(0xc7);
1461 emit_byte(0x05);
1462 emit_long(d);
1463 emit_word(s);
1464 }
1465 LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
1466
1467 LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1468 {
1469 emit_byte(0xc6);
1470 emit_byte(0x05);
1471 emit_long(d);
1472 emit_byte(s);
1473 }
1474 LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
1475
1476 LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1477 {
1478 if (optimize_shift_once && (i == 1)) {
1479 emit_byte(0xd0);
1480 emit_byte(0x05);
1481 emit_long(d);
1482 }
1483 else {
1484 emit_byte(0xc0);
1485 emit_byte(0x05);
1486 emit_long(d);
1487 emit_byte(i);
1488 }
1489 }
1490 LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
1491
1492 LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1493 {
1494 if (optimize_shift_once && (i == 1)) {
1495 emit_byte(0xd0);
1496 emit_byte(0xc0+r);
1497 }
1498 else {
1499 emit_byte(0xc0);
1500 emit_byte(0xc0+r);
1501 emit_byte(i);
1502 }
1503 }
1504 LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
1505
1506 LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1507 {
1508 emit_byte(0x66);
1509 emit_byte(0xc1);
1510 emit_byte(0xc0+r);
1511 emit_byte(i);
1512 }
1513 LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
1514
1515 LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1516 {
1517 if (optimize_shift_once && (i == 1)) {
1518 emit_byte(0xd1);
1519 emit_byte(0xc0+r);
1520 }
1521 else {
1522 emit_byte(0xc1);
1523 emit_byte(0xc0+r);
1524 emit_byte(i);
1525 }
1526 }
1527 LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
1528
1529 LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1530 {
1531 emit_byte(0xd3);
1532 emit_byte(0xc0+d);
1533 }
1534 LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
1535
1536 LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1537 {
1538 emit_byte(0x66);
1539 emit_byte(0xd3);
1540 emit_byte(0xc0+d);
1541 }
1542 LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
1543
1544 LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1545 {
1546 emit_byte(0xd2);
1547 emit_byte(0xc0+d);
1548 }
1549 LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
1550
1551 LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1552 {
1553 emit_byte(0xd3);
1554 emit_byte(0xe0+d);
1555 }
1556 LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
1557
1558 LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1559 {
1560 emit_byte(0x66);
1561 emit_byte(0xd3);
1562 emit_byte(0xe0+d);
1563 }
1564 LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
1565
1566 LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1567 {
1568 emit_byte(0xd2);
1569 emit_byte(0xe0+d);
1570 }
1571 LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
1572
1573 LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1574 {
1575 if (optimize_shift_once && (i == 1)) {
1576 emit_byte(0xd0);
1577 emit_byte(0xc8+r);
1578 }
1579 else {
1580 emit_byte(0xc0);
1581 emit_byte(0xc8+r);
1582 emit_byte(i);
1583 }
1584 }
1585 LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
1586
1587 LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1588 {
1589 emit_byte(0x66);
1590 emit_byte(0xc1);
1591 emit_byte(0xc8+r);
1592 emit_byte(i);
1593 }
1594 LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
1595
1596 // gb-- used for making an fpcr value in compemu_fpp.cpp
1597 LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1598 {
1599 emit_byte(0x0b);
1600 emit_byte(0x05+8*d);
1601 emit_long(s);
1602 }
1603 LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
1604
1605 LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1606 {
1607 if (optimize_shift_once && (i == 1)) {
1608 emit_byte(0xd1);
1609 emit_byte(0xc8+r);
1610 }
1611 else {
1612 emit_byte(0xc1);
1613 emit_byte(0xc8+r);
1614 emit_byte(i);
1615 }
1616 }
1617 LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
1618
1619 LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1620 {
1621 emit_byte(0xd3);
1622 emit_byte(0xc8+d);
1623 }
1624 LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
1625
1626 LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1627 {
1628 emit_byte(0x66);
1629 emit_byte(0xd3);
1630 emit_byte(0xc8+d);
1631 }
1632 LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
1633
1634 LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1635 {
1636 emit_byte(0xd2);
1637 emit_byte(0xc8+d);
1638 }
1639 LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
1640
1641 LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1642 {
1643 emit_byte(0xd3);
1644 emit_byte(0xe8+d);
1645 }
1646 LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
1647
1648 LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1649 {
1650 emit_byte(0x66);
1651 emit_byte(0xd3);
1652 emit_byte(0xe8+d);
1653 }
1654 LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
1655
1656 LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1657 {
1658 emit_byte(0xd2);
1659 emit_byte(0xe8+d);
1660 }
1661 LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
1662
1663 LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1664 {
1665 emit_byte(0xd3);
1666 emit_byte(0xf8+d);
1667 }
1668 LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
1669
1670 LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1671 {
1672 emit_byte(0x66);
1673 emit_byte(0xd3);
1674 emit_byte(0xf8+d);
1675 }
1676 LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
1677
1678 LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1679 {
1680 emit_byte(0xd2);
1681 emit_byte(0xf8+d);
1682 }
1683 LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
1684
1685 LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1686 {
1687 if (optimize_shift_once && (i == 1)) {
1688 emit_byte(0xd1);
1689 emit_byte(0xe0+r);
1690 }
1691 else {
1692 emit_byte(0xc1);
1693 emit_byte(0xe0+r);
1694 emit_byte(i);
1695 }
1696 }
1697 LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
1698
1699 LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1700 {
1701 emit_byte(0x66);
1702 emit_byte(0xc1);
1703 emit_byte(0xe0+r);
1704 emit_byte(i);
1705 }
1706 LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
1707
1708 LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1709 {
1710 if (optimize_shift_once && (i == 1)) {
1711 emit_byte(0xd0);
1712 emit_byte(0xe0+r);
1713 }
1714 else {
1715 emit_byte(0xc0);
1716 emit_byte(0xe0+r);
1717 emit_byte(i);
1718 }
1719 }
1720 LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
1721
1722 LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1723 {
1724 if (optimize_shift_once && (i == 1)) {
1725 emit_byte(0xd1);
1726 emit_byte(0xe8+r);
1727 }
1728 else {
1729 emit_byte(0xc1);
1730 emit_byte(0xe8+r);
1731 emit_byte(i);
1732 }
1733 }
1734 LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
1735
1736 LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1737 {
1738 emit_byte(0x66);
1739 emit_byte(0xc1);
1740 emit_byte(0xe8+r);
1741 emit_byte(i);
1742 }
1743 LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
1744
1745 LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1746 {
1747 if (optimize_shift_once && (i == 1)) {
1748 emit_byte(0xd0);
1749 emit_byte(0xe8+r);
1750 }
1751 else {
1752 emit_byte(0xc0);
1753 emit_byte(0xe8+r);
1754 emit_byte(i);
1755 }
1756 }
1757 LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
1758
1759 LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1760 {
1761 if (optimize_shift_once && (i == 1)) {
1762 emit_byte(0xd1);
1763 emit_byte(0xf8+r);
1764 }
1765 else {
1766 emit_byte(0xc1);
1767 emit_byte(0xf8+r);
1768 emit_byte(i);
1769 }
1770 }
1771 LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
1772
1773 LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1774 {
1775 emit_byte(0x66);
1776 emit_byte(0xc1);
1777 emit_byte(0xf8+r);
1778 emit_byte(i);
1779 }
1780 LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
1781
1782 LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1783 {
1784 if (optimize_shift_once && (i == 1)) {
1785 emit_byte(0xd0);
1786 emit_byte(0xf8+r);
1787 }
1788 else {
1789 emit_byte(0xc0);
1790 emit_byte(0xf8+r);
1791 emit_byte(i);
1792 }
1793 }
1794 LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
1795
1796 LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1797 {
1798 emit_byte(0x9e);
1799 }
1800 LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
1801
1802 LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1803 {
1804 emit_byte(0x0f);
1805 emit_byte(0xa2);
1806 }
1807 LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
1808
1809 LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1810 {
1811 emit_byte(0x9f);
1812 }
1813 LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
1814
1815 LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1816 {
1817 emit_byte(0x0f);
1818 emit_byte(0x90+cc);
1819 emit_byte(0xc0+d);
1820 }
1821 LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
1822
1823 LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1824 {
1825 emit_byte(0x0f);
1826 emit_byte(0x90+cc);
1827 emit_byte(0x05);
1828 emit_long(d);
1829 }
1830 LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
1831
1832 LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1833 {
1834 if (have_cmov) {
1835 emit_byte(0x0f);
1836 emit_byte(0x40+cc);
1837 emit_byte(0xc0+8*d+s);
1838 }
1839 else { /* replacement using branch and mov */
1840 int uncc=(cc^1);
1841 emit_byte(0x70+uncc);
1842 emit_byte(2); /* skip next 2 bytes if not cc=true */
1843 emit_byte(0x89);
1844 emit_byte(0xc0+8*s+d);
1845 }
1846 }
1847 LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
1848
1849 LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1850 {
1851 emit_byte(0x0f);
1852 emit_byte(0xbc);
1853 emit_byte(0xc0+8*d+s);
1854 }
1855 LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
1856
1857 LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1858 {
1859 emit_byte(0x0f);
1860 emit_byte(0xbf);
1861 emit_byte(0xc0+8*d+s);
1862 }
1863 LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
1864
1865 LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1866 {
1867 emit_byte(0x0f);
1868 emit_byte(0xbe);
1869 emit_byte(0xc0+8*d+s);
1870 }
1871 LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
1872
1873 LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1874 {
1875 emit_byte(0x0f);
1876 emit_byte(0xb7);
1877 emit_byte(0xc0+8*d+s);
1878 }
1879 LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
1880
1881 LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1882 {
1883 emit_byte(0x0f);
1884 emit_byte(0xb6);
1885 emit_byte(0xc0+8*d+s);
1886 }
1887 LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
1888
1889 LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1890 {
1891 emit_byte(0x0f);
1892 emit_byte(0xaf);
1893 emit_byte(0xc0+8*d+s);
1894 }
1895 LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
1896
1897 LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1898 {
1899 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1900 jit_abort("Bad register in IMUL: d=%d, s=%d\n",d,s);
1901 }
1902 emit_byte(0xf7);
1903 emit_byte(0xea);
1904 }
1905 LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
1906
1907 LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1908 {
1909 if (d!=MUL_NREG1 || s!=MUL_NREG2) {
1910 jit_abort("Bad register in MUL: d=%d, s=%d",d,s);
1911 }
1912 emit_byte(0xf7);
1913 emit_byte(0xe2);
1914 }
1915 LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
1916
1917 LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1918 {
1919 abort(); /* %^$&%^$%#^ x86! */
1920 emit_byte(0x0f);
1921 emit_byte(0xaf);
1922 emit_byte(0xc0+8*d+s);
1923 }
1924 LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
1925
1926 LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1927 {
1928 emit_byte(0x88);
1929 emit_byte(0xc0+8*s+d);
1930 }
1931 LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
1932
1933 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1934 {
1935 emit_byte(0x66);
1936 emit_byte(0x89);
1937 emit_byte(0xc0+8*s+d);
1938 }
1939 LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
1940
1941 LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1942 {
1943 int isebp=(baser==5)?0x40:0;
1944 int fi;
1945
1946 switch(factor) {
1947 case 1: fi=0; break;
1948 case 2: fi=1; break;
1949 case 4: fi=2; break;
1950 case 8: fi=3; break;
1951 default: abort();
1952 }
1953
1954
1955 emit_byte(0x8b);
1956 emit_byte(0x04+8*d+isebp);
1957 emit_byte(baser+8*index+0x40*fi);
1958 if (isebp)
1959 emit_byte(0x00);
1960 }
1961 LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
1962
1963 LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1964 {
1965 int fi;
1966 int isebp;
1967
1968 switch(factor) {
1969 case 1: fi=0; break;
1970 case 2: fi=1; break;
1971 case 4: fi=2; break;
1972 case 8: fi=3; break;
1973 default: abort();
1974 }
1975 isebp=(baser==5)?0x40:0;
1976
1977 emit_byte(0x66);
1978 emit_byte(0x8b);
1979 emit_byte(0x04+8*d+isebp);
1980 emit_byte(baser+8*index+0x40*fi);
1981 if (isebp)
1982 emit_byte(0x00);
1983 }
1984 LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
1985
1986 LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
1987 {
1988 int fi;
1989 int isebp;
1990
1991 switch(factor) {
1992 case 1: fi=0; break;
1993 case 2: fi=1; break;
1994 case 4: fi=2; break;
1995 case 8: fi=3; break;
1996 default: abort();
1997 }
1998 isebp=(baser==5)?0x40:0;
1999
2000 emit_byte(0x8a);
2001 emit_byte(0x04+8*d+isebp);
2002 emit_byte(baser+8*index+0x40*fi);
2003 if (isebp)
2004 emit_byte(0x00);
2005 }
2006 LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
2007
2008 LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2009 {
2010 int fi;
2011 int isebp;
2012
2013 switch(factor) {
2014 case 1: fi=0; break;
2015 case 2: fi=1; break;
2016 case 4: fi=2; break;
2017 case 8: fi=3; break;
2018 default: abort();
2019 }
2020
2021
2022 isebp=(baser==5)?0x40:0;
2023
2024 emit_byte(0x89);
2025 emit_byte(0x04+8*s+isebp);
2026 emit_byte(baser+8*index+0x40*fi);
2027 if (isebp)
2028 emit_byte(0x00);
2029 }
2030 LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
2031
2032 LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2033 {
2034 int fi;
2035 int isebp;
2036
2037 switch(factor) {
2038 case 1: fi=0; break;
2039 case 2: fi=1; break;
2040 case 4: fi=2; break;
2041 case 8: fi=3; break;
2042 default: abort();
2043 }
2044 isebp=(baser==5)?0x40:0;
2045
2046 emit_byte(0x66);
2047 emit_byte(0x89);
2048 emit_byte(0x04+8*s+isebp);
2049 emit_byte(baser+8*index+0x40*fi);
2050 if (isebp)
2051 emit_byte(0x00);
2052 }
2053 LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
2054
2055 LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2056 {
2057 int fi;
2058 int isebp;
2059
2060 switch(factor) {
2061 case 1: fi=0; break;
2062 case 2: fi=1; break;
2063 case 4: fi=2; break;
2064 case 8: fi=3; break;
2065 default: abort();
2066 }
2067 isebp=(baser==5)?0x40:0;
2068
2069 emit_byte(0x88);
2070 emit_byte(0x04+8*s+isebp);
2071 emit_byte(baser+8*index+0x40*fi);
2072 if (isebp)
2073 emit_byte(0x00);
2074 }
2075 LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
2076
2077 LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2078 {
2079 int fi;
2080
2081 switch(factor) {
2082 case 1: fi=0; break;
2083 case 2: fi=1; break;
2084 case 4: fi=2; break;
2085 case 8: fi=3; break;
2086 default: abort();
2087 }
2088
2089 emit_byte(0x89);
2090 emit_byte(0x84+8*s);
2091 emit_byte(baser+8*index+0x40*fi);
2092 emit_long(base);
2093 }
2094 LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
2095
2096 LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2097 {
2098 int fi;
2099
2100 switch(factor) {
2101 case 1: fi=0; break;
2102 case 2: fi=1; break;
2103 case 4: fi=2; break;
2104 case 8: fi=3; break;
2105 default: abort();
2106 }
2107
2108 emit_byte(0x66);
2109 emit_byte(0x89);
2110 emit_byte(0x84+8*s);
2111 emit_byte(baser+8*index+0x40*fi);
2112 emit_long(base);
2113 }
2114 LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
2115
2116 LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2117 {
2118 int fi;
2119
2120 switch(factor) {
2121 case 1: fi=0; break;
2122 case 2: fi=1; break;
2123 case 4: fi=2; break;
2124 case 8: fi=3; break;
2125 default: abort();
2126 }
2127
2128 emit_byte(0x88);
2129 emit_byte(0x84+8*s);
2130 emit_byte(baser+8*index+0x40*fi);
2131 emit_long(base);
2132 }
2133 LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
2134
2135 LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2136 {
2137 int fi;
2138
2139 switch(factor) {
2140 case 1: fi=0; break;
2141 case 2: fi=1; break;
2142 case 4: fi=2; break;
2143 case 8: fi=3; break;
2144 default: abort();
2145 }
2146
2147 emit_byte(0x8b);
2148 emit_byte(0x84+8*d);
2149 emit_byte(baser+8*index+0x40*fi);
2150 emit_long(base);
2151 }
2152 LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
2153
2154 LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2155 {
2156 int fi;
2157
2158 switch(factor) {
2159 case 1: fi=0; break;
2160 case 2: fi=1; break;
2161 case 4: fi=2; break;
2162 case 8: fi=3; break;
2163 default: abort();
2164 }
2165
2166 emit_byte(0x66);
2167 emit_byte(0x8b);
2168 emit_byte(0x84+8*d);
2169 emit_byte(baser+8*index+0x40*fi);
2170 emit_long(base);
2171 }
2172 LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
2173
2174 LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2175 {
2176 int fi;
2177
2178 switch(factor) {
2179 case 1: fi=0; break;
2180 case 2: fi=1; break;
2181 case 4: fi=2; break;
2182 case 8: fi=3; break;
2183 default: abort();
2184 }
2185
2186 emit_byte(0x8a);
2187 emit_byte(0x84+8*d);
2188 emit_byte(baser+8*index+0x40*fi);
2189 emit_long(base);
2190 }
2191 LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
2192
2193 LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2194 {
2195 int fi;
2196 switch(factor) {
2197 case 1: fi=0; break;
2198 case 2: fi=1; break;
2199 case 4: fi=2; break;
2200 case 8: fi=3; break;
2201 default:
2202 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2203 abort();
2204 }
2205 emit_byte(0x8b);
2206 emit_byte(0x04+8*d);
2207 emit_byte(0x05+8*index+64*fi);
2208 emit_long(base);
2209 }
2210 LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
2211
2212 LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2213 {
2214 int fi;
2215 switch(factor) {
2216 case 1: fi=0; break;
2217 case 2: fi=1; break;
2218 case 4: fi=2; break;
2219 case 8: fi=3; break;
2220 default:
2221 fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
2222 abort();
2223 }
2224 if (have_cmov) {
2225 emit_byte(0x0f);
2226 emit_byte(0x40+cond);
2227 emit_byte(0x04+8*d);
2228 emit_byte(0x05+8*index+64*fi);
2229 emit_long(base);
2230 }
2231 else { /* replacement using branch and mov */
2232 int uncc=(cond^1);
2233 emit_byte(0x70+uncc);
2234 emit_byte(7); /* skip next 7 bytes if not cc=true */
2235 emit_byte(0x8b);
2236 emit_byte(0x04+8*d);
2237 emit_byte(0x05+8*index+64*fi);
2238 emit_long(base);
2239 }
2240 }
2241 LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
2242
2243 LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2244 {
2245 if (have_cmov) {
2246 emit_byte(0x0f);
2247 emit_byte(0x40+cond);
2248 emit_byte(0x05+8*d);
2249 emit_long(mem);
2250 }
2251 else { /* replacement using branch and mov */
2252 int uncc=(cond^1);
2253 emit_byte(0x70+uncc);
2254 emit_byte(6); /* skip next 6 bytes if not cc=true */
2255 emit_byte(0x8b);
2256 emit_byte(0x05+8*d);
2257 emit_long(mem);
2258 }
2259 }
2260 LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
2261
2262 LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2263 {
2264 Dif(!isbyte(offset)) abort();
2265 emit_byte(0x8b);
2266 emit_byte(0x40+8*d+s);
2267 emit_byte(offset);
2268 }
2269 LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
2270
2271 LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2272 {
2273 Dif(!isbyte(offset)) abort();
2274 emit_byte(0x66);
2275 emit_byte(0x8b);
2276 emit_byte(0x40+8*d+s);
2277 emit_byte(offset);
2278 }
2279 LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
2280
2281 LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2282 {
2283 Dif(!isbyte(offset)) abort();
2284 emit_byte(0x8a);
2285 emit_byte(0x40+8*d+s);
2286 emit_byte(offset);
2287 }
2288 LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
2289
2290 LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2291 {
2292 emit_byte(0x8b);
2293 emit_byte(0x80+8*d+s);
2294 emit_long(offset);
2295 }
2296 LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
2297
2298 LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2299 {
2300 emit_byte(0x66);
2301 emit_byte(0x8b);
2302 emit_byte(0x80+8*d+s);
2303 emit_long(offset);
2304 }
2305 LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
2306
2307 LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2308 {
2309 emit_byte(0x8a);
2310 emit_byte(0x80+8*d+s);
2311 emit_long(offset);
2312 }
2313 LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
2314
2315 LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2316 {
2317 Dif(!isbyte(offset)) abort();
2318 emit_byte(0xc7);
2319 emit_byte(0x40+d);
2320 emit_byte(offset);
2321 emit_long(i);
2322 }
2323 LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
2324
2325 LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2326 {
2327 Dif(!isbyte(offset)) abort();
2328 emit_byte(0x66);
2329 emit_byte(0xc7);
2330 emit_byte(0x40+d);
2331 emit_byte(offset);
2332 emit_word(i);
2333 }
2334 LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
2335
2336 LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2337 {
2338 Dif(!isbyte(offset)) abort();
2339 emit_byte(0xc6);
2340 emit_byte(0x40+d);
2341 emit_byte(offset);
2342 emit_byte(i);
2343 }
2344 LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
2345
2346 LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2347 {
2348 Dif(!isbyte(offset)) abort();
2349 emit_byte(0x89);
2350 emit_byte(0x40+8*s+d);
2351 emit_byte(offset);
2352 }
2353 LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
2354
2355 LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2356 {
2357 Dif(!isbyte(offset)) abort();
2358 emit_byte(0x66);
2359 emit_byte(0x89);
2360 emit_byte(0x40+8*s+d);
2361 emit_byte(offset);
2362 }
2363 LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
2364
2365 LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2366 {
2367 Dif(!isbyte(offset)) abort();
2368 emit_byte(0x88);
2369 emit_byte(0x40+8*s+d);
2370 emit_byte(offset);
2371 }
2372 LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
2373
2374 LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2375 {
2376 if (optimize_imm8 && isbyte(offset)) {
2377 emit_byte(0x8d);
2378 emit_byte(0x40+8*d+s);
2379 emit_byte(offset);
2380 }
2381 else {
2382 emit_byte(0x8d);
2383 emit_byte(0x80+8*d+s);
2384 emit_long(offset);
2385 }
2386 }
2387 LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
2388
2389 LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2390 {
2391 int fi;
2392
2393 switch(factor) {
2394 case 1: fi=0; break;
2395 case 2: fi=1; break;
2396 case 4: fi=2; break;
2397 case 8: fi=3; break;
2398 default: abort();
2399 }
2400
2401 if (optimize_imm8 && isbyte(offset)) {
2402 emit_byte(0x8d);
2403 emit_byte(0x44+8*d);
2404 emit_byte(0x40*fi+8*index+s);
2405 emit_byte(offset);
2406 }
2407 else {
2408 emit_byte(0x8d);
2409 emit_byte(0x84+8*d);
2410 emit_byte(0x40*fi+8*index+s);
2411 emit_long(offset);
2412 }
2413 }
2414 LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
2415
2416 LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2417 {
2418 int isebp=(s==5)?0x40:0;
2419 int fi;
2420
2421 switch(factor) {
2422 case 1: fi=0; break;
2423 case 2: fi=1; break;
2424 case 4: fi=2; break;
2425 case 8: fi=3; break;
2426 default: abort();
2427 }
2428
2429 emit_byte(0x8d);
2430 emit_byte(0x04+8*d+isebp);
2431 emit_byte(0x40*fi+8*index+s);
2432 if (isebp)
2433 emit_byte(0);
2434 }
2435 LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
2436
2437 LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2438 {
2439 if (optimize_imm8 && isbyte(offset)) {
2440 emit_byte(0x89);
2441 emit_byte(0x40+8*s+d);
2442 emit_byte(offset);
2443 }
2444 else {
2445 emit_byte(0x89);
2446 emit_byte(0x80+8*s+d);
2447 emit_long(offset);
2448 }
2449 }
2450 LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
2451
2452 LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2453 {
2454 emit_byte(0x66);
2455 emit_byte(0x89);
2456 emit_byte(0x80+8*s+d);
2457 emit_long(offset);
2458 }
2459 LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
2460
2461 LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2462 {
2463 if (optimize_imm8 && isbyte(offset)) {
2464 emit_byte(0x88);
2465 emit_byte(0x40+8*s+d);
2466 emit_byte(offset);
2467 }
2468 else {
2469 emit_byte(0x88);
2470 emit_byte(0x80+8*s+d);
2471 emit_long(offset);
2472 }
2473 }
2474 LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
2475
2476 LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2477 {
2478 emit_byte(0x0f);
2479 emit_byte(0xc8+r);
2480 }
2481 LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
2482
2483 LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2484 {
2485 emit_byte(0x66);
2486 emit_byte(0xc1);
2487 emit_byte(0xc0+r);
2488 emit_byte(0x08);
2489 }
2490 LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
2491
2492 LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2493 {
2494 emit_byte(0x89);
2495 emit_byte(0xc0+8*s+d);
2496 }
2497 LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
2498
2499 LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2500 {
2501 emit_byte(0x89);
2502 emit_byte(0x05+8*s);
2503 emit_long(d);
2504 }
2505 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
2506
2507 LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2508 {
2509 emit_byte(0x66);
2510 emit_byte(0x89);
2511 emit_byte(0x05+8*s);
2512 emit_long(d);
2513 }
2514 LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
2515
2516 LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2517 {
2518 emit_byte(0x66);
2519 emit_byte(0x8b);
2520 emit_byte(0x05+8*d);
2521 emit_long(s);
2522 }
2523 LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
2524
2525 LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2526 {
2527 emit_byte(0x88);
2528 emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
2529 emit_long(d);
2530 }
2531 LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
2532
2533 LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2534 {
2535 emit_byte(0x8a);
2536 emit_byte(0x05+8*d);
2537 emit_long(s);
2538 }
2539 LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
2540
2541 LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2542 {
2543 emit_byte(0xb8+d);
2544 emit_long(s);
2545 }
2546 LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
2547
2548 LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2549 {
2550 emit_byte(0x66);
2551 emit_byte(0xb8+d);
2552 emit_word(s);
2553 }
2554 LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
2555
2556 LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2557 {
2558 emit_byte(0xb0+d);
2559 emit_byte(s);
2560 }
2561 LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
2562
2563 LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2564 {
2565 emit_byte(0x81);
2566 emit_byte(0x15);
2567 emit_long(d);
2568 emit_long(s);
2569 }
2570 LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
2571
2572 LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2573 {
2574 if (optimize_imm8 && isbyte(s)) {
2575 emit_byte(0x83);
2576 emit_byte(0x05);
2577 emit_long(d);
2578 emit_byte(s);
2579 }
2580 else {
2581 emit_byte(0x81);
2582 emit_byte(0x05);
2583 emit_long(d);
2584 emit_long(s);
2585 }
2586 }
2587 LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
2588
2589 LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2590 {
2591 emit_byte(0x66);
2592 emit_byte(0x81);
2593 emit_byte(0x05);
2594 emit_long(d);
2595 emit_word(s);
2596 }
2597 LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
2598
2599 LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2600 {
2601 emit_byte(0x80);
2602 emit_byte(0x05);
2603 emit_long(d);
2604 emit_byte(s);
2605 }
2606 LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
2607
2608 LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2609 {
2610 if (optimize_accum && isaccum(d))
2611 emit_byte(0xa9);
2612 else {
2613 emit_byte(0xf7);
2614 emit_byte(0xc0+d);
2615 }
2616 emit_long(i);
2617 }
2618 LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
2619
2620 LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2621 {
2622 emit_byte(0x85);
2623 emit_byte(0xc0+8*s+d);
2624 }
2625 LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
2626
2627 LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2628 {
2629 emit_byte(0x66);
2630 emit_byte(0x85);
2631 emit_byte(0xc0+8*s+d);
2632 }
2633 LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
2634
2635 LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2636 {
2637 emit_byte(0x84);
2638 emit_byte(0xc0+8*s+d);
2639 }
2640 LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
2641
2642 LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2643 {
2644 emit_byte(0x81);
2645 emit_byte(0xf0+d);
2646 emit_long(i);
2647 }
2648 LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
2649
2650 LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2651 {
2652 if (optimize_imm8 && isbyte(i)) {
2653 emit_byte(0x83);
2654 emit_byte(0xe0+d);
2655 emit_byte(i);
2656 }
2657 else {
2658 if (optimize_accum && isaccum(d))
2659 emit_byte(0x25);
2660 else {
2661 emit_byte(0x81);
2662 emit_byte(0xe0+d);
2663 }
2664 emit_long(i);
2665 }
2666 }
2667 LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
2668
2669 LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2670 {
2671 emit_byte(0x66);
2672 if (optimize_imm8 && isbyte(i)) {
2673 emit_byte(0x83);
2674 emit_byte(0xe0+d);
2675 emit_byte(i);
2676 }
2677 else {
2678 if (optimize_accum && isaccum(d))
2679 emit_byte(0x25);
2680 else {
2681 emit_byte(0x81);
2682 emit_byte(0xe0+d);
2683 }
2684 emit_word(i);
2685 }
2686 }
2687 LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
2688
2689 LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2690 {
2691 emit_byte(0x21);
2692 emit_byte(0xc0+8*s+d);
2693 }
2694 LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
2695
2696 LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2697 {
2698 emit_byte(0x66);
2699 emit_byte(0x21);
2700 emit_byte(0xc0+8*s+d);
2701 }
2702 LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
2703
2704 LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2705 {
2706 emit_byte(0x20);
2707 emit_byte(0xc0+8*s+d);
2708 }
2709 LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
2710
2711 LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2712 {
2713 if (optimize_imm8 && isbyte(i)) {
2714 emit_byte(0x83);
2715 emit_byte(0xc8+d);
2716 emit_byte(i);
2717 }
2718 else {
2719 if (optimize_accum && isaccum(d))
2720 emit_byte(0x0d);
2721 else {
2722 emit_byte(0x81);
2723 emit_byte(0xc8+d);
2724 }
2725 emit_long(i);
2726 }
2727 }
2728 LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
2729
2730 LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2731 {
2732 emit_byte(0x09);
2733 emit_byte(0xc0+8*s+d);
2734 }
2735 LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
2736
2737 LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2738 {
2739 emit_byte(0x66);
2740 emit_byte(0x09);
2741 emit_byte(0xc0+8*s+d);
2742 }
2743 LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
2744
2745 LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2746 {
2747 emit_byte(0x08);
2748 emit_byte(0xc0+8*s+d);
2749 }
2750 LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
2751
2752 LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2753 {
2754 emit_byte(0x11);
2755 emit_byte(0xc0+8*s+d);
2756 }
2757 LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
2758
2759 LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2760 {
2761 emit_byte(0x66);
2762 emit_byte(0x11);
2763 emit_byte(0xc0+8*s+d);
2764 }
2765 LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
2766
2767 LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2768 {
2769 emit_byte(0x10);
2770 emit_byte(0xc0+8*s+d);
2771 }
2772 LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
2773
2774 LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2775 {
2776 emit_byte(0x01);
2777 emit_byte(0xc0+8*s+d);
2778 }
2779 LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
2780
2781 LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2782 {
2783 emit_byte(0x66);
2784 emit_byte(0x01);
2785 emit_byte(0xc0+8*s+d);
2786 }
2787 LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
2788
2789 LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2790 {
2791 emit_byte(0x00);
2792 emit_byte(0xc0+8*s+d);
2793 }
2794 LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
2795
2796 LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2797 {
2798 if (isbyte(i)) {
2799 emit_byte(0x83);
2800 emit_byte(0xe8+d);
2801 emit_byte(i);
2802 }
2803 else {
2804 if (optimize_accum && isaccum(d))
2805 emit_byte(0x2d);
2806 else {
2807 emit_byte(0x81);
2808 emit_byte(0xe8+d);
2809 }
2810 emit_long(i);
2811 }
2812 }
2813 LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
2814
2815 LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2816 {
2817 if (optimize_accum && isaccum(d))
2818 emit_byte(0x2c);
2819 else {
2820 emit_byte(0x80);
2821 emit_byte(0xe8+d);
2822 }
2823 emit_byte(i);
2824 }
2825 LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
2826
2827 LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2828 {
2829 if (isbyte(i)) {
2830 emit_byte(0x83);
2831 emit_byte(0xc0+d);
2832 emit_byte(i);
2833 }
2834 else {
2835 if (optimize_accum && isaccum(d))
2836 emit_byte(0x05);
2837 else {
2838 emit_byte(0x81);
2839 emit_byte(0xc0+d);
2840 }
2841 emit_long(i);
2842 }
2843 }
2844 LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
2845
2846 LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2847 {
2848 emit_byte(0x66);
2849 if (isbyte(i)) {
2850 emit_byte(0x83);
2851 emit_byte(0xc0+d);
2852 emit_byte(i);
2853 }
2854 else {
2855 if (optimize_accum && isaccum(d))
2856 emit_byte(0x05);
2857 else {
2858 emit_byte(0x81);
2859 emit_byte(0xc0+d);
2860 }
2861 emit_word(i);
2862 }
2863 }
2864 LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
2865
2866 LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2867 {
2868 if (optimize_accum && isaccum(d))
2869 emit_byte(0x04);
2870 else {
2871 emit_byte(0x80);
2872 emit_byte(0xc0+d);
2873 }
2874 emit_byte(i);
2875 }
2876 LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
2877
2878 LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2879 {
2880 emit_byte(0x19);
2881 emit_byte(0xc0+8*s+d);
2882 }
2883 LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
2884
2885 LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2886 {
2887 emit_byte(0x66);
2888 emit_byte(0x19);
2889 emit_byte(0xc0+8*s+d);
2890 }
2891 LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
2892
2893 LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2894 {
2895 emit_byte(0x18);
2896 emit_byte(0xc0+8*s+d);
2897 }
2898 LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
2899
2900 LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2901 {
2902 emit_byte(0x29);
2903 emit_byte(0xc0+8*s+d);
2904 }
2905 LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
2906
2907 LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2908 {
2909 emit_byte(0x66);
2910 emit_byte(0x29);
2911 emit_byte(0xc0+8*s+d);
2912 }
2913 LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
2914
2915 LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2916 {
2917 emit_byte(0x28);
2918 emit_byte(0xc0+8*s+d);
2919 }
2920 LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
2921
2922 LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2923 {
2924 emit_byte(0x39);
2925 emit_byte(0xc0+8*s+d);
2926 }
2927 LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
2928
2929 LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2930 {
2931 if (optimize_imm8 && isbyte(i)) {
2932 emit_byte(0x83);
2933 emit_byte(0xf8+r);
2934 emit_byte(i);
2935 }
2936 else {
2937 if (optimize_accum && isaccum(r))
2938 emit_byte(0x3d);
2939 else {
2940 emit_byte(0x81);
2941 emit_byte(0xf8+r);
2942 }
2943 emit_long(i);
2944 }
2945 }
2946 LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
2947
2948 LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2949 {
2950 emit_byte(0x66);
2951 emit_byte(0x39);
2952 emit_byte(0xc0+8*s+d);
2953 }
2954 LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
2955
2956 LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
2957 {
2958 emit_byte(0x80);
2959 emit_byte(0x3d);
2960 emit_long(d);
2961 emit_byte(s);
2962 }
2963 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
2964
2965 LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2966 {
2967 if (optimize_accum && isaccum(d))
2968 emit_byte(0x3c);
2969 else {
2970 emit_byte(0x80);
2971 emit_byte(0xf8+d);
2972 }
2973 emit_byte(i);
2974 }
2975 LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
2976
2977 LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2978 {
2979 emit_byte(0x38);
2980 emit_byte(0xc0+8*s+d);
2981 }
2982 LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
2983
2984 LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
2985 {
2986 int fi;
2987
2988 switch(factor) {
2989 case 1: fi=0; break;
2990 case 2: fi=1; break;
2991 case 4: fi=2; break;
2992 case 8: fi=3; break;
2993 default: abort();
2994 }
2995 emit_byte(0x39);
2996 emit_byte(0x04+8*d);
2997 emit_byte(5+8*index+0x40*fi);
2998 emit_long(offset);
2999 }
3000 LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
3001
3002 LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
3003 {
3004 emit_byte(0x31);
3005 emit_byte(0xc0+8*s+d);
3006 }
3007 LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
3008
3009 LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
3010 {
3011 emit_byte(0x66);
3012 emit_byte(0x31);
3013 emit_byte(0xc0+8*s+d);
3014 }
3015 LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
3016
3017 LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
3018 {
3019 emit_byte(0x30);
3020 emit_byte(0xc0+8*s+d);
3021 }
3022 LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
3023
3024 LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
3025 {
3026 if (optimize_imm8 && isbyte(s)) {
3027 emit_byte(0x83);
3028 emit_byte(0x2d);
3029 emit_long(d);
3030 emit_byte(s);
3031 }
3032 else {
3033 emit_byte(0x81);
3034 emit_byte(0x2d);
3035 emit_long(d);
3036 emit_long(s);
3037 }
3038 }
3039 LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
3040
3041 LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3042 {
3043 if (optimize_imm8 && isbyte(s)) {
3044 emit_byte(0x83);
3045 emit_byte(0x3d);
3046 emit_long(d);
3047 emit_byte(s);
3048 }
3049 else {
3050 emit_byte(0x81);
3051 emit_byte(0x3d);
3052 emit_long(d);
3053 emit_long(s);
3054 }
3055 }
3056 LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
3057
3058 LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3059 {
3060 emit_byte(0x87);
3061 emit_byte(0xc0+8*r1+r2);
3062 }
3063 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3064
3065 LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
3066 {
3067 emit_byte(0x86);
3068 emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
3069 }
3070 LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
3071
3072 /*************************************************************************
3073 * FIXME: mem access modes probably wrong *
3074 *************************************************************************/
3075
3076 LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
3077 {
3078 emit_byte(0x9c);
3079 }
3080 LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
3081
3082 LOWFUNC(WRITE,READ,0,raw_popfl,(void))
3083 {
3084 emit_byte(0x9d);
3085 }
3086 LENDFUNC(WRITE,READ,0,raw_popfl,(void))
3087
3088 /* Generate floating-point instructions */
x86_fadd_m(MEMR s)3089 static inline void x86_fadd_m(MEMR s)
3090 {
3091 emit_byte(0xdc);
3092 emit_byte(0x05);
3093 emit_long(s);
3094 }
3095
3096 #endif
3097
3098 /*************************************************************************
3099 * Unoptimizable stuff --- jump *
3100 *************************************************************************/
3101
raw_call_r(R4 r)3102 static inline void raw_call_r(R4 r)
3103 {
3104 #if USE_NEW_RTASM
3105 CALLsr(r);
3106 #else
3107 emit_byte(0xff);
3108 emit_byte(0xd0+r);
3109 #endif
3110 }
3111
raw_call_m_indexed(uae_u32 base,uae_u32 r,uae_u32 m)3112 static inline void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3113 {
3114 #if USE_NEW_RTASM
3115 ADDR32 CALLsm(base, X86_NOREG, r, m);
3116 #else
3117 int mu;
3118 switch(m) {
3119 case 1: mu=0; break;
3120 case 2: mu=1; break;
3121 case 4: mu=2; break;
3122 case 8: mu=3; break;
3123 default: abort();
3124 }
3125 emit_byte(0xff);
3126 emit_byte(0x14);
3127 emit_byte(0x05+8*r+0x40*mu);
3128 emit_long(base);
3129 #endif
3130 }
3131
raw_jmp_r(R4 r)3132 static inline void raw_jmp_r(R4 r)
3133 {
3134 #if USE_NEW_RTASM
3135 JMPsr(r);
3136 #else
3137 emit_byte(0xff);
3138 emit_byte(0xe0+r);
3139 #endif
3140 }
3141
raw_jmp_m_indexed(uae_u32 base,uae_u32 r,uae_u32 m)3142 static inline void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
3143 {
3144 #if USE_NEW_RTASM
3145 ADDR32 JMPsm(base, X86_NOREG, r, m);
3146 #else
3147 int mu;
3148 switch (m) {
3149 case 1: mu=0; break;
3150 case 2: mu=1; break;
3151 case 4: mu=2; break;
3152 case 8: mu=3; break;
3153 default: abort();
3154 }
3155 emit_byte(0xff);
3156 emit_byte(0x24);
3157 emit_byte(0x05+8*r+0x40*mu);
3158 emit_long(base);
3159 #endif
3160 }
3161
raw_jmp_m(uae_u32 base)3162 static inline void raw_jmp_m(uae_u32 base)
3163 {
3164 emit_byte(0xff);
3165 emit_byte(0x25);
3166 emit_long(base);
3167 }
3168
3169
raw_call(uae_u32 t)3170 static inline void raw_call(uae_u32 t)
3171 {
3172 #if USE_NEW_RTASM
3173 ADDR32 CALLm(t);
3174 #else
3175 emit_byte(0xe8);
3176 emit_long(t-(uintptr)target-4);
3177 #endif
3178 }
3179
raw_jmp(uae_u32 t)3180 static inline void raw_jmp(uae_u32 t)
3181 {
3182 #if USE_NEW_RTASM
3183 ADDR32 JMPm(t);
3184 #else
3185 emit_byte(0xe9);
3186 emit_long(t-(uintptr)target-4);
3187 #endif
3188 }
3189
raw_jl(uae_u32 t)3190 static inline void raw_jl(uae_u32 t)
3191 {
3192 emit_byte(0x0f);
3193 emit_byte(0x8c);
3194 emit_long(t-(uintptr)target-4);
3195 }
3196
raw_jz(uae_u32 t)3197 static inline void raw_jz(uae_u32 t)
3198 {
3199 emit_byte(0x0f);
3200 emit_byte(0x84);
3201 emit_long(t-(uintptr)target-4);
3202 }
3203
raw_jnz(uae_u32 t)3204 static inline void raw_jnz(uae_u32 t)
3205 {
3206 emit_byte(0x0f);
3207 emit_byte(0x85);
3208 emit_long(t-(uintptr)target-4);
3209 }
3210
raw_jnz_l_oponly(void)3211 static inline void raw_jnz_l_oponly(void)
3212 {
3213 emit_byte(0x0f);
3214 emit_byte(0x85);
3215 }
3216
raw_jcc_l_oponly(int cc)3217 static inline void raw_jcc_l_oponly(int cc)
3218 {
3219 emit_byte(0x0f);
3220 emit_byte(0x80+cc);
3221 }
3222
raw_jnz_b_oponly(void)3223 static inline void raw_jnz_b_oponly(void)
3224 {
3225 emit_byte(0x75);
3226 }
3227
raw_jz_b_oponly(void)3228 static inline void raw_jz_b_oponly(void)
3229 {
3230 emit_byte(0x74);
3231 }
3232
raw_jcc_b_oponly(int cc)3233 static inline void raw_jcc_b_oponly(int cc)
3234 {
3235 emit_byte(0x70+cc);
3236 }
3237
raw_jmp_l_oponly(void)3238 static inline void raw_jmp_l_oponly(void)
3239 {
3240 emit_byte(0xe9);
3241 }
3242
raw_jmp_b_oponly(void)3243 static inline void raw_jmp_b_oponly(void)
3244 {
3245 emit_byte(0xeb);
3246 }
3247
raw_ret(void)3248 static inline void raw_ret(void)
3249 {
3250 emit_byte(0xc3);
3251 }
3252
raw_nop(void)3253 static inline void raw_nop(void)
3254 {
3255 emit_byte(0x90);
3256 }
3257
raw_emit_nop_filler(int nbytes)3258 static inline void raw_emit_nop_filler(int nbytes)
3259 {
3260
3261 #if defined(CPU_x86_64)
3262 /* The recommended way to pad 64bit code is to use NOPs preceded by
3263 maximally four 0x66 prefixes. Balance the size of nops. */
3264 static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
3265 if (nbytes == 0)
3266 return;
3267
3268 int i;
3269 int nnops = (nbytes + 3) / 4;
3270 int len = nbytes / nnops;
3271 int remains = nbytes - nnops * len;
3272
3273 for (i = 0; i < remains; i++) {
3274 emit_block(prefixes, len);
3275 raw_nop();
3276 }
3277 for (; i < nnops; i++) {
3278 emit_block(prefixes, len - 1);
3279 raw_nop();
3280 }
3281 #else
3282 /* Source: GNU Binutils 2.12.90.0.15 */
3283 /* Various efficient no-op patterns for aligning code labels.
3284 Note: Don't try to assemble the instructions in the comments.
3285 0L and 0w are not legal. */
3286 static const uae_u8 f32_1[] =
3287 {0x90}; /* nop */
3288 static const uae_u8 f32_2[] =
3289 {0x89,0xf6}; /* movl %esi,%esi */
3290 static const uae_u8 f32_3[] =
3291 {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
3292 static const uae_u8 f32_4[] =
3293 {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3294 static const uae_u8 f32_5[] =
3295 {0x90, /* nop */
3296 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
3297 static const uae_u8 f32_6[] =
3298 {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
3299 static const uae_u8 f32_7[] =
3300 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3301 static const uae_u8 f32_8[] =
3302 {0x90, /* nop */
3303 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
3304 static const uae_u8 f32_9[] =
3305 {0x89,0xf6, /* movl %esi,%esi */
3306 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3307 static const uae_u8 f32_10[] =
3308 {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
3309 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3310 static const uae_u8 f32_11[] =
3311 {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
3312 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3313 static const uae_u8 f32_12[] =
3314 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3315 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
3316 static const uae_u8 f32_13[] =
3317 {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
3318 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3319 static const uae_u8 f32_14[] =
3320 {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
3321 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
3322 static const uae_u8 f32_15[] =
3323 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3324 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3325 static const uae_u8 f32_16[] =
3326 {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
3327 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
3328 static const uae_u8 *const f32_patt[] = {
3329 f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
3330 f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
3331 };
3332
3333 int nloops = nbytes / 16;
3334 while (nloops-- > 0)
3335 emit_block(f32_16, sizeof(f32_16));
3336
3337 nbytes %= 16;
3338 if (nbytes)
3339 emit_block(f32_patt[nbytes - 1], nbytes);
3340 #endif
3341 }
3342
3343
3344 /*************************************************************************
3345 * Flag handling, to and fro UAE flag register *
3346 *************************************************************************/
3347
raw_flags_evicted(int r)3348 static inline void raw_flags_evicted(int r)
3349 {
3350 //live.state[FLAGTMP].status=CLEAN;
3351 live.state[FLAGTMP].status=INMEM;
3352 live.state[FLAGTMP].realreg=-1;
3353 /* We just "evicted" FLAGTMP. */
3354 if (live.nat[r].nholds!=1) {
3355 /* Huh? */
3356 abort();
3357 }
3358 live.nat[r].nholds=0;
3359 }
3360
3361 #define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
raw_flags_to_reg_FLAGREG(int r)3362 static inline void raw_flags_to_reg_FLAGREG(int r)
3363 {
3364 raw_lahf(0); /* Most flags in AH */
3365 //raw_setcc(r,0); /* V flag in AL */
3366 raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
3367
3368 #if 1 /* Let's avoid those nasty partial register stalls */
3369 //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
3370 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
3371 raw_flags_evicted(r);
3372 #endif
3373 }
3374
3375 #define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
raw_reg_to_flags_FLAGREG(int r)3376 static inline void raw_reg_to_flags_FLAGREG(int r)
3377 {
3378 raw_cmp_b_ri(r,-127); /* set V */
3379 raw_sahf(0);
3380 }
3381
3382 #define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
raw_flags_set_zero_FLAGREG(int s,int tmp)3383 static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
3384 {
3385 raw_mov_l_rr(tmp,s);
3386 raw_lahf(s); /* flags into ah */
3387 raw_and_l_ri(s,0xffffbfff);
3388 raw_and_l_ri(tmp,0x00004000);
3389 raw_xor_l_ri(tmp,0x00004000);
3390 raw_or_l(s,tmp);
3391 raw_sahf(s);
3392 }
3393
raw_flags_init_FLAGREG(void)3394 static inline void raw_flags_init_FLAGREG(void) { }
3395
3396 #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
raw_flags_to_reg_FLAGSTK(int r)3397 static inline void raw_flags_to_reg_FLAGSTK(int r)
3398 {
3399 raw_pushfl();
3400 raw_pop_l_r(r);
3401 raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
3402 raw_flags_evicted(r);
3403 }
3404
3405 #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
raw_reg_to_flags_FLAGSTK(int r)3406 static inline void raw_reg_to_flags_FLAGSTK(int r)
3407 {
3408 raw_push_l_r(r);
3409 raw_popfl();
3410 }
3411
3412 #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
raw_flags_set_zero_FLAGSTK(int s,int tmp)3413 static inline void raw_flags_set_zero_FLAGSTK(int s, int tmp)
3414 {
3415 raw_mov_l_rr(tmp,s);
3416 raw_pushfl();
3417 raw_pop_l_r(s);
3418 raw_and_l_ri(s,0xffffffbf);
3419 raw_and_l_ri(tmp,0x00000040);
3420 raw_xor_l_ri(tmp,0x00000040);
3421 raw_or_l(s,tmp);
3422 raw_push_l_r(s);
3423 raw_popfl();
3424 }
3425
raw_flags_init_FLAGSTK(void)3426 static inline void raw_flags_init_FLAGSTK(void) { }
3427
3428 #if defined(CPU_x86_64)
3429 /* Try to use the LAHF/SETO method on x86_64 since it is faster.
3430 This can't be the default because some older CPUs don't support
3431 LAHF/SAHF in long mode. */
3432 static int FLAG_NREG1_FLAGGEN = 0;
raw_flags_to_reg_FLAGGEN(int r)3433 static inline void raw_flags_to_reg_FLAGGEN(int r)
3434 {
3435 if (have_lahf_lm) {
3436 // NOTE: the interpreter uses the normal EFLAGS layout
3437 // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
3438 // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
3439 assert(r == 0);
3440 raw_setcc(r,0); /* V flag in AL */
3441 raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
3442 raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
3443 raw_lahf(0); /* most flags in AH */
3444 raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
3445 raw_flags_evicted(r);
3446 }
3447 else
3448 raw_flags_to_reg_FLAGSTK(r);
3449 }
3450
3451 static int FLAG_NREG2_FLAGGEN = 0;
raw_reg_to_flags_FLAGGEN(int r)3452 static inline void raw_reg_to_flags_FLAGGEN(int r)
3453 {
3454 if (have_lahf_lm) {
3455 raw_xchg_b_rr(0,AH_INDEX);
3456 raw_cmp_b_ri(r,-120); /* set V */
3457 raw_sahf(0);
3458 }
3459 else
3460 raw_reg_to_flags_FLAGSTK(r);
3461 }
3462
3463 static int FLAG_NREG3_FLAGGEN = 0;
raw_flags_set_zero_FLAGGEN(int s,int tmp)3464 static inline void raw_flags_set_zero_FLAGGEN(int s, int tmp)
3465 {
3466 if (have_lahf_lm)
3467 raw_flags_set_zero_FLAGREG(s, tmp);
3468 else
3469 raw_flags_set_zero_FLAGSTK(s, tmp);
3470 }
3471
raw_flags_init_FLAGGEN(void)3472 static inline void raw_flags_init_FLAGGEN(void)
3473 {
3474 if (have_lahf_lm) {
3475 FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
3476 FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
3477 FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
3478 }
3479 else {
3480 FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
3481 FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
3482 FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
3483 }
3484 }
3485 #endif
3486
3487 #ifdef SAHF_SETO_PROFITABLE
3488 #define FLAG_SUFFIX FLAGREG
3489 #elif defined CPU_x86_64
3490 #define FLAG_SUFFIX FLAGGEN
3491 #else
3492 #define FLAG_SUFFIX FLAGSTK
3493 #endif
3494
3495 #define FLAG_GLUE_2(x, y) x ## _ ## y
3496 #define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
3497 #define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
3498
3499 #define raw_flags_init FLAG_GLUE(raw_flags_init)
3500 #define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
3501 #define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
3502 #define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
3503 #define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
3504 #define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
3505 #define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
3506
3507 /* Apparently, there are enough instructions between flag store and
3508 flag reload to avoid the partial memory stall */
raw_load_flagreg(uae_u32 target,uae_u32 r)3509 static inline void raw_load_flagreg(uae_u32 target, uae_u32 r)
3510 {
3511 #if 1
3512 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3513 #else
3514 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3515 raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
3516 #endif
3517 }
3518
3519 #ifdef UAE
3520 /* FLAGX is word-sized */
3521 #else
3522 /* FLAGX is byte sized, and we *do* write it at that size */
3523 #endif
raw_load_flagx(uae_u32 target,uae_u32 r)3524 static inline void raw_load_flagx(uae_u32 target, uae_u32 r)
3525 {
3526 #ifdef UAE
3527 if (live.nat[target].canword)
3528 #else
3529 if (live.nat[target].canbyte)
3530 raw_mov_b_rm(target,(uintptr)live.state[r].mem);
3531 else if (live.nat[target].canword)
3532 #endif
3533 raw_mov_w_rm(target,(uintptr)live.state[r].mem);
3534 else
3535 raw_mov_l_rm(target,(uintptr)live.state[r].mem);
3536 }
3537
raw_dec_sp(int off)3538 static inline void raw_dec_sp(int off)
3539 {
3540 if (off) {
3541 #ifdef CPU_x86_64
3542 emit_byte(0x48); /* REX prefix */
3543 #endif
3544 raw_sub_l_ri(ESP_INDEX,off);
3545 }
3546 }
3547
raw_inc_sp(int off)3548 static inline void raw_inc_sp(int off)
3549 {
3550 if (off) {
3551 #ifdef CPU_x86_64
3552 emit_byte(0x48); /* REX prefix */
3553 #endif
3554 raw_add_l_ri(ESP_INDEX,off);
3555 }
3556 }
3557
raw_push_regs_to_preserve(void)3558 static inline void raw_push_regs_to_preserve(void) {
3559 for (int i=N_REGS;i--;) {
3560 if (need_to_preserve[i])
3561 raw_push_l_r(i);
3562 }
3563 }
3564
raw_pop_preserved_regs(void)3565 static inline void raw_pop_preserved_regs(void) {
3566 for (int i=0;i<N_REGS;i++) {
3567 if (need_to_preserve[i])
3568 raw_pop_l_r(i);
3569 }
3570 }
3571
3572 /*************************************************************************
3573 * Handling mistaken direct memory access (removed from ARAnyM sources) *
3574 *************************************************************************/
3575
3576 #ifdef UAE
3577 #include "exception_handler.cpp"
3578 #endif
3579
3580 static
compiler_status()3581 void compiler_status() {
3582 jit_log("compiled code starts at %p, current at %p (size 0x%x)", compiled_code, current_compile_p, (unsigned int)(current_compile_p - compiled_code));
3583 }
3584
3585 /*************************************************************************
3586 * Checking for CPU features *
3587 *************************************************************************/
3588
3589 struct cpuinfo_x86 {
3590 uae_u8 x86; // CPU family
3591 uae_u8 x86_vendor; // CPU vendor
3592 uae_u8 x86_processor; // CPU canonical processor type
3593 uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
3594 uae_u32 x86_hwcap;
3595 uae_u8 x86_model;
3596 uae_u8 x86_mask;
3597 bool x86_has_xmm2;
3598 int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
3599 char x86_vendor_id[16];
3600 uintptr x86_clflush_size;
3601 };
3602 struct cpuinfo_x86 cpuinfo;
3603
3604 enum {
3605 X86_VENDOR_INTEL = 0,
3606 X86_VENDOR_CYRIX = 1,
3607 X86_VENDOR_AMD = 2,
3608 X86_VENDOR_UMC = 3,
3609 X86_VENDOR_NEXGEN = 4,
3610 X86_VENDOR_CENTAUR = 5,
3611 X86_VENDOR_RISE = 6,
3612 X86_VENDOR_TRANSMETA = 7,
3613 X86_VENDOR_NSC = 8,
3614 X86_VENDOR_UNKNOWN = 0xff
3615 };
3616
3617 enum {
3618 X86_PROCESSOR_I386, /* 80386 */
3619 X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
3620 X86_PROCESSOR_PENTIUM,
3621 X86_PROCESSOR_PENTIUMPRO,
3622 X86_PROCESSOR_K6,
3623 X86_PROCESSOR_ATHLON,
3624 X86_PROCESSOR_PENTIUM4,
3625 X86_PROCESSOR_X86_64,
3626 X86_PROCESSOR_max
3627 };
3628
3629 static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
3630 "80386",
3631 "80486",
3632 "Pentium",
3633 "PentiumPro",
3634 "K6",
3635 "Athlon",
3636 "Pentium4",
3637 "x86-64"
3638 };
3639
3640 static struct ptt {
3641 const int align_loop;
3642 const int align_loop_max_skip;
3643 const int align_jump;
3644 const int align_jump_max_skip;
3645 const int align_func;
3646 }
3647 x86_alignments[X86_PROCESSOR_max] = {
3648 { 4, 3, 4, 3, 4 },
3649 { 16, 15, 16, 15, 16 },
3650 { 16, 7, 16, 7, 16 },
3651 { 16, 15, 16, 7, 16 },
3652 { 32, 7, 32, 7, 32 },
3653 { 16, 7, 16, 7, 16 },
3654 { 0, 0, 0, 0, 0 },
3655 { 16, 7, 16, 7, 16 }
3656 };
3657
3658 static void
x86_get_cpu_vendor(struct cpuinfo_x86 * c)3659 x86_get_cpu_vendor(struct cpuinfo_x86 *c)
3660 {
3661 char *v = c->x86_vendor_id;
3662
3663 if (!strcmp(v, "GenuineIntel"))
3664 c->x86_vendor = X86_VENDOR_INTEL;
3665 else if (!strcmp(v, "AuthenticAMD"))
3666 c->x86_vendor = X86_VENDOR_AMD;
3667 else if (!strcmp(v, "CyrixInstead"))
3668 c->x86_vendor = X86_VENDOR_CYRIX;
3669 else if (!strcmp(v, "Geode by NSC"))
3670 c->x86_vendor = X86_VENDOR_NSC;
3671 else if (!strcmp(v, "UMC UMC UMC "))
3672 c->x86_vendor = X86_VENDOR_UMC;
3673 else if (!strcmp(v, "CentaurHauls"))
3674 c->x86_vendor = X86_VENDOR_CENTAUR;
3675 else if (!strcmp(v, "NexGenDriven"))
3676 c->x86_vendor = X86_VENDOR_NEXGEN;
3677 else if (!strcmp(v, "RiseRiseRise"))
3678 c->x86_vendor = X86_VENDOR_RISE;
3679 else if (!strcmp(v, "GenuineTMx86") ||
3680 !strcmp(v, "TransmetaCPU"))
3681 c->x86_vendor = X86_VENDOR_TRANSMETA;
3682 else
3683 c->x86_vendor = X86_VENDOR_UNKNOWN;
3684 }
3685
3686 /*
3687 * Generic CPUID function
3688 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
3689 * resulting in stale register contents being returned.
3690 */
3691 /* Some CPUID calls want 'count' to be placed in ecx */
3692 #ifdef __GNUC__
cpuid_count(uae_u32 op,uae_u32 count,uae_u32 * eax,uae_u32 * ebx,uae_u32 * ecx,uae_u32 * edx)3693 static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3694 {
3695 uae_u32 _eax, _ebx, _ecx, _edx;
3696 _eax = op;
3697 _ecx = count;
3698 __asm__ __volatile__(
3699 " movl %0,%%eax \n"
3700 " movl %2,%%ecx \n"
3701 " cpuid \n"
3702 " movl %%eax,%0 \n"
3703 " movl %%ebx,%1 \n"
3704 " movl %%ecx,%2 \n"
3705 " movl %%edx,%3 \n"
3706 : "+m" (_eax),
3707 "=m" (_ebx),
3708 "+m" (_ecx),
3709 "=m" (_edx)
3710 :
3711 : "eax", "ebx", "ecx", "edx");
3712 *eax = _eax;
3713 *ebx = _ebx;
3714 *ecx = _ecx;
3715 *edx = _edx;
3716 }
3717 #endif
3718
3719 #ifdef _MSC_VER
cpuid_count(uae_u32 op,uae_u32 count,uae_u32 * eax,uae_u32 * ebx,uae_u32 * ecx,uae_u32 * edx)3720 static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3721 {
3722 int cpuinfo[4];
3723 cpuinfo[0] = op;
3724 cpuinfo[1] = 0;
3725 cpuinfo[2] = count;
3726 cpuinfo[3] = 0;
3727 __cpuidex(cpuinfo, op, count);
3728 *eax = cpuinfo[0];
3729 *ebx = cpuinfo[1];
3730 *ecx = cpuinfo[2];
3731 *edx = cpuinfo[3];
3732 }
3733 #endif
3734
3735 static void
cpuid(uae_u32 op,uae_u32 * eax,uae_u32 * ebx,uae_u32 * ecx,uae_u32 * edx)3736 cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
3737 {
3738 cpuid_count(op, 0, eax, ebx, ecx, edx);
3739 }
3740
3741 static void
raw_init_cpu(void)3742 raw_init_cpu(void)
3743 {
3744 struct cpuinfo_x86 *c = &cpuinfo;
3745 uae_u32 dummy;
3746
3747 /* Defaults */
3748 c->x86_processor = X86_PROCESSOR_max;
3749 c->x86_vendor = X86_VENDOR_UNKNOWN;
3750 c->cpuid_level = -1; /* CPUID not detected */
3751 c->x86_model = c->x86_mask = 0; /* So far unknown... */
3752 c->x86_vendor_id[0] = '\0'; /* Unset */
3753 c->x86_hwcap = 0;
3754 #ifdef CPU_x86_64
3755 c->x86_clflush_size = 64;
3756 #else
3757 c->x86_clflush_size = 32;
3758 #endif
3759
3760 /* Get vendor name */
3761 c->x86_vendor_id[12] = '\0';
3762 cpuid(0x00000000,
3763 (uae_u32 *)&c->cpuid_level,
3764 (uae_u32 *)&c->x86_vendor_id[0],
3765 (uae_u32 *)&c->x86_vendor_id[8],
3766 (uae_u32 *)&c->x86_vendor_id[4]);
3767 x86_get_cpu_vendor(c);
3768
3769 /* Intel-defined flags: level 0x00000001 */
3770 c->x86_brand_id = 0;
3771 if ( c->cpuid_level >= 0x00000001 ) {
3772 uae_u32 tfms, brand_id;
3773 cpuid(0x00000001, &tfms, &brand_id, &dummy, &c->x86_hwcap);
3774 c->x86 = (tfms >> 8) & 15;
3775 if (c->x86 == 0xf)
3776 c->x86 += (tfms >> 20) & 0xff; /* extended family */
3777 c->x86_model = (tfms >> 4) & 15;
3778 if (c->x86_model == 0xf)
3779 c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
3780 c->x86_brand_id = brand_id & 0xff;
3781 c->x86_mask = tfms & 15;
3782 if (c->x86_hwcap & (1 << 19))
3783 {
3784 c->x86_clflush_size = ((brand_id >> 8) & 0xff) * 8;
3785 }
3786 } else {
3787 /* Have CPUID level 0 only - unheard of */
3788 c->x86 = 4;
3789 }
3790
3791 /* AMD-defined flags: level 0x80000001 */
3792 uae_u32 xlvl;
3793 cpuid(0x80000000, &xlvl, &dummy, &dummy, &dummy);
3794 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
3795 if ( xlvl >= 0x80000001 ) {
3796 uae_u32 features, extra_features;
3797 cpuid(0x80000001, &dummy, &dummy, &extra_features, &features);
3798 if (features & (1 << 29)) {
3799 /* Assume x86-64 if long mode is supported */
3800 c->x86_processor = X86_PROCESSOR_X86_64;
3801 }
3802 if (extra_features & (1 << 0))
3803 have_lahf_lm = true;
3804 }
3805 }
3806
3807 /* Canonicalize processor ID */
3808 switch (c->x86) {
3809 case 3:
3810 c->x86_processor = X86_PROCESSOR_I386;
3811 break;
3812 case 4:
3813 c->x86_processor = X86_PROCESSOR_I486;
3814 break;
3815 case 5:
3816 if (c->x86_vendor == X86_VENDOR_AMD)
3817 c->x86_processor = X86_PROCESSOR_K6;
3818 else
3819 c->x86_processor = X86_PROCESSOR_PENTIUM;
3820 break;
3821 case 6:
3822 if (c->x86_vendor == X86_VENDOR_AMD)
3823 c->x86_processor = X86_PROCESSOR_ATHLON;
3824 else
3825 c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
3826 break;
3827 case 15:
3828 if (c->x86_processor == X86_PROCESSOR_max) {
3829 switch (c->x86_vendor) {
3830 case X86_VENDOR_INTEL:
3831 c->x86_processor = X86_PROCESSOR_PENTIUM4;
3832 break;
3833 case X86_VENDOR_AMD:
3834 /* Assume a 32-bit Athlon processor if not in long mode */
3835 c->x86_processor = X86_PROCESSOR_ATHLON;
3836 break;
3837 }
3838 }
3839 break;
3840 }
3841 if (c->x86_processor == X86_PROCESSOR_max) {
3842 c->x86_processor = X86_PROCESSOR_I386;
3843 jit_log("Error: unknown processor type");
3844 jit_log(" Family : %d", c->x86);
3845 jit_log(" Model : %d", c->x86_model);
3846 jit_log(" Mask : %d", c->x86_mask);
3847 jit_log(" Vendor : %s [%d]", c->x86_vendor_id, c->x86_vendor);
3848 if (c->x86_brand_id)
3849 jit_log(" BrandID : %02x", c->x86_brand_id);
3850 }
3851
3852 /* Have CMOV support? */
3853 have_cmov = (c->x86_hwcap & (1 << 15)) != 0;
3854 #if defined(CPU_x86_64)
3855 if (!have_cmov) {
3856 jit_abort("x86-64 implementations are bound to have CMOV!");
3857 }
3858 #endif
3859
3860 c->x86_has_xmm2 = (c->x86_hwcap & (1 << 26)) != 0;
3861
3862 /* Can the host CPU suffer from partial register stalls? */
3863 // non-RAT_STALL mode is currently broken
3864 have_rat_stall = true; //(c->x86_vendor == X86_VENDOR_INTEL);
3865 #if 0
3866 /* It appears that partial register writes are a bad idea even on
3867 AMD K7 cores, even though they are not supposed to have the
3868 dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
3869 if (c->x86_processor == X86_PROCESSOR_ATHLON)
3870 have_rat_stall = true;
3871 #endif
3872
3873 /* Alignments */
3874 if (tune_alignment) {
3875 align_loops = x86_alignments[c->x86_processor].align_loop;
3876 align_jumps = x86_alignments[c->x86_processor].align_jump;
3877 }
3878
3879 jit_log("Max CPUID level=%d Processor is %s [%s]",
3880 c->cpuid_level, c->x86_vendor_id,
3881 x86_processor_string_table[c->x86_processor]);
3882
3883 raw_flags_init();
3884 }
3885
3886 #if 0
3887 static void __attribute_noinline__ prevent_redzone_use(void) {}
3888
3889 static bool target_check_bsf(void)
3890 {
3891 bool mismatch = false;
3892 for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
3893 for (int g_CF = 0; g_CF <= 1; g_CF++) {
3894 for (int g_OF = 0; g_OF <= 1; g_OF++) {
3895 for (int g_SF = 0; g_SF <= 1; g_SF++) {
3896 for (int value = -1; value <= 1; value++) {
3897 uintptr flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
3898 intptr tmp = value;
3899 prevent_redzone_use();
3900 __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
3901 : "+r" (flags), "+r" (tmp) : : "cc");
3902 int OF = (flags >> 11) & 1;
3903 int SF = (flags >> 7) & 1;
3904 int ZF = (flags >> 6) & 1;
3905 int CF = flags & 1;
3906 tmp = (value == 0);
3907 if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
3908 mismatch = true;
3909 }
3910 }}}}
3911 if (mismatch)
3912 {
3913 jit_log("Target CPU defines all flags on BSF instruction");
3914 }
3915 return !mismatch;
3916 }
3917 #endif
3918
3919 /*************************************************************************
3920 * FPU stuff *
3921 *************************************************************************/
3922
3923
raw_fp_init(void)3924 static inline void raw_fp_init(void)
3925 {
3926 int i;
3927
3928 for (i=0;i<N_FREGS;i++)
3929 live.spos[i]=-2;
3930 live.tos=-1; /* Stack is empty */
3931 }
3932
raw_fp_cleanup_drop(void)3933 static inline void raw_fp_cleanup_drop(void)
3934 {
3935 #if 0
3936 /* using FINIT instead of popping all the entries.
3937 Seems to have side effects --- there is display corruption in
3938 Quake when this is used */
3939 if (live.tos>1) {
3940 emit_byte(0x9b);
3941 emit_byte(0xdb);
3942 emit_byte(0xe3);
3943 live.tos=-1;
3944 }
3945 #endif
3946 while (live.tos>=1) {
3947 emit_byte(0xde);
3948 emit_byte(0xd9);
3949 live.tos-=2;
3950 }
3951 while (live.tos>=0) {
3952 emit_byte(0xdd);
3953 emit_byte(0xd8);
3954 live.tos--;
3955 }
3956 raw_fp_init();
3957 }
3958
make_tos(int r)3959 static inline void make_tos(int r)
3960 {
3961 int p,q;
3962
3963 if (live.spos[r]<0) { /* Register not yet on stack */
3964 emit_byte(0xd9);
3965 emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
3966 live.tos++;
3967 live.spos[r]=live.tos;
3968 live.onstack[live.tos]=r;
3969 return;
3970 }
3971 /* Register is on stack */
3972 if (live.tos==live.spos[r])
3973 return;
3974 p=live.spos[r];
3975 q=live.onstack[live.tos];
3976
3977 emit_byte(0xd9);
3978 emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
3979 live.onstack[live.tos]=r;
3980 live.spos[r]=live.tos;
3981 live.onstack[p]=q;
3982 live.spos[q]=p;
3983 }
3984
make_tos2(int r,int r2)3985 static inline void make_tos2(int r, int r2)
3986 {
3987 int q;
3988
3989 make_tos(r2); /* Put the reg that's supposed to end up in position2
3990 on top */
3991
3992 if (live.spos[r]<0) { /* Register not yet on stack */
3993 make_tos(r); /* This will extend the stack */
3994 return;
3995 }
3996 /* Register is on stack */
3997 emit_byte(0xd9);
3998 emit_byte(0xc9); /* Move r2 into position 2 */
3999
4000 q=live.onstack[live.tos-1];
4001 live.onstack[live.tos]=q;
4002 live.spos[q]=live.tos;
4003 live.onstack[live.tos-1]=r2;
4004 live.spos[r2]=live.tos-1;
4005
4006 make_tos(r); /* And r into 1 */
4007 }
4008
stackpos(int r)4009 static inline int stackpos(int r)
4010 {
4011 if (live.spos[r]<0)
4012 abort();
4013 if (live.tos<live.spos[r]) {
4014 jit_abort("Looking for spos for fnreg %d",r);
4015 }
4016 return live.tos-live.spos[r];
4017 }
4018
4019 /* IMO, calling usereg(r) makes no sense, if the register r should supply our function with
4020 an argument, because I would expect all arguments to be on the stack already, won't they?
4021 Thus, usereg(s) is always useless and also for every FRW d it's too late here now. PeterK
4022 */
usereg(int r)4023 static inline void usereg(int r)
4024 {
4025 if (live.spos[r]<0)
4026 make_tos(r);
4027 }
4028
4029 /* This is called with one FP value in a reg *above* tos, which it will
4030 pop off the stack if necessary */
tos_make(int r)4031 static inline void tos_make(int r)
4032 {
4033 if (live.spos[r]<0) {
4034 live.tos++;
4035 live.spos[r]=live.tos;
4036 live.onstack[live.tos]=r;
4037 return;
4038 }
4039 emit_byte(0xdd);
4040 emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
4041 and pop it*/
4042 }
4043
4044 /* FP helper functions */
4045 #if USE_NEW_RTASM
4046 #define DEFINE_OP(NAME, GEN) \
4047 static inline void raw_##NAME(uint32 m) \
4048 { \
4049 GEN(m, X86_NOREG, X86_NOREG, 1); \
4050 }
4051 DEFINE_OP(fstl, FSTLm);
4052 DEFINE_OP(fstpl, FSTPLm);
4053 DEFINE_OP(fldl, FLDLm);
4054 DEFINE_OP(fildl, FILDLm);
4055 DEFINE_OP(fistl, FISTLm);
4056 DEFINE_OP(flds, FLDSm);
4057 DEFINE_OP(fsts, FSTSm);
4058 DEFINE_OP(fstpt, FSTPTm);
4059 DEFINE_OP(fldt, FLDTm);
4060 DEFINE_OP(fistpl, FISTPLm);
4061 #else
4062 #define DEFINE_OP(NAME, OP1, OP2) \
4063 static inline void raw_##NAME(uint32 m) \
4064 { \
4065 emit_byte(OP1); \
4066 emit_byte(OP2); \
4067 emit_long(m); \
4068 }
4069 DEFINE_OP(fstl, 0xdd, 0x15);
4070 DEFINE_OP(fstpl, 0xdd, 0x1d);
4071 DEFINE_OP(fldl, 0xdd, 0x05);
4072 DEFINE_OP(fildl, 0xdb, 0x05);
4073 DEFINE_OP(fistl, 0xdb, 0x15);
4074 DEFINE_OP(flds, 0xd9, 0x05);
4075 DEFINE_OP(fsts, 0xd9, 0x15);
4076 DEFINE_OP(fstpt, 0xdb, 0x3d);
4077 DEFINE_OP(fldt, 0xdb, 0x2d);
4078 DEFINE_OP(fistpl, 0xdb, 0x1d);
4079 #endif
4080 #undef DEFINE_OP
4081
4082 LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4083 {
4084 make_tos(r);
4085 raw_fstl(m);
4086 }
4087 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4088
4089 LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
4090 {
4091 make_tos(r);
4092 raw_fstpl(m);
4093 live.onstack[live.tos]=-1;
4094 live.tos--;
4095 live.spos[r]=-2;
4096 }
4097 LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
4098
4099 LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4100 {
4101 raw_fldl(m);
4102 tos_make(r);
4103 }
4104 LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
4105
4106 LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4107 {
4108 raw_fildl(m);
4109 tos_make(r);
4110 }
4111 LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
4112
4113 LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4114 {
4115 make_tos(r);
4116 raw_fistl(m);
4117 }
4118 LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
4119
4120 LOWFUNC(NONE,WRITE,3,raw_fmovi_mrb,(MEMW m, FR r, double *bounds))
4121 {
4122 /* Clamp value to the given range and convert to integer. */
4123
4124 int rs;
4125 usereg(r);
4126 rs = stackpos(r)+1;
4127
4128 /* Lower bound onto stack */
4129 raw_fldl((uintptr) &bounds[0]); /* fld double from lower */
4130
4131 /* Clamp to lower */
4132 emit_byte(0xdb);
4133 emit_byte(0xf0+rs); /* fcomi lower,r */
4134 emit_byte(0x73);
4135 emit_byte(12); /* jae to writeback */
4136
4137 /* Upper bound onto stack */
4138 emit_byte(0xdd);
4139 emit_byte(0xd8); /* fstp st(0) */
4140 raw_fldl((uintptr) &bounds[1]); /* fld double from upper */
4141
4142 /* Clamp to upper */
4143 emit_byte(0xdb);
4144 emit_byte(0xf0+rs); /* fcomi upper,r */
4145 emit_byte(0xdb);
4146 emit_byte(0xd0+rs); /* fcmovnbe upper,r */
4147
4148 /* Store to destination */
4149 raw_fistpl(m);
4150 }
4151 LENDFUNC(NONE,WRITE,3,raw_fmovi_mrb,(MEMW m, FR r, double *bounds))
4152
4153 LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4154 {
4155 raw_flds(m);
4156 tos_make(r);
4157 }
4158 LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
4159
4160 LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4161 {
4162 make_tos(r);
4163 raw_fsts(m);
4164 }
4165 LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
4166
4167 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4168 {
4169 int rs;
4170
4171 /* Stupid x87 can't write a long double to mem without popping the
4172 stack! */
4173 usereg(r);
4174 rs=stackpos(r);
4175 emit_byte(0xd9); /* Get a copy to the top of stack */
4176 emit_byte(0xc0+rs);
4177
4178 raw_fstpt(m); /* store and pop it */
4179 }
4180 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4181
4182 LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
4183 {
4184 make_tos(r);
4185 raw_fstpt(m); /* store and pop it */
4186 live.onstack[live.tos]=-1;
4187 live.tos--;
4188 live.spos[r]=-2;
4189 }
4190 LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
4191
4192 LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4193 {
4194 raw_fldt(m);
4195 tos_make(r);
4196 }
4197 LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
4198
4199 LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4200 {
4201 emit_byte(0xd9);
4202 emit_byte(0xeb);
4203 tos_make(r);
4204 }
4205 LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
4206
4207 LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4208 {
4209 emit_byte(0xd9);
4210 emit_byte(0xec);
4211 tos_make(r);
4212 }
4213 LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
4214
4215 LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4216 {
4217 emit_byte(0xd9);
4218 emit_byte(0xea);
4219 tos_make(r);
4220 }
4221 LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
4222
4223 LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4224 {
4225 emit_byte(0xd9);
4226 emit_byte(0xed);
4227 tos_make(r);
4228 }
4229 LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
4230
4231 LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4232 {
4233 emit_byte(0xd9);
4234 emit_byte(0xe8);
4235 tos_make(r);
4236 }
4237 LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
4238
4239 LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4240 {
4241 emit_byte(0xd9);
4242 emit_byte(0xee);
4243 tos_make(r);
4244 }
4245 LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
4246
4247 LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4248 {
4249 int ds;
4250
4251 usereg(s);
4252 ds=stackpos(s);
4253 if (ds==0 && live.spos[d]>=0) {
4254 /* source is on top of stack, and we already have the dest */
4255 int dd=stackpos(d);
4256 emit_byte(0xdd);
4257 emit_byte(0xd0+dd);
4258 }
4259 else {
4260 emit_byte(0xd9);
4261 emit_byte(0xc0+ds); /* duplicate source on tos */
4262 tos_make(d); /* store to destination, pop if necessary */
4263 }
4264 }
4265 LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
4266
4267 LOWFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, IMM base))
4268 {
4269 x86_64_prefix(true, false, NULL, NULL, &index);
4270 emit_byte(0xd9);
4271 emit_byte(0xa8 + index);
4272 emit_long(base);
4273 }
4274 LENDFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, IMM base))
4275
4276 LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4277 {
4278 int ds;
4279
4280 if (d!=s) {
4281 usereg(s);
4282 ds=stackpos(s);
4283 emit_byte(0xd9);
4284 emit_byte(0xc0+ds); /* duplicate source */
4285 emit_byte(0xd9);
4286 emit_byte(0xfa); /* take square root */
4287 tos_make(d); /* store to destination */
4288 }
4289 else {
4290 make_tos(d);
4291 emit_byte(0xd9);
4292 emit_byte(0xfa); /* take square root */
4293 }
4294 }
4295 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
4296
4297 LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4298 {
4299 int ds;
4300
4301 if (d!=s) {
4302 usereg(s);
4303 ds=stackpos(s);
4304 emit_byte(0xd9);
4305 emit_byte(0xc0+ds); /* duplicate source */
4306 emit_byte(0xd9);
4307 emit_byte(0xe1); /* take fabs */
4308 tos_make(d); /* store to destination */
4309 }
4310 else {
4311 make_tos(d);
4312 emit_byte(0xd9);
4313 emit_byte(0xe1); /* take fabs */
4314 }
4315 }
4316 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
4317
4318 LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4319 {
4320 int ds;
4321
4322 if (d!=s) {
4323 usereg(s);
4324 ds=stackpos(s);
4325 emit_byte(0xd9);
4326 emit_byte(0xc0+ds); /* duplicate source */
4327 emit_byte(0xd9);
4328 emit_byte(0xfc); /* take frndint */
4329 tos_make(d); /* store to destination */
4330 }
4331 else {
4332 make_tos(d);
4333 emit_byte(0xd9);
4334 emit_byte(0xfc); /* take frndint */
4335 }
4336 }
4337 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
4338
4339 LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4340 {
4341 int ds;
4342
4343 if (d!=s) {
4344 usereg(s);
4345 ds=stackpos(s);
4346 emit_byte(0xd9);
4347 emit_byte(0xc0+ds); /* duplicate source */
4348 emit_byte(0xd9);
4349 emit_byte(0xff); /* take cos */
4350 tos_make(d); /* store to destination */
4351 }
4352 else {
4353 make_tos(d);
4354 emit_byte(0xd9);
4355 emit_byte(0xff); /* take cos */
4356 }
4357 }
4358 LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
4359
4360 LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4361 {
4362 int ds;
4363
4364 if (d!=s) {
4365 ds=stackpos(s);
4366 emit_byte(0xd9);
4367 emit_byte(0xc0+ds); /* fld x */
4368 emit_byte(0xd9);
4369 emit_byte(0xfe); /* fsin sin(x) */
4370 tos_make(d); /* store to destination */
4371 }
4372 else {
4373 make_tos(d);
4374 emit_byte(0xd9);
4375 emit_byte(0xfe); /* fsin y=sin(x) */
4376 }
4377 }
4378 LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
4379
4380 static const double one = 1;
4381
4382 LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4383 {
4384 int ds;
4385
4386 ds=stackpos(s);
4387 emit_byte(0xd9);
4388 emit_byte(0xc0+ds); /* fld x */
4389 emit_byte(0xd9);
4390 emit_byte(0xfc); /* frndint int(x) */
4391 emit_byte(0xd9);
4392 emit_byte(0xc1+ds); /* fld x again */
4393 emit_byte(0xd8);
4394 emit_byte(0xe1); /* fsub frac(x) = x - int(x) */
4395 emit_byte(0xd9);
4396 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
4397 x86_fadd_m((uintptr) &one); /* Add '1' without using extra stack space */
4398 emit_byte(0xd9);
4399 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x) */
4400 emit_byte(0xdd);
4401 emit_byte(0xd9); /* fstp copy & pop */
4402 tos_make(d); /* store y=2^x */
4403 }
4404 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
4405
4406 LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4407 {
4408 int ds;
4409
4410 if (s==d)
4411 make_tos(s);
4412 else {
4413 ds=stackpos(s);
4414 emit_byte(0xd9);
4415 emit_byte(0xc0+ds); /* duplicate source */
4416 }
4417 emit_byte(0xd9);
4418 emit_byte(0xea); /* fldl2e log2(e) */
4419 emit_byte(0xd8);
4420 emit_byte(0xc9); /* fmul x*log2(e) */
4421 emit_byte(0xdd);
4422 emit_byte(0xd1); /* fst copy up */
4423 emit_byte(0xd9);
4424 emit_byte(0xfc); /* frndint int(x*log2(e)) */
4425 emit_byte(0xd9);
4426 emit_byte(0xc9); /* fxch swap top two elements */
4427 emit_byte(0xd8);
4428 emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
4429 emit_byte(0xd9);
4430 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
4431 x86_fadd_m((uintptr) &one); /* Add '1' without using extra stack space */
4432 emit_byte(0xd9);
4433 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
4434 emit_byte(0xdd);
4435 emit_byte(0xd9); /* fstp copy & pop */
4436 if (s!=d)
4437 tos_make(d); /* store y=e^x */
4438 }
4439 LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
4440
4441 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4442 {
4443 int ds;
4444
4445 if (s==d)
4446 make_tos(s);
4447 else {
4448 ds=stackpos(s);
4449 emit_byte(0xd9);
4450 emit_byte(0xc0+ds); /* duplicate source */
4451 }
4452 emit_byte(0xd9);
4453 emit_byte(0xe8); /* push '1' */
4454 emit_byte(0xd9);
4455 emit_byte(0xc9); /* swap top two */
4456 emit_byte(0xd9);
4457 emit_byte(0xf1); /* take 1*log2(x) */
4458 if (s!=d)
4459 tos_make(d); /* store to destination */
4460 }
4461 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
4462
4463
4464 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4465 {
4466 int ds;
4467
4468 if (d!=s) {
4469 usereg(s);
4470 ds=stackpos(s);
4471 emit_byte(0xd9);
4472 emit_byte(0xc0+ds); /* duplicate source */
4473 emit_byte(0xd9);
4474 emit_byte(0xe0); /* take fchs */
4475 tos_make(d); /* store to destination */
4476 }
4477 else {
4478 make_tos(d);
4479 emit_byte(0xd9);
4480 emit_byte(0xe0); /* take fchs */
4481 }
4482 }
4483 LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
4484
4485 LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4486 {
4487 int ds;
4488
4489 usereg(s);
4490 usereg(d);
4491
4492 if (live.spos[s]==live.tos) {
4493 /* Source is on top of stack */
4494 ds=stackpos(d);
4495 emit_byte(0xdc);
4496 emit_byte(0xc0+ds); /* add source to dest*/
4497 }
4498 else {
4499 make_tos(d);
4500 ds=stackpos(s);
4501
4502 emit_byte(0xd8);
4503 emit_byte(0xc0+ds); /* add source to dest*/
4504 }
4505 }
4506 LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
4507
4508 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4509 {
4510 int ds;
4511
4512 usereg(s);
4513 usereg(d);
4514
4515 if (live.spos[s]==live.tos) {
4516 /* Source is on top of stack */
4517 ds=stackpos(d);
4518 emit_byte(0xdc);
4519 emit_byte(0xe8+ds); /* sub source from dest*/
4520 }
4521 else {
4522 make_tos(d);
4523 ds=stackpos(s);
4524
4525 emit_byte(0xd8);
4526 emit_byte(0xe0+ds); /* sub src from dest */
4527 }
4528 }
4529 LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
4530
4531 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4532 {
4533 int ds;
4534
4535 usereg(s);
4536 usereg(d);
4537
4538 make_tos(d);
4539 ds=stackpos(s);
4540
4541 emit_byte(0xdd);
4542 emit_byte(0xe0+ds); /* cmp dest with source*/
4543 }
4544 LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
4545
4546 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4547 {
4548 int ds;
4549
4550 usereg(s);
4551 usereg(d);
4552
4553 if (live.spos[s]==live.tos) {
4554 /* Source is on top of stack */
4555 ds=stackpos(d);
4556 emit_byte(0xdc);
4557 emit_byte(0xc8+ds); /* mul dest by source*/
4558 }
4559 else {
4560 make_tos(d);
4561 ds=stackpos(s);
4562
4563 emit_byte(0xd8);
4564 emit_byte(0xc8+ds); /* mul dest by source*/
4565 }
4566 }
4567 LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
4568
4569 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4570 {
4571 int ds;
4572
4573 usereg(s);
4574 usereg(d);
4575
4576 if (live.spos[s]==live.tos) {
4577 /* Source is on top of stack */
4578 ds=stackpos(d);
4579 emit_byte(0xdc);
4580 emit_byte(0xf8+ds); /* div dest by source */
4581 }
4582 else {
4583 make_tos(d);
4584 ds=stackpos(s);
4585
4586 emit_byte(0xd8);
4587 emit_byte(0xf0+ds); /* div dest by source*/
4588 }
4589 }
4590 LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
4591
4592 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4593 {
4594 int ds;
4595
4596 usereg(s);
4597 usereg(d);
4598
4599 make_tos2(d,s);
4600 ds=stackpos(s);
4601
4602 if (ds!=1) {
4603 printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
4604 abort();
4605 }
4606 emit_byte(0xd9);
4607 emit_byte(0xf8); /* take rem from dest by source */
4608 }
4609 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
4610
4611 LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4612 {
4613 int ds;
4614
4615 usereg(s);
4616 usereg(d);
4617
4618 make_tos2(d,s);
4619 ds=stackpos(s);
4620
4621 if (ds!=1) {
4622 printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
4623 abort();
4624 }
4625 emit_byte(0xd9);
4626 emit_byte(0xf5); /* take rem1 from dest by source */
4627 }
4628 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
4629
4630
4631 LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4632 {
4633 make_tos(r);
4634 emit_byte(0xd9); /* ftst */
4635 emit_byte(0xe4);
4636 }
4637 LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
4638
4639 LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
4640 {
4641 int ds;
4642
4643 if (s==d)
4644 make_tos(s);
4645 else {
4646 ds=stackpos(s);
4647 emit_byte(0xd9);
4648 emit_byte(0xc0+ds); /* fld x */
4649 }
4650 emit_byte(0xd9);
4651 emit_byte(0xea); /* fldl2e log2(e) */
4652 emit_byte(0xd8);
4653 emit_byte(0xc9); /* fmul x*log2(e) */
4654 emit_byte(0xdd);
4655 emit_byte(0xd1); /* fst copy up */
4656 emit_byte(0xd9);
4657 emit_byte(0xfc); /* frndint int(x*log2(e)) */
4658 emit_byte(0xd9);
4659 emit_byte(0xc9); /* fxch swap top two elements */
4660 emit_byte(0xd8);
4661 emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
4662 emit_byte(0xd9);
4663 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
4664 emit_byte(0xd9);
4665 emit_byte(0xfd); /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */
4666 emit_byte(0xdd);
4667 emit_byte(0xd9); /* fstp copy & pop */
4668 if (s!=d)
4669 tos_make(d); /* store y=(e^x)-1 */
4670 }
4671 LENDFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
4672
4673 LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
4674 {
4675 int ds;
4676
4677 if (s==d)
4678 make_tos(s);
4679 else {
4680 ds=stackpos(s);
4681 emit_byte(0xd9);
4682 emit_byte(0xc0+ds); /* fld x */
4683 }
4684 emit_byte(0xd9);
4685 emit_byte(0xe9); /* fldl2t log2(10) */
4686 emit_byte(0xd8);
4687 emit_byte(0xc9); /* fmul x*log2(10) */
4688 emit_byte(0xdd);
4689 emit_byte(0xd1); /* fst copy up */
4690 emit_byte(0xd9);
4691 emit_byte(0xfc); /* frndint int(x*log2(10)) */
4692 emit_byte(0xd9);
4693 emit_byte(0xc9); /* fxch swap top two elements */
4694 emit_byte(0xd8);
4695 emit_byte(0xe1); /* fsub x*log2(10) - int(x*log2(10)) */
4696 emit_byte(0xd9);
4697 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
4698 x86_fadd_m((uintptr) &one);
4699 emit_byte(0xd9);
4700 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(10)) */
4701 emit_byte(0xdd);
4702 emit_byte(0xd9); /* fstp copy & pop */
4703 if (s!=d)
4704 tos_make(d); /* store y=10^x */
4705 }
4706 LENDFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
4707
4708 LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
4709 {
4710 int ds;
4711
4712 if (s==d) {
4713 //write_log (_T("FSINCOS src = dest\n"));
4714 make_tos(s);
4715 emit_byte(0xd9);
4716 emit_byte(0xfb); /* fsincos sin(x) push cos(x) */
4717 tos_make(c); /* store cos(x) to c */
4718 return;
4719 }
4720
4721 ds=stackpos(s);
4722 emit_byte(0xd9);
4723 emit_byte(0xc0+ds); /* fld x */
4724 emit_byte(0xd9);
4725 emit_byte(0xfb); /* fsincos sin(x) push cos(x) */
4726 if (live.spos[c]<0) {
4727 if (live.spos[d]<0) { /* occupy both regs directly */
4728 live.tos++;
4729 live.spos[d]=live.tos;
4730 live.onstack[live.tos]=d; /* sin(x) comes first */
4731 live.tos++;
4732 live.spos[c]=live.tos;
4733 live.onstack[live.tos]=c;
4734 }
4735 else {
4736 emit_byte(0xd9);
4737 emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */
4738 emit_byte(0xdd); /* store sin(x) to d & pop */
4739 emit_byte(0xd8+(live.tos+2)-live.spos[d]);
4740 live.tos++; /* occupy a reg for cos(x) here */
4741 live.spos[c]=live.tos;
4742 live.onstack[live.tos]=c;
4743 }
4744 }
4745 else {
4746 emit_byte(0xdd); /* store cos(x) to c & pop */
4747 emit_byte(0xd8+(live.tos+2)-live.spos[c]);
4748 tos_make(d); /* store sin(x) to destination */
4749 }
4750 }
4751 LENDFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
4752
4753 LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
4754 {
4755 int ds;
4756
4757 if (live.spos[d]==live.tos && live.spos[s]==live.tos-1) {
4758 //write_log (_T("fscale found x in TOS-1 and y in TOS\n"));
4759 emit_byte(0xd9);
4760 emit_byte(0xfd); /* fscale y*(2^x) */
4761 }
4762 else {
4763 make_tos(s); /* tos=x */
4764 ds=stackpos(d);
4765 emit_byte(0xd9);
4766 emit_byte(0xc0+ds); /* fld y */
4767 emit_byte(0xd9);
4768 emit_byte(0xfd); /* fscale y*(2^x) */
4769 tos_make(d); /* store y=y*(2^x) */
4770 }
4771 }
4772 LENDFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
4773
4774 LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
4775 {
4776 int ds;
4777
4778 if (d!=s) {
4779 ds=stackpos(s);
4780 emit_byte(0xd9);
4781 emit_byte(0xc0+ds); /* fld x */
4782 emit_byte(0xd9);
4783 emit_byte(0xf2); /* fptan tan(x)=y/1.0 */
4784 emit_byte(0xdd);
4785 emit_byte(0xd8); /* fstp pop 1.0 */
4786 tos_make(d); /* store to destination */
4787 }
4788 else {
4789 make_tos(d);
4790 emit_byte(0xd9);
4791 emit_byte(0xf2); /* fptan tan(x)=y/1.0 */
4792 emit_byte(0xdd);
4793 emit_byte(0xd8); /* fstp pop 1.0 */
4794 }
4795 }
4796 LENDFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
4797
4798 #ifdef CPU_x86_64
4799 #define REX64 emit_byte(0x48);
4800 #else
4801 #define REX64
4802 #endif
4803
4804 LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
4805 {
4806 make_tos(r); /* TOS = r */
4807 REX64
4808 emit_byte(0x83);
4809 emit_byte(0xc4);
4810 emit_byte(0xfc); /* add -4 to esp */
4811 emit_byte(0xd9);
4812 emit_byte(0x1c);
4813 emit_byte(0x24); /* fstp store r as SINGLE to [esp] and pop */
4814 emit_byte(0xd9);
4815 emit_byte(0x04);
4816 emit_byte(0x24); /* fld load r as SINGLE from [esp] */
4817 emit_byte(0x9b); /* let the CPU wait on FPU exceptions */
4818 REX64
4819 emit_byte(0x83);
4820 emit_byte(0xc4);
4821 emit_byte(0x04); /* add +4 to esp */
4822 }
4823 LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
4824
4825 LOWFUNC(NONE,NONE,1,raw_fcut_r,(FRW r))
4826 {
4827 make_tos(r); /* TOS = r */
4828 REX64
4829 emit_byte(0x83);
4830 emit_byte(0xc4);
4831 emit_byte(0xf8); /* add -8 to esp */
4832 emit_byte(0xdd);
4833 emit_byte(0x1c);
4834 emit_byte(0x24); /* fstp store r as DOUBLE to [esp] and pop */
4835 emit_byte(0xdd);
4836 emit_byte(0x04);
4837 emit_byte(0x24); /* fld load r as DOUBLE from [esp] */
4838 emit_byte(0x9b); /* let the CPU wait on FPU exceptions */
4839 REX64
4840 emit_byte(0x83);
4841 emit_byte(0xc4);
4842 emit_byte(0x08); /* add +8 to esp */
4843 }
4844 LENDFUNC(NONE,NONE,1,raw_fcut_r,(FRW r))
4845
4846 LOWFUNC(NONE,NONE,2,raw_fgetexp_rr,(FW d, FR s))
4847 {
4848 int ds;
4849
4850 if (d!=s) {
4851 ds=stackpos(s);
4852 emit_byte(0xd9);
4853 emit_byte(0xc0+ds); /* fld x */
4854 emit_byte(0xd9);
4855 emit_byte(0xf4); /* fxtract exp push man */
4856 emit_byte(0xdd);
4857 emit_byte(0xd8); /* fstp just pop man */
4858 tos_make(d); /* store exp to destination */
4859 }
4860 else {
4861 make_tos(d); /* tos=x=y */
4862 emit_byte(0xd9);
4863 emit_byte(0xf4); /* fxtract exp push man */
4864 emit_byte(0xdd);
4865 emit_byte(0xd8); /* fstp just pop man */
4866 }
4867 }
4868 LENDFUNC(NONE,NONE,2,raw_fgetexp_rr,(FW d, FR s))
4869
4870 LOWFUNC(NONE,NONE,2,raw_fgetman_rr,(FW d, FR s))
4871 {
4872 int ds;
4873
4874 if (d!=s) {
4875 ds=stackpos(s);
4876 emit_byte(0xd9);
4877 emit_byte(0xc0+ds); /* fld x */
4878 emit_byte(0xd9);
4879 emit_byte(0xf4); /* fxtract exp push man */
4880 emit_byte(0xdd);
4881 emit_byte(0xd9); /* fstp copy man up & pop */
4882 tos_make(d); /* store man to destination */
4883 }
4884 else {
4885 make_tos(d); /* tos=x=y */
4886 emit_byte(0xd9);
4887 emit_byte(0xf4); /* fxtract exp push man */
4888 emit_byte(0xdd);
4889 emit_byte(0xd9); /* fstp copy man up & pop */
4890 }
4891 }
4892 LENDFUNC(NONE,NONE,2,raw_fgetman_rr,(FW d, FR s))
4893
4894 LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
4895 {
4896 int ds;
4897
4898 if (s==d)
4899 make_tos(s);
4900 else {
4901 ds=stackpos(s);
4902 emit_byte(0xd9);
4903 emit_byte(0xc0+ds); /* fld x */
4904 }
4905 emit_byte(0xd9);
4906 emit_byte(0xed); /* fldln2 logN(2) */
4907 emit_byte(0xd9);
4908 emit_byte(0xc9); /* fxch swap logN(2) with x */
4909 emit_byte(0xd9);
4910 emit_byte(0xf1); /* fyl2x logN(2)*log2(x) */
4911 if (s!=d)
4912 tos_make(d); /* store y=logN(x) */
4913 }
4914 LENDFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
4915
4916 LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
4917 {
4918 int ds;
4919
4920 if (s==d)
4921 make_tos(s);
4922 else {
4923 ds=stackpos(s);
4924 emit_byte(0xd9);
4925 emit_byte(0xc0+ds); /* fld x */
4926 }
4927 emit_byte(0xd9);
4928 emit_byte(0xed); /* fldln2 logN(2) */
4929 emit_byte(0xd9);
4930 emit_byte(0xc9); /* fxch swap logN(2) with x */
4931 emit_byte(0xd9);
4932 emit_byte(0xf9); /* fyl2xp1 logN(2)*log2(x+1) */
4933 if (s!=d)
4934 tos_make(d); /* store y=logN(x+1) */
4935 }
4936 LENDFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
4937
4938 LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
4939 {
4940 int ds;
4941
4942 if (s==d)
4943 make_tos(s);
4944 else {
4945 ds=stackpos(s);
4946 emit_byte(0xd9);
4947 emit_byte(0xc0+ds); /* fld x */
4948 }
4949 emit_byte(0xd9);
4950 emit_byte(0xec); /* fldlg2 log10(2) */
4951 emit_byte(0xd9);
4952 emit_byte(0xc9); /* fxch swap log10(2) with x */
4953 emit_byte(0xd9);
4954 emit_byte(0xf1); /* fyl2x log10(2)*log2(x) */
4955 if (s!=d)
4956 tos_make(d); /* store y=log10(x) */
4957 }
4958 LENDFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
4959
4960 LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
4961 {
4962 int ds;
4963
4964 ds=stackpos(s);
4965 emit_byte(0xd9);
4966 emit_byte(0xc0+ds); /* fld x */
4967 emit_byte(0xd8);
4968 emit_byte(0xc8); /* fmul x*x */
4969 emit_byte(0xd9);
4970 emit_byte(0xe8); /* fld 1.0 */
4971 emit_byte(0xde);
4972 emit_byte(0xe1); /* fsubrp 1 - (x^2) */
4973 emit_byte(0xd9);
4974 emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */
4975 emit_byte(0xd9);
4976 emit_byte(0xc1+ds); /* fld x again */
4977 emit_byte(0xd9);
4978 emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */
4979 emit_byte(0xd9);
4980 emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */
4981 tos_make(d); /* store y=asin(x) */
4982 }
4983 LENDFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
4984
4985 static uae_u32 pihalf[] = {0x2168c234, 0xc90fdaa2, 0x3fff}; // LSB=0 to get acos(1)=0
4986
4987 LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
4988 {
4989 int ds;
4990
4991 ds=stackpos(s);
4992 emit_byte(0xd9);
4993 emit_byte(0xc0+ds); /* fld x */
4994 emit_byte(0xd8);
4995 emit_byte(0xc8); /* fmul x*x */
4996 emit_byte(0xd9);
4997 emit_byte(0xe8); /* fld 1.0 */
4998 emit_byte(0xde);
4999 emit_byte(0xe1); /* fsubrp 1 - (x^2) */
5000 emit_byte(0xd9);
5001 emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */
5002 emit_byte(0xd9);
5003 emit_byte(0xc1+ds); /* fld x again */
5004 emit_byte(0xd9);
5005 emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */
5006 emit_byte(0xd9);
5007 emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */
5008 raw_fldt((uintptr) &pihalf); /* fld load pi/2 from pihalf */
5009 emit_byte(0xde);
5010 emit_byte(0xe1); /* fsubrp pi/2 - asin(x) & pop */
5011 tos_make(d); /* store y=acos(x) */
5012 }
5013 LENDFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
5014
5015 LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
5016 {
5017 int ds;
5018
5019 if (s==d)
5020 make_tos(s);
5021 else {
5022 ds=stackpos(s);
5023 emit_byte(0xd9);
5024 emit_byte(0xc0+ds); /* fld x */
5025 }
5026 emit_byte(0xd9);
5027 emit_byte(0xe8); /* fld 1.0 */
5028 emit_byte(0xd9);
5029 emit_byte(0xf3); /* fpatan atan(x)/1 & pop*/
5030 if (s!=d)
5031 tos_make(d); /* store y=atan(x) */
5032 }
5033 LENDFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
5034
5035 LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
5036 {
5037 int ds;
5038
5039 ds=stackpos(s);
5040 emit_byte(0xd9);
5041 emit_byte(0xc0+ds); /* fld x */
5042 emit_byte(0xd9);
5043 emit_byte(0xe8); /* fld 1.0 */
5044 emit_byte(0xdc);
5045 emit_byte(0xc1); /* fadd 1 + x */
5046 emit_byte(0xd8);
5047 emit_byte(0xe2+ds); /* fsub 1 - x */
5048 emit_byte(0xde);
5049 emit_byte(0xf9); /* fdivp (1+x)/(1-x) */
5050 emit_byte(0xd9);
5051 emit_byte(0xed); /* fldl2e logN(2) */
5052 emit_byte(0xd9);
5053 emit_byte(0xc9); /* fxch swap logN(2) with (1+x)/(1-x) */
5054 emit_byte(0xd9);
5055 emit_byte(0xf1); /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */
5056 emit_byte(0xd9);
5057 emit_byte(0xe8); /* fld 1.0 */
5058 emit_byte(0xd9);
5059 emit_byte(0xe0); /* fchs -1.0 */
5060 emit_byte(0xd9);
5061 emit_byte(0xc9); /* fxch swap */
5062 emit_byte(0xd9);
5063 emit_byte(0xfd); /* fscale logN((1+x)/(1-x)) * 2^(-1) */
5064 emit_byte(0xdd);
5065 emit_byte(0xd9); /* fstp copy & pop */
5066 tos_make(d); /* store y=atanh(x) */
5067 }
5068 LENDFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
5069
5070 LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
5071 {
5072 int ds,tr;
5073
5074 tr=live.onstack[live.tos+3];
5075 if (s==d)
5076 make_tos(s);
5077 else {
5078 ds=stackpos(s);
5079 emit_byte(0xd9);
5080 emit_byte(0xc0+ds); /* fld x */
5081 }
5082 emit_byte(0xd9);
5083 emit_byte(0xea); /* fldl2e log2(e) */
5084 emit_byte(0xd8);
5085 emit_byte(0xc9); /* fmul x*log2(e) */
5086 emit_byte(0xdd);
5087 emit_byte(0xd1); /* fst copy x*log2(e) */
5088 if (tr>=0) {
5089 emit_byte(0xd9);
5090 emit_byte(0xca); /* fxch swap with temp-reg */
5091 REX64
5092 emit_byte(0x83);
5093 emit_byte(0xc4);
5094 emit_byte(0xf4); /* add -12 to esp */
5095 emit_byte(0xdb);
5096 emit_byte(0x3c);
5097 emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
5098 }
5099 emit_byte(0xd9);
5100 emit_byte(0xe0); /* fchs -x*log2(e) */
5101 emit_byte(0xd9);
5102 emit_byte(0xc0); /* fld -x*log2(e) again */
5103 emit_byte(0xd9);
5104 emit_byte(0xfc); /* frndint int(-x*log2(e)) */
5105 emit_byte(0xd9);
5106 emit_byte(0xc9); /* fxch swap */
5107 emit_byte(0xd8);
5108 emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
5109 emit_byte(0xd9);
5110 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5111 x86_fadd_m((uintptr) &one);
5112 emit_byte(0xd9);
5113 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5114 emit_byte(0xd9);
5115 emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */
5116 emit_byte(0xdd);
5117 emit_byte(0xd1); /* fst copy x*log2(e) */
5118 emit_byte(0xd9);
5119 emit_byte(0xfc); /* frndint int(x*log2(e)) */
5120 emit_byte(0xd9);
5121 emit_byte(0xc9); /* fxch swap */
5122 emit_byte(0xd8);
5123 emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
5124 emit_byte(0xd9);
5125 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5126 x86_fadd_m((uintptr) &one);
5127 emit_byte(0xd9);
5128 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5129 emit_byte(0xdd);
5130 emit_byte(0xd9); /* fstp copy e^x & pop */
5131 if (tr>=0) {
5132 emit_byte(0xdb);
5133 emit_byte(0x2c);
5134 emit_byte(0x24); /* fld load temp-reg from [esp] */
5135 emit_byte(0xd9);
5136 emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
5137 emit_byte(0xde);
5138 emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */
5139 REX64
5140 emit_byte(0x83);
5141 emit_byte(0xc4);
5142 emit_byte(0x0c); /* delayed add +12 to esp */
5143 }
5144 else {
5145 emit_byte(0xde);
5146 emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */
5147 }
5148 emit_byte(0xd9);
5149 emit_byte(0xe8); /* fld 1.0 */
5150 emit_byte(0xd9);
5151 emit_byte(0xe0); /* fchs -1.0 */
5152 emit_byte(0xd9);
5153 emit_byte(0xc9); /* fxch swap */
5154 emit_byte(0xd9);
5155 emit_byte(0xfd); /* fscale ((e^x)-(e^-x))/2 */
5156 emit_byte(0xdd);
5157 emit_byte(0xd9); /* fstp copy & pop */
5158 if (s!=d)
5159 tos_make(d); /* store y=sinh(x) */
5160 }
5161 LENDFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
5162
5163 LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
5164 {
5165 int ds,tr;
5166
5167 tr=live.onstack[live.tos+3];
5168 if (s==d)
5169 make_tos(s);
5170 else {
5171 ds=stackpos(s);
5172 emit_byte(0xd9);
5173 emit_byte(0xc0+ds); /* fld x */
5174 }
5175 emit_byte(0xd9);
5176 emit_byte(0xea); /* fldl2e log2(e) */
5177 emit_byte(0xd8);
5178 emit_byte(0xc9); /* fmul x*log2(e) */
5179 emit_byte(0xdd);
5180 emit_byte(0xd1); /* fst copy x*log2(e) */
5181 if (tr>=0) {
5182 emit_byte(0xd9);
5183 emit_byte(0xca); /* fxch swap with temp-reg */
5184 REX64
5185 emit_byte(0x83);
5186 emit_byte(0xc4);
5187 emit_byte(0xf4); /* add -12 to esp */
5188 emit_byte(0xdb);
5189 emit_byte(0x3c);
5190 emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
5191 }
5192 emit_byte(0xd9);
5193 emit_byte(0xe0); /* fchs -x*log2(e) */
5194 emit_byte(0xd9);
5195 emit_byte(0xc0); /* fld -x*log2(e) again */
5196 emit_byte(0xd9);
5197 emit_byte(0xfc); /* frndint int(-x*log2(e)) */
5198 emit_byte(0xd9);
5199 emit_byte(0xc9); /* fxch swap */
5200 emit_byte(0xd8);
5201 emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
5202 emit_byte(0xd9);
5203 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5204 x86_fadd_m((uintptr) &one);
5205 emit_byte(0xd9);
5206 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5207 emit_byte(0xd9);
5208 emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */
5209 emit_byte(0xdd);
5210 emit_byte(0xd1); /* fst copy x*log2(e) */
5211 emit_byte(0xd9);
5212 emit_byte(0xfc); /* frndint int(x*log2(e)) */
5213 emit_byte(0xd9);
5214 emit_byte(0xc9); /* fxch swap */
5215 emit_byte(0xd8);
5216 emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
5217 emit_byte(0xd9);
5218 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5219 x86_fadd_m((uintptr) &one);
5220 emit_byte(0xd9);
5221 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5222 emit_byte(0xdd);
5223 emit_byte(0xd9); /* fstp copy e^x & pop */
5224 if (tr>=0) {
5225 emit_byte(0xdb);
5226 emit_byte(0x2c);
5227 emit_byte(0x24); /* fld load temp-reg from [esp] */
5228 emit_byte(0xd9);
5229 emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
5230 REX64
5231 emit_byte(0x83);
5232 emit_byte(0xc4);
5233 emit_byte(0x0c); /* delayed add +12 to esp */
5234 }
5235 emit_byte(0xde);
5236 emit_byte(0xc1); /* faddp (e^x)+(e^-x) */
5237 emit_byte(0xd9);
5238 emit_byte(0xe8); /* fld 1.0 */
5239 emit_byte(0xd9);
5240 emit_byte(0xe0); /* fchs -1.0 */
5241 emit_byte(0xd9);
5242 emit_byte(0xc9); /* fxch swap */
5243 emit_byte(0xd9);
5244 emit_byte(0xfd); /* fscale ((e^x)+(e^-x))/2 */
5245 emit_byte(0xdd);
5246 emit_byte(0xd9); /* fstp copy & pop */
5247 if (s!=d)
5248 tos_make(d); /* store y=cosh(x) */
5249 }
5250 LENDFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
5251
5252 LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
5253 {
5254 int ds,tr;
5255
5256 tr=live.onstack[live.tos+3];
5257 if (s==d)
5258 make_tos(s);
5259 else {
5260 ds=stackpos(s);
5261 emit_byte(0xd9);
5262 emit_byte(0xc0+ds); /* fld x */
5263 }
5264 emit_byte(0xd9);
5265 emit_byte(0xea); /* fldl2e log2(e) */
5266 emit_byte(0xd8);
5267 emit_byte(0xc9); /* fmul x*log2(e) */
5268 emit_byte(0xdd);
5269 emit_byte(0xd1); /* fst copy x*log2(e) */
5270 if (tr>=0) {
5271 emit_byte(0xd9);
5272 emit_byte(0xca); /* fxch swap with temp-reg */
5273 REX64
5274 emit_byte(0x83);
5275 emit_byte(0xc4);
5276 emit_byte(0xf4); /* add -12 to esp */
5277 emit_byte(0xdb);
5278 emit_byte(0x3c);
5279 emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
5280 }
5281 emit_byte(0xd9);
5282 emit_byte(0xe0); /* fchs -x*log2(e) */
5283 emit_byte(0xd9);
5284 emit_byte(0xc0); /* fld -x*log2(e) again */
5285 emit_byte(0xd9);
5286 emit_byte(0xfc); /* frndint int(-x*log2(e)) */
5287 emit_byte(0xd9);
5288 emit_byte(0xc9); /* fxch swap */
5289 emit_byte(0xd8);
5290 emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
5291 emit_byte(0xd9);
5292 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5293 x86_fadd_m((uintptr) &one);
5294 emit_byte(0xd9);
5295 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5296 emit_byte(0xd9);
5297 emit_byte(0xca); /* fxch swap e^-x with x*log2(e) */
5298 emit_byte(0xdd);
5299 emit_byte(0xd1); /* fst copy x*log2(e) */
5300 emit_byte(0xd9);
5301 emit_byte(0xfc); /* frndint int(x*log2(e)) */
5302 emit_byte(0xd9);
5303 emit_byte(0xc9); /* fxch swap */
5304 emit_byte(0xd8);
5305 emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
5306 emit_byte(0xd9);
5307 emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
5308 x86_fadd_m((uintptr) &one);
5309 emit_byte(0xd9);
5310 emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
5311 emit_byte(0xdd);
5312 emit_byte(0xd1); /* fst copy e^x */
5313 emit_byte(0xd8);
5314 emit_byte(0xc2); /* fadd (e^x)+(e^-x) */
5315 emit_byte(0xd9);
5316 emit_byte(0xca); /* fxch swap with e^-x */
5317 emit_byte(0xde);
5318 emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */
5319 if (tr>=0) {
5320 emit_byte(0xdb);
5321 emit_byte(0x2c);
5322 emit_byte(0x24); /* fld load temp-reg from [esp] */
5323 emit_byte(0xd9);
5324 emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
5325 emit_byte(0xde);
5326 emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
5327 REX64
5328 emit_byte(0x83);
5329 emit_byte(0xc4);
5330 emit_byte(0x0c); /* delayed add +12 to esp */
5331 }
5332 else {
5333 emit_byte(0xde);
5334 emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
5335 }
5336 if (s!=d)
5337 tos_make(d); /* store y=tanh(x) */
5338 }
5339 LENDFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
5340
5341 /* %eax register is clobbered if target processor doesn't support fucomi */
5342 #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
5343 #define FFLAG_NREG EAX_INDEX
5344
raw_fflags_into_flags(int r)5345 static inline void raw_fflags_into_flags(int r)
5346 {
5347 int p;
5348
5349 usereg(r);
5350 p=stackpos(r);
5351
5352 emit_byte(0xd9);
5353 emit_byte(0xee); /* Push 0 */
5354 emit_byte(0xd9);
5355 emit_byte(0xc9+p); /* swap top two around */
5356 if (have_cmov) {
5357 // gb-- fucomi is for P6 cores only, not K6-2 then...
5358 emit_byte(0xdb);
5359 emit_byte(0xe9+p); /* fucomi them */
5360 }
5361 else {
5362 emit_byte(0xdd);
5363 emit_byte(0xe1+p); /* fucom them */
5364 emit_byte(0x9b);
5365 emit_byte(0xdf);
5366 emit_byte(0xe0); /* fstsw ax */
5367 raw_sahf(0); /* sahf */
5368 }
5369 emit_byte(0xdd);
5370 emit_byte(0xd9+p); /* store value back, and get rid of 0 */
5371 }
5372