1 /*
2   * UAE - The Un*x Amiga Emulator
3   *
4   * MC68881 emulation
5   *
6   * Copyright 1996 Herman ten Brugge
7   * Adapted for JIT compilation (c) Bernd Meyer, 2000
8   * Modified 2005 Peter Keunecke
9   */
10 
11 #include "sysconfig.h"
12 #include "sysdeps.h"
13 
14 #include "options.h"
15 #include "uae/memory.h"
16 #include "custom.h"
17 #include "newcpu.h"
18 #include "ersatz.h"
19 #include "md-fpp.h"
20 #include "compemu.h"
21 
22 #if defined(JIT)
23 uae_u32 temp_fp[] = { 0, 0, 0 };  /* To convert between FP and <EA> */
24 
25 /* 128 words, indexed through the low byte of the 68k fpu control word */
26 static const uae_u16 x86_fpucw[] = {
27 	0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* E-RN */
28 	0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* E-RZ */
29 	0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* E-RD */
30 	0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* E-RU */
31 
32 	0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* S-RN */
33 	0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* S-RZ */
34 	0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* S-RD */
35 	0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* S-RU */
36 
37 	0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* D-RN */
38 	0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* D-RZ */
39 	0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* D-RD */
40 	0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* D-RU */
41 
42 	0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* ?-RN */
43 	0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* ?-RZ */
44 	0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* ?-RD */
45 	0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f  /* ?-RU */
46 };
47 static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
48 static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
49 
50 static struct {
51 	double b[2];
52 	double w[2];
53 	double l[2];
54 } clamp_bounds = {
55 	{ -128.0, 127.0 },
56 	{ -32768.0, 32767.0 },
57 	{ -2147483648.0, 2147483647.0 }
58 };
59 
60 /* return the required floating point precision or -1 for failure, 0=E, 1=S, 2=D */
comp_fp_get(uae_u32 opcode,uae_u16 extra,int treg)61 STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
62 {
63 	int reg = opcode & 7;
64 	int mode = (opcode >> 3) & 7;
65 	int size = (extra >> 10) & 7;
66 
67 	if (size == 3 || size == 7) /* 3 = packed decimal, 7 is not defined */
68 		return -1;
69 	switch (mode) {
70 		case 0: /* Dn */
71 		switch (size) {
72 			case 0: /* Long */
73 			mov_l_mr (uae_p32(temp_fp), reg);
74 			fmovi_rm (treg, uae_p32(temp_fp));
75 			return 2;
76 			case 1: /* Single */
77 			mov_l_mr (uae_p32(temp_fp), reg);
78 			fmovs_rm (treg, uae_p32(temp_fp));
79 			return 1;
80 			case 4: /* Word */
81 			sign_extend_16_rr (S1, reg);
82 			mov_l_mr (uae_p32(temp_fp), S1);
83 			fmovi_rm (treg, uae_p32(temp_fp));
84 			return 1;
85 			case 6: /* Byte */
86 			sign_extend_8_rr (S1, reg);
87 			mov_l_mr (uae_p32(temp_fp), S1);
88 			fmovi_rm (treg, uae_p32(temp_fp));
89 			return 1;
90 			default:
91 			return -1;
92 		}
93 		case 1: /* An,  invalid mode */
94 		return -1;
95 		case 2: /* (An) */
96 		mov_l_rr (S1, reg + 8);
97 		break;
98 		case 3: /* (An)+ */
99 		mov_l_rr (S1, reg + 8);
100 		lea_l_brr (reg + 8, reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
101 		break;
102 		case 4: /* -(An) */
103 		lea_l_brr (reg + 8, reg + 8, -(reg == 7 ? sz2[size] : sz1[size]));
104 		mov_l_rr (S1, reg + 8);
105 		break;
106 		case 5: /* (d16,An)  */
107 		{
108 			uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
109 			mov_l_rr (S1, reg + 8);
110 			lea_l_brr (S1, S1, off);
111 			break;
112 		}
113 		case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
114 		{
115 			uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
116 			calc_disp_ea_020 (reg + 8, dp, S1, S2);
117 			break;
118 		}
119 		case 7:
120 		switch (reg) {
121 			case 0: /* (xxx).W */
122 			{
123 				uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
124 				mov_l_ri (S1, off);
125 				break;
126 			}
127 			case 1: /* (xxx).L */
128 			{
129 				uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
130 				mov_l_ri (S1, off);
131 				break;
132 			}
133 			case 2: /* (d16,PC) */
134 			{
135 				uae_u32 address = start_pc + ((uae_char*) comp_pc_p - (uae_char*) start_pc_p) +
136 					m68k_pc_offset;
137 				uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
138 				mov_l_ri (S1, address + PC16off);
139 				break;
140 			}
141 			case 3: /* (d8,PC,Xn) or (bd,PC,Xn) or ([bd,PC,Xn],od) or ([bd,PC],Xn,od) */
142 			return -1; /* rarely used, fallback to non-JIT */
143 			case 4: /* # < data >; Constants should be converted just once by the JIT */
144 			m68k_pc_offset += sz2[size];
145 			switch (size) {
146 				case 0:
147 				{
148 					uae_s32 li = comp_get_ilong(m68k_pc_offset - 4);
149 					float si = (float)li;
150 
151 					if (li == (int)si) {
152 						//write_log ("converted immediate LONG constant to SINGLE\n");
153 						mov_l_mi(uae_p32(temp_fp), *(uae_u32 *)&si);
154 						fmovs_rm(treg, uae_p32(temp_fp));
155 						return 1;
156 					}
157 					//write_log ("immediate LONG constant\n");
158 					mov_l_mi(uae_p32(temp_fp), *(uae_u32 *)&li);
159 					fmovi_rm(treg, uae_p32(temp_fp));
160 					return 2;
161 				}
162 				case 1:
163 				//write_log (_T("immediate SINGLE constant\n"));
164 				mov_l_mi(uae_p32(temp_fp), comp_get_ilong(m68k_pc_offset - 4));
165 				fmovs_rm(treg, uae_p32(temp_fp));
166 				return 1;
167 				case 2:
168 				//write_log (_T("immediate LONG DOUBLE constant\n"));
169 				mov_l_mi(uae_p32(temp_fp), comp_get_ilong(m68k_pc_offset - 4));
170 				mov_l_mi((uae_p32(temp_fp)) + 4, comp_get_ilong(m68k_pc_offset - 8));
171 				mov_l_mi((uae_p32(temp_fp)) + 8, (uae_u32)comp_get_iword(m68k_pc_offset - 12));
172 				fmov_ext_rm(treg, uae_p32(temp_fp));
173 				return 0;
174 				case 4:
175 				{
176 					float si = (float)(uae_s16)comp_get_iword(m68k_pc_offset-2);
177 
178 					//write_log (_T("converted immediate WORD constant %f to SINGLE\n"), si);
179 					mov_l_mi(uae_p32(temp_fp),*(uae_u32 *)&si);
180 					fmovs_rm(treg,uae_p32(temp_fp));
181 					return 1;
182 				}
183 				case 5:
184 				{
185 					uae_u32 longarray[] = { comp_get_ilong(m68k_pc_offset - 4),
186 						comp_get_ilong(m68k_pc_offset - 8) };
187 					float si = (float)*(double *)longarray;
188 
189 					if (*(double *)longarray == (double)si) {
190 						//write_log (_T("SPEED GAIN: converted a DOUBLE constant to SINGLE\n"));
191 						mov_l_mi(uae_p32(temp_fp), *(uae_u32 *)&si);
192 						fmovs_rm(treg, uae_p32(temp_fp));
193 						return 1;
194 					}
195 					//write_log (_T("immediate DOUBLE constant\n"));
196 					mov_l_mi(uae_p32(temp_fp), longarray[0]);
197 					mov_l_mi((uae_p32(temp_fp)) + 4, longarray[1]);
198 					fmov_rm(treg, uae_p32(temp_fp));
199 					return 2;
200 				}
201 				case 6:
202 				{
203 					float si = (float)(uae_s8)comp_get_ibyte(m68k_pc_offset - 2);
204 
205 					//write_log (_T("converted immediate BYTE constant to SINGLE\n"));
206 					mov_l_mi(uae_p32(temp_fp), *(uae_u32 *)&si);
207 					fmovs_rm(treg, uae_p32(temp_fp));
208 					return 1;
209 				}
210 				default: /* never reached */
211 				return -1;
212 			}
213 			default: /* never reached */
214 			return -1;
215 		}
216 	}
217 
218 	switch (size) {
219 		case 0: /* Long */
220 		readlong (S1, S2, S3);
221 		mov_l_mr (uae_p32(temp_fp), S2);
222 		fmovi_rm (treg, uae_p32(temp_fp));
223 		return 2;
224 		case 1: /* Single */
225 		readlong (S1, S2, S3);
226 		mov_l_mr (uae_p32(temp_fp), S2);
227 		fmovs_rm (treg, uae_p32(temp_fp));
228 		return 1;
229 		case 2: /* Long Double */
230 		readword (S1, S2, S3);
231 		mov_w_mr ((uae_p32(temp_fp)) + 8, S2);
232 		add_l_ri (S1, 4);
233 		readlong (S1, S2, S3);
234 		mov_l_mr ((uae_p32(temp_fp)) + 4, S2);
235 		add_l_ri (S1, 4);
236 		readlong (S1, S2, S3);
237 		mov_l_mr ((uae_p32(temp_fp)), S2);
238 		fmov_ext_rm (treg, uae_p32(temp_fp));
239 		return 0;
240 		case 4: /* Word */
241 		readword (S1, S2, S3);
242 		sign_extend_16_rr (S2, S2);
243 		mov_l_mr (uae_p32(temp_fp), S2);
244 		fmovi_rm (treg, uae_p32(temp_fp));
245 		return 1;
246 		case 5: /* Double */
247 		readlong (S1, S2, S3);
248 		mov_l_mr ((uae_p32(temp_fp)) + 4, S2);
249 		add_l_ri (S1, 4);
250 		readlong (S1, S2, S3);
251 		mov_l_mr ((uae_p32(temp_fp)), S2);
252 		fmov_rm (treg, uae_p32(temp_fp));
253 		return 2;
254 		case 6: /* Byte */
255 		readbyte (S1, S2, S3);
256 		sign_extend_8_rr (S2, S2);
257 		mov_l_mr (uae_p32(temp_fp), S2);
258 		fmovi_rm (treg, uae_p32(temp_fp));
259 		return 1;
260 		default:
261 		return -1;
262 	}
263 	return -1;
264 }
265 
266 /* return of -1 means failure, >=0 means OK */
comp_fp_put(uae_u32 opcode,uae_u16 extra)267 STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
268 {
269 	int reg = opcode & 7;
270 	int sreg = (extra >> 7) & 7;
271 	int mode = (opcode >> 3) & 7;
272 	int size = (extra >> 10) & 7;
273 
274 	if (size == 3 || size == 7) /* 3 = packed decimal, 7 is not defined */
275 		return -1;
276 	switch (mode) {
277 		case 0: /* Dn */
278 		switch (size) {
279 			case 0: /* FMOVE.L FPx, Dn */
280 #if USE_X86_FPUCW && 0
281 			if (!(regs.fpcr & 0xf0)) { /* if extended round to nearest */
282 				mov_l_ri(S1,0x10); /* use extended round to zero mode */
283 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
284 				fmovi_mrb(uae_p32(temp_fp),sreg, clamp_bounds.l);
285 				mov_l_rm(reg,uae_p32(temp_fp));
286 				mov_l_rm(S1,(uae_u32)&regs.fpcr);
287 				and_l_ri(S1,0xf0); /* restore control word */
288 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
289 				return 0;
290 			}
291 #endif
292 			fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.l);
293 			mov_l_rm (reg, uae_p32(temp_fp));
294 			return 0;
295 			case 1: /* FMOVE.S FPx, Dn */
296 			fmovs_mr (uae_p32(temp_fp), sreg);
297 			mov_l_rm (reg, uae_p32(temp_fp));
298 			return 0;
299 			case 4: /* FMOVE.W FPx, Dn */
300 #if USE_X86_FPUCW && 0
301 			if (!(regs.fpcr & 0xf0)) { /* if extended round to nearest */
302 				mov_l_ri(S1,0x10); /* use extended round to zero mode */
303 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
304 				fmovi_mrb(uae_p32(temp_fp),sreg, clamp_bounds.w);
305 				mov_w_rm(reg,uae_p32(temp_fp));
306 				mov_l_rm(S1,(uae_u32)&regs.fpcr);
307 				and_l_ri(S1,0xf0); /* restore control word */
308 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
309 				return 0;
310 			}
311 #endif
312 			fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.w);
313 			mov_w_rm (reg, uae_p32(temp_fp));
314 			return 0;
315 			case 6: /* FMOVE.B FPx, Dn */
316 #if USE_X86_FPUCW && 0
317 			if (!(regs.fpcr & 0xf0)) { /* if extended round to nearest */
318 				mov_l_ri(S1,0x10); /* use extended round to zero mode */
319 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
320 				fmovi_mrb(uae_p32(temp_fp),sreg, clamp_bounds.b);
321 				mov_b_rm(reg,uae_p32(temp_fp));
322 				mov_l_rm(S1,(uae_u32)&regs.fpcr);
323 				and_l_ri(S1,0xf0); /* restore control word */
324 				fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
325 				return 0;
326 			}
327 #endif
328 			fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.b);
329 			mov_b_rm (reg, uae_p32(temp_fp));
330 			return 0;
331 			default:
332 			return -1;
333 		}
334 		case 1: /* An, invalid mode */
335 		return -1;
336 		case 2: /* (An) */
337 		mov_l_rr (S1, reg + 8);
338 		break;
339 		case 3: /* (An)+ */
340 		mov_l_rr (S1, reg + 8);
341 		lea_l_brr (reg + 8, reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
342 		break;
343 		case 4: /* -(An) */
344 		lea_l_brr (reg + 8, reg + 8, -(reg == 7 ? sz2[size] : sz1[size]));
345 		mov_l_rr (S1, reg + 8);
346 		break;
347 		case 5: /* (d16,An) */
348 		{
349 			uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
350 			mov_l_rr (S1, reg + 8);
351 			add_l_ri (S1, off);
352 			break;
353 		}
354 		case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
355 		{
356 			uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
357 			calc_disp_ea_020 (reg + 8, dp, S1, S2);
358 			break;
359 		}
360 		case 7:
361 		switch (reg) {
362 			case 0: /* (xxx).W */
363 			{
364 				uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
365 				mov_l_ri (S1, off);
366 				break;
367 			}
368 			case 1: /* (xxx).L */
369 			{
370 				uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
371 				mov_l_ri (S1, off);
372 				break;
373 			}
374 			default: /* All other modes are not allowed for FPx to <EA> */
375 			write_log (_T ("JIT FMOVE FPx,<EA> Mode is not allowed %04x %04x\n"), opcode, extra);
376 			return -1;
377 		}
378 	}
379 	switch (size) {
380 		case 0: /* Long */
381 		fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.l);
382 		mov_l_rm (S2, uae_p32(temp_fp));
383 		writelong_clobber (S1, S2, S3);
384 		return 0;
385 		case 1: /* Single */
386 		fmovs_mr (uae_p32(temp_fp), sreg);
387 		mov_l_rm (S2, uae_p32(temp_fp));
388 		writelong_clobber (S1, S2, S3);
389 		return 0;
390 		case 2:/* Long Double */
391 		fmov_ext_mr (uae_p32(temp_fp), sreg);
392 		mov_w_rm (S2, uae_p32(temp_fp) + 8);
393 		writeword_clobber (S1, S2, S3);
394 		add_l_ri (S1, 4);
395 		mov_l_rm (S2, uae_p32(temp_fp) + 4);
396 		writelong_clobber (S1, S2, S3);
397 		add_l_ri (S1, 4);
398 		mov_l_rm (S2, uae_p32(temp_fp));
399 		writelong_clobber (S1, S2, S3);
400 		return 0;
401 		case 4: /* Word */
402 		fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.w);
403 		mov_l_rm (S2, uae_p32(temp_fp));
404 		writeword_clobber (S1, S2, S3);
405 		return 0;
406 		case 5: /* Double */
407 		fmov_mr (uae_p32(temp_fp), sreg);
408 		mov_l_rm (S2, uae_p32(temp_fp) + 4);
409 		writelong_clobber (S1, S2, S3);
410 		add_l_ri (S1, 4);
411 		mov_l_rm (S2, uae_p32(temp_fp));
412 		writelong_clobber (S1, S2, S3);
413 		return 0;
414 		case 6: /* Byte */
415 		fmovi_mrb (uae_p32(temp_fp), sreg, clamp_bounds.b);
416 		mov_l_rm (S2, uae_p32(temp_fp));
417 		writebyte (S1, S2, S3);
418 		return 0;
419 		default:
420 		return -1;
421 	}
422 	return -1;
423 }
424 
425 /* return -1 for failure, or register number for success */
comp_fp_adr(uae_u32 opcode)426 STATIC_INLINE int comp_fp_adr (uae_u32 opcode)
427 {
428 	uae_s32 off;
429 	int mode = (opcode >> 3) & 7;
430 	int reg = opcode & 7;
431 
432 	switch (mode) {
433 		case 2:
434 		case 3:
435 		case 4:
436 		mov_l_rr (S1, 8 + reg);
437 		return S1;
438 		case 5:
439 		off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
440 		mov_l_rr (S1, 8 + reg);
441 		add_l_ri (S1, off);
442 		return S1;
443 		case 7:
444 		switch (reg) {
445 			case 0:
446 			off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
447 			mov_l_ri (S1, off);
448 			return S1;
449 			case 1:
450 			off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
451 			mov_l_ri (S1, off);
452 			return S1;
453 		}
454 		default:
455 		return -1;
456 	}
457 }
458 
comp_fdbcc_opp(uae_u32 opcode,uae_u16 extra)459 void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
460 {
461 	FAIL (1);
462 	return;
463 }
464 
comp_fscc_opp(uae_u32 opcode,uae_u16 extra)465 void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
466 {
467 	int reg;
468 
469 	if (!currprefs.compfpu) {
470 		FAIL (1);
471 		return;
472 	}
473 
474 #if DEBUG_FPP
475 	write_log (_T("JIT: fscc_opp at %08lx\n"), M68K_GETPC);
476 #endif
477 
478 	if (extra & 0x20) {  /* only cc from 00 to 1f are defined */
479 		FAIL (1);
480 		return;
481 	}
482 	if ((opcode & 0x38) != 0) { /* We can only do to integer register */
483 		FAIL (1);
484 		return;
485 	}
486 
487 	fflags_into_flags (S2);
488 	reg = (opcode & 7);
489 
490 	mov_l_ri (S1, 255);
491 	mov_l_ri (S4, 0);
492 	switch (extra & 0x0f) { /* according to fpp.c, the 0x10 bit is ignored */
493 		case 0: break;  /* set never */
494 		case 1: mov_l_rr (S2, S4);
495 			cmov_l_rr (S4, S1, 4);
496 			cmov_l_rr (S4, S2, 10); break;
497 		case 2: cmov_l_rr (S4, S1, 7); break;
498 		case 3: cmov_l_rr (S4, S1, 3); break;
499 		case 4: mov_l_rr (S2, S4);
500 			cmov_l_rr (S4, S1, 2);
501 			cmov_l_rr (S4, S2, 10); break;
502 		case 5: mov_l_rr (S2, S4);
503 			cmov_l_rr (S4, S1, 6);
504 			cmov_l_rr (S4, S2, 10); break;
505 		case 6: cmov_l_rr (S4, S1, 5); break;
506 		case 7: cmov_l_rr (S4, S1, 11); break;
507 		case 8: cmov_l_rr (S4, S1, 10); break;
508 		case 9: cmov_l_rr (S4, S1, 4); break;
509 		case 10: cmov_l_rr (S4, S1, 10); cmov_l_rr (S4, S1, 7); break;
510 		case 11: cmov_l_rr (S4, S1, 4); cmov_l_rr (S4, S1, 3); break;
511 		case 12: cmov_l_rr (S4, S1, 2); break;
512 		case 13: cmov_l_rr (S4, S1, 6); break;
513 		case 14: cmov_l_rr (S4, S1, 5); cmov_l_rr (S4, S1, 10); break;
514 		case 15: mov_l_rr (S4, S1); break;
515 	}
516 
517 	if (!(opcode & 0x38))
518 		mov_b_rr (reg, S4);
519 #if 0
520 	else {
521 		abort();
522 		if (!comp_fp_adr (opcode)) {
523 			m68k_setpc (m68k_getpc () - 4);
524 			op_illg (opcode);
525 		}
526 		else
527 			put_byte (ad, cc ? 0xff : 0x00);
528 	}
529 #endif
530 }
531 
comp_ftrapcc_opp(uae_u32 opcode,uaecptr oldpc)532 void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
533 {
534 	FAIL (1);
535 	return;
536 }
537 
538 extern unsigned long foink3, oink;
539 
comp_fbcc_opp(uae_u32 opcode)540 void comp_fbcc_opp (uae_u32 opcode)
541 {
542 	uae_u32 start_68k_offset = m68k_pc_offset;
543 	uae_u32 off, v1, v2;
544 	int cc;
545 
546 	if (!currprefs.compfpu) {
547 		FAIL (1);
548 		return;
549 	}
550 
551 	// comp_pc_p is expected to be bound to 32-bit addresses
552 	assert((uintptr) comp_pc_p <= 0xffffffffUL);
553 
554 	if (opcode & 0x20) {  /* only cc from 00 to 1f are defined */
555 		FAIL (1);
556 		return;
557 	}
558 	if (!(opcode & 0x40)) {
559 		off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
560 	}
561 	else {
562 		off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
563 	}
564 
565 	/* Note, "off" will sometimes be (unsigned) "negative", so the following
566          * uintptr can be > 0xffffffff, but the result will be correct due to
567          * wraparound when truncated to 32 bit in the call to mov_l_ri. */
568 	mov_l_ri(S1, (uintptr)
569 		(comp_pc_p + off - (m68k_pc_offset - start_68k_offset)));
570 	mov_l_ri(PC_P, (uintptr) comp_pc_p);
571 
572 	/* Now they are both constant. Might as well fold in m68k_pc_offset */
573 	add_l_ri (S1, m68k_pc_offset);
574 	add_l_ri (PC_P, m68k_pc_offset);
575 	m68k_pc_offset = 0;
576 
577 	/* according to fpp.c, the 0x10 bit is ignored
578 	   (it handles exception handling, which we don't
579 	   do, anyway ;-) */
580 	cc = opcode & 0x0f;
581 	v1 = get_const (PC_P);
582 	v2 = get_const (S1);
583 	fflags_into_flags (S2);
584 
585 	// mov_l_mi((uae_u32)&foink3,cc);
586 	switch (cc) {
587 		case 0: break;  /* jump never */
588 		case 1:
589 		mov_l_rr (S2, PC_P);
590 		cmov_l_rr (PC_P, S1, 4);
591 		cmov_l_rr (PC_P, S2, 10); break;
592 		case 2: register_branch (v1, v2, 7); break;
593 		case 3: register_branch (v1, v2, 3); break;
594 		case 4:
595 		mov_l_rr (S2, PC_P);
596 		cmov_l_rr (PC_P, S1, 2);
597 		cmov_l_rr (PC_P, S2, 10); break;
598 		case 5:
599 		mov_l_rr (S2, PC_P);
600 		cmov_l_rr (PC_P, S1, 6);
601 		cmov_l_rr (PC_P, S2, 10); break;
602 		case 6: register_branch (v1, v2, 5); break;
603 		case 7: register_branch (v1, v2, 11); break;
604 		case 8: register_branch (v1, v2, 10); break;
605 		case 9: register_branch (v1, v2, 4); break;
606 		case 10:
607 		cmov_l_rr (PC_P, S1, 10);
608 		cmov_l_rr (PC_P, S1, 7); break;
609 		case 11:
610 		cmov_l_rr (PC_P, S1, 4);
611 		cmov_l_rr (PC_P, S1, 3); break;
612 		case 12: register_branch (v1, v2, 2); break;
613 		case 13: register_branch (v1, v2, 6); break;
614 		case 14:
615 		cmov_l_rr (PC_P, S1, 5);
616 		cmov_l_rr (PC_P, S1, 10); break;
617 		case 15: mov_l_rr (PC_P, S1); break;
618 	}
619 }
620 
621 /* Floating point conditions
622    The "NotANumber" part could be problematic; Howver, when NaN is
623    encountered, the ftst instruction sets bot N and Z to 1 on the x87,
624    so quite often things just fall into place. This is probably not
625    accurate wrt the 68k FPU, but it is *as* accurate as this was before.
626    However, some more thought should go into fixing this stuff up so
627    it accurately emulates the 68k FPU.
628    >=<U
629    0000    0x00: 0                        ---   Never jump
630    0101    0x01: Z                        ---   jump if zero (x86: 4)
631    1000    0x02: !(NotANumber || Z || N)  --- Neither Z nor N set (x86: 7)
632    1101    0x03: Z || !(NotANumber || N); --- Z or !N (x86: 4 and 3)
633    0010    0x04: N && !(NotANumber || Z); --- N and !Z (x86: hard!)
634    0111    0x05: Z || (N && !NotANumber); --- Z or N (x86: 6)
635    1010    0x06: !(NotANumber || Z);      --- not Z (x86: 5)
636    1110    0x07: !NotANumber;             --- not NaN (x86: 11, not parity)
637    0001    0x08: NotANumber;              --- NaN (x86: 10)
638    0101    0x09: NotANumber || Z;         --- Z (x86: 4)
639    1001    0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
640    1101    0x0b: NotANumber || Z || !N;   --- Z or !N (x86: 4 and 3)
641    0011    0x0c: NotANumber || (N && !Z); --- N (x86: 2)
642    0111    0x0d: NotANumber || Z || N;    --- Z or N (x86: 6)
643    1010    0x0e: !Z;                      --- not Z (x86: 5)
644    1111    0x0f: 1;                       --- always
645 
646    This is not how the 68k handles things, though --- it sets Z to 0 and N
647    to the NaN's sign.... ('o' and 'i' denote differences from the above
648    table)
649 
650    >=<U
651    0000    0x00: 0                        ---   Never jump
652    010o    0x01: Z                        ---   jump if zero (x86: 4, not 10)
653    1000    0x02: !(NotANumber || Z || N)  --- Neither Z nor N set (x86: 7)
654    110o    0x03: Z || !(NotANumber || N); --- Z or !N (x86: 3)
655    0010    0x04: N && !(NotANumber || Z); --- N and !Z (x86: 2, not 10)
656    011o    0x05: Z || (N && !NotANumber); --- Z or N (x86: 6, not 10)
657    1010    0x06: !(NotANumber || Z);      --- not Z (x86: 5)
658    1110    0x07: !NotANumber;             --- not NaN (x86: 11, not parity)
659    0001    0x08: NotANumber;              --- NaN (x86: 10)
660    0101    0x09: NotANumber || Z;         --- Z (x86: 4)
661    1001    0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
662    1101    0x0b: NotANumber || Z || !N;   --- Z or !N (x86: 4 and 3)
663    0011    0x0c: NotANumber || (N && !Z); --- N (x86: 2)
664    0111    0x0d: NotANumber || Z || N;    --- Z or N (x86: 6)
665    101i    0x0e: !Z;                      --- not Z (x86: 5 and 10)
666    1111    0x0f: 1;                       --- always
667 
668    Of course, this *still* doesn't mean that the x86 and 68k conditions are
669    equivalent --- the handling of infinities is different, for one thing.
670    On the 68k, +infinity minus +infinity is NotANumber (as it should be). On
671    the x86, it is +infinity, and some exception is raised (which I suspect
672    is promptly ignored) STUPID!
673    The more I learn about their CPUs, the more I detest Intel....
674 
675    You can see this in action if you have "Benoit" (see Aminet) and
676    set the exponent to 16. Wait for a long time, and marvel at the extra black
677    areas outside the center one. That's where Benoit expects NaN, and the x86
678    gives +infinity. [Ooops --- that must have been some kind of bug in my code.
679    it no longer happens, and the resulting graphic looks much better, too]
680 
681    x86 conditions
682    0011    : 2
683    1100    : 3
684    0101    : 4
685    1010    : 5
686    0111    : 6
687    1000    : 7
688    0001    : 10
689    1110    : 11
690    */
comp_fsave_opp(uae_u32 opcode)691 void comp_fsave_opp (uae_u32 opcode)
692 {
693 	FAIL (1);
694 	return;
695 }
696 
comp_frestore_opp(uae_u32 opcode)697 void comp_frestore_opp (uae_u32 opcode)
698 {
699 	FAIL (1);
700 	return;
701 }
702 
703 extern uae_u32 xhex_pi[], xhex_exp_1[], xhex_l2_e[], xhex_ln_2[], xhex_ln_10[];
704 extern uae_u32 xhex_l10_2[], xhex_l10_e[], xhex_1e16[], xhex_1e32[], xhex_1e64[];
705 extern uae_u32 xhex_1e128[], xhex_1e256[], xhex_1e512[], xhex_1e1024[];
706 extern uae_u32 xhex_1e2048[], xhex_1e4096[];
707 extern double fp_1e8;
708 extern float  fp_1e1, fp_1e2, fp_1e4;
709 
comp_fpp_opp(uae_u32 opcode,uae_u16 extra)710 void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
711 {
712 	int reg;
713 	int sreg, prec = 0;
714 	int	dreg = (extra >> 7) & 7;
715 	int source = (extra >> 13) & 7;
716 	int	opmode = extra & 0x7f;
717 
718 	if (!currprefs.compfpu) {
719 		FAIL (1);
720 		return;
721 	}
722 	switch (source) {
723 		case 3: /* FMOVE FPx, <EA> */
724 		if (comp_fp_put (opcode, extra) < 0)
725 			FAIL (1);
726 		return;
727 		case 4: /* FMOVE.L  <EA>, ControlReg */
728 		if (!(opcode & 0x30)) { /* Dn or An */
729 			if (extra & 0x1000) { /* FPCR */
730 				mov_l_mr (uae_p32(&regs.fpcr), opcode & 15);
731 #if USE_X86_FPUCW
732 				mov_l_rr (S1, opcode & 15);
733 				and_l_ri (S1, 0xf0);
734 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
735 #endif
736 				return;
737 			}
738 			if (extra & 0x0800) { /* FPSR */
739 				FAIL (1);
740 				return;
741 				// set_fpsr(m68k_dreg (regs, opcode & 15));
742 			}
743 			if (extra & 0x0400) { /* FPIAR */
744 				mov_l_mr (uae_p32(&regs.fpiar), opcode & 15); return;
745 			}
746 		}
747 		else if ((opcode & 0x3f) == 0x3c) {
748 			if (extra & 0x1000) { /* FPCR */
749 				uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
750 				mov_l_mi (uae_p32(&regs.fpcr), val);
751 #if USE_X86_FPUCW
752 				mov_l_ri (S1, val & 0xf0);
753 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
754 #endif
755 				return;
756 			}
757 			if (extra & 0x0800) { /* FPSR */
758 				FAIL (1);
759 				return;
760 			}
761 			if (extra & 0x0400) { /* FPIAR */
762 				uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
763 				mov_l_mi (uae_p32(&regs.fpiar), val);
764 				return;
765 			}
766 		}
767 		FAIL (1);
768 		return;
769 		case 5: /* FMOVE.L  ControlReg, <EA> */
770 		if (!(opcode & 0x30)) { /* Dn or An */
771 			if (extra & 0x1000) { /* FPCR */
772 				mov_l_rm (opcode & 15, uae_p32(&regs.fpcr)); return;
773 			}
774 			if (extra & 0x0800) { /* FPSR */
775 				FAIL (1);
776 				return;
777 			}
778 			if (extra & 0x0400) { /* FPIAR */
779 				mov_l_rm (opcode & 15, uae_p32(&regs.fpiar)); return;
780 			}
781 		}
782 		FAIL (1);
783 		return;
784 		case 6:
785 		case 7:
786 		{
787 			uae_u32 list = 0;
788 			int incr = 0;
789 			if (extra & 0x2000) {
790 				int ad;
791 
792 				/* FMOVEM FPP->memory */
793 				switch ((extra >> 11) & 3) { /* Get out early if failure */
794 					case 0:
795 					case 2:
796 					break;
797 					case 1:
798 					case 3:
799 					default:
800 					FAIL (1); return;
801 				}
802 				ad = comp_fp_adr (opcode);
803 				if (ad < 0) {
804 					m68k_setpc (m68k_getpc () - 4);
805 					op_illg (opcode);
806 					return;
807 				}
808 				switch ((extra >> 11) & 3) {
809 					case 0:	/* static pred */
810 					list = extra & 0xff;
811 					incr = -1;
812 					break;
813 					case 2:	/* static postinc */
814 					list = extra & 0xff;
815 					incr = 1;
816 					break;
817 					case 1:	/* dynamic pred */
818 					case 3:	/* dynamic postinc */
819 					abort ();
820 				}
821 				if (incr < 0) { /* Predecrement */
822 					for (reg = 7; reg >= 0; reg--) {
823 						if (list & 0x80) {
824 							fmov_ext_mr ((uintptr) temp_fp, reg);
825 							sub_l_ri (ad, 4);
826 							mov_l_rm (S2, (uintptr) temp_fp);
827 							writelong_clobber (ad, S2, S3);
828 							sub_l_ri (ad, 4);
829 							mov_l_rm (S2, (uintptr) temp_fp + 4);
830 							writelong_clobber (ad, S2, S3);
831 							sub_l_ri (ad, 4);
832 							mov_w_rm (S2, (uintptr) temp_fp + 8);
833 							writeword_clobber (ad, S2, S3);
834 						}
835 						list <<= 1;
836 					}
837 				} else { /* Postincrement */
838 					for (reg = 0; reg <= 7; reg++) {
839 						if (list & 0x80) {
840 							fmov_ext_mr ((uintptr) temp_fp, reg);
841 							mov_w_rm (S2, (uintptr) temp_fp + 8);
842 							writeword_clobber (ad, S2, S3);
843 							add_l_ri (ad, 4);
844 							mov_l_rm (S2, (uintptr) temp_fp + 4);
845 							writelong_clobber (ad, S2, S3);
846 							add_l_ri (ad, 4);
847 							mov_l_rm (S2, (uintptr) temp_fp);
848 							writelong_clobber (ad, S2, S3);
849 							add_l_ri (ad, 4);
850 						}
851 						list <<= 1;
852 					}
853 				}
854 				if ((opcode & 0x38) == 0x18)
855 					mov_l_rr ((opcode & 7) + 8, ad);
856 				if ((opcode & 0x38) == 0x20)
857 					mov_l_rr ((opcode & 7) + 8, ad);
858 			} else {
859 				/* FMOVEM memory->FPP */
860 
861 				int ad;
862 				switch ((extra >> 11) & 3) { /* Get out early if failure */
863 					case 0:
864 					case 2:
865 					break;
866 					case 1:
867 					case 3:
868 					default:
869 					FAIL (1); return;
870 				}
871 				ad = comp_fp_adr (opcode);
872 				if (ad < 0) {
873 					m68k_setpc (m68k_getpc () - 4);
874 					op_illg (opcode);
875 					return;
876 				}
877 				switch ((extra >> 11) & 3) {
878 					case 0:	/* static pred */
879 					list = extra & 0xff;
880 					incr = -1;
881 					break;
882 					case 2:	/* static postinc */
883 					list = extra & 0xff;
884 					incr = 1;
885 					break;
886 					case 1:	/* dynamic pred */
887 					case 3:	/* dynamic postinc */
888 					abort ();
889 				}
890 
891 				if (incr < 0) {
892 					// not reached
893 					for (reg = 7; reg >= 0; reg--) {
894 						if (list & 0x80) {
895 							sub_l_ri (ad, 4);
896 							readlong (ad, S2, S3);
897 							mov_l_mr ((uintptr) (temp_fp), S2);
898 							sub_l_ri (ad, 4);
899 							readlong (ad, S2, S3);
900 							mov_l_mr ((uintptr) (temp_fp) +4, S2);
901 							sub_l_ri (ad, 4);
902 							readword (ad, S2, S3);
903 							mov_w_mr (((uintptr) temp_fp) + 8, S2);
904 							fmov_ext_rm (reg, (uintptr) (temp_fp));
905 						}
906 						list <<= 1;
907 					}
908 				} else {
909 					for (reg = 0; reg <= 7; reg++) {
910 						if (list & 0x80) {
911 							readword (ad, S2, S3);
912 							mov_w_mr (((uintptr) temp_fp) + 8, S2);
913 							add_l_ri (ad, 4);
914 							readlong (ad, S2, S3);
915 							mov_l_mr ((uintptr) (temp_fp) +4, S2);
916 							add_l_ri (ad, 4);
917 							readlong (ad, S2, S3);
918 							mov_l_mr ((uintptr) (temp_fp), S2);
919 							add_l_ri (ad, 4);
920 							fmov_ext_rm (reg, (uintptr) (temp_fp));
921 						}
922 						list <<= 1;
923 					}
924 				}
925 				if ((opcode & 0x38) == 0x18)
926 					mov_l_rr ((opcode & 7) + 8, ad);
927 				if ((opcode & 0x38) == 0x20)
928 					mov_l_rr ((opcode & 7) + 8, ad);
929 			}
930 		}
931 		return;
932 #if 0
933 		case 6: /* FMOVEM  <EA>, FPx-FPz */
934 		if (!(extra & 0x0800)) {
935 			uae_u32 list = extra & 0xff;
936 			int ad;
937 			if ((ad = comp_fp_adr(opcode)) < 0) {FAIL(1);return;}
938 			while (list) {
939 				if  (extra & 0x1000) { /* postincrement */
940 					readword(ad,S2,S3);
941 					mov_w_mr((uae_p32(temp_fp))+8,S2);
942 					add_l_ri(ad,4);
943 					readlong(ad,S2,S3);
944 					mov_l_mr((uae_u32)(temp_fp)+4,S2);
945 					add_l_ri(ad,4);
946 					readlong(ad,S2,S3);
947 					mov_l_mr((uae_u32)(temp_fp),S2);
948 					add_l_ri(ad,4);
949 					fmov_ext_rm(fpp_movem_index1[list],(uae_u32)(temp_fp));
950 				} else { /* predecrement */
951 					sub_l_ri(ad,4);
952 					readlong(ad,S2,S3);
953 					mov_l_mr((uae_u32)(temp_fp),S2);
954 					sub_l_ri(ad,4);
955 					readlong(ad,S2,S3);
956 					mov_l_mr((uae_u32)(temp_fp)+4,S2);
957 					sub_l_ri(ad,4);
958 					readword(ad,S2,S3);
959 					mov_w_mr((uae_p32(temp_fp))+8,S2);
960 					fmov_ext_rm(fpp_movem_index2[list],(uae_u32)(temp_fp));
961 				}
962 				list = fpp_movem_next[list];
963 			}
964 			if ((opcode & 0x38) == 0x18)
965 				mov_l_rr((opcode & 7)+8,ad);
966 			return;
967 		} /* no break for dynamic register list */
968 		case 7: /* FMOVEM  FPx-FPz, <EA> */
969 		if (!(extra & 0x0800)) {
970 			uae_u32 list = extra & 0xff;
971 			int ad;
972 			if ((ad = comp_fp_adr(opcode)) < 0) {FAIL(1);return;}
973 			while (list) {
974 				if (extra & 0x1000) { /* postincrement */
975 					fmov_ext_mr(uae_p32(temp_fp),fpp_movem_index2[list]);
976 					mov_w_rm(S2,uae_p32(temp_fp)+8);
977 					writeword_clobber(ad,S2,S3);
978 					add_l_ri(ad,4);
979 					mov_l_rm(S2,uae_p32(temp_fp)+4);
980 					writelong_clobber(ad,S2,S3);
981 					add_l_ri(ad,4);
982 					mov_l_rm(S2,uae_p32(temp_fp));
983 					writelong_clobber(ad,S2,S3);
984 					add_l_ri(ad,4);
985 				} else { /* predecrement */
986 					fmov_ext_mr(uae_p32(temp_fp),fpp_movem_index2[list]);
987 					sub_l_ri(ad,4);
988 					mov_l_rm(S2,uae_p32(temp_fp));
989 					writelong_clobber(ad,S2,S3);
990 					sub_l_ri(ad,4);
991 					mov_l_rm(S2,uae_p32(temp_fp)+4);
992 					writelong_clobber(ad,S2,S3);
993 					sub_l_ri(ad,4);
994 					mov_w_rm(S2,uae_p32(temp_fp)+8);
995 					writeword_clobber(ad,S2,S3);
996 				}
997 				list = fpp_movem_next[list];
998 			}
999 			if ((opcode & 0x38) == 0x20)
1000 				mov_l_rr((opcode & 7)+8,ad);
1001 			return;
1002 		} /* no break */
1003 		write_log (_T("fallback from JIT FMOVEM dynamic register list\n"));
1004 		FAIL(1);
1005 		return;
1006 #endif
1007 		case 2: /* from <EA> to FPx */
1008 		dont_care_fflags ();
1009 		if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */
1010 			//write_log (_T("JIT FMOVECR %x\n"), opmode);
1011 			switch (opmode) {
1012 				case 0x00:
1013 				fmov_pi (dreg);
1014 				break;
1015 				case 0x0b:
1016 				fmov_ext_rm (dreg, uae_p32(&xhex_l10_2));
1017 				break;
1018 				case 0x0c:
1019 				fmov_ext_rm (dreg, uae_p32(&xhex_exp_1));
1020 				break;
1021 				case 0x0d:
1022 				fmov_log2_e (dreg);
1023 				break;
1024 				case 0x0e:
1025 				fmov_ext_rm (dreg, uae_p32(&xhex_l10_e));
1026 				break;
1027 				case 0x0f:
1028 				fmov_0 (dreg);
1029 				break;
1030 				case 0x30:
1031 				fmov_loge_2 (dreg);
1032 				break;
1033 				case 0x31:
1034 				fmov_ext_rm (dreg, uae_p32(&xhex_ln_10));
1035 				break;
1036 				case 0x32:
1037 				fmov_1 (dreg);
1038 				break;
1039 				case 0x33:
1040 				fmovs_rm (dreg, uae_p32(&fp_1e1));
1041 				break;
1042 				case 0x34:
1043 				fmovs_rm (dreg, uae_p32(&fp_1e2));
1044 				break;
1045 				case 0x35:
1046 				fmovs_rm (dreg, uae_p32(&fp_1e4));
1047 				break;
1048 				case 0x36:
1049 				fmov_rm (dreg, uae_p32(&fp_1e8));
1050 				break;
1051 				case 0x37:
1052 				fmov_ext_rm (dreg, uae_p32(&xhex_1e16));
1053 				break;
1054 				case 0x38:
1055 				fmov_ext_rm (dreg, uae_p32(&xhex_1e32));
1056 				break;
1057 				case 0x39:
1058 				fmov_ext_rm (dreg, uae_p32(&xhex_1e64));
1059 				break;
1060 				case 0x3a:
1061 				fmov_ext_rm (dreg, uae_p32(&xhex_1e128));
1062 				break;
1063 				case 0x3b:
1064 				fmov_ext_rm (dreg, uae_p32(&xhex_1e256));
1065 				break;
1066 				case 0x3c:
1067 				fmov_ext_rm (dreg, uae_p32(&xhex_1e512));
1068 				break;
1069 				case 0x3d:
1070 				fmov_ext_rm (dreg, uae_p32(&xhex_1e1024));
1071 				break;
1072 				case 0x3e:
1073 				fmov_ext_rm (dreg, uae_p32(&xhex_1e2048));
1074 				break;
1075 				case 0x3f:
1076 				fmov_ext_rm (dreg, uae_p32(&xhex_1e4096));
1077 				break;
1078 				default:
1079 				FAIL (1);
1080 				return;
1081 			}
1082 			fmov_rr (FP_RESULT, dreg);
1083 			return;
1084 		}
1085 		if (opmode & 0x20) /* two operands, so we need a scratch reg */
1086 			sreg = FS1;
1087 		else /* one operand only, thus we can load the argument into dreg */
1088 			sreg = dreg;
1089 		if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
1090 			FAIL (1);
1091 			return;
1092 		}
1093 		if (!opmode) { /* FMOVE  <EA>,FPx */
1094 			fmov_rr (FP_RESULT, dreg);
1095 			return;
1096 		}
1097 		/* no break here for <EA> to dreg */
1098 		case 0: /* directly from sreg to dreg */
1099 		if (!source) { /* no <EA> */
1100 			dont_care_fflags ();
1101 			sreg = (extra >> 10) & 7;
1102 		}
1103 		switch (opmode) {
1104 			case 0x00: /* FMOVE */
1105 			fmov_rr (dreg, sreg);
1106 			break;
1107 			case 0x01: /* FINT */
1108 			frndint_rr (dreg, sreg);
1109 			break;
1110 			case 0x02: /* FSINH */
1111 			fsinh_rr (dreg, sreg);
1112 			break;
1113 			case 0x03: /* FINTRZ */
1114 #if USE_X86_FPUCW /* if we have control over the CW, we can do this */
1115 			if (0 && (regs.fpcr & 0xf0) == 0x10) /* maybe unsafe, because this test is done */
1116 				frndint_rr (dreg, sreg); /* during the JIT compilation and not at runtime */
1117 			else {
1118 				mov_l_ri (S1, 0x10); /* extended round to zero */
1119 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1120 				frndint_rr (dreg, sreg);
1121 				mov_l_rm (S1, uae_p32(&regs.fpcr));
1122 				and_l_ri (S1, 0xf0); /* restore control word */
1123 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1124 			}
1125 			break;
1126 #endif
1127 			FAIL (1);
1128 			return;
1129 			case 0x04: /* FSQRT */
1130 			fsqrt_rr (dreg, sreg);
1131 			break;
1132 			case 0x06: /* FLOGNP1 */
1133 			flogNP1_rr (dreg, sreg);
1134 			break;
1135 			case 0x08: /* FETOXM1 */
1136 			fetoxM1_rr (dreg, sreg);
1137 			break;
1138 			case 0x09: /* FTANH */
1139 			ftanh_rr (dreg, sreg);
1140 			break;
1141 			case 0x0a: /* FATAN */
1142 			fatan_rr (dreg, sreg);
1143 			break;
1144 			case 0x0c: /* FASIN */
1145 			fasin_rr (dreg, sreg);
1146 			break;
1147 			case 0x0d: /* FATANH */
1148 			fatanh_rr (dreg, sreg);
1149 			break;
1150 			case 0x0e: /* FSIN */
1151 			fsin_rr (dreg, sreg);
1152 			break;
1153 			case 0x0f: /* FTAN */
1154 			ftan_rr (dreg, sreg);
1155 			break;
1156 			case 0x10: /* FETOX */
1157 			fetox_rr (dreg, sreg);
1158 			break;
1159 			case 0x11: /* FTWOTOX */
1160 			ftwotox_rr (dreg, sreg);
1161 			break;
1162 			case 0x12: /* FTENTOX */
1163 			ftentox_rr (dreg, sreg);
1164 			break;
1165 			case 0x14: /* FLOGN */
1166 			flogN_rr (dreg, sreg);
1167 			break;
1168 			case 0x15: /* FLOG10 */
1169 			flog10_rr (dreg, sreg);
1170 			break;
1171 			case 0x16: /* FLOG2 */
1172 			flog2_rr (dreg, sreg);
1173 			break;
1174 			case 0x18: /* FABS */
1175 			fabs_rr (dreg, sreg);
1176 			break;
1177 			case 0x19: /* FCOSH */
1178 			fcosh_rr (dreg, sreg);
1179 			break;
1180 			case 0x1a: /* FNEG */
1181 			fneg_rr (dreg, sreg);
1182 			break;
1183 			case 0x1c: /* FACOS */
1184 #if USE_X86_FPUCW
1185 			if ((regs.fpcr & 0x30) != 0x10) { /* use round to zero */
1186 				mov_l_ri (S1, (regs.fpcr & 0xC0) | 0x10);
1187 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1188 				facos_rr (dreg, sreg);
1189 				mov_l_rm (S1, uae_p32(&regs.fpcr));
1190 				and_l_ri (S1, 0xf0); /* restore control word */
1191 				fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1192 				break;
1193 			}
1194 #endif
1195 			facos_rr (dreg, sreg);
1196 			break;
1197 			case 0x1d: /* FCOS */
1198 			fcos_rr (dreg, sreg);
1199 			break;
1200 			case 0x1e: /* FGETEXP */
1201 			fgetexp_rr (dreg, sreg);
1202 			break;
1203 			case 0x1f: /* FGETMAN */
1204 			fgetman_rr (dreg, sreg);
1205 			break;
1206 			case 0x20: /* FDIV */
1207 			fdiv_rr (dreg, sreg);
1208 			break;
1209 			case 0x21: /* FMOD */
1210 			frem_rr (dreg, sreg);
1211 			break;
1212 			case 0x22: /* FADD */
1213 			fadd_rr (dreg, sreg);
1214 			break;
1215 			case 0x23: /* FMUL */
1216 			fmul_rr (dreg, sreg);
1217 			break;
1218 			case 0x24: /* FSGLDIV  is not exactly the same as FSDIV, */
1219 			/* because both operands should be SINGLE precision, too */
1220 			case 0x60: /* FSDIV */
1221 			fdiv_rr (dreg, sreg);
1222 			if (!currprefs.fpu_strict) /* faster, but less strict rounding */
1223 				break;
1224 #if USE_X86_FPUCW
1225 			if ((regs.fpcr & 0xC0) == 0x40) /* if SINGLE precision */
1226 				break;
1227 #endif
1228 			fcuts_r (dreg);
1229 			break;
1230 			case 0x25: /* FREM */
1231 			frem1_rr (dreg, sreg);
1232 			break;
1233 			case 0x26: /* FSCALE */
1234 			fscale_rr (dreg, sreg);
1235 			break;
1236 			case 0x27: /* FSGLMUL is not exactly the same as FSMUL, */
1237 			/* because both operands should be SINGLE precision, too */
1238 			case 0x63: /* FSMUL */
1239 			fmul_rr (dreg, sreg);
1240 			if (!currprefs.fpu_strict) /* faster, but less strict rounding */
1241 				break;
1242 #if USE_X86_FPUCW
1243 			if ((regs.fpcr & 0xC0) == 0x40) /* if SINGLE precision */
1244 				break;
1245 #endif
1246 			fcuts_r (dreg);
1247 			break;
1248 			case 0x28: /* FSUB */
1249 			fsub_rr (dreg, sreg);
1250 			break;
1251 			case 0x30: /* FSINCOS */
1252 			case 0x31:
1253 			case 0x32:
1254 			case 0x33:
1255 			case 0x34:
1256 			case 0x35:
1257 			case 0x36:
1258 			case 0x37:
1259 			if (dreg == (extra & 7))
1260 				fsin_rr (dreg, sreg);
1261 			else
1262 				fsincos_rr (dreg, extra & 7, sreg);
1263 			break;
1264 			case 0x38: /* FCMP */
1265 			fmov_rr (FP_RESULT, dreg);
1266 			fsub_rr (FP_RESULT, sreg);
1267 			return;
1268 			case 0x3a: /* FTST */
1269 			fmov_rr (FP_RESULT, sreg);
1270 			return;
1271 			case 0x40: /* FSMOVE */
1272 			if (prec == 1 || !currprefs.fpu_strict) {
1273 				if (sreg != dreg) /* no <EA> */
1274 					fmov_rr (dreg, sreg);
1275 			}
1276 			else {
1277 				fmovs_mr (uae_p32(temp_fp), sreg);
1278 				fmovs_rm (dreg, uae_p32(temp_fp));
1279 			}
1280 			break;
1281 			case 0x44: /* FDMOVE */
1282 			if (prec || !currprefs.fpu_strict) {
1283 				if (sreg != dreg) /* no <EA> */
1284 					fmov_rr (dreg, sreg);
1285 			}
1286 			else {
1287 				fmov_mr (uae_p32(temp_fp), sreg);
1288 				fmov_rm (dreg, uae_p32(temp_fp));
1289 			}
1290 			break;
1291 			case 0x41: /* FSSQRT */
1292 			fsqrt_rr (dreg, sreg);
1293 			if (!currprefs.fpu_strict) /* faster, but less strict rounding */
1294 				break;
1295 #if USE_X86_FPUCW
1296 			if ((regs.fpcr & 0xC0) == 0x40) /* if SINGLE precision */
1297 				break;
1298 #endif
1299 			fcuts_r (dreg);
1300 			break;
1301 			case 0x45: /* FDSQRT */
1302 			if (!currprefs.fpu_strict) { /* faster, but less strict rounding */
1303 				fsqrt_rr (dreg, sreg);
1304 				break;
1305 			}
1306 #if USE_X86_FPUCW
1307 			if (regs.fpcr & 0xC0) { /* if we don't have EXTENDED precision */
1308 				if ((regs.fpcr & 0xC0) == 0x80) /* if we have DOUBLE */
1309 					fsqrt_rr (dreg, sreg);
1310 				else { /* if we have SINGLE presision, force DOUBLE */
1311 					mov_l_ri (S1, (regs.fpcr & 0x30) | 0x80);
1312 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1313 					fsqrt_rr (dreg, sreg);
1314 					mov_l_rm (S1, uae_p32(&regs.fpcr));
1315 					and_l_ri (S1, 0xf0); /* restore control word */
1316 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1317 				}
1318 				break;
1319 			}
1320 #endif		/* in case of EXTENDED precision, just reduce the result to DOUBLE */
1321 			fsqrt_rr (dreg, sreg);
1322 			fcut_r (dreg);
1323 			break;
1324 			case 0x58: /* FSABS */
1325 			fabs_rr (dreg, sreg);
1326 			if (prec != 1 && currprefs.fpu_strict)
1327 				fcuts_r (dreg);
1328 			break;
1329 			case 0x5a: /* FSNEG */
1330 			fneg_rr (dreg, sreg);
1331 			if (prec != 1 && currprefs.fpu_strict)
1332 				fcuts_r (dreg);
1333 			break;
1334 			case 0x5c: /* FDABS */
1335 			fabs_rr (dreg, sreg);
1336 			if (!prec && currprefs.fpu_strict)
1337 				fcut_r (dreg);
1338 			break;
1339 			case 0x5e: /* FDNEG */
1340 			fneg_rr (dreg, sreg);
1341 			if (!prec && currprefs.fpu_strict)
1342 				fcut_r (dreg);
1343 			break;
1344 			case 0x62: /* FSADD */
1345 			fadd_rr (dreg, sreg);
1346 			if (!currprefs.fpu_strict) /* faster, but less strict rounding */
1347 				break;
1348 #if USE_X86_FPUCW
1349 			if ((regs.fpcr & 0xC0) == 0x40) /* if SINGLE precision */
1350 				break;
1351 #endif
1352 			fcuts_r (dreg);
1353 			break;
1354 			case 0x64: /* FDDIV */
1355 			if (!currprefs.fpu_strict) { /* faster, but less strict rounding */
1356 				fdiv_rr (dreg, sreg);
1357 				break;
1358 			}
1359 #if USE_X86_FPUCW
1360 			if (regs.fpcr & 0xC0) { /* if we don't have EXTENDED precision */
1361 				if ((regs.fpcr & 0xC0) == 0x80) /* if we have DOUBLE */
1362 					fdiv_rr (dreg, sreg);
1363 				else { /* if we have SINGLE presision, force DOUBLE */
1364 					mov_l_ri (S1, (regs.fpcr & 0x30) | 0x80);
1365 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1366 					fdiv_rr (dreg, sreg);
1367 					mov_l_rm (S1, uae_p32(&regs.fpcr));
1368 					and_l_ri (S1, 0xf0); /* restore control word */
1369 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1370 				}
1371 				break;
1372 			}
1373 #endif		/* in case of EXTENDED precision, just reduce the result to DOUBLE */
1374 			fdiv_rr (dreg, sreg);
1375 			fcut_r (dreg);
1376 			break;
1377 			case 0x66: /* FDADD */
1378 			if (!currprefs.fpu_strict) { /* faster, but less strict rounding */
1379 				fadd_rr (dreg, sreg);
1380 				break;
1381 			}
1382 #if USE_X86_FPUCW
1383 			if (regs.fpcr & 0xC0) { /* if we don't have EXTENDED precision */
1384 				if ((regs.fpcr & 0xC0) == 0x80) /* if we have DOUBLE */
1385 					fadd_rr (dreg, sreg);
1386 				else { /* if we have SINGLE presision, force DOUBLE */
1387 					mov_l_ri (S1, (regs.fpcr & 0x30) | 0x80);
1388 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1389 					fadd_rr (dreg, sreg);
1390 					mov_l_rm (S1, uae_p32(&regs.fpcr));
1391 					and_l_ri (S1, 0xf0); /* restore control word */
1392 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1393 				}
1394 				break;
1395 			}
1396 #endif		/* in case of EXTENDED precision, just reduce the result to DOUBLE */
1397 			fadd_rr (dreg, sreg);
1398 			fcut_r (dreg);
1399 			break;
1400 			case 0x67: /* FDMUL */
1401 			if (!currprefs.fpu_strict) { /* faster, but less strict rounding */
1402 				fmul_rr (dreg, sreg);
1403 				break;
1404 			}
1405 #if USE_X86_FPUCW
1406 			if (regs.fpcr & 0xC0) { /* if we don't have EXTENDED precision */
1407 				if ((regs.fpcr & 0xC0) == 0x80) /* if we have DOUBLE */
1408 					fmul_rr (dreg, sreg);
1409 				else { /* if we have SINGLE presision, force DOUBLE */
1410 					mov_l_ri (S1, (regs.fpcr & 0x30) | 0x80);
1411 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1412 					fmul_rr (dreg, sreg);
1413 					mov_l_rm (S1, uae_p32(&regs.fpcr));
1414 					and_l_ri (S1, 0xf0); /* restore control word */
1415 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1416 				}
1417 				break;
1418 			}
1419 #endif		/* in case of EXTENDED precision, just reduce the result to DOUBLE */
1420 			fmul_rr (dreg, sreg);
1421 			fcut_r (dreg);
1422 			break;
1423 			case 0x68: /* FSSUB */
1424 			fsub_rr (dreg, sreg);
1425 			if (!currprefs.fpu_strict) /* faster, but less strict rounding */
1426 				break;
1427 #if USE_X86_FPUCW
1428 			if ((regs.fpcr & 0xC0) == 0x40) /* if SINGLE precision */
1429 				break;
1430 #endif
1431 			fcuts_r (dreg);
1432 			break;
1433 			case 0x6c: /* FDSUB */
1434 			if (!currprefs.fpu_strict) { /* faster, but less strict rounding */
1435 				fsub_rr (dreg, sreg);
1436 				break;
1437 			}
1438 #if USE_X86_FPUCW
1439 			if (regs.fpcr & 0xC0) { /* if we don't have EXTENDED precision */
1440 				if ((regs.fpcr & 0xC0) == 0x80) /* if we have DOUBLE */
1441 					fsub_rr (dreg, sreg);
1442 				else { /* if we have SINGLE presision, force DOUBLE */
1443 					mov_l_ri (S1, (regs.fpcr & 0x30) | 0x80);
1444 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1445 					fsub_rr (dreg, sreg);
1446 					mov_l_rm (S1, uae_p32(&regs.fpcr));
1447 					and_l_ri (S1, 0xf0); /* restore control word */
1448 					fldcw_m_indexed (S1, uae_p32(x86_fpucw));
1449 				}
1450 				break;
1451 			}
1452 #endif		/* in case of EXTENDED precision, just reduce the result to DOUBLE */
1453 			fsub_rr (dreg, sreg);
1454 			fcut_r (dreg);
1455 			break;
1456 			default:
1457 			FAIL (1);
1458 			return;
1459 		}
1460 		fmov_rr (FP_RESULT, dreg);
1461 		return;
1462 		default:
1463 		write_log (_T ("Unsupported JIT-FPU instruction: 0x%04x %04x\n"), opcode, extra);
1464 		FAIL (1);
1465 		return;
1466 	}
1467 }
1468 #endif
1469