1 /*
2  *  Copyright (C) 2002-2010  The DOSBox Team
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  */
18 
19 /* $Id: fpu_instructions_x86.h,v 1.7 2009-05-27 09:15:41 qbix79 Exp $ */
20 
21 
22 // #define WEAK_EXCEPTIONS
23 
24 
25 #if defined (_MSC_VER)
26 
27 #ifdef WEAK_EXCEPTIONS
28 #define clx
29 #else
30 #define clx fclex
31 #endif
32 
33 #ifdef WEAK_EXCEPTIONS
34 #define FPUD_LOAD(op,szI,szA)			\
35 		__asm {							\
36 		__asm	mov		ebx, store_to	\
37 		__asm	shl		ebx, 4			\
38 		__asm	op		szI PTR fpu.p_regs[128].m1		\
39 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
40 		}
41 #else
42 #define FPUD_LOAD(op,szI,szA)			\
43 		Bit16u new_sw;					\
44 		__asm {							\
45 		__asm	mov		eax, 8			\
46 		__asm	shl		eax, 4			\
47 		__asm	mov		ebx, store_to	\
48 		__asm	shl		ebx, 4			\
49 		__asm	fclex					\
50 		__asm	op		szI PTR fpu.p_regs[eax].m1		\
51 		__asm	fnstsw	new_sw			\
52 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
53 		}								\
54 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
55 #endif
56 
57 #ifdef WEAK_EXCEPTIONS
58 #define FPUD_LOAD_EA(op,szI,szA)		\
59 		__asm {							\
60 		__asm	op		szI PTR fpu.p_regs[128].m1		\
61 		}
62 #else
63 #define FPUD_LOAD_EA(op,szI,szA)		\
64 		Bit16u new_sw;					\
65 		__asm {							\
66 		__asm	mov		eax, 8			\
67 		__asm	shl		eax, 4			\
68 		__asm	fclex					\
69 		__asm	op		szI PTR fpu.p_regs[eax].m1		\
70 		__asm	fnstsw	new_sw			\
71 		}								\
72 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
73 #endif
74 
75 #ifdef WEAK_EXCEPTIONS
76 #define FPUD_STORE(op,szI,szA)				\
77 		Bit16u save_cw;						\
78 		__asm {								\
79 		__asm	fnstcw	save_cw				\
80 		__asm	mov		eax, TOP			\
81 		__asm	fldcw	fpu.cw_mask_all		\
82 		__asm	shl		eax, 4				\
83 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
84 		__asm	op		szI PTR fpu.p_regs[128].m1		\
85 		__asm	fldcw	save_cw				\
86 		}
87 #else
88 #define FPUD_STORE(op,szI,szA)				\
89 		Bit16u new_sw,save_cw;				\
90 		__asm {								\
91 		__asm	fnstcw	save_cw				\
92 		__asm	fldcw	fpu.cw_mask_all		\
93 		__asm	mov		eax, TOP			\
94 		__asm	shl		eax, 4				\
95 		__asm	mov		ebx, 8				\
96 		__asm	shl		ebx, 4				\
97 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
98 		__asm	clx							\
99 		__asm	op		szI PTR fpu.p_regs[ebx].m1		\
100 		__asm	fnstsw	new_sw				\
101 		__asm	fldcw	save_cw				\
102 		}									\
103 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
104 #endif
105 
106 // handles fsin,fcos,f2xm1,fchs,fabs
107 #define FPUD_TRIG(op)				\
108 		Bit16u new_sw;				\
109 		__asm {						\
110 		__asm	mov		eax, TOP	\
111 		__asm	shl		eax, 4		\
112 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
113 		__asm	clx					\
114 		__asm	op					\
115 		__asm	fnstsw	new_sw		\
116 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
117 		}							\
118 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
119 
120 // handles fsincos
121 #define FPUD_SINCOS()				\
122 		Bit16u new_sw;					\
123 		__asm {							\
124 		__asm	mov		eax, TOP		\
125 		__asm	mov		ebx, eax		\
126 		__asm	dec     ebx				\
127 		__asm	and     ebx, 7			\
128 		__asm	shl		eax, 4			\
129 		__asm	shl		ebx, 4			\
130 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
131 		__asm	clx						\
132 		__asm	fsincos					\
133 		__asm	fnstsw	new_sw			\
134 		__asm	mov		cx, new_sw		\
135 		__asm	and		ch, 0x04 		\
136 		__asm	jnz		argument_too_large1				\
137 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
138 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
139 		__asm	jmp		end_sincos		\
140 		__asm	argument_too_large1:	\
141 		__asm	fstp	st(0)			\
142 		__asm	end_sincos:				\
143 		}												\
144 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
145 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
146 
147 // handles fptan
148 #define FPUD_PTAN()					\
149 		Bit16u new_sw;					\
150 		__asm {							\
151 		__asm	mov		eax, TOP		\
152 		__asm	mov		ebx, eax		\
153 		__asm	dec     ebx				\
154 		__asm	and     ebx, 7			\
155 		__asm	shl		eax, 4			\
156 		__asm	shl		ebx, 4			\
157 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
158 		__asm	clx					\
159 		__asm	fptan					\
160 		__asm	fnstsw	new_sw			\
161 		__asm	mov		cx, new_sw		\
162 		__asm	and		ch, 0x04 		\
163 		__asm	jnz		argument_too_large2				\
164 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
165 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
166 		__asm	jmp		end_ptan		\
167 		__asm	argument_too_large2:	\
168 		__asm	fstp	st(0)			\
169 		__asm	end_ptan:				\
170 		}												\
171 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
172 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
173 
174 // handles fxtract
175 #ifdef WEAK_EXCEPTIONS
176 #define FPUD_XTRACT						\
177 		__asm {							\
178 		__asm	mov		eax, TOP		\
179 		__asm	mov		ebx, eax		\
180 		__asm	dec     ebx				\
181 		__asm	and     ebx, 7			\
182 		__asm	shl		eax, 4			\
183 		__asm	shl		ebx, 4			\
184 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
185 		__asm	fxtract					\
186 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
187 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
188 		}												\
189 		FPU_PREP_PUSH();
190 #else
191 #define FPUD_XTRACT						\
192 		Bit16u new_sw;					\
193 		__asm {							\
194 		__asm	mov		eax, TOP		\
195 		__asm	mov		ebx, eax		\
196 		__asm	dec     ebx				\
197 		__asm	and     ebx, 7			\
198 		__asm	shl		eax, 4			\
199 		__asm	shl		ebx, 4			\
200 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
201 		__asm	fclex					\
202 		__asm	fxtract					\
203 		__asm	fnstsw	new_sw			\
204 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
205 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
206 		}												\
207 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
208 		FPU_PREP_PUSH();
209 #endif
210 
211 // handles fadd,fmul,fsub,fsubr
212 #ifdef WEAK_EXCEPTIONS
213 #define FPUD_ARITH1(op)						\
214 		Bit16u save_cw;						\
215 		__asm {								\
216 		__asm	fnstcw	save_cw				\
217 		__asm	mov		eax, op1			\
218 		__asm	shl		eax, 4				\
219 		__asm	fldcw	fpu.cw_mask_all		\
220 		__asm	mov		ebx, op2			\
221 		__asm	shl		ebx, 4				\
222 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
223 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
224 		__asm	op		st(1), st(0)		\
225 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
226 		__asm	fldcw	save_cw				\
227 		}
228 #else
229 #define FPUD_ARITH1(op)						\
230 		Bit16u new_sw,save_cw;				\
231 		__asm {								\
232 		__asm	fnstcw	save_cw				\
233 		__asm	fldcw	fpu.cw_mask_all		\
234 		__asm	mov		eax, op1			\
235 		__asm	shl		eax, 4				\
236 		__asm	mov		ebx, op2			\
237 		__asm	shl		ebx, 4				\
238 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
239 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
240 		__asm	clx							\
241 		__asm	op		st(1), st(0)		\
242 		__asm	fnstsw	new_sw				\
243 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
244 		__asm	fldcw	save_cw				\
245 		}									\
246 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
247 #endif
248 
249 // handles fadd,fmul,fsub,fsubr
250 #ifdef WEAK_EXCEPTIONS
251 #define FPUD_ARITH1_EA(op)					\
252 		Bit16u save_cw;						\
253 		__asm {								\
254 		__asm	fnstcw	save_cw				\
255 		__asm	mov		eax, op1			\
256 		__asm	fldcw	fpu.cw_mask_all		\
257 		__asm	shl		eax, 4				\
258 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
259 		__asm	fxch	\
260 		__asm	op		st(1), st(0)		\
261 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
262 		__asm	fldcw	save_cw				\
263 		}
264 #else
265 #define FPUD_ARITH1_EA(op)					\
266 		Bit16u new_sw,save_cw;				\
267 		__asm {								\
268 		__asm	fnstcw	save_cw				\
269 		__asm	fldcw	fpu.cw_mask_all		\
270 		__asm	mov		eax, op1			\
271 		__asm	shl		eax, 4				\
272 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
273 		__asm	fxch	\
274 		__asm	clx							\
275 		__asm	op		st(1), st(0)		\
276 		__asm	fnstsw	new_sw				\
277 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
278 		__asm	fldcw	save_cw				\
279 		}									\
280 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
281 #endif
282 
283 // handles fsqrt,frndint
284 #ifdef WEAK_EXCEPTIONS
285 #define FPUD_ARITH2(op)						\
286 		Bit16u save_cw;						\
287 		__asm {								\
288 		__asm	fnstcw	save_cw				\
289 		__asm	mov		eax, TOP			\
290 		__asm	fldcw	fpu.cw_mask_all		\
291 		__asm	shl		eax, 4				\
292 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
293 		__asm	op							\
294 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
295 		__asm	fldcw	save_cw				\
296 		}
297 #else
298 #define FPUD_ARITH2(op)						\
299 		Bit16u new_sw,save_cw;				\
300 		__asm {								\
301 		__asm	fnstcw	save_cw				\
302 		__asm	fldcw	fpu.cw_mask_all		\
303 		__asm	mov		eax, TOP			\
304 		__asm	shl		eax, 4				\
305 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
306 		__asm	clx							\
307 		__asm	op							\
308 		__asm	fnstsw	new_sw				\
309 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
310 		__asm	fldcw	save_cw				\
311 		}									\
312 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
313 #endif
314 
315 // handles fdiv,fdivr
316 #ifdef WEAK_EXCEPTIONS
317 #define FPUD_ARITH3(op)						\
318 		Bit16u save_cw;						\
319 		__asm {								\
320 		__asm	fnstcw	save_cw				\
321 		__asm	mov		eax, op1			\
322 		__asm	shl		eax, 4				\
323 		__asm	fldcw	fpu.cw_mask_all		\
324 		__asm	mov		ebx, op2			\
325 		__asm	shl		ebx, 4				\
326 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
327 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
328 		__asm	op		st(1), st(0)		\
329 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
330 		__asm	fldcw	save_cw				\
331 		}
332 #else
333 #define FPUD_ARITH3(op)						\
334 		Bit16u new_sw,save_cw;				\
335 		__asm {								\
336 		__asm	fnstcw	save_cw				\
337 		__asm	fldcw	fpu.cw_mask_all		\
338 		__asm	mov		eax, op1			\
339 		__asm	shl		eax, 4				\
340 		__asm	mov		ebx, op2			\
341 		__asm	shl		ebx, 4				\
342 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
343 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
344 		__asm	fclex						\
345 		__asm	op		st(1), st(0)		\
346 		__asm	fnstsw	new_sw				\
347 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
348 		__asm	fldcw	save_cw				\
349 		}									\
350 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
351 #endif
352 
353 // handles fdiv,fdivr
354 #ifdef WEAK_EXCEPTIONS
355 #define FPUD_ARITH3_EA(op)					\
356 		Bit16u save_cw;						\
357 		__asm {								\
358 		__asm	fnstcw	save_cw				\
359 		__asm	mov		eax, op1			\
360 		__asm	fldcw	fpu.cw_mask_all		\
361 		__asm	shl		eax, 4				\
362 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
363 		__asm	fxch	\
364 		__asm	op		st(1), st(0)		\
365 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
366 		__asm	fldcw	save_cw				\
367 		}
368 #else
369 #define FPUD_ARITH3_EA(op)					\
370 		Bit16u new_sw,save_cw;				\
371 		__asm {								\
372 		__asm	fnstcw	save_cw				\
373 		__asm	mov		eax, op1			\
374 		__asm	fldcw	fpu.cw_mask_all		\
375 		__asm	shl		eax, 4				\
376 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
377 		__asm	fxch	\
378 		__asm	fclex						\
379 		__asm	op		st(1), st(0)		\
380 		__asm	fnstsw	new_sw				\
381 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
382 		__asm	fldcw	save_cw				\
383 		}									\
384 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
385 #endif
386 
387 // handles fprem,fprem1,fscale
388 #define FPUD_REMINDER(op)			\
389 		Bit16u new_sw;				\
390 		__asm {						\
391 		__asm	mov		eax, TOP	\
392 		__asm	mov		ebx, eax	\
393 		__asm	inc     ebx			\
394 		__asm	and     ebx, 7		\
395 		__asm	shl		ebx, 4		\
396 		__asm	shl		eax, 4		\
397 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
398 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
399 		__asm	fclex				\
400 		__asm	op					\
401 		__asm	fnstsw	new_sw		\
402 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
403 		__asm	fstp	st(0)		\
404 		}							\
405 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
406 
407 // handles fcom,fucom
408 #define FPUD_COMPARE(op)			\
409 		Bit16u new_sw;				\
410 		__asm {						\
411 		__asm	mov		ebx, op2	\
412 		__asm	mov		eax, op1	\
413 		__asm	shl		ebx, 4		\
414 		__asm	shl		eax, 4		\
415 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
416 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
417 		__asm	clx					\
418 		__asm	op					\
419 		__asm	fnstsw	new_sw		\
420 		}							\
421 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
422 
423 #define FPUD_COMPARE_EA(op)			\
424 		Bit16u new_sw;				\
425 		__asm {						\
426 		__asm	mov		eax, op1	\
427 		__asm	shl		eax, 4		\
428 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
429 		__asm	clx					\
430 		__asm	op					\
431 		__asm	fnstsw	new_sw		\
432 		}							\
433 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
434 
435 // handles fxam,ftst
436 #define FPUD_EXAMINE(op)			\
437 		Bit16u new_sw;				\
438 		__asm {						\
439 		__asm	mov		eax, TOP	\
440 		__asm	shl		eax, 4		\
441 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
442 		__asm	clx					\
443 		__asm	op					\
444 		__asm	fnstsw	new_sw		\
445 		__asm	fstp	st(0)		\
446 		}							\
447 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
448 
449 // handles fpatan,fyl2xp1
450 #ifdef WEAK_EXCEPTIONS
451 #define FPUD_WITH_POP(op)			\
452 		__asm {						\
453 		__asm	mov		eax, TOP	\
454 		__asm	mov		ebx, eax	\
455 		__asm	inc     ebx			\
456 		__asm	and     ebx, 7		\
457 		__asm	shl		ebx, 4		\
458 		__asm	shl		eax, 4		\
459 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
460 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
461 		__asm	op					\
462 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
463 		}							\
464 		FPU_FPOP();
465 #else
466 #define FPUD_WITH_POP(op)			\
467 		Bit16u new_sw;				\
468 		__asm {						\
469 		__asm	mov		eax, TOP	\
470 		__asm	mov		ebx, eax	\
471 		__asm	inc     ebx			\
472 		__asm	and     ebx, 7		\
473 		__asm	shl		ebx, 4		\
474 		__asm	shl		eax, 4		\
475 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
476 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
477 		__asm	fclex				\
478 		__asm	op					\
479 		__asm	fnstsw	new_sw		\
480 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
481 		}								\
482 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);	\
483 		FPU_FPOP();
484 #endif
485 
486 // handles fyl2x
487 #ifdef WEAK_EXCEPTIONS
488 #define FPUD_FYL2X(op)				\
489 		__asm {						\
490 		__asm	mov		eax, TOP	\
491 		__asm	mov		ebx, eax	\
492 		__asm	inc     ebx			\
493 		__asm	and     ebx, 7		\
494 		__asm	shl		ebx, 4		\
495 		__asm	shl		eax, 4		\
496 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
497 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
498 		__asm	op					\
499 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
500 		}								\
501 		FPU_FPOP();
502 #else
503 #define FPUD_FYL2X(op)				\
504 		Bit16u new_sw;				\
505 		__asm {						\
506 		__asm	mov		eax, TOP	\
507 		__asm	mov		ebx, eax	\
508 		__asm	inc     ebx			\
509 		__asm	and     ebx, 7		\
510 		__asm	shl		ebx, 4		\
511 		__asm	shl		eax, 4		\
512 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
513 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
514 		__asm	fclex				\
515 		__asm	op					\
516 		__asm	fnstsw	new_sw		\
517 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
518 		}								\
519 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);	\
520 		FPU_FPOP();
521 #endif
522 
523 // load math constants
524 #define FPUD_LOAD_CONST(op)		\
525 		FPU_PREP_PUSH();			\
526 		__asm {						\
527 		__asm	mov		eax, TOP	\
528 		__asm	shl		eax, 4		\
529 		__asm	clx					\
530 		__asm	op					\
531 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
532 		}							\
533 
534 #else
535 
536 #ifdef WEAK_EXCEPTIONS
537 #define clx
538 #else
539 #define clx "fclex"
540 #endif
541 
542 #ifdef WEAK_EXCEPTIONS
543 #define FPUD_LOAD(op,szI,szA)				\
544 		__asm__ volatile (					\
545 			"movl		$128, %%eax		\n"	\
546 			"shl		$4, %0			\n"	\
547 			#op #szA "	(%1, %%eax)		\n"	\
548 			"fstpt		(%1, %0)		"	\
549 			:								\
550 			:	"r" (store_to), "r" (fpu.p_regs)	\
551 			:	"eax", "memory"						\
552 		);
553 #else
554 #define FPUD_LOAD(op,szI,szA)				\
555 		Bit16u new_sw;						\
556 		__asm__ volatile (					\
557 			"movl		$8, %%eax		\n"	\
558 			"shl		$4, %%eax		\n"	\
559 			"shl		$4, %1			\n"	\
560 			"fclex						\n"	\
561 			#op #szA "	(%2, %%eax)		\n"	\
562 			"fnstsw		%0				\n"	\
563 			"fstpt		(%2, %1)		"	\
564 			:	"=m" (new_sw)				\
565 			:	"r" (store_to), "r" (fpu.p_regs)	\
566 			:	"eax", "memory"						\
567 		);									\
568 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
569 #endif
570 
571 #ifdef WEAK_EXCEPTIONS
572 #define FPUD_LOAD_EA(op,szI,szA)			\
573 		__asm__ volatile (					\
574 			"movl		$128, %%eax		\n"	\
575 			#op #szA "	(%0, %%eax)		\n"	\
576 			:								\
577 			:	"r" (fpu.p_regs)			\
578 			:	"eax", "memory"				\
579 		);
580 #else
581 #define FPUD_LOAD_EA(op,szI,szA)			\
582 		Bit16u new_sw;						\
583 		__asm__ volatile (					\
584 			"movl		$8, %%eax		\n"	\
585 			"shl		$4, %%eax		\n"	\
586 			"fclex						\n"	\
587 			#op #szA "	(%1, %%eax)		\n"	\
588 			"fnstsw		%0				\n"	\
589 			:	"=m" (new_sw)				\
590 			:	"r" (fpu.p_regs)			\
591 			:	"eax", "memory"				\
592 		);									\
593 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
594 #endif
595 
596 #ifdef WEAK_EXCEPTIONS
597 #define FPUD_STORE(op,szI,szA)				\
598 		Bit16u save_cw;						\
599 		__asm__ volatile (					\
600 			"fnstcw		%0				\n"	\
601 			"shll		$4, %1			\n"	\
602 			"fldcw		%3				\n"	\
603 			"movl		$128, %%eax		\n"	\
604 			"fldt		(%2, %1)		\n"	\
605 			#op #szA "	(%2, %%eax)		\n"	\
606 			"fldcw		%0				"	\
607 			:	"=m" (save_cw)				\
608 			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
609 			:	"eax", "memory"						\
610 		);
611 #else
612 #define FPUD_STORE(op,szI,szA)				\
613 		Bit16u new_sw,save_cw;				\
614 		__asm__ volatile (					\
615 			"fnstcw		%1				\n"	\
616 			"fldcw		%4				\n"	\
617 			"shll		$4, %2			\n"	\
618 			"movl		$8, %%eax		\n"	\
619 			"shl		$4, %%eax		\n"	\
620 			"fldt		(%3, %2)		\n"	\
621 			clx" 						\n"	\
622 			#op #szA "	(%3, %%eax)		\n"	\
623 			"fnstsw		%0				\n"	\
624 			"fldcw		%1				"	\
625 			:	"=m" (new_sw), "=m" (save_cw)	\
626 			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
627 			:	"eax", "memory"						\
628 		);										\
629 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
630 #endif
631 
632 // handles fsin,fcos,f2xm1,fchs,fabs
633 #define FPUD_TRIG(op)						\
634 		Bit16u new_sw;						\
635 		__asm__ volatile (					\
636 			"shll		$4, %1			\n"	\
637 			"fldt		(%2, %1)		\n"	\
638 			clx" 						\n"	\
639 			#op" 						\n"	\
640 			"fnstsw		%0				\n"	\
641 			"fstpt		(%2, %1)		"	\
642 			:	"=m" (new_sw)				\
643 			:	"r" (TOP), "r" (fpu.p_regs)	\
644 			:	"memory"					\
645 		);									\
646 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
647 
648 // handles fsincos
649 #define FPUD_SINCOS()					\
650 		Bit16u new_sw;						\
651 		__asm__ volatile (					\
652 			"movl		%1, %%eax		\n"	\
653 			"shll		$4, %1			\n"	\
654 			"decl		%%eax			\n"	\
655 			"andl		$7, %%eax		\n"	\
656 			"shll		$4, %%eax		\n"	\
657 			"fldt		(%2, %1)		\n"	\
658 			clx" 						\n"	\
659 			"fsincos					\n"	\
660 			"fnstsw		%0				\n"	\
661 			"fstpt		(%2, %%eax)		\n"	\
662 			"movw		%0, %%ax		\n"	\
663 			"sahf						\n"	\
664 			"jp			argument_too_large1		\n"	\
665 			"fstpt		(%2, %1)		\n"	\
666 			"argument_too_large1:		"	\
667 			:	"=m" (new_sw)				\
668 			:	"r" (TOP), "r" (fpu.p_regs)	\
669 			:	"eax", "cc", "memory"		\
670 		);									\
671 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
672 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
673 
674 // handles fptan
675 #define FPUD_PTAN()						\
676 		Bit16u new_sw;						\
677 		__asm__ volatile (					\
678 			"movl		%1, %%eax		\n"	\
679 			"shll		$4, %1			\n"	\
680 			"decl		%%eax			\n"	\
681 			"andl		$7, %%eax		\n"	\
682 			"shll		$4, %%eax		\n"	\
683 			"fldt		(%2, %1)		\n"	\
684 			clx" 						\n"	\
685 			"fptan 						\n"	\
686 			"fnstsw		%0				\n"	\
687 			"fstpt		(%2, %%eax)		\n"	\
688 			"movw		%0, %%ax		\n"	\
689 			"sahf						\n"	\
690 			"jp			argument_too_large2		\n"	\
691 			"fstpt		(%2, %1)		\n"	\
692 			"argument_too_large2:		"	\
693 			:	"=m" (new_sw)				\
694 			:	"r" (TOP), "r" (fpu.p_regs)	\
695 			:	"eax", "cc", "memory"		\
696 		);									\
697 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
698 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
699 
700 // handles fxtract
701 #ifdef WEAK_EXCEPTIONS
702 #define FPUD_XTRACT						\
703 		__asm__ volatile (					\
704 			"movl		%0, %%eax		\n"	\
705 			"shll		$4, %0			\n"	\
706 			"decl		%%eax			\n"	\
707 			"andl		$7, %%eax		\n"	\
708 			"shll		$4, %%eax		\n"	\
709 			"fldt		(%1, %0)		\n"	\
710 			"fxtract					\n"	\
711 			"fstpt		(%1, %%eax)		\n"	\
712 			"fstpt		(%1, %0)		"	\
713 			:								\
714 			:	"r" (TOP), "r" (fpu.p_regs)	\
715 			:	"eax", "memory"				\
716 		);									\
717 		FPU_PREP_PUSH();
718 #else
719 #define FPUD_XTRACT						\
720 		Bit16u new_sw;						\
721 		__asm__ volatile (					\
722 			"movl		%1, %%eax		\n"	\
723 			"shll		$4, %1			\n"	\
724 			"decl		%%eax			\n"	\
725 			"andl		$7, %%eax		\n"	\
726 			"shll		$4, %%eax		\n"	\
727 			"fldt		(%2, %1)		\n"	\
728 			"fclex						\n"	\
729 			"fxtract					\n"	\
730 			"fnstsw		%0				\n"	\
731 			"fstpt		(%2, %%eax)		\n"	\
732 			"fstpt		(%2, %1)		"	\
733 			:	"=m" (new_sw)				\
734 			:	"r" (TOP), "r" (fpu.p_regs)	\
735 			:	"eax", "memory"						\
736 		);									\
737 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
738 		FPU_PREP_PUSH();
739 #endif
740 
741 // handles fadd,fmul,fsub,fsubr
742 #ifdef WEAK_EXCEPTIONS
743 #define FPUD_ARITH1(op)						\
744 		Bit16u save_cw;						\
745 		__asm__ volatile (					\
746 			"fnstcw		%0				\n"	\
747 			"fldcw		%4				\n"	\
748 			"shll		$4, %2			\n"	\
749 			"shll		$4, %1			\n"	\
750 			"fldt		(%3, %2)		\n"	\
751 			"fldt		(%3, %1)		\n"	\
752 			#op"						\n"	\
753 			"fstpt		(%3, %1)		\n"	\
754 			"fldcw		%0				"	\
755 			:	"=m" (save_cw)		\
756 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
757 			:	"memory"				\
758 		);
759 #else
760 #define FPUD_ARITH1(op)						\
761 		Bit16u new_sw,save_cw;				\
762 		__asm__ volatile (					\
763 			"fnstcw		%1				\n"	\
764 			"fldcw		%5				\n"	\
765 			"shll		$4, %3			\n"	\
766 			"shll		$4, %2			\n"	\
767 			"fldt		(%4, %3)		\n"	\
768 			"fldt		(%4, %2)		\n"	\
769 			clx" 						\n"	\
770 			#op"						\n"	\
771 			"fnstsw		%0				\n"	\
772 			"fstpt		(%4, %2)		\n"	\
773 			"fldcw		%1				"	\
774 			:	"=m" (new_sw), "=m" (save_cw)		\
775 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
776 			:	"memory"				\
777 		);									\
778 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
779 #endif
780 
781 // handles fadd,fmul,fsub,fsubr
782 #ifdef WEAK_EXCEPTIONS
783 #define FPUD_ARITH1_EA(op)					\
784 		Bit16u save_cw;						\
785 		__asm__ volatile (					\
786 			"fnstcw		%0				\n"	\
787 			"fldcw		%3				\n"	\
788 			"shll		$4, %1			\n"	\
789 			"fldt		(%2, %1)		\n"	\
790 			#op"						\n"	\
791 			"fstpt		(%2, %1)		\n"	\
792 			"fldcw		%0				"	\
793 			:	"=m" (save_cw)		\
794 			:	"r" (op1), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
795 			:	"memory"				\
796 		);
797 #else
798 #define FPUD_ARITH1_EA(op)					\
799 		Bit16u new_sw,save_cw;				\
800 		__asm__ volatile (					\
801 			"fnstcw		%1				\n"	\
802 			"fldcw		%4				\n"	\
803 			"shll		$4, %2			\n"	\
804 			"fldt		(%3, %2)		\n"	\
805 			clx" 						\n"	\
806 			#op"						\n"	\
807 			"fnstsw		%0				\n"	\
808 			"fstpt		(%3, %2)		\n"	\
809 			"fldcw		%1				"	\
810 			:	"=m" (new_sw), "=m" (save_cw)		\
811 			:	"r" (op1), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
812 			:	"memory"				\
813 		);									\
814 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
815 #endif
816 
817 // handles fsqrt,frndint
818 #ifdef WEAK_EXCEPTIONS
819 #define FPUD_ARITH2(op)						\
820 		Bit16u save_cw;						\
821 		__asm__ volatile (					\
822 			"fnstcw		%0				\n"	\
823 			"fldcw		%3				\n"	\
824 			"shll		$4, %1			\n"	\
825 			"fldt		(%2, %1)		\n"	\
826 			#op" 						\n"	\
827 			"fstpt		(%2, %1)		\n"	\
828 			"fldcw		%0				"	\
829 			:	"=m" (save_cw)				\
830 			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
831 			:	"memory"				\
832 		);
833 #else
834 #define FPUD_ARITH2(op)						\
835 		Bit16u new_sw,save_cw;				\
836 		__asm__ volatile (					\
837 			"fnstcw		%1				\n"	\
838 			"fldcw		%4				\n"	\
839 			"shll		$4, %2			\n"	\
840 			"fldt		(%3, %2)		\n"	\
841 			clx" 						\n"	\
842 			#op" 						\n"	\
843 			"fnstsw		%0				\n"	\
844 			"fstpt		(%3, %2)		\n"	\
845 			"fldcw		%1				"	\
846 			:	"=m" (new_sw), "=m" (save_cw)	\
847 			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
848 			:	"memory"				\
849 		);										\
850 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
851 #endif
852 
853 // handles fdiv,fdivr
854 #ifdef WEAK_EXCEPTIONS
855 #define FPUD_ARITH3(op)						\
856 		Bit16u save_cw;						\
857 		__asm__ volatile (					\
858 			"fnstcw		%0				\n"	\
859 			"fldcw		%4				\n"	\
860 			"shll		$4, %2			\n"	\
861 			"shll		$4, %1			\n"	\
862 			"fldt		(%3, %2)		\n"	\
863 			"fldt		(%3, %1)		\n"	\
864 			#op"						\n"	\
865 			"fstpt		(%3, %1)		\n"	\
866 			"fldcw		%0				"	\
867 			:	"=m" (save_cw)				\
868 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
869 			:	"memory"					\
870 		);
871 #else
872 #define FPUD_ARITH3(op)						\
873 		Bit16u new_sw,save_cw;				\
874 		__asm__ volatile (					\
875 			"fnstcw		%1				\n"	\
876 			"fldcw		%5				\n"	\
877 			"shll		$4, %3			\n"	\
878 			"shll		$4, %2			\n"	\
879 			"fldt		(%4, %3)		\n"	\
880 			"fldt		(%4, %2)		\n"	\
881 			"fclex						\n"	\
882 			#op"						\n"	\
883 			"fnstsw		%0				\n"	\
884 			"fstpt		(%4, %2)		\n"	\
885 			"fldcw		%1				"	\
886 			:	"=m" (new_sw), "=m" (save_cw)		\
887 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
888 			:	"memory"					\
889 		);									\
890 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
891 #endif
892 
893 // handles fdiv,fdivr
894 #ifdef WEAK_EXCEPTIONS
895 #define FPUD_ARITH3_EA(op)					\
896 		Bit16u save_cw;						\
897 		__asm__ volatile (					\
898 			"fnstcw		%0				\n"	\
899 			"fldcw		%3				\n"	\
900 			"shll		$4, %1			\n"	\
901 			"fldt		(%2, %1)		\n"	\
902 			#op"						\n"	\
903 			"fstpt		(%2, %1)		\n"	\
904 			"fldcw		%0				"	\
905 			:	"=m" (save_cw)				\
906 			:	"r" (op1), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
907 			:	"memory"					\
908 		);
909 #else
910 #define FPUD_ARITH3_EA(op)					\
911 		Bit16u new_sw,save_cw;				\
912 		__asm__ volatile (					\
913 			"fnstcw		%1				\n"	\
914 			"fldcw		%4				\n"	\
915 			"shll		$4, %2			\n"	\
916 			"fldt		(%3, %2)		\n"	\
917 			"fclex						\n"	\
918 			#op"						\n"	\
919 			"fnstsw		%0				\n"	\
920 			"fstpt		(%3, %2)		\n"	\
921 			"fldcw		%1				"	\
922 			:	"=m" (new_sw), "=m" (save_cw)		\
923 			:	"r" (op1), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
924 			:	"memory"					\
925 		);									\
926 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
927 #endif
928 
929 // handles fprem,fprem1,fscale
930 #define FPUD_REMINDER(op)					\
931 		Bit16u new_sw;						\
932 		__asm__ volatile (					\
933 			"movl		%1, %%eax		\n"	\
934 			"incl		%%eax			\n"	\
935 			"andl		$7, %%eax		\n"	\
936 			"shll		$4, %%eax		\n"	\
937 			"shll		$4, %1			\n"	\
938 			"fldt		(%2, %%eax)		\n"	\
939 			"fldt		(%2, %1)		\n"	\
940 			"fclex						\n"	\
941 			#op" 						\n"	\
942 			"fnstsw		%0				\n"	\
943 			"fstpt		(%2, %1)		\n"	\
944 			"fstp		%%st(0)			"	\
945 			:	"=m" (new_sw)				\
946 			:	"r" (TOP), "r" (fpu.p_regs)	\
947 			:	"eax", "memory"						\
948 		);									\
949 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
950 
951 // handles fcom,fucom
952 #define FPUD_COMPARE(op)					\
953 		Bit16u new_sw;						\
954 		__asm__ volatile (					\
955 			"shll		$4, %2			\n"	\
956 			"shll		$4, %1			\n"	\
957 			"fldt		(%3, %2)		\n"	\
958 			"fldt		(%3, %1)		\n"	\
959 			clx" 						\n"	\
960 			#op" 						\n"	\
961 			"fnstsw		%0				"	\
962 			:	"=m" (new_sw)				\
963 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs) 		\
964 			:	"memory"					\
965 		);									\
966 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
967 
968 // handles fcom,fucom
969 #define FPUD_COMPARE_EA(op)					\
970 		Bit16u new_sw;						\
971 		__asm__ volatile (					\
972 			"shll		$4, %1			\n"	\
973 			"fldt		(%2, %1)		\n"	\
974 			clx" 						\n"	\
975 			#op" 						\n"	\
976 			"fnstsw		%0				"	\
977 			:	"=m" (new_sw)				\
978 			:	"r" (op1), "r" (fpu.p_regs) 		\
979 			:	"memory"					\
980 		);									\
981 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
982 
983 // handles fxam,ftst
984 #define FPUD_EXAMINE(op)					\
985 		Bit16u new_sw;						\
986 		__asm__ volatile (					\
987 			"shll		$4, %1			\n"	\
988 			"fldt		(%2, %1)		\n"	\
989 			clx" 						\n"	\
990 			#op" 						\n"	\
991 			"fnstsw		%0				\n"	\
992 			"fstp		%%st(0)			"	\
993 			:	"=m" (new_sw)				\
994 			:	"r" (TOP), "r" (fpu.p_regs)	\
995 			:	"memory"				\
996 		);									\
997 		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
998 
999 // handles fpatan,fyl2xp1
1000 #ifdef WEAK_EXCEPTIONS
1001 #define FPUD_WITH_POP(op)					\
1002 		__asm__ volatile (					\
1003 			"movl		%0, %%eax		\n"	\
1004 			"incl		%%eax			\n"	\
1005 			"andl		$7, %%eax		\n"	\
1006 			"shll		$4, %%eax		\n"	\
1007 			"shll		$4, %0			\n"	\
1008 			"fldt		(%1, %%eax)		\n"	\
1009 			"fldt		(%1, %0)		\n"	\
1010 			#op" 						\n"	\
1011 			"fstpt		(%1, %%eax)		\n"	\
1012 			:								\
1013 			:	"r" (TOP), "r" (fpu.p_regs)	\
1014 			:	"eax", "memory"				\
1015 		);									\
1016 		FPU_FPOP();
1017 #else
1018 #define FPUD_WITH_POP(op)					\
1019 		Bit16u new_sw;						\
1020 		__asm__ volatile (					\
1021 			"movl		%1, %%eax		\n"	\
1022 			"incl		%%eax			\n"	\
1023 			"andl		$7, %%eax		\n"	\
1024 			"shll		$4, %%eax		\n"	\
1025 			"shll		$4, %1			\n"	\
1026 			"fldt		(%2, %%eax)		\n"	\
1027 			"fldt		(%2, %1)		\n"	\
1028 			"fclex						\n"	\
1029 			#op" 						\n"	\
1030 			"fnstsw		%0				\n"	\
1031 			"fstpt		(%2, %%eax)		\n"	\
1032 			:	"=m" (new_sw)				\
1033 			:	"r" (TOP), "r" (fpu.p_regs)	\
1034 			:	"eax", "memory"						\
1035 		);									\
1036 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
1037 		FPU_FPOP();
1038 #endif
1039 
1040 // handles fyl2x
1041 #ifdef WEAK_EXCEPTIONS
1042 #define FPUD_FYL2X(op)						\
1043 		__asm__ volatile (					\
1044 			"movl		%0, %%eax		\n"	\
1045 			"incl		%%eax			\n"	\
1046 			"andl		$7, %%eax		\n"	\
1047 			"shll		$4, %%eax		\n"	\
1048 			"shll		$4, %0			\n"	\
1049 			"fldt		(%1, %%eax)		\n"	\
1050 			"fldt		(%1, %0)		\n"	\
1051 			#op" 						\n"	\
1052 			"fstpt		(%1, %%eax)		\n"	\
1053 			:								\
1054 			:	"r" (TOP), "r" (fpu.p_regs)	\
1055 			:	"eax", "memory"				\
1056 		);									\
1057 		FPU_FPOP();
1058 #else
1059 #define FPUD_FYL2X(op)						\
1060 		Bit16u new_sw;						\
1061 		__asm__ volatile (					\
1062 			"movl		%1, %%eax		\n"	\
1063 			"incl		%%eax			\n"	\
1064 			"andl		$7, %%eax		\n"	\
1065 			"shll		$4, %%eax		\n"	\
1066 			"shll		$4, %1			\n"	\
1067 			"fldt		(%2, %%eax)		\n"	\
1068 			"fldt		(%2, %1)		\n"	\
1069 			"fclex						\n"	\
1070 			#op" 						\n"	\
1071 			"fnstsw		%0				\n"	\
1072 			"fstpt		(%2, %%eax)		\n"	\
1073 			:	"=m" (new_sw)				\
1074 			:	"r" (TOP), "r" (fpu.p_regs)	\
1075 			:	"eax", "memory"				\
1076 		);									\
1077 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
1078 		FPU_FPOP();
1079 #endif
1080 
1081 // load math constants
1082 #define FPUD_LOAD_CONST(op)				\
1083 		FPU_PREP_PUSH();					\
1084 		__asm__ volatile (					\
1085 			"shll		$4, %0			\n"	\
1086 			clx" 						\n"	\
1087 			#op" 						\n"	\
1088 			"fstpt		(%1, %0)		\n"	\
1089 			:								\
1090 			:	"r" (TOP), "r" (fpu.p_regs)	\
1091 			:	"memory"					\
1092 		);
1093 
1094 #endif
1095 
1096 #ifdef WEAK_EXCEPTIONS
1097 const Bit16u exc_mask=0x7f00;
1098 #else
1099 const Bit16u exc_mask=0xffbf;
1100 #endif
1101 
FPU_FINIT(void)1102 static void FPU_FINIT(void) {
1103 	FPU_SetCW(0x37F);
1104 	fpu.sw=0;
1105 	TOP=FPU_GET_TOP();
1106 	fpu.tags[0]=TAG_Empty;
1107 	fpu.tags[1]=TAG_Empty;
1108 	fpu.tags[2]=TAG_Empty;
1109 	fpu.tags[3]=TAG_Empty;
1110 	fpu.tags[4]=TAG_Empty;
1111 	fpu.tags[5]=TAG_Empty;
1112 	fpu.tags[6]=TAG_Empty;
1113 	fpu.tags[7]=TAG_Empty;
1114 	fpu.tags[8]=TAG_Valid; // is only used by us
1115 }
1116 
FPU_FCLEX(void)1117 static void FPU_FCLEX(void){
1118 	fpu.sw&=0x7f00;				//should clear exceptions
1119 }
1120 
FPU_FNOP(void)1121 static void FPU_FNOP(void){
1122 }
1123 
FPU_PREP_PUSH(void)1124 static void FPU_PREP_PUSH(void){
1125 	TOP = (TOP - 1) &7;
1126 	fpu.tags[TOP]=TAG_Valid;
1127 }
1128 
FPU_FPOP(void)1129 static void FPU_FPOP(void){
1130 	fpu.tags[TOP]=TAG_Empty;
1131 	TOP = ((TOP+1)&7);
1132 }
1133 
FPU_FLD_F32(PhysPt addr,Bitu store_to)1134 static void FPU_FLD_F32(PhysPt addr,Bitu store_to) {
1135 	fpu.p_regs[8].m1 = mem_readd(addr);
1136 	FPUD_LOAD(fld,DWORD,s)
1137 }
1138 
FPU_FLD_F32_EA(PhysPt addr)1139 static void FPU_FLD_F32_EA(PhysPt addr) {
1140 	fpu.p_regs[8].m1 = mem_readd(addr);
1141 	FPUD_LOAD_EA(fld,DWORD,s)
1142 }
1143 
FPU_FLD_F64(PhysPt addr,Bitu store_to)1144 static void FPU_FLD_F64(PhysPt addr,Bitu store_to) {
1145 	fpu.p_regs[8].m1 = mem_readd(addr);
1146 	fpu.p_regs[8].m2 = mem_readd(addr+4);
1147 	FPUD_LOAD(fld,QWORD,l)
1148 }
1149 
FPU_FLD_F64_EA(PhysPt addr)1150 static void FPU_FLD_F64_EA(PhysPt addr) {
1151 	fpu.p_regs[8].m1 = mem_readd(addr);
1152 	fpu.p_regs[8].m2 = mem_readd(addr+4);
1153 	FPUD_LOAD_EA(fld,QWORD,l)
1154 }
1155 
FPU_FLD_F80(PhysPt addr)1156 static void FPU_FLD_F80(PhysPt addr) {
1157 	fpu.p_regs[TOP].m1 = mem_readd(addr);
1158 	fpu.p_regs[TOP].m2 = mem_readd(addr+4);
1159 	fpu.p_regs[TOP].m3 = mem_readw(addr+8);
1160 	FPU_SET_C1(0);
1161 }
1162 
FPU_FLD_I16(PhysPt addr,Bitu store_to)1163 static void FPU_FLD_I16(PhysPt addr,Bitu store_to) {
1164 	fpu.p_regs[8].m1 = (Bit32u)mem_readw(addr);
1165 	FPUD_LOAD(fild,WORD,)
1166 }
1167 
FPU_FLD_I16_EA(PhysPt addr)1168 static void FPU_FLD_I16_EA(PhysPt addr) {
1169 	fpu.p_regs[8].m1 = (Bit32u)mem_readw(addr);
1170 	FPUD_LOAD_EA(fild,WORD,)
1171 }
1172 
FPU_FLD_I32(PhysPt addr,Bitu store_to)1173 static void FPU_FLD_I32(PhysPt addr,Bitu store_to) {
1174 	fpu.p_regs[8].m1 = mem_readd(addr);
1175 	FPUD_LOAD(fild,DWORD,l)
1176 }
1177 
FPU_FLD_I32_EA(PhysPt addr)1178 static void FPU_FLD_I32_EA(PhysPt addr) {
1179 	fpu.p_regs[8].m1 = mem_readd(addr);
1180 	FPUD_LOAD_EA(fild,DWORD,l)
1181 }
1182 
FPU_FLD_I64(PhysPt addr,Bitu store_to)1183 static void FPU_FLD_I64(PhysPt addr,Bitu store_to) {
1184 	fpu.p_regs[8].m1 = mem_readd(addr);
1185 	fpu.p_regs[8].m2 = mem_readd(addr+4);
1186 	FPUD_LOAD(fild,QWORD,q)
1187 }
1188 
FPU_FBLD(PhysPt addr,Bitu store_to)1189 static void FPU_FBLD(PhysPt addr,Bitu store_to) {
1190 	fpu.p_regs[8].m1 = mem_readd(addr);
1191 	fpu.p_regs[8].m2 = mem_readd(addr+4);
1192 	fpu.p_regs[8].m3 = mem_readw(addr+8);
1193 	FPUD_LOAD(fbld,TBYTE,)
1194 }
1195 
FPU_FST_F32(PhysPt addr)1196 static void FPU_FST_F32(PhysPt addr) {
1197 	FPUD_STORE(fstp,DWORD,s)
1198 	mem_writed(addr,fpu.p_regs[8].m1);
1199 }
1200 
FPU_FST_F64(PhysPt addr)1201 static void FPU_FST_F64(PhysPt addr) {
1202 	FPUD_STORE(fstp,QWORD,l)
1203 	mem_writed(addr,fpu.p_regs[8].m1);
1204 	mem_writed(addr+4,fpu.p_regs[8].m2);
1205 }
1206 
FPU_FST_F80(PhysPt addr)1207 static void FPU_FST_F80(PhysPt addr) {
1208 	mem_writed(addr,fpu.p_regs[TOP].m1);
1209 	mem_writed(addr+4,fpu.p_regs[TOP].m2);
1210 	mem_writew(addr+8,fpu.p_regs[TOP].m3);
1211 	FPU_SET_C1(0);
1212 }
1213 
FPU_FST_I16(PhysPt addr)1214 static void FPU_FST_I16(PhysPt addr) {
1215 	FPUD_STORE(fistp,WORD,)
1216 	mem_writew(addr,(Bit16u)fpu.p_regs[8].m1);
1217 }
1218 
FPU_FST_I32(PhysPt addr)1219 static void FPU_FST_I32(PhysPt addr) {
1220 	FPUD_STORE(fistp,DWORD,l)
1221 	mem_writed(addr,fpu.p_regs[8].m1);
1222 }
1223 
FPU_FST_I64(PhysPt addr)1224 static void FPU_FST_I64(PhysPt addr) {
1225 	FPUD_STORE(fistp,QWORD,q)
1226 	mem_writed(addr,fpu.p_regs[8].m1);
1227 	mem_writed(addr+4,fpu.p_regs[8].m2);
1228 }
1229 
FPU_FBST(PhysPt addr)1230 static void FPU_FBST(PhysPt addr) {
1231 	FPUD_STORE(fbstp,TBYTE,)
1232 	mem_writed(addr,fpu.p_regs[8].m1);
1233 	mem_writed(addr+4,fpu.p_regs[8].m2);
1234 	mem_writew(addr+8,fpu.p_regs[8].m3);
1235 }
1236 
1237 
FPU_FSIN(void)1238 static void FPU_FSIN(void){
1239 	FPUD_TRIG(fsin)
1240 }
1241 
FPU_FSINCOS(void)1242 static void FPU_FSINCOS(void){
1243 	FPUD_SINCOS()
1244 }
1245 
FPU_FCOS(void)1246 static void FPU_FCOS(void){
1247 	FPUD_TRIG(fcos)
1248 }
1249 
FPU_FSQRT(void)1250 static void FPU_FSQRT(void){
1251 	FPUD_ARITH2(fsqrt)
1252 }
1253 
FPU_FPATAN(void)1254 static void FPU_FPATAN(void){
1255 	FPUD_WITH_POP(fpatan)
1256 }
1257 
FPU_FPTAN(void)1258 static void FPU_FPTAN(void){
1259 	FPUD_PTAN()
1260 }
1261 
1262 
FPU_FADD(Bitu op1,Bitu op2)1263 static void FPU_FADD(Bitu op1, Bitu op2){
1264 	FPUD_ARITH1(faddp)
1265 }
1266 
FPU_FADD_EA(Bitu op1)1267 static void FPU_FADD_EA(Bitu op1){
1268 	FPUD_ARITH1_EA(faddp)
1269 }
1270 
FPU_FDIV(Bitu op1,Bitu op2)1271 static void FPU_FDIV(Bitu op1, Bitu op2){
1272 	FPUD_ARITH3(fdivp)
1273 }
1274 
FPU_FDIV_EA(Bitu op1)1275 static void FPU_FDIV_EA(Bitu op1){
1276 	FPUD_ARITH3_EA(fdivp)
1277 }
1278 
FPU_FDIVR(Bitu op1,Bitu op2)1279 static void FPU_FDIVR(Bitu op1, Bitu op2){
1280 	FPUD_ARITH3(fdivrp)
1281 }
1282 
FPU_FDIVR_EA(Bitu op1)1283 static void FPU_FDIVR_EA(Bitu op1){
1284 	FPUD_ARITH3_EA(fdivrp)
1285 }
1286 
FPU_FMUL(Bitu op1,Bitu op2)1287 static void FPU_FMUL(Bitu op1, Bitu op2){
1288 	FPUD_ARITH1(fmulp)
1289 }
1290 
FPU_FMUL_EA(Bitu op1)1291 static void FPU_FMUL_EA(Bitu op1){
1292 	FPUD_ARITH1_EA(fmulp)
1293 }
1294 
FPU_FSUB(Bitu op1,Bitu op2)1295 static void FPU_FSUB(Bitu op1, Bitu op2){
1296 	FPUD_ARITH1(fsubp)
1297 }
1298 
FPU_FSUB_EA(Bitu op1)1299 static void FPU_FSUB_EA(Bitu op1){
1300 	FPUD_ARITH1_EA(fsubp)
1301 }
1302 
FPU_FSUBR(Bitu op1,Bitu op2)1303 static void FPU_FSUBR(Bitu op1, Bitu op2){
1304 	FPUD_ARITH1(fsubrp)
1305 }
1306 
FPU_FSUBR_EA(Bitu op1)1307 static void FPU_FSUBR_EA(Bitu op1){
1308 	FPUD_ARITH1_EA(fsubrp)
1309 }
1310 
FPU_FXCH(Bitu stv,Bitu other)1311 static void FPU_FXCH(Bitu stv, Bitu other){
1312 	FPU_Tag tag = fpu.tags[other];
1313 	fpu.tags[other] = fpu.tags[stv];
1314 	fpu.tags[stv] = tag;
1315 
1316 	Bit32u m1s = fpu.p_regs[other].m1;
1317 	Bit32u m2s = fpu.p_regs[other].m2;
1318 	Bit16u m3s = fpu.p_regs[other].m3;
1319 	fpu.p_regs[other].m1 = fpu.p_regs[stv].m1;
1320 	fpu.p_regs[other].m2 = fpu.p_regs[stv].m2;
1321 	fpu.p_regs[other].m3 = fpu.p_regs[stv].m3;
1322 	fpu.p_regs[stv].m1 = m1s;
1323 	fpu.p_regs[stv].m2 = m2s;
1324 	fpu.p_regs[stv].m3 = m3s;
1325 
1326 	FPU_SET_C1(0);
1327 }
1328 
FPU_FST(Bitu stv,Bitu other)1329 static void FPU_FST(Bitu stv, Bitu other){
1330 	fpu.tags[other] = fpu.tags[stv];
1331 
1332 	fpu.p_regs[other].m1 = fpu.p_regs[stv].m1;
1333 	fpu.p_regs[other].m2 = fpu.p_regs[stv].m2;
1334 	fpu.p_regs[other].m3 = fpu.p_regs[stv].m3;
1335 
1336 	FPU_SET_C1(0);
1337 }
1338 
1339 
FPU_FCOM(Bitu op1,Bitu op2)1340 static void FPU_FCOM(Bitu op1, Bitu op2){
1341 	FPUD_COMPARE(fcompp)
1342 }
1343 
FPU_FCOM_EA(Bitu op1)1344 static void FPU_FCOM_EA(Bitu op1){
1345 	FPUD_COMPARE_EA(fcompp)
1346 }
1347 
FPU_FUCOM(Bitu op1,Bitu op2)1348 static void FPU_FUCOM(Bitu op1, Bitu op2){
1349 	FPUD_COMPARE(fucompp)
1350 }
1351 
FPU_FRNDINT(void)1352 static void FPU_FRNDINT(void){
1353 	FPUD_ARITH2(frndint)
1354 }
1355 
FPU_FPREM(void)1356 static void FPU_FPREM(void){
1357 	FPUD_REMINDER(fprem)
1358 }
1359 
FPU_FPREM1(void)1360 static void FPU_FPREM1(void){
1361 	FPUD_REMINDER(fprem1)
1362 }
1363 
FPU_FXAM(void)1364 static void FPU_FXAM(void){
1365 	FPUD_EXAMINE(fxam)
1366 	// handle empty registers (C1 set to sign in any way!)
1367 	if(fpu.tags[TOP] == TAG_Empty) {
1368 		FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(1);
1369 		return;
1370 	}
1371 }
1372 
FPU_F2XM1(void)1373 static void FPU_F2XM1(void){
1374 	FPUD_TRIG(f2xm1)
1375 }
1376 
FPU_FYL2X(void)1377 static void FPU_FYL2X(void){
1378 	FPUD_FYL2X(fyl2x)
1379 }
1380 
FPU_FYL2XP1(void)1381 static void FPU_FYL2XP1(void){
1382 	FPUD_WITH_POP(fyl2xp1)
1383 }
1384 
FPU_FSCALE(void)1385 static void FPU_FSCALE(void){
1386 	FPUD_REMINDER(fscale)
1387 }
1388 
1389 
FPU_FSTENV(PhysPt addr)1390 static void FPU_FSTENV(PhysPt addr){
1391 	FPU_SET_TOP(TOP);
1392 	if(!cpu.code.big) {
1393 		mem_writew(addr+0,static_cast<Bit16u>(fpu.cw));
1394 		mem_writew(addr+2,static_cast<Bit16u>(fpu.sw));
1395 		mem_writew(addr+4,static_cast<Bit16u>(FPU_GetTag()));
1396 	} else {
1397 		mem_writed(addr+0,static_cast<Bit32u>(fpu.cw));
1398 		mem_writed(addr+4,static_cast<Bit32u>(fpu.sw));
1399 		mem_writed(addr+8,static_cast<Bit32u>(FPU_GetTag()));
1400 	}
1401 }
1402 
FPU_FLDENV(PhysPt addr)1403 static void FPU_FLDENV(PhysPt addr){
1404 	Bit16u tag;
1405 	Bit32u tagbig;
1406 	Bitu cw;
1407 	if(!cpu.code.big) {
1408 		cw     = mem_readw(addr+0);
1409 		fpu.sw = mem_readw(addr+2);
1410 		tag    = mem_readw(addr+4);
1411 	} else {
1412 		cw     = mem_readd(addr+0);
1413 		fpu.sw = (Bit16u)mem_readd(addr+4);
1414 		tagbig = mem_readd(addr+8);
1415 		tag    = static_cast<Bit16u>(tagbig);
1416 	}
1417 	FPU_SetTag(tag);
1418 	FPU_SetCW(cw);
1419 	TOP=FPU_GET_TOP();
1420 }
1421 
FPU_FSAVE(PhysPt addr)1422 static void FPU_FSAVE(PhysPt addr){
1423 	FPU_FSTENV(addr);
1424 	Bitu start=(cpu.code.big?28:14);
1425 	for(Bitu i=0;i<8;i++){
1426 		mem_writed(addr+start,fpu.p_regs[STV(i)].m1);
1427 		mem_writed(addr+start+4,fpu.p_regs[STV(i)].m2);
1428 		mem_writew(addr+start+8,fpu.p_regs[STV(i)].m3);
1429 		start+=10;
1430 	}
1431 	FPU_FINIT();
1432 }
1433 
FPU_FRSTOR(PhysPt addr)1434 static void FPU_FRSTOR(PhysPt addr){
1435 	FPU_FLDENV(addr);
1436 	Bitu start=(cpu.code.big?28:14);
1437 	for(Bitu i=0;i<8;i++){
1438 		fpu.p_regs[STV(i)].m1 = mem_readd(addr+start);
1439 		fpu.p_regs[STV(i)].m2 = mem_readd(addr+start+4);
1440 		fpu.p_regs[STV(i)].m3 = mem_readw(addr+start+8);
1441 		start+=10;
1442 	}
1443 }
1444 
1445 
FPU_FXTRACT(void)1446 static void FPU_FXTRACT(void) {
1447 	FPUD_XTRACT
1448 }
1449 
FPU_FCHS(void)1450 static void FPU_FCHS(void){
1451 	FPUD_TRIG(fchs)
1452 }
1453 
FPU_FABS(void)1454 static void FPU_FABS(void){
1455 	FPUD_TRIG(fabs)
1456 }
1457 
FPU_FTST(void)1458 static void FPU_FTST(void){
1459 	FPUD_EXAMINE(ftst)
1460 }
1461 
FPU_FLD1(void)1462 static void FPU_FLD1(void){
1463 	FPUD_LOAD_CONST(fld1)
1464 }
1465 
FPU_FLDL2T(void)1466 static void FPU_FLDL2T(void){
1467 	FPUD_LOAD_CONST(fldl2t)
1468 }
1469 
FPU_FLDL2E(void)1470 static void FPU_FLDL2E(void){
1471 	FPUD_LOAD_CONST(fldl2e)
1472 }
1473 
FPU_FLDPI(void)1474 static void FPU_FLDPI(void){
1475 	FPUD_LOAD_CONST(fldpi)
1476 }
1477 
FPU_FLDLG2(void)1478 static void FPU_FLDLG2(void){
1479 	FPUD_LOAD_CONST(fldlg2)
1480 }
1481 
FPU_FLDLN2(void)1482 static void FPU_FLDLN2(void){
1483 	FPUD_LOAD_CONST(fldln2)
1484 }
1485 
FPU_FLDZ(void)1486 static void FPU_FLDZ(void){
1487 	FPUD_LOAD_CONST(fldz)
1488 	fpu.tags[TOP]=TAG_Zero;
1489 }
1490