1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - fpu.h *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2010 Ari64 *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22 #ifndef M64P_R4300_FPU_H
23 #define M64P_R4300_FPU_H
24
25 #include <math.h>
26 #include <stdint.h>
27
28 #include "cp1_private.h"
29 #include "r4300.h"
30
31 #ifdef _MSC_VER
32 #define M64P_FPU_INLINE static __inline
33 #include <float.h>
34
35 typedef enum { FE_TONEAREST = 0, FE_TOWARDZERO, FE_UPWARD, FE_DOWNWARD } eRoundType;
fesetround(eRoundType RoundType)36 static void fesetround(eRoundType RoundType)
37 {
38 static const unsigned int msRound[4] = { _RC_NEAR, _RC_CHOP, _RC_UP, _RC_DOWN };
39 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
40 _controlfp(msRound[RoundType], _MCW_RC);
41 #elif defined(__x86_64__) || defined(_M_X64)
42 _controlfp(msRound[RoundType], _MCW_RC);
43 #else
44 unsigned int oldX87, oldSSE2;
45 __control87_2(msRound[RoundType], _MCW_RC, &oldX87, &oldSSE2);
46 #endif
47 }
round(double x)48 static __inline double round(double x) { return floor(x + 0.5); }
roundf(float x)49 static __inline float roundf(float x) { return (float) floor(x + 0.5); }
trunc(double x)50 static __inline double trunc(double x) { return (double) (int) x; }
truncf(float x)51 static __inline float truncf(float x) { return (float) (int) x; }
52 #define isnan _isnan
53 #else
54 #define M64P_FPU_INLINE static inline
55 #ifndef VITA
56 #include <fenv.h>
57 #endif
58 #endif
59
60 #define FCR31_CMP_BIT UINT32_C(0x800000)
61
62
set_rounding(void)63 M64P_FPU_INLINE void set_rounding(void)
64 {
65 /* TODO skogaby: fix this for real */
66 #ifndef VITA
67 switch(FCR31 & 3)
68 {
69 case 0: /* Round to nearest, or to even if equidistant */
70 fesetround(FE_TONEAREST);
71 break;
72 case 1: /* Truncate (toward 0) */
73 fesetround(FE_TOWARDZERO);
74 break;
75 case 2: /* Round up (toward +Inf) */
76 fesetround(FE_UPWARD);
77 break;
78 case 3: /* Round down (toward -Inf) */
79 fesetround(FE_DOWNWARD);
80 break;
81 }
82 #endif
83 }
84
cvt_s_w(const int32_t * source,float * dest)85 M64P_FPU_INLINE void cvt_s_w(const int32_t *source,float *dest)
86 {
87 set_rounding();
88 *dest = (float) *source;
89 }
cvt_d_w(const int32_t * source,double * dest)90 M64P_FPU_INLINE void cvt_d_w(const int32_t *source,double *dest)
91 {
92 *dest = (double) *source;
93 }
cvt_s_l(const int64_t * source,float * dest)94 M64P_FPU_INLINE void cvt_s_l(const int64_t *source,float *dest)
95 {
96 set_rounding();
97 *dest = (float) *source;
98 }
cvt_d_l(const int64_t * source,double * dest)99 M64P_FPU_INLINE void cvt_d_l(const int64_t *source,double *dest)
100 {
101 set_rounding();
102 *dest = (double) *source;
103 }
cvt_d_s(const float * source,double * dest)104 M64P_FPU_INLINE void cvt_d_s(const float *source,double *dest)
105 {
106 *dest = (double) *source;
107 }
cvt_s_d(const double * source,float * dest)108 M64P_FPU_INLINE void cvt_s_d(const double *source,float *dest)
109 {
110 set_rounding();
111 *dest = (float) *source;
112 }
113
round_l_s(const float * source,int64_t * dest)114 M64P_FPU_INLINE void round_l_s(const float *source,int64_t *dest)
115 {
116 *dest = (int64_t) roundf(*source);
117 }
round_w_s(const float * source,int32_t * dest)118 M64P_FPU_INLINE void round_w_s(const float *source,int32_t *dest)
119 {
120 *dest = (int32_t) roundf(*source);
121 }
trunc_l_s(const float * source,int64_t * dest)122 M64P_FPU_INLINE void trunc_l_s(const float *source,int64_t *dest)
123 {
124 *dest = (int64_t) truncf(*source);
125 }
trunc_w_s(const float * source,int32_t * dest)126 M64P_FPU_INLINE void trunc_w_s(const float *source,int32_t *dest)
127 {
128 *dest = (int32_t) truncf(*source);
129 }
ceil_l_s(const float * source,int64_t * dest)130 M64P_FPU_INLINE void ceil_l_s(const float *source,int64_t *dest)
131 {
132 *dest = (int64_t) ceilf(*source);
133 }
ceil_w_s(const float * source,int32_t * dest)134 M64P_FPU_INLINE void ceil_w_s(const float *source,int32_t *dest)
135 {
136 *dest = (int32_t) ceilf(*source);
137 }
floor_l_s(const float * source,int64_t * dest)138 M64P_FPU_INLINE void floor_l_s(const float *source,int64_t *dest)
139 {
140 *dest = (int64_t) floorf(*source);
141 }
floor_w_s(const float * source,int32_t * dest)142 M64P_FPU_INLINE void floor_w_s(const float *source,int32_t *dest)
143 {
144 *dest = (int32_t) floorf(*source);
145 }
146
round_l_d(const double * source,int64_t * dest)147 M64P_FPU_INLINE void round_l_d(const double *source,int64_t *dest)
148 {
149 *dest = (int64_t) round(*source);
150 }
round_w_d(const double * source,int32_t * dest)151 M64P_FPU_INLINE void round_w_d(const double *source,int32_t *dest)
152 {
153 *dest = (int32_t) round(*source);
154 }
trunc_l_d(const double * source,int64_t * dest)155 M64P_FPU_INLINE void trunc_l_d(const double *source,int64_t *dest)
156 {
157 *dest = (int64_t) trunc(*source);
158 }
trunc_w_d(const double * source,int32_t * dest)159 M64P_FPU_INLINE void trunc_w_d(const double *source,int32_t *dest)
160 {
161 *dest = (int32_t) trunc(*source);
162 }
ceil_l_d(const double * source,int64_t * dest)163 M64P_FPU_INLINE void ceil_l_d(const double *source,int64_t *dest)
164 {
165 *dest = (int64_t) ceil(*source);
166 }
ceil_w_d(const double * source,int32_t * dest)167 M64P_FPU_INLINE void ceil_w_d(const double *source,int32_t *dest)
168 {
169 *dest = (int32_t) ceil(*source);
170 }
floor_l_d(const double * source,int64_t * dest)171 M64P_FPU_INLINE void floor_l_d(const double *source,int64_t *dest)
172 {
173 *dest = (int64_t) floor(*source);
174 }
floor_w_d(const double * source,int32_t * dest)175 M64P_FPU_INLINE void floor_w_d(const double *source,int32_t *dest)
176 {
177 *dest = (int32_t) floor(*source);
178 }
179
cvt_w_s(const float * source,int32_t * dest)180 M64P_FPU_INLINE void cvt_w_s(const float *source,int32_t *dest)
181 {
182 switch(FCR31&3)
183 {
184 case 0: round_w_s(source,dest);return;
185 case 1: trunc_w_s(source,dest);return;
186 case 2: ceil_w_s(source,dest);return;
187 case 3: floor_w_s(source,dest);return;
188 }
189 }
cvt_w_d(const double * source,int32_t * dest)190 M64P_FPU_INLINE void cvt_w_d(const double *source,int32_t *dest)
191 {
192 switch(FCR31&3)
193 {
194 case 0: round_w_d(source,dest);return;
195 case 1: trunc_w_d(source,dest);return;
196 case 2: ceil_w_d(source,dest);return;
197 case 3: floor_w_d(source,dest);return;
198 }
199 }
cvt_l_s(const float * source,int64_t * dest)200 M64P_FPU_INLINE void cvt_l_s(const float *source,int64_t *dest)
201 {
202 switch(FCR31&3)
203 {
204 case 0: round_l_s(source,dest);return;
205 case 1: trunc_l_s(source,dest);return;
206 case 2: ceil_l_s(source,dest);return;
207 case 3: floor_l_s(source,dest);return;
208 }
209 }
cvt_l_d(const double * source,int64_t * dest)210 M64P_FPU_INLINE void cvt_l_d(const double *source,int64_t *dest)
211 {
212 switch(FCR31&3)
213 {
214 case 0: round_l_d(source,dest);return;
215 case 1: trunc_l_d(source,dest);return;
216 case 2: ceil_l_d(source,dest);return;
217 case 3: floor_l_d(source,dest);return;
218 }
219 }
220
c_f_s()221 M64P_FPU_INLINE void c_f_s()
222 {
223 FCR31 &= ~FCR31_CMP_BIT;
224 }
c_un_s(const float * source,const float * target)225 M64P_FPU_INLINE void c_un_s(const float *source,const float *target)
226 {
227 FCR31=(isnan(*source) || isnan(*target)) ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
228 }
229
c_eq_s(const float * source,const float * target)230 M64P_FPU_INLINE void c_eq_s(const float *source,const float *target)
231 {
232 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
233 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
234 }
c_ueq_s(const float * source,const float * target)235 M64P_FPU_INLINE void c_ueq_s(const float *source,const float *target)
236 {
237 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
238 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
239 }
240
c_olt_s(const float * source,const float * target)241 M64P_FPU_INLINE void c_olt_s(const float *source,const float *target)
242 {
243 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
244 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
245 }
c_ult_s(const float * source,const float * target)246 M64P_FPU_INLINE void c_ult_s(const float *source,const float *target)
247 {
248 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
249 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
250 }
251
c_ole_s(const float * source,const float * target)252 M64P_FPU_INLINE void c_ole_s(const float *source,const float *target)
253 {
254 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
255 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
256 }
c_ule_s(const float * source,const float * target)257 M64P_FPU_INLINE void c_ule_s(const float *source,const float *target)
258 {
259 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
260 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
261 }
262
c_sf_s(const float * source,const float * target)263 M64P_FPU_INLINE void c_sf_s(const float *source,const float *target)
264 {
265 //if (isnan(*source) || isnan(*target)) // FIXME - exception
266 FCR31&=~FCR31_CMP_BIT;
267 }
c_ngle_s(const float * source,const float * target)268 M64P_FPU_INLINE void c_ngle_s(const float *source,const float *target)
269 {
270 //if (isnan(*source) || isnan(*target)) // FIXME - exception
271 FCR31&=~FCR31_CMP_BIT;
272 }
273
c_seq_s(const float * source,const float * target)274 M64P_FPU_INLINE void c_seq_s(const float *source,const float *target)
275 {
276 //if (isnan(*source) || isnan(*target)) // FIXME - exception
277 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
278 }
c_ngl_s(const float * source,const float * target)279 M64P_FPU_INLINE void c_ngl_s(const float *source,const float *target)
280 {
281 //if (isnan(*source) || isnan(*target)) // FIXME - exception
282 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
283 }
284
c_lt_s(const float * source,const float * target)285 M64P_FPU_INLINE void c_lt_s(const float *source,const float *target)
286 {
287 //if (isnan(*source) || isnan(*target)) // FIXME - exception
288 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
289 }
c_nge_s(const float * source,const float * target)290 M64P_FPU_INLINE void c_nge_s(const float *source,const float *target)
291 {
292 //if (isnan(*source) || isnan(*target)) // FIXME - exception
293 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
294 }
295
c_le_s(const float * source,const float * target)296 M64P_FPU_INLINE void c_le_s(const float *source,const float *target)
297 {
298 //if (isnan(*source) || isnan(*target)) // FIXME - exception
299 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
300 }
c_ngt_s(const float * source,const float * target)301 M64P_FPU_INLINE void c_ngt_s(const float *source,const float *target)
302 {
303 //if (isnan(*source) || isnan(*target)) // FIXME - exception
304 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
305 }
306
c_f_d()307 M64P_FPU_INLINE void c_f_d()
308 {
309 FCR31 &= ~FCR31_CMP_BIT;
310 }
c_un_d(const double * source,const double * target)311 M64P_FPU_INLINE void c_un_d(const double *source,const double *target)
312 {
313 FCR31=(isnan(*source) || isnan(*target)) ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
314 }
315
c_eq_d(const double * source,const double * target)316 M64P_FPU_INLINE void c_eq_d(const double *source,const double *target)
317 {
318 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
319 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
320 }
c_ueq_d(const double * source,const double * target)321 M64P_FPU_INLINE void c_ueq_d(const double *source,const double *target)
322 {
323 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
324 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
325 }
326
c_olt_d(const double * source,const double * target)327 M64P_FPU_INLINE void c_olt_d(const double *source,const double *target)
328 {
329 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
330 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
331 }
c_ult_d(const double * source,const double * target)332 M64P_FPU_INLINE void c_ult_d(const double *source,const double *target)
333 {
334 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
335 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
336 }
337
c_ole_d(const double * source,const double * target)338 M64P_FPU_INLINE void c_ole_d(const double *source,const double *target)
339 {
340 if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
341 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
342 }
c_ule_d(const double * source,const double * target)343 M64P_FPU_INLINE void c_ule_d(const double *source,const double *target)
344 {
345 if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
346 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
347 }
348
c_sf_d(const double * source,const double * target)349 M64P_FPU_INLINE void c_sf_d(const double *source,const double *target)
350 {
351 //if (isnan(*source) || isnan(*target)) // FIXME - exception
352 FCR31&=~FCR31_CMP_BIT;
353 }
c_ngle_d(const double * source,const double * target)354 M64P_FPU_INLINE void c_ngle_d(const double *source,const double *target)
355 {
356 //if (isnan(*source) || isnan(*target)) // FIXME - exception
357 FCR31&=~FCR31_CMP_BIT;
358 }
359
c_seq_d(const double * source,const double * target)360 M64P_FPU_INLINE void c_seq_d(const double *source,const double *target)
361 {
362 //if (isnan(*source) || isnan(*target)) // FIXME - exception
363 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
364 }
c_ngl_d(const double * source,const double * target)365 M64P_FPU_INLINE void c_ngl_d(const double *source,const double *target)
366 {
367 //if (isnan(*source) || isnan(*target)) // FIXME - exception
368 FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
369 }
370
c_lt_d(const double * source,const double * target)371 M64P_FPU_INLINE void c_lt_d(const double *source,const double *target)
372 {
373 //if (isnan(*source) || isnan(*target)) // FIXME - exception
374 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
375 }
c_nge_d(const double * source,const double * target)376 M64P_FPU_INLINE void c_nge_d(const double *source,const double *target)
377 {
378 //if (isnan(*source) || isnan(*target)) // FIXME - exception
379 FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
380 }
381
c_le_d(const double * source,const double * target)382 M64P_FPU_INLINE void c_le_d(const double *source,const double *target)
383 {
384 //if (isnan(*source) || isnan(*target)) // FIXME - exception
385 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
386 }
c_ngt_d(const double * source,const double * target)387 M64P_FPU_INLINE void c_ngt_d(const double *source,const double *target)
388 {
389 //if (isnan(*source) || isnan(*target)) // FIXME - exception
390 FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
391 }
392
393
add_s(const float * source1,const float * source2,float * target)394 M64P_FPU_INLINE void add_s(const float *source1,const float *source2,float *target)
395 {
396 set_rounding();
397 *target=(*source1)+(*source2);
398 }
sub_s(const float * source1,const float * source2,float * target)399 M64P_FPU_INLINE void sub_s(const float *source1,const float *source2,float *target)
400 {
401 set_rounding();
402 *target=(*source1)-(*source2);
403 }
mul_s(const float * source1,const float * source2,float * target)404 M64P_FPU_INLINE void mul_s(const float *source1,const float *source2,float *target)
405 {
406 set_rounding();
407 *target=(*source1)*(*source2);
408 }
div_s(const float * source1,const float * source2,float * target)409 M64P_FPU_INLINE void div_s(const float *source1,const float *source2,float *target)
410 {
411 set_rounding();
412 *target=(*source1)/(*source2);
413 }
sqrt_s(const float * source,float * target)414 M64P_FPU_INLINE void sqrt_s(const float *source,float *target)
415 {
416 set_rounding();
417 *target=sqrtf(*source);
418 }
abs_s(const float * source,float * target)419 M64P_FPU_INLINE void abs_s(const float *source,float *target)
420 {
421 *target=fabsf(*source);
422 }
mov_s(const float * source,float * target)423 M64P_FPU_INLINE void mov_s(const float *source,float *target)
424 {
425 *target=*source;
426 }
neg_s(const float * source,float * target)427 M64P_FPU_INLINE void neg_s(const float *source,float *target)
428 {
429 *target=-(*source);
430 }
add_d(const double * source1,const double * source2,double * target)431 M64P_FPU_INLINE void add_d(const double *source1,const double *source2,double *target)
432 {
433 set_rounding();
434 *target=(*source1)+(*source2);
435 }
sub_d(const double * source1,const double * source2,double * target)436 M64P_FPU_INLINE void sub_d(const double *source1,const double *source2,double *target)
437 {
438 set_rounding();
439 *target=(*source1)-(*source2);
440 }
mul_d(const double * source1,const double * source2,double * target)441 M64P_FPU_INLINE void mul_d(const double *source1,const double *source2,double *target)
442 {
443 set_rounding();
444 *target=(*source1)*(*source2);
445 }
div_d(const double * source1,const double * source2,double * target)446 M64P_FPU_INLINE void div_d(const double *source1,const double *source2,double *target)
447 {
448 set_rounding();
449 *target=(*source1)/(*source2);
450 }
sqrt_d(const double * source,double * target)451 M64P_FPU_INLINE void sqrt_d(const double *source,double *target)
452 {
453 set_rounding();
454 *target=sqrt(*source);
455 }
abs_d(const double * source,double * target)456 M64P_FPU_INLINE void abs_d(const double *source,double *target)
457 {
458 *target=fabs(*source);
459 }
mov_d(const double * source,double * target)460 M64P_FPU_INLINE void mov_d(const double *source,double *target)
461 {
462 *target=*source;
463 }
neg_d(const double * source,double * target)464 M64P_FPU_INLINE void neg_d(const double *source,double *target)
465 {
466 *target = -(*source);
467 }
468
469 #endif /* M64P_R4300_FPU_H */
470