1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus - fpu.h                                                   *
3  *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
4  *   Copyright (C) 2010 Ari64                                              *
5  *                                                                         *
6  *   This program is free software; you can redistribute it and/or modify  *
7  *   it under the terms of the GNU General Public License as published by  *
8  *   the Free Software Foundation; either version 2 of the License, or     *
9  *   (at your option) any later version.                                   *
10  *                                                                         *
11  *   This program is distributed in the hope that it will be useful,       *
12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14  *   GNU General Public License for more details.                          *
15  *                                                                         *
16  *   You should have received a copy of the GNU General Public License     *
17  *   along with this program; if not, write to the                         *
18  *   Free Software Foundation, Inc.,                                       *
19  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
20  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21 
22 #ifndef M64P_R4300_FPU_H
23 #define M64P_R4300_FPU_H
24 
25 #include <math.h>
26 #include <stdint.h>
27 
28 #include "cp1_private.h"
29 #include "r4300.h"
30 
31 #ifdef _MSC_VER
32   #define M64P_FPU_INLINE static __inline
33   #include <float.h>
34 
35   typedef enum { FE_TONEAREST = 0, FE_TOWARDZERO, FE_UPWARD, FE_DOWNWARD } eRoundType;
fesetround(eRoundType RoundType)36   static void fesetround(eRoundType RoundType)
37   {
38     static const unsigned int msRound[4] = { _RC_NEAR, _RC_CHOP, _RC_UP, _RC_DOWN };
39 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
40     _controlfp(msRound[RoundType], _MCW_RC);
41 #elif defined(__x86_64__) || defined(_M_X64)
42     _controlfp(msRound[RoundType], _MCW_RC);
43 #else
44     unsigned int oldX87, oldSSE2;
45     __control87_2(msRound[RoundType], _MCW_RC, &oldX87, &oldSSE2);
46 #endif
47   }
round(double x)48   static __inline double round(double x) { return floor(x + 0.5); }
roundf(float x)49   static __inline float roundf(float x) { return (float) floor(x + 0.5); }
trunc(double x)50   static __inline double trunc(double x) { return (double) (int) x; }
truncf(float x)51   static __inline float truncf(float x) { return (float) (int) x; }
52   #define isnan _isnan
53 #else
54   #define M64P_FPU_INLINE static inline
55 #ifndef VITA
56   #include <fenv.h>
57 #endif
58 #endif
59 
60 #define FCR31_CMP_BIT UINT32_C(0x800000)
61 
62 
set_rounding(void)63 M64P_FPU_INLINE void set_rounding(void)
64 {
65    /* TODO skogaby: fix this for real */
66 #ifndef VITA
67    switch(FCR31 & 3)
68    {
69       case 0: /* Round to nearest, or to even if equidistant */
70          fesetround(FE_TONEAREST);
71          break;
72       case 1: /* Truncate (toward 0) */
73          fesetround(FE_TOWARDZERO);
74          break;
75       case 2: /* Round up (toward +Inf) */
76          fesetround(FE_UPWARD);
77          break;
78       case 3: /* Round down (toward -Inf) */
79          fesetround(FE_DOWNWARD);
80          break;
81    }
82 #endif
83 }
84 
cvt_s_w(const int32_t * source,float * dest)85 M64P_FPU_INLINE void cvt_s_w(const int32_t *source,float *dest)
86 {
87   set_rounding();
88   *dest = (float) *source;
89 }
cvt_d_w(const int32_t * source,double * dest)90 M64P_FPU_INLINE void cvt_d_w(const int32_t *source,double *dest)
91 {
92   *dest = (double) *source;
93 }
cvt_s_l(const int64_t * source,float * dest)94 M64P_FPU_INLINE void cvt_s_l(const int64_t *source,float *dest)
95 {
96   set_rounding();
97   *dest = (float) *source;
98 }
cvt_d_l(const int64_t * source,double * dest)99 M64P_FPU_INLINE void cvt_d_l(const int64_t *source,double *dest)
100 {
101   set_rounding();
102   *dest = (double) *source;
103 }
cvt_d_s(const float * source,double * dest)104 M64P_FPU_INLINE void cvt_d_s(const float *source,double *dest)
105 {
106   *dest = (double) *source;
107 }
cvt_s_d(const double * source,float * dest)108 M64P_FPU_INLINE void cvt_s_d(const double *source,float *dest)
109 {
110   set_rounding();
111   *dest = (float) *source;
112 }
113 
round_l_s(const float * source,int64_t * dest)114 M64P_FPU_INLINE void round_l_s(const float *source,int64_t *dest)
115 {
116   *dest = (int64_t) roundf(*source);
117 }
round_w_s(const float * source,int32_t * dest)118 M64P_FPU_INLINE void round_w_s(const float *source,int32_t *dest)
119 {
120   *dest = (int32_t) roundf(*source);
121 }
trunc_l_s(const float * source,int64_t * dest)122 M64P_FPU_INLINE void trunc_l_s(const float *source,int64_t *dest)
123 {
124   *dest = (int64_t) truncf(*source);
125 }
trunc_w_s(const float * source,int32_t * dest)126 M64P_FPU_INLINE void trunc_w_s(const float *source,int32_t *dest)
127 {
128   *dest = (int32_t) truncf(*source);
129 }
ceil_l_s(const float * source,int64_t * dest)130 M64P_FPU_INLINE void ceil_l_s(const float *source,int64_t *dest)
131 {
132   *dest = (int64_t) ceilf(*source);
133 }
ceil_w_s(const float * source,int32_t * dest)134 M64P_FPU_INLINE void ceil_w_s(const float *source,int32_t *dest)
135 {
136   *dest = (int32_t) ceilf(*source);
137 }
floor_l_s(const float * source,int64_t * dest)138 M64P_FPU_INLINE void floor_l_s(const float *source,int64_t *dest)
139 {
140   *dest = (int64_t) floorf(*source);
141 }
floor_w_s(const float * source,int32_t * dest)142 M64P_FPU_INLINE void floor_w_s(const float *source,int32_t *dest)
143 {
144   *dest = (int32_t) floorf(*source);
145 }
146 
round_l_d(const double * source,int64_t * dest)147 M64P_FPU_INLINE void round_l_d(const double *source,int64_t *dest)
148 {
149   *dest = (int64_t) round(*source);
150 }
round_w_d(const double * source,int32_t * dest)151 M64P_FPU_INLINE void round_w_d(const double *source,int32_t *dest)
152 {
153   *dest = (int32_t) round(*source);
154 }
trunc_l_d(const double * source,int64_t * dest)155 M64P_FPU_INLINE void trunc_l_d(const double *source,int64_t *dest)
156 {
157   *dest = (int64_t) trunc(*source);
158 }
trunc_w_d(const double * source,int32_t * dest)159 M64P_FPU_INLINE void trunc_w_d(const double *source,int32_t *dest)
160 {
161   *dest = (int32_t) trunc(*source);
162 }
ceil_l_d(const double * source,int64_t * dest)163 M64P_FPU_INLINE void ceil_l_d(const double *source,int64_t *dest)
164 {
165   *dest = (int64_t) ceil(*source);
166 }
ceil_w_d(const double * source,int32_t * dest)167 M64P_FPU_INLINE void ceil_w_d(const double *source,int32_t *dest)
168 {
169   *dest = (int32_t) ceil(*source);
170 }
floor_l_d(const double * source,int64_t * dest)171 M64P_FPU_INLINE void floor_l_d(const double *source,int64_t *dest)
172 {
173   *dest = (int64_t) floor(*source);
174 }
floor_w_d(const double * source,int32_t * dest)175 M64P_FPU_INLINE void floor_w_d(const double *source,int32_t *dest)
176 {
177   *dest = (int32_t) floor(*source);
178 }
179 
cvt_w_s(const float * source,int32_t * dest)180 M64P_FPU_INLINE void cvt_w_s(const float *source,int32_t *dest)
181 {
182   switch(FCR31&3)
183   {
184     case 0: round_w_s(source,dest);return;
185     case 1: trunc_w_s(source,dest);return;
186     case 2: ceil_w_s(source,dest);return;
187     case 3: floor_w_s(source,dest);return;
188   }
189 }
cvt_w_d(const double * source,int32_t * dest)190 M64P_FPU_INLINE void cvt_w_d(const double *source,int32_t *dest)
191 {
192   switch(FCR31&3)
193   {
194     case 0: round_w_d(source,dest);return;
195     case 1: trunc_w_d(source,dest);return;
196     case 2: ceil_w_d(source,dest);return;
197     case 3: floor_w_d(source,dest);return;
198   }
199 }
cvt_l_s(const float * source,int64_t * dest)200 M64P_FPU_INLINE void cvt_l_s(const float *source,int64_t *dest)
201 {
202   switch(FCR31&3)
203   {
204     case 0: round_l_s(source,dest);return;
205     case 1: trunc_l_s(source,dest);return;
206     case 2: ceil_l_s(source,dest);return;
207     case 3: floor_l_s(source,dest);return;
208   }
209 }
cvt_l_d(const double * source,int64_t * dest)210 M64P_FPU_INLINE void cvt_l_d(const double *source,int64_t *dest)
211 {
212   switch(FCR31&3)
213   {
214     case 0: round_l_d(source,dest);return;
215     case 1: trunc_l_d(source,dest);return;
216     case 2: ceil_l_d(source,dest);return;
217     case 3: floor_l_d(source,dest);return;
218   }
219 }
220 
c_f_s()221 M64P_FPU_INLINE void c_f_s()
222 {
223   FCR31 &= ~FCR31_CMP_BIT;
224 }
c_un_s(const float * source,const float * target)225 M64P_FPU_INLINE void c_un_s(const float *source,const float *target)
226 {
227   FCR31=(isnan(*source) || isnan(*target)) ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
228 }
229 
c_eq_s(const float * source,const float * target)230 M64P_FPU_INLINE void c_eq_s(const float *source,const float *target)
231 {
232   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
233   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
234 }
c_ueq_s(const float * source,const float * target)235 M64P_FPU_INLINE void c_ueq_s(const float *source,const float *target)
236 {
237   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
238   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
239 }
240 
c_olt_s(const float * source,const float * target)241 M64P_FPU_INLINE void c_olt_s(const float *source,const float *target)
242 {
243   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
244   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
245 }
c_ult_s(const float * source,const float * target)246 M64P_FPU_INLINE void c_ult_s(const float *source,const float *target)
247 {
248   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
249   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
250 }
251 
c_ole_s(const float * source,const float * target)252 M64P_FPU_INLINE void c_ole_s(const float *source,const float *target)
253 {
254   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
255   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
256 }
c_ule_s(const float * source,const float * target)257 M64P_FPU_INLINE void c_ule_s(const float *source,const float *target)
258 {
259   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
260   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
261 }
262 
c_sf_s(const float * source,const float * target)263 M64P_FPU_INLINE void c_sf_s(const float *source,const float *target)
264 {
265   //if (isnan(*source) || isnan(*target)) // FIXME - exception
266   FCR31&=~FCR31_CMP_BIT;
267 }
c_ngle_s(const float * source,const float * target)268 M64P_FPU_INLINE void c_ngle_s(const float *source,const float *target)
269 {
270   //if (isnan(*source) || isnan(*target)) // FIXME - exception
271   FCR31&=~FCR31_CMP_BIT;
272 }
273 
c_seq_s(const float * source,const float * target)274 M64P_FPU_INLINE void c_seq_s(const float *source,const float *target)
275 {
276   //if (isnan(*source) || isnan(*target)) // FIXME - exception
277   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
278 }
c_ngl_s(const float * source,const float * target)279 M64P_FPU_INLINE void c_ngl_s(const float *source,const float *target)
280 {
281   //if (isnan(*source) || isnan(*target)) // FIXME - exception
282   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
283 }
284 
c_lt_s(const float * source,const float * target)285 M64P_FPU_INLINE void c_lt_s(const float *source,const float *target)
286 {
287   //if (isnan(*source) || isnan(*target)) // FIXME - exception
288   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
289 }
c_nge_s(const float * source,const float * target)290 M64P_FPU_INLINE void c_nge_s(const float *source,const float *target)
291 {
292   //if (isnan(*source) || isnan(*target)) // FIXME - exception
293   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
294 }
295 
c_le_s(const float * source,const float * target)296 M64P_FPU_INLINE void c_le_s(const float *source,const float *target)
297 {
298   //if (isnan(*source) || isnan(*target)) // FIXME - exception
299   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
300 }
c_ngt_s(const float * source,const float * target)301 M64P_FPU_INLINE void c_ngt_s(const float *source,const float *target)
302 {
303   //if (isnan(*source) || isnan(*target)) // FIXME - exception
304   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
305 }
306 
c_f_d()307 M64P_FPU_INLINE void c_f_d()
308 {
309   FCR31 &= ~FCR31_CMP_BIT;
310 }
c_un_d(const double * source,const double * target)311 M64P_FPU_INLINE void c_un_d(const double *source,const double *target)
312 {
313   FCR31=(isnan(*source) || isnan(*target)) ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
314 }
315 
c_eq_d(const double * source,const double * target)316 M64P_FPU_INLINE void c_eq_d(const double *source,const double *target)
317 {
318   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
319   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
320 }
c_ueq_d(const double * source,const double * target)321 M64P_FPU_INLINE void c_ueq_d(const double *source,const double *target)
322 {
323   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
324   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
325 }
326 
c_olt_d(const double * source,const double * target)327 M64P_FPU_INLINE void c_olt_d(const double *source,const double *target)
328 {
329   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
330   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
331 }
c_ult_d(const double * source,const double * target)332 M64P_FPU_INLINE void c_ult_d(const double *source,const double *target)
333 {
334   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
335   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
336 }
337 
c_ole_d(const double * source,const double * target)338 M64P_FPU_INLINE void c_ole_d(const double *source,const double *target)
339 {
340   if (isnan(*source) || isnan(*target)) {FCR31&=~FCR31_CMP_BIT;return;}
341   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
342 }
c_ule_d(const double * source,const double * target)343 M64P_FPU_INLINE void c_ule_d(const double *source,const double *target)
344 {
345   if (isnan(*source) || isnan(*target)) {FCR31|=FCR31_CMP_BIT;return;}
346   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
347 }
348 
c_sf_d(const double * source,const double * target)349 M64P_FPU_INLINE void c_sf_d(const double *source,const double *target)
350 {
351   //if (isnan(*source) || isnan(*target)) // FIXME - exception
352   FCR31&=~FCR31_CMP_BIT;
353 }
c_ngle_d(const double * source,const double * target)354 M64P_FPU_INLINE void c_ngle_d(const double *source,const double *target)
355 {
356   //if (isnan(*source) || isnan(*target)) // FIXME - exception
357   FCR31&=~FCR31_CMP_BIT;
358 }
359 
c_seq_d(const double * source,const double * target)360 M64P_FPU_INLINE void c_seq_d(const double *source,const double *target)
361 {
362   //if (isnan(*source) || isnan(*target)) // FIXME - exception
363   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
364 }
c_ngl_d(const double * source,const double * target)365 M64P_FPU_INLINE void c_ngl_d(const double *source,const double *target)
366 {
367   //if (isnan(*source) || isnan(*target)) // FIXME - exception
368   FCR31 = *source==*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
369 }
370 
c_lt_d(const double * source,const double * target)371 M64P_FPU_INLINE void c_lt_d(const double *source,const double *target)
372 {
373   //if (isnan(*source) || isnan(*target)) // FIXME - exception
374   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
375 }
c_nge_d(const double * source,const double * target)376 M64P_FPU_INLINE void c_nge_d(const double *source,const double *target)
377 {
378   //if (isnan(*source) || isnan(*target)) // FIXME - exception
379   FCR31 = *source<*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
380 }
381 
c_le_d(const double * source,const double * target)382 M64P_FPU_INLINE void c_le_d(const double *source,const double *target)
383 {
384   //if (isnan(*source) || isnan(*target)) // FIXME - exception
385   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
386 }
c_ngt_d(const double * source,const double * target)387 M64P_FPU_INLINE void c_ngt_d(const double *source,const double *target)
388 {
389   //if (isnan(*source) || isnan(*target)) // FIXME - exception
390   FCR31 = *source<=*target ? FCR31|FCR31_CMP_BIT : FCR31&~FCR31_CMP_BIT;
391 }
392 
393 
add_s(const float * source1,const float * source2,float * target)394 M64P_FPU_INLINE void add_s(const float *source1,const float *source2,float *target)
395 {
396   set_rounding();
397   *target=(*source1)+(*source2);
398 }
sub_s(const float * source1,const float * source2,float * target)399 M64P_FPU_INLINE void sub_s(const float *source1,const float *source2,float *target)
400 {
401   set_rounding();
402   *target=(*source1)-(*source2);
403 }
mul_s(const float * source1,const float * source2,float * target)404 M64P_FPU_INLINE void mul_s(const float *source1,const float *source2,float *target)
405 {
406   set_rounding();
407   *target=(*source1)*(*source2);
408 }
div_s(const float * source1,const float * source2,float * target)409 M64P_FPU_INLINE void div_s(const float *source1,const float *source2,float *target)
410 {
411   set_rounding();
412   *target=(*source1)/(*source2);
413 }
sqrt_s(const float * source,float * target)414 M64P_FPU_INLINE void sqrt_s(const float *source,float *target)
415 {
416   set_rounding();
417   *target=sqrtf(*source);
418 }
abs_s(const float * source,float * target)419 M64P_FPU_INLINE void abs_s(const float *source,float *target)
420 {
421   *target=fabsf(*source);
422 }
mov_s(const float * source,float * target)423 M64P_FPU_INLINE void mov_s(const float *source,float *target)
424 {
425   *target=*source;
426 }
neg_s(const float * source,float * target)427 M64P_FPU_INLINE void neg_s(const float *source,float *target)
428 {
429   *target=-(*source);
430 }
add_d(const double * source1,const double * source2,double * target)431 M64P_FPU_INLINE void add_d(const double *source1,const double *source2,double *target)
432 {
433   set_rounding();
434   *target=(*source1)+(*source2);
435 }
sub_d(const double * source1,const double * source2,double * target)436 M64P_FPU_INLINE void sub_d(const double *source1,const double *source2,double *target)
437 {
438   set_rounding();
439   *target=(*source1)-(*source2);
440 }
mul_d(const double * source1,const double * source2,double * target)441 M64P_FPU_INLINE void mul_d(const double *source1,const double *source2,double *target)
442 {
443   set_rounding();
444   *target=(*source1)*(*source2);
445 }
div_d(const double * source1,const double * source2,double * target)446 M64P_FPU_INLINE void div_d(const double *source1,const double *source2,double *target)
447 {
448   set_rounding();
449   *target=(*source1)/(*source2);
450 }
sqrt_d(const double * source,double * target)451 M64P_FPU_INLINE void sqrt_d(const double *source,double *target)
452 {
453   set_rounding();
454   *target=sqrt(*source);
455 }
abs_d(const double * source,double * target)456 M64P_FPU_INLINE void abs_d(const double *source,double *target)
457 {
458   *target=fabs(*source);
459 }
mov_d(const double * source,double * target)460 M64P_FPU_INLINE void mov_d(const double *source,double *target)
461 {
462   *target=*source;
463 }
neg_d(const double * source,double * target)464 M64P_FPU_INLINE void neg_d(const double *source,double *target)
465 {
466   *target = -(*source);
467 }
468 
469 #endif /* M64P_R4300_FPU_H */
470