1 /*	$NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $	*/
2 
3 /* This is a derivative work. */
4 
5 /*-
6  * Copyright (c) 2001 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Ross Harvey.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35 ===============================================================================
36 
37 This C source file is part of TestFloat, Release 2a, a package of programs
38 for testing the correctness of floating-point arithmetic complying to the
39 IEC/IEEE Standard for Floating-Point.
40 
41 Written by John R. Hauser.  More information is available through the Web
42 page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
43 
44 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
45 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
46 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
47 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
48 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
49 
50 Derivative works are acceptable, even for commercial purposes, so long as
51 (1) they include prominent notice that the work is derivative, and (2) they
52 include prominent notice akin to these four paragraphs for those parts of
53 this code that are retained.
54 
55 ===============================================================================
56 */
57 
58 #include <sys/cdefs.h>
59 #ifndef __lint
60 __RCSID("$NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $");
61 #endif
62 
63 #include <math.h>
64 #include <ieeefp.h>
65 #include "milieu.h"
66 #include "softfloat.h"
67 #include "systfloat.h"
68 #include "systflags.h"
69 #include "systmodes.h"
70 
71 typedef union {
72     float32 f32;
73     float f;
74 } union32;
75 typedef union {
76     float64 f64;
77     double d;
78 } union64;
79 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
80 typedef union {
81     floatx80 fx80;
82     long double ld;
83 } unionx80;
84 #endif
85 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
86 typedef union {
87     float128 f128;
88     long double ld;
89 } union128;
90 #endif
91 
92 fp_except
syst_float_flags_clear(void)93 syst_float_flags_clear(void)
94 {
95     return fpsetsticky(0)
96 	& (FP_X_IMP | FP_X_UFL | FP_X_OFL | FP_X_DZ | FP_X_INV);
97 }
98 
99 void
syst_float_set_rounding_mode(fp_rnd direction)100 syst_float_set_rounding_mode(fp_rnd direction)
101 {
102     fpsetround(direction);
103     fpsetmask(0);
104 }
105 
syst_int32_to_float32(int32 a)106 float32 syst_int32_to_float32( int32 a )
107 {
108     const union32 uz = { .f = a };
109 
110     return uz.f32;
111 
112 }
113 
syst_int32_to_float64(int32 a)114 float64 syst_int32_to_float64( int32 a )
115 {
116     const union64 uz = { .d = a };
117 
118     return uz.f64;
119 
120 }
121 
122 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
123 
syst_int32_to_floatx80(int32 a)124 floatx80 syst_int32_to_floatx80( int32 a )
125 {
126     const unionx80 uz = { .ld = a };
127 
128     return uz.fx80;
129 
130 }
131 
132 #endif
133 
134 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
135 
syst_int32_to_float128(int32 a)136 float128 syst_int32_to_float128( int32 a )
137 {
138     const union128 uz = { .ld = a };
139 
140     return uz.f128;
141 
142 }
143 
144 #endif
145 
146 #ifdef BITS64
147 
syst_int64_to_float32(int64 a)148 float32 syst_int64_to_float32( int64 a )
149 {
150     const union32 uz = { .f = a };
151 
152     return uz.f32;
153 }
154 
syst_int64_to_float64(int64 a)155 float64 syst_int64_to_float64( int64 a )
156 {
157     const union64 uz = { .d = a };
158 
159     return uz.f64;
160 }
161 
162 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
163 
syst_int64_to_floatx80(int64 a)164 floatx80 syst_int64_to_floatx80( int64 a )
165 {
166     const unionx80 uz = { .ld = a };
167 
168     return uz.fx80;
169 }
170 
171 #endif
172 
173 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
174 
syst_int64_to_float128(int64 a)175 float128 syst_int64_to_float128( int64 a )
176 {
177     const union128 uz = { .ld = a };
178 
179     return uz.f128;
180 }
181 
182 #endif
183 
184 #endif
185 
syst_float32_to_int32_round_to_zero(float32 a)186 int32 syst_float32_to_int32_round_to_zero( float32 a )
187 {
188     const union32 uz = { .f32 = a };
189 
190     return uz.f;
191 
192 }
193 
194 #ifdef BITS64
195 
syst_float32_to_int64_round_to_zero(float32 a)196 int64 syst_float32_to_int64_round_to_zero( float32 a )
197 {
198     const union32 uz = { .f32 = a };
199 
200     return uz.f;
201 }
202 
203 #endif
204 
syst_float32_to_float64(float32 a)205 float64 syst_float32_to_float64( float32 a )
206 {
207     const union32 ua = { .f32 = a };
208     union64 uz;
209 
210     uz.d = ua.f;
211     return uz.f64;
212 
213 }
214 
215 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
216 
syst_float32_to_floatx80(float32 a)217 floatx80 syst_float32_to_floatx80( float32 a )
218 {
219     const union32 ua = { .f32 = a };
220     unionx80 uz;
221 
222     uz.ld = ua.f;
223     return uz.fx80;
224 }
225 
226 #endif
227 
228 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
229 
syst_float32_to_float128(float32 a)230 float128 syst_float32_to_float128( float32 a )
231 {
232     const union32 ua = { .f32 = a };
233     union128 ub;
234 
235     ub.ld = ua.f;
236     return ub.f128;
237 }
238 
239 #endif
240 
syst_float32_add(float32 a,float32 b)241 float32 syst_float32_add( float32 a, float32 b )
242 {
243     const union32 ua = { .f32 = a }, ub = { .f32 = b };
244     union32 uz;
245 
246     uz.f = ua.f + ub.f;
247     return uz.f32;
248 }
249 
syst_float32_sub(float32 a,float32 b)250 float32 syst_float32_sub( float32 a, float32 b )
251 {
252     const union32 ua = { .f32 = a }, ub = { .f32 = b };
253     union32 uz;
254 
255     uz.f = ua.f - ub.f;
256     return uz.f32;
257 }
258 
syst_float32_mul(float32 a,float32 b)259 float32 syst_float32_mul( float32 a, float32 b )
260 {
261     const union32 ua = { .f32 = a }, ub = { .f32 = b };
262     union32 uz;
263 
264     uz.f = ua.f * ub.f;
265     return uz.f32;
266 }
267 
syst_float32_div(float32 a,float32 b)268 float32 syst_float32_div( float32 a, float32 b )
269 {
270     const union32 ua = { .f32 = a }, ub = { .f32 = b };
271     union32 uz;
272 
273     uz.f = ua.f / ub.f;
274     return uz.f32;
275 }
276 
syst_float32_eq(float32 a,float32 b)277 flag syst_float32_eq( float32 a, float32 b )
278 {
279     const union32 ua = { .f32 = a }, ub = { .f32 = b };
280 
281     return ua.f == ub.f;
282 }
283 
syst_float32_le(float32 a,float32 b)284 flag syst_float32_le( float32 a, float32 b )
285 {
286     const union32 ua = { .f32 = a }, ub = { .f32 = b };
287 
288     return ua.f <= ub.f;
289 }
290 
syst_float32_lt(float32 a,float32 b)291 flag syst_float32_lt( float32 a, float32 b )
292 {
293     const union32 ua = { .f32 = a }, ub = { .f32 = b };
294 
295     return ua.f < ub.f;
296 }
297 
syst_float64_to_int32_round_to_zero(float64 a)298 int32 syst_float64_to_int32_round_to_zero( float64 a )
299 {
300     const union64 uz = { .f64 = a };
301 
302     return uz.d;
303 }
304 
305 #ifdef BITS64
306 
syst_float64_to_int64_round_to_zero(float64 a)307 int64 syst_float64_to_int64_round_to_zero( float64 a )
308 {
309     const union64 uz = { .f64 = a };
310 
311     return uz.d;
312 }
313 
314 #endif
315 
syst_float64_to_float32(float64 a)316 float32 syst_float64_to_float32( float64 a )
317 {
318     const union64 ua = { .f64 = a };
319     union32 uz;
320 
321     uz.f = ua.d;
322     return uz.f32;
323 }
324 
325 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
326 
syst_float64_to_floatx80(float64 a)327 floatx80 syst_float64_to_floatx80( float64 a )
328 {
329     const union64 ua = { .f64 = a };
330     unionx80 u;
331 
332     u.ld = ua.d;
333     return u.fx80;
334 }
335 
336 #endif
337 
338 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
339 
syst_float64_to_float128(float64 a)340 float128 syst_float64_to_float128( float64 a )
341 {
342     const union64 ua = { .f64 = a };
343     union128 uz;
344 
345     uz.ld = ua.d;
346     return uz.f128;
347 }
348 
349 #endif
350 
syst_float64_add(float64 a,float64 b)351 float64 syst_float64_add( float64 a, float64 b )
352 {
353     const union64 ua = { .f64 = a }, ub = { .f64 = b };
354     union64 uz;
355 
356     uz.d = ua.d + ub.d;
357     return uz.f64;
358 }
359 
syst_float64_sub(float64 a,float64 b)360 float64 syst_float64_sub( float64 a, float64 b )
361 {
362     const union64 ua = { .f64 = a }, ub = { .f64 = b };
363     union64 uz;
364 
365     uz.d = ua.d - ub.d;
366     return uz.f64;
367 }
368 
syst_float64_mul(float64 a,float64 b)369 float64 syst_float64_mul( float64 a, float64 b )
370 {
371     const union64 ua = { .f64 = a }, ub = { .f64 = b };
372     union64 uz;
373 
374     uz.d = ua.d * ub.d;
375     return uz.f64;
376 }
377 
syst_float64_div(float64 a,float64 b)378 float64 syst_float64_div( float64 a, float64 b )
379 {
380     const union64 ua = { .f64 = a }, ub = { .f64 = b };
381     union64 uz;
382 
383     uz.d = ua.d / ub.d;
384     return uz.f64;
385 }
386 
syst_float64_sqrt(float64 a)387 float64 syst_float64_sqrt( float64 a )
388 {
389     const union64 ua = { .f64 = a };
390     union64 uz;
391 
392     uz.d = sqrt(ua.d);
393     return uz.f64;
394 }
395 
syst_float64_eq(float64 a,float64 b)396 flag syst_float64_eq( float64 a, float64 b )
397 {
398     const union64 ua = { .f64 = a }, ub = { .f64 = b };
399 
400     return ua.d == ub.d;
401 }
402 
syst_float64_le(float64 a,float64 b)403 flag syst_float64_le( float64 a, float64 b )
404 {
405     const union64 ua = { .f64 = a }, ub = { .f64 = b };
406 
407     return ua.d <= ub.d;
408 }
409 
syst_float64_lt(float64 a,float64 b)410 flag syst_float64_lt( float64 a, float64 b )
411 {
412     const union64 ua = { .f64 = a }, ub = { .f64 = b };
413 
414     return ua.d < ub.d;
415 }
416 
417 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
418 
syst_floatx80_to_int32_round_to_zero(floatx80 a)419 int32 syst_floatx80_to_int32_round_to_zero( floatx80 a )
420 {
421     const unionx80 uz = { .fx80 = a };
422 
423     return uz.ld;
424 }
425 
426 #ifdef BITS64
427 
syst_floatx80_to_int64_round_to_zero(floatx80 a)428 int64 syst_floatx80_to_int64_round_to_zero( floatx80 a )
429 {
430     const unionx80 uz = { .fx80 = a };
431 
432     return uz.ld;
433 }
434 
435 #endif
436 
syst_floatx80_to_float32(floatx80 a)437 float32 syst_floatx80_to_float32( floatx80 a )
438 {
439     const unionx80 ua = { .fx80 = a };
440     union32 uz;
441 
442     uz.f = ua.ld;
443     return uz.f32;
444 }
445 
syst_floatx80_to_float64(floatx80 a)446 float64 syst_floatx80_to_float64( floatx80 a )
447 {
448     const unionx80 ua = { .fx80 = a };
449     union64 uz;
450 
451     uz.d = ua.ld;
452     return uz.f64;
453 }
454 
syst_floatx80_add(floatx80 a,floatx80 b)455 floatx80 syst_floatx80_add( floatx80 a, floatx80 b )
456 {
457     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
458     unionx80 uz;
459 
460     uz.ld = ua.ld + ub.ld;
461     return uz.fx80;
462 }
463 
syst_floatx80_sub(floatx80 a,floatx80 b)464 floatx80 syst_floatx80_sub( floatx80 a, floatx80 b )
465 {
466     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
467     unionx80 uz;
468 
469     uz.ld = ua.ld - ub.ld;
470     return uz.fx80;
471 }
472 
syst_floatx80_mul(floatx80 a,floatx80 b)473 floatx80 syst_floatx80_mul( floatx80 a, floatx80 b )
474 {
475     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
476     unionx80 uz;
477 
478     uz.ld = ua.ld * ub.ld;
479     return uz.fx80;
480 }
481 
syst_floatx80_div(floatx80 a,floatx80 b)482 floatx80 syst_floatx80_div( floatx80 a, floatx80 b )
483 {
484     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
485     unionx80 uz;
486 
487     uz.ld = ua.ld / ub.ld;
488     return uz.fx80;
489 }
490 
syst_floatx80_eq(floatx80 a,floatx80 b)491 flag syst_floatx80_eq( floatx80 a, floatx80 b )
492 {
493     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
494 
495     return ua.ld == ub.ld;
496 }
497 
syst_floatx80_le(floatx80 a,floatx80 b)498 flag syst_floatx80_le( floatx80 a, floatx80 b )
499 {
500     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
501 
502     return ua.ld <= ub.ld;
503 }
504 
syst_floatx80_lt(floatx80 a,floatx80 b)505 flag syst_floatx80_lt( floatx80 a, floatx80 b )
506 {
507     const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
508 
509     return ua.ld < ub.ld;
510 }
511 
512 #endif
513 
514 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
515 
syst_float128_to_int32_round_to_zero(float128 a)516 int32 syst_float128_to_int32_round_to_zero( float128 a )
517 {
518     const union128 ua = { .f128 = a };
519 
520     return ua.ld;
521 }
522 
523 #ifdef BITS64
524 
syst_float128_to_int64_round_to_zero(float128 a)525 int64 syst_float128_to_int64_round_to_zero( float128 a )
526 {
527     const union128 ua = { .f128 = a };
528 
529     return ua.ld;
530 }
531 
532 #endif
533 
syst_float128_to_float32(float128 a)534 float32 syst_float128_to_float32( float128 a )
535 {
536     const union128 ua = { .f128 = a };
537     union32 uz;
538 
539     uz.f = ua.ld;
540     return uz.f32;
541 
542 }
543 
syst_float128_to_float64(float128 a)544 float64 syst_float128_to_float64( float128 a )
545 {
546     const union128 ua = { .f128 = a };
547     union64 uz;
548 
549     uz.d = ua.ld;
550     return uz.f64;
551 }
552 
syst_float128_add(float128 a,float128 b)553 float128 syst_float128_add( float128 a, float128 b )
554 {
555     const union128 ua = { .f128 = a }, ub = { .f128 = b };
556     union128 uz;
557 
558     uz.ld = ua.ld + ub.ld;
559     return uz.f128;
560 
561 }
562 
syst_float128_sub(float128 a,float128 b)563 float128 syst_float128_sub( float128 a, float128 b )
564 {
565     const union128 ua = { .f128 = a }, ub = { .f128 = b };
566     union128 uz;
567 
568     uz.ld = ua.ld - ub.ld;
569     return uz.f128;
570 }
571 
syst_float128_mul(float128 a,float128 b)572 float128 syst_float128_mul( float128 a, float128 b )
573 {
574     const union128 ua = { .f128 = a }, ub = { .f128 = b };
575     union128 uz;
576 
577     uz.ld = ua.ld * ub.ld;
578     return uz.f128;
579 }
580 
syst_float128_div(float128 a,float128 b)581 float128 syst_float128_div( float128 a, float128 b )
582 {
583     const union128 ua = { .f128 = a }, ub = { .f128 = b };
584     union128 uz;
585 
586     uz.ld = ua.ld / ub.ld;
587     return uz.f128;
588 }
589 
syst_float128_eq(float128 a,float128 b)590 flag syst_float128_eq( float128 a, float128 b )
591 {
592     const union128 ua = { .f128 = a }, ub = { .f128 = b };
593 
594     return ua.ld == ub.ld;
595 }
596 
syst_float128_le(float128 a,float128 b)597 flag syst_float128_le( float128 a, float128 b )
598 {
599     const union128 ua = { .f128 = a }, ub = { .f128 = b };
600 
601     return ua.ld <= ub.ld;
602 }
603 
syst_float128_lt(float128 a,float128 b)604 flag syst_float128_lt( float128 a, float128 b )
605 {
606     const union128 ua = { .f128 = a }, ub = { .f128 = b };
607 
608     return ua.ld < ub.ld;
609 }
610 
611 #endif
612