1 /* $NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $ */
2
3 /* This is a derivative work. */
4
5 /*-
6 * Copyright (c) 2001 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Ross Harvey.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 ===============================================================================
36
37 This C source file is part of TestFloat, Release 2a, a package of programs
38 for testing the correctness of floating-point arithmetic complying to the
39 IEC/IEEE Standard for Floating-Point.
40
41 Written by John R. Hauser. More information is available through the Web
42 page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
43
44 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
45 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
46 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
47 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
48 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
49
50 Derivative works are acceptable, even for commercial purposes, so long as
51 (1) they include prominent notice that the work is derivative, and (2) they
52 include prominent notice akin to these four paragraphs for those parts of
53 this code that are retained.
54
55 ===============================================================================
56 */
57
58 #include <sys/cdefs.h>
59 #ifndef __lint
60 __RCSID("$NetBSD: systfloat.c,v 1.8 2008/04/28 20:23:04 martin Exp $");
61 #endif
62
63 #include <math.h>
64 #include <ieeefp.h>
65 #include "milieu.h"
66 #include "softfloat.h"
67 #include "systfloat.h"
68 #include "systflags.h"
69 #include "systmodes.h"
70
71 typedef union {
72 float32 f32;
73 float f;
74 } union32;
75 typedef union {
76 float64 f64;
77 double d;
78 } union64;
79 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
80 typedef union {
81 floatx80 fx80;
82 long double ld;
83 } unionx80;
84 #endif
85 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
86 typedef union {
87 float128 f128;
88 long double ld;
89 } union128;
90 #endif
91
92 fp_except
syst_float_flags_clear(void)93 syst_float_flags_clear(void)
94 {
95 return fpsetsticky(0)
96 & (FP_X_IMP | FP_X_UFL | FP_X_OFL | FP_X_DZ | FP_X_INV);
97 }
98
99 void
syst_float_set_rounding_mode(fp_rnd direction)100 syst_float_set_rounding_mode(fp_rnd direction)
101 {
102 fpsetround(direction);
103 fpsetmask(0);
104 }
105
syst_int32_to_float32(int32 a)106 float32 syst_int32_to_float32( int32 a )
107 {
108 const union32 uz = { .f = a };
109
110 return uz.f32;
111
112 }
113
syst_int32_to_float64(int32 a)114 float64 syst_int32_to_float64( int32 a )
115 {
116 const union64 uz = { .d = a };
117
118 return uz.f64;
119
120 }
121
122 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
123
syst_int32_to_floatx80(int32 a)124 floatx80 syst_int32_to_floatx80( int32 a )
125 {
126 const unionx80 uz = { .ld = a };
127
128 return uz.fx80;
129
130 }
131
132 #endif
133
134 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
135
syst_int32_to_float128(int32 a)136 float128 syst_int32_to_float128( int32 a )
137 {
138 const union128 uz = { .ld = a };
139
140 return uz.f128;
141
142 }
143
144 #endif
145
146 #ifdef BITS64
147
syst_int64_to_float32(int64 a)148 float32 syst_int64_to_float32( int64 a )
149 {
150 const union32 uz = { .f = a };
151
152 return uz.f32;
153 }
154
syst_int64_to_float64(int64 a)155 float64 syst_int64_to_float64( int64 a )
156 {
157 const union64 uz = { .d = a };
158
159 return uz.f64;
160 }
161
162 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
163
syst_int64_to_floatx80(int64 a)164 floatx80 syst_int64_to_floatx80( int64 a )
165 {
166 const unionx80 uz = { .ld = a };
167
168 return uz.fx80;
169 }
170
171 #endif
172
173 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
174
syst_int64_to_float128(int64 a)175 float128 syst_int64_to_float128( int64 a )
176 {
177 const union128 uz = { .ld = a };
178
179 return uz.f128;
180 }
181
182 #endif
183
184 #endif
185
syst_float32_to_int32_round_to_zero(float32 a)186 int32 syst_float32_to_int32_round_to_zero( float32 a )
187 {
188 const union32 uz = { .f32 = a };
189
190 return uz.f;
191
192 }
193
194 #ifdef BITS64
195
syst_float32_to_int64_round_to_zero(float32 a)196 int64 syst_float32_to_int64_round_to_zero( float32 a )
197 {
198 const union32 uz = { .f32 = a };
199
200 return uz.f;
201 }
202
203 #endif
204
syst_float32_to_float64(float32 a)205 float64 syst_float32_to_float64( float32 a )
206 {
207 const union32 ua = { .f32 = a };
208 union64 uz;
209
210 uz.d = ua.f;
211 return uz.f64;
212
213 }
214
215 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
216
syst_float32_to_floatx80(float32 a)217 floatx80 syst_float32_to_floatx80( float32 a )
218 {
219 const union32 ua = { .f32 = a };
220 unionx80 uz;
221
222 uz.ld = ua.f;
223 return uz.fx80;
224 }
225
226 #endif
227
228 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
229
syst_float32_to_float128(float32 a)230 float128 syst_float32_to_float128( float32 a )
231 {
232 const union32 ua = { .f32 = a };
233 union128 ub;
234
235 ub.ld = ua.f;
236 return ub.f128;
237 }
238
239 #endif
240
syst_float32_add(float32 a,float32 b)241 float32 syst_float32_add( float32 a, float32 b )
242 {
243 const union32 ua = { .f32 = a }, ub = { .f32 = b };
244 union32 uz;
245
246 uz.f = ua.f + ub.f;
247 return uz.f32;
248 }
249
syst_float32_sub(float32 a,float32 b)250 float32 syst_float32_sub( float32 a, float32 b )
251 {
252 const union32 ua = { .f32 = a }, ub = { .f32 = b };
253 union32 uz;
254
255 uz.f = ua.f - ub.f;
256 return uz.f32;
257 }
258
syst_float32_mul(float32 a,float32 b)259 float32 syst_float32_mul( float32 a, float32 b )
260 {
261 const union32 ua = { .f32 = a }, ub = { .f32 = b };
262 union32 uz;
263
264 uz.f = ua.f * ub.f;
265 return uz.f32;
266 }
267
syst_float32_div(float32 a,float32 b)268 float32 syst_float32_div( float32 a, float32 b )
269 {
270 const union32 ua = { .f32 = a }, ub = { .f32 = b };
271 union32 uz;
272
273 uz.f = ua.f / ub.f;
274 return uz.f32;
275 }
276
syst_float32_eq(float32 a,float32 b)277 flag syst_float32_eq( float32 a, float32 b )
278 {
279 const union32 ua = { .f32 = a }, ub = { .f32 = b };
280
281 return ua.f == ub.f;
282 }
283
syst_float32_le(float32 a,float32 b)284 flag syst_float32_le( float32 a, float32 b )
285 {
286 const union32 ua = { .f32 = a }, ub = { .f32 = b };
287
288 return ua.f <= ub.f;
289 }
290
syst_float32_lt(float32 a,float32 b)291 flag syst_float32_lt( float32 a, float32 b )
292 {
293 const union32 ua = { .f32 = a }, ub = { .f32 = b };
294
295 return ua.f < ub.f;
296 }
297
syst_float64_to_int32_round_to_zero(float64 a)298 int32 syst_float64_to_int32_round_to_zero( float64 a )
299 {
300 const union64 uz = { .f64 = a };
301
302 return uz.d;
303 }
304
305 #ifdef BITS64
306
syst_float64_to_int64_round_to_zero(float64 a)307 int64 syst_float64_to_int64_round_to_zero( float64 a )
308 {
309 const union64 uz = { .f64 = a };
310
311 return uz.d;
312 }
313
314 #endif
315
syst_float64_to_float32(float64 a)316 float32 syst_float64_to_float32( float64 a )
317 {
318 const union64 ua = { .f64 = a };
319 union32 uz;
320
321 uz.f = ua.d;
322 return uz.f32;
323 }
324
325 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
326
syst_float64_to_floatx80(float64 a)327 floatx80 syst_float64_to_floatx80( float64 a )
328 {
329 const union64 ua = { .f64 = a };
330 unionx80 u;
331
332 u.ld = ua.d;
333 return u.fx80;
334 }
335
336 #endif
337
338 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
339
syst_float64_to_float128(float64 a)340 float128 syst_float64_to_float128( float64 a )
341 {
342 const union64 ua = { .f64 = a };
343 union128 uz;
344
345 uz.ld = ua.d;
346 return uz.f128;
347 }
348
349 #endif
350
syst_float64_add(float64 a,float64 b)351 float64 syst_float64_add( float64 a, float64 b )
352 {
353 const union64 ua = { .f64 = a }, ub = { .f64 = b };
354 union64 uz;
355
356 uz.d = ua.d + ub.d;
357 return uz.f64;
358 }
359
syst_float64_sub(float64 a,float64 b)360 float64 syst_float64_sub( float64 a, float64 b )
361 {
362 const union64 ua = { .f64 = a }, ub = { .f64 = b };
363 union64 uz;
364
365 uz.d = ua.d - ub.d;
366 return uz.f64;
367 }
368
syst_float64_mul(float64 a,float64 b)369 float64 syst_float64_mul( float64 a, float64 b )
370 {
371 const union64 ua = { .f64 = a }, ub = { .f64 = b };
372 union64 uz;
373
374 uz.d = ua.d * ub.d;
375 return uz.f64;
376 }
377
syst_float64_div(float64 a,float64 b)378 float64 syst_float64_div( float64 a, float64 b )
379 {
380 const union64 ua = { .f64 = a }, ub = { .f64 = b };
381 union64 uz;
382
383 uz.d = ua.d / ub.d;
384 return uz.f64;
385 }
386
syst_float64_sqrt(float64 a)387 float64 syst_float64_sqrt( float64 a )
388 {
389 const union64 ua = { .f64 = a };
390 union64 uz;
391
392 uz.d = sqrt(ua.d);
393 return uz.f64;
394 }
395
syst_float64_eq(float64 a,float64 b)396 flag syst_float64_eq( float64 a, float64 b )
397 {
398 const union64 ua = { .f64 = a }, ub = { .f64 = b };
399
400 return ua.d == ub.d;
401 }
402
syst_float64_le(float64 a,float64 b)403 flag syst_float64_le( float64 a, float64 b )
404 {
405 const union64 ua = { .f64 = a }, ub = { .f64 = b };
406
407 return ua.d <= ub.d;
408 }
409
syst_float64_lt(float64 a,float64 b)410 flag syst_float64_lt( float64 a, float64 b )
411 {
412 const union64 ua = { .f64 = a }, ub = { .f64 = b };
413
414 return ua.d < ub.d;
415 }
416
417 #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
418
syst_floatx80_to_int32_round_to_zero(floatx80 a)419 int32 syst_floatx80_to_int32_round_to_zero( floatx80 a )
420 {
421 const unionx80 uz = { .fx80 = a };
422
423 return uz.ld;
424 }
425
426 #ifdef BITS64
427
syst_floatx80_to_int64_round_to_zero(floatx80 a)428 int64 syst_floatx80_to_int64_round_to_zero( floatx80 a )
429 {
430 const unionx80 uz = { .fx80 = a };
431
432 return uz.ld;
433 }
434
435 #endif
436
syst_floatx80_to_float32(floatx80 a)437 float32 syst_floatx80_to_float32( floatx80 a )
438 {
439 const unionx80 ua = { .fx80 = a };
440 union32 uz;
441
442 uz.f = ua.ld;
443 return uz.f32;
444 }
445
syst_floatx80_to_float64(floatx80 a)446 float64 syst_floatx80_to_float64( floatx80 a )
447 {
448 const unionx80 ua = { .fx80 = a };
449 union64 uz;
450
451 uz.d = ua.ld;
452 return uz.f64;
453 }
454
syst_floatx80_add(floatx80 a,floatx80 b)455 floatx80 syst_floatx80_add( floatx80 a, floatx80 b )
456 {
457 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
458 unionx80 uz;
459
460 uz.ld = ua.ld + ub.ld;
461 return uz.fx80;
462 }
463
syst_floatx80_sub(floatx80 a,floatx80 b)464 floatx80 syst_floatx80_sub( floatx80 a, floatx80 b )
465 {
466 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
467 unionx80 uz;
468
469 uz.ld = ua.ld - ub.ld;
470 return uz.fx80;
471 }
472
syst_floatx80_mul(floatx80 a,floatx80 b)473 floatx80 syst_floatx80_mul( floatx80 a, floatx80 b )
474 {
475 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
476 unionx80 uz;
477
478 uz.ld = ua.ld * ub.ld;
479 return uz.fx80;
480 }
481
syst_floatx80_div(floatx80 a,floatx80 b)482 floatx80 syst_floatx80_div( floatx80 a, floatx80 b )
483 {
484 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
485 unionx80 uz;
486
487 uz.ld = ua.ld / ub.ld;
488 return uz.fx80;
489 }
490
syst_floatx80_eq(floatx80 a,floatx80 b)491 flag syst_floatx80_eq( floatx80 a, floatx80 b )
492 {
493 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
494
495 return ua.ld == ub.ld;
496 }
497
syst_floatx80_le(floatx80 a,floatx80 b)498 flag syst_floatx80_le( floatx80 a, floatx80 b )
499 {
500 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
501
502 return ua.ld <= ub.ld;
503 }
504
syst_floatx80_lt(floatx80 a,floatx80 b)505 flag syst_floatx80_lt( floatx80 a, floatx80 b )
506 {
507 const unionx80 ua = { .fx80 = a }, ub = { .fx80 = b };
508
509 return ua.ld < ub.ld;
510 }
511
512 #endif
513
514 #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
515
syst_float128_to_int32_round_to_zero(float128 a)516 int32 syst_float128_to_int32_round_to_zero( float128 a )
517 {
518 const union128 ua = { .f128 = a };
519
520 return ua.ld;
521 }
522
523 #ifdef BITS64
524
syst_float128_to_int64_round_to_zero(float128 a)525 int64 syst_float128_to_int64_round_to_zero( float128 a )
526 {
527 const union128 ua = { .f128 = a };
528
529 return ua.ld;
530 }
531
532 #endif
533
syst_float128_to_float32(float128 a)534 float32 syst_float128_to_float32( float128 a )
535 {
536 const union128 ua = { .f128 = a };
537 union32 uz;
538
539 uz.f = ua.ld;
540 return uz.f32;
541
542 }
543
syst_float128_to_float64(float128 a)544 float64 syst_float128_to_float64( float128 a )
545 {
546 const union128 ua = { .f128 = a };
547 union64 uz;
548
549 uz.d = ua.ld;
550 return uz.f64;
551 }
552
syst_float128_add(float128 a,float128 b)553 float128 syst_float128_add( float128 a, float128 b )
554 {
555 const union128 ua = { .f128 = a }, ub = { .f128 = b };
556 union128 uz;
557
558 uz.ld = ua.ld + ub.ld;
559 return uz.f128;
560
561 }
562
syst_float128_sub(float128 a,float128 b)563 float128 syst_float128_sub( float128 a, float128 b )
564 {
565 const union128 ua = { .f128 = a }, ub = { .f128 = b };
566 union128 uz;
567
568 uz.ld = ua.ld - ub.ld;
569 return uz.f128;
570 }
571
syst_float128_mul(float128 a,float128 b)572 float128 syst_float128_mul( float128 a, float128 b )
573 {
574 const union128 ua = { .f128 = a }, ub = { .f128 = b };
575 union128 uz;
576
577 uz.ld = ua.ld * ub.ld;
578 return uz.f128;
579 }
580
syst_float128_div(float128 a,float128 b)581 float128 syst_float128_div( float128 a, float128 b )
582 {
583 const union128 ua = { .f128 = a }, ub = { .f128 = b };
584 union128 uz;
585
586 uz.ld = ua.ld / ub.ld;
587 return uz.f128;
588 }
589
syst_float128_eq(float128 a,float128 b)590 flag syst_float128_eq( float128 a, float128 b )
591 {
592 const union128 ua = { .f128 = a }, ub = { .f128 = b };
593
594 return ua.ld == ub.ld;
595 }
596
syst_float128_le(float128 a,float128 b)597 flag syst_float128_le( float128 a, float128 b )
598 {
599 const union128 ua = { .f128 = a }, ub = { .f128 = b };
600
601 return ua.ld <= ub.ld;
602 }
603
syst_float128_lt(float128 a,float128 b)604 flag syst_float128_lt( float128 a, float128 b )
605 {
606 const union128 ua = { .f128 = a }, ub = { .f128 = b };
607
608 return ua.ld < ub.ld;
609 }
610
611 #endif
612