xref: /qemu/fpu/softfloat.c (revision c01196bd)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  */
521 typedef struct {
522     int exp_size;
523     int exp_bias;
524     int exp_re_bias;
525     int exp_max;
526     int frac_size;
527     int frac_shift;
528     bool arm_althp;
529     uint64_t round_mask;
530 } FloatFmt;
531 
532 /* Expand fields based on the size of exponent and fraction */
533 #define FLOAT_PARAMS_(E)                                \
534     .exp_size       = E,                                \
535     .exp_bias       = ((1 << E) - 1) >> 1,              \
536     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
537     .exp_max        = (1 << E) - 1
538 
539 #define FLOAT_PARAMS(E, F)                              \
540     FLOAT_PARAMS_(E),                                   \
541     .frac_size      = F,                                \
542     .frac_shift     = (-F - 1) & 63,                    \
543     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
544 
545 static const FloatFmt float16_params = {
546     FLOAT_PARAMS(5, 10)
547 };
548 
549 static const FloatFmt float16_params_ahp = {
550     FLOAT_PARAMS(5, 10),
551     .arm_althp = true
552 };
553 
554 static const FloatFmt bfloat16_params = {
555     FLOAT_PARAMS(8, 7)
556 };
557 
558 static const FloatFmt float32_params = {
559     FLOAT_PARAMS(8, 23)
560 };
561 
562 static const FloatFmt float64_params = {
563     FLOAT_PARAMS(11, 52)
564 };
565 
566 static const FloatFmt float128_params = {
567     FLOAT_PARAMS(15, 112)
568 };
569 
570 #define FLOATX80_PARAMS(R)              \
571     FLOAT_PARAMS_(15),                  \
572     .frac_size = R == 64 ? 63 : R,      \
573     .frac_shift = 0,                    \
574     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
575 
576 static const FloatFmt floatx80_params[3] = {
577     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
578     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
579     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
580 };
581 
582 /* Unpack a float to parts, but do not canonicalize.  */
583 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
584 {
585     const int f_size = fmt->frac_size;
586     const int e_size = fmt->exp_size;
587 
588     *r = (FloatParts64) {
589         .cls = float_class_unclassified,
590         .sign = extract64(raw, f_size + e_size, 1),
591         .exp = extract64(raw, f_size, e_size),
592         .frac = extract64(raw, 0, f_size)
593     };
594 }
595 
596 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
597 {
598     unpack_raw64(p, &float16_params, f);
599 }
600 
601 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
602 {
603     unpack_raw64(p, &bfloat16_params, f);
604 }
605 
606 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
607 {
608     unpack_raw64(p, &float32_params, f);
609 }
610 
611 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
612 {
613     unpack_raw64(p, &float64_params, f);
614 }
615 
616 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
617 {
618     *p = (FloatParts128) {
619         .cls = float_class_unclassified,
620         .sign = extract32(f.high, 15, 1),
621         .exp = extract32(f.high, 0, 15),
622         .frac_hi = f.low
623     };
624 }
625 
626 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
627 {
628     const int f_size = float128_params.frac_size - 64;
629     const int e_size = float128_params.exp_size;
630 
631     *p = (FloatParts128) {
632         .cls = float_class_unclassified,
633         .sign = extract64(f.high, f_size + e_size, 1),
634         .exp = extract64(f.high, f_size, e_size),
635         .frac_hi = extract64(f.high, 0, f_size),
636         .frac_lo = f.low,
637     };
638 }
639 
640 /* Pack a float from parts, but do not canonicalize.  */
641 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
642 {
643     const int f_size = fmt->frac_size;
644     const int e_size = fmt->exp_size;
645     uint64_t ret;
646 
647     ret = (uint64_t)p->sign << (f_size + e_size);
648     ret = deposit64(ret, f_size, e_size, p->exp);
649     ret = deposit64(ret, 0, f_size, p->frac);
650     return ret;
651 }
652 
653 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
654 {
655     return make_float16(pack_raw64(p, &float16_params));
656 }
657 
658 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
659 {
660     return pack_raw64(p, &bfloat16_params);
661 }
662 
663 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
664 {
665     return make_float32(pack_raw64(p, &float32_params));
666 }
667 
668 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
669 {
670     return make_float64(pack_raw64(p, &float64_params));
671 }
672 
673 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
674 {
675     const int f_size = float128_params.frac_size - 64;
676     const int e_size = float128_params.exp_size;
677     uint64_t hi;
678 
679     hi = (uint64_t)p->sign << (f_size + e_size);
680     hi = deposit64(hi, f_size, e_size, p->exp);
681     hi = deposit64(hi, 0, f_size, p->frac_hi);
682     return make_float128(hi, p->frac_lo);
683 }
684 
685 /*----------------------------------------------------------------------------
686 | Functions and definitions to determine:  (1) whether tininess for underflow
687 | is detected before or after rounding by default, (2) what (if anything)
688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
690 | are propagated from function inputs to output.  These details are target-
691 | specific.
692 *----------------------------------------------------------------------------*/
693 #include "softfloat-specialize.c.inc"
694 
695 #define PARTS_GENERIC_64_128(NAME, P) \
696     _Generic((P), FloatParts64 *: parts64_##NAME, \
697                   FloatParts128 *: parts128_##NAME)
698 
699 #define PARTS_GENERIC_64_128_256(NAME, P) \
700     _Generic((P), FloatParts64 *: parts64_##NAME, \
701                   FloatParts128 *: parts128_##NAME, \
702                   FloatParts256 *: parts256_##NAME)
703 
704 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
705 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
706 
707 static void parts64_return_nan(FloatParts64 *a, float_status *s);
708 static void parts128_return_nan(FloatParts128 *a, float_status *s);
709 
710 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
711 
712 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
713                                       float_status *s);
714 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
715                                         float_status *s);
716 
717 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
718 
719 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
720                                              FloatParts64 *c, float_status *s,
721                                              int ab_mask, int abc_mask);
722 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
723                                                FloatParts128 *b,
724                                                FloatParts128 *c,
725                                                float_status *s,
726                                                int ab_mask, int abc_mask);
727 
728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
729     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
730 
731 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
732                                  const FloatFmt *fmt);
733 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
734                                   const FloatFmt *fmt);
735 
736 #define parts_canonicalize(A, S, F) \
737     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
738 
739 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
740                                    const FloatFmt *fmt);
741 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
742                                     const FloatFmt *fmt);
743 
744 #define parts_uncanon_normal(A, S, F) \
745     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
746 
747 static void parts64_uncanon(FloatParts64 *p, float_status *status,
748                             const FloatFmt *fmt);
749 static void parts128_uncanon(FloatParts128 *p, float_status *status,
750                              const FloatFmt *fmt);
751 
752 #define parts_uncanon(A, S, F) \
753     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
754 
755 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
756 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
757 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
758 
759 #define parts_add_normal(A, B) \
760     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
761 
762 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
763 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
764 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
765 
766 #define parts_sub_normal(A, B) \
767     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
768 
769 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
770                                     float_status *s, bool subtract);
771 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
772                                       float_status *s, bool subtract);
773 
774 #define parts_addsub(A, B, S, Z) \
775     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
776 
777 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
778                                  float_status *s);
779 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
780                                    float_status *s);
781 
782 #define parts_mul(A, B, S) \
783     PARTS_GENERIC_64_128(mul, A)(A, B, S)
784 
785 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
786                                     FloatParts64 *c, int flags,
787                                     float_status *s);
788 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
789                                       FloatParts128 *c, int flags,
790                                       float_status *s);
791 
792 #define parts_muladd(A, B, C, Z, S) \
793     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
794 
795 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
796                                  float_status *s);
797 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
798                                    float_status *s);
799 
800 #define parts_div(A, B, S) \
801     PARTS_GENERIC_64_128(div, A)(A, B, S)
802 
803 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
804                                     uint64_t *mod_quot, float_status *s);
805 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
806                                       uint64_t *mod_quot, float_status *s);
807 
808 #define parts_modrem(A, B, Q, S) \
809     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
810 
811 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
812 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
813 
814 #define parts_sqrt(A, S, F) \
815     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
816 
817 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
818                                         int scale, int frac_size);
819 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
820                                          int scale, int frac_size);
821 
822 #define parts_round_to_int_normal(A, R, C, F) \
823     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
824 
825 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
826                                  int scale, float_status *s,
827                                  const FloatFmt *fmt);
828 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
829                                   int scale, float_status *s,
830                                   const FloatFmt *fmt);
831 
832 #define parts_round_to_int(A, R, C, S, F) \
833     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
834 
835 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
836                                      int scale, int64_t min, int64_t max,
837                                      float_status *s);
838 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
839                                      int scale, int64_t min, int64_t max,
840                                      float_status *s);
841 
842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
843     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
844 
845 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
846                                       int scale, uint64_t max,
847                                       float_status *s);
848 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
849                                        int scale, uint64_t max,
850                                        float_status *s);
851 
852 #define parts_float_to_uint(P, R, Z, M, S) \
853     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
854 
855 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
856                                             FloatRoundMode rmode,
857                                             int bitsm1, float_status *s);
858 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
859                                              FloatRoundMode rmode,
860                                              int bitsm1, float_status *s);
861 
862 #define parts_float_to_sint_modulo(P, R, M, S) \
863     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
864 
865 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
866                                   int scale, float_status *s);
867 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
868                                    int scale, float_status *s);
869 
870 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
871     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
872 
873 #define parts_sint_to_float(P, I, Z, S) \
874     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
875 
876 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
877                                   int scale, float_status *s);
878 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
879                                    int scale, float_status *s);
880 
881 #define parts_uint_to_float(P, I, Z, S) \
882     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
883 
884 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
885                                     float_status *s, int flags);
886 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
887                                       float_status *s, int flags);
888 
889 #define parts_minmax(A, B, S, F) \
890     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
891 
892 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
893                                      float_status *s, bool q);
894 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
895                                       float_status *s, bool q);
896 
897 #define parts_compare(A, B, S, Q) \
898     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
899 
900 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
901 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
902 
903 #define parts_scalbn(A, N, S) \
904     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
905 
906 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
907 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
908 
909 #define parts_log2(A, S, F) \
910     PARTS_GENERIC_64_128(log2, A)(A, S, F)
911 
912 /*
913  * Helper functions for softfloat-parts.c.inc, per-size operations.
914  */
915 
916 #define FRAC_GENERIC_64_128(NAME, P) \
917     _Generic((P), FloatParts64 *: frac64_##NAME, \
918                   FloatParts128 *: frac128_##NAME)
919 
920 #define FRAC_GENERIC_64_128_256(NAME, P) \
921     _Generic((P), FloatParts64 *: frac64_##NAME, \
922                   FloatParts128 *: frac128_##NAME, \
923                   FloatParts256 *: frac256_##NAME)
924 
925 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
926 {
927     return uadd64_overflow(a->frac, b->frac, &r->frac);
928 }
929 
930 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
931 {
932     bool c = 0;
933     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
934     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
935     return c;
936 }
937 
938 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
939 {
940     bool c = 0;
941     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
942     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
943     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
944     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
945     return c;
946 }
947 
948 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
949 
950 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
951 {
952     return uadd64_overflow(a->frac, c, &r->frac);
953 }
954 
955 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
956 {
957     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
958     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
959 }
960 
961 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
962 
963 static void frac64_allones(FloatParts64 *a)
964 {
965     a->frac = -1;
966 }
967 
968 static void frac128_allones(FloatParts128 *a)
969 {
970     a->frac_hi = a->frac_lo = -1;
971 }
972 
973 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
974 
975 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
976 {
977     return (a->frac == b->frac ? float_relation_equal
978             : a->frac < b->frac ? float_relation_less
979             : float_relation_greater);
980 }
981 
982 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
983 {
984     uint64_t ta = a->frac_hi, tb = b->frac_hi;
985     if (ta == tb) {
986         ta = a->frac_lo, tb = b->frac_lo;
987         if (ta == tb) {
988             return float_relation_equal;
989         }
990     }
991     return ta < tb ? float_relation_less : float_relation_greater;
992 }
993 
994 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
995 
996 static void frac64_clear(FloatParts64 *a)
997 {
998     a->frac = 0;
999 }
1000 
1001 static void frac128_clear(FloatParts128 *a)
1002 {
1003     a->frac_hi = a->frac_lo = 0;
1004 }
1005 
1006 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1007 
1008 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1009 {
1010     uint64_t n1, n0, r, q;
1011     bool ret;
1012 
1013     /*
1014      * We want a 2*N / N-bit division to produce exactly an N-bit
1015      * result, so that we do not lose any precision and so that we
1016      * do not have to renormalize afterward.  If A.frac < B.frac,
1017      * then division would produce an (N-1)-bit result; shift A left
1018      * by one to produce the an N-bit result, and return true to
1019      * decrement the exponent to match.
1020      *
1021      * The udiv_qrnnd algorithm that we're using requires normalization,
1022      * i.e. the msb of the denominator must be set, which is already true.
1023      */
1024     ret = a->frac < b->frac;
1025     if (ret) {
1026         n0 = a->frac;
1027         n1 = 0;
1028     } else {
1029         n0 = a->frac >> 1;
1030         n1 = a->frac << 63;
1031     }
1032     q = udiv_qrnnd(&r, n0, n1, b->frac);
1033 
1034     /* Set lsb if there is a remainder, to set inexact. */
1035     a->frac = q | (r != 0);
1036 
1037     return ret;
1038 }
1039 
1040 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1041 {
1042     uint64_t q0, q1, a0, a1, b0, b1;
1043     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1044     bool ret = false;
1045 
1046     a0 = a->frac_hi, a1 = a->frac_lo;
1047     b0 = b->frac_hi, b1 = b->frac_lo;
1048 
1049     ret = lt128(a0, a1, b0, b1);
1050     if (!ret) {
1051         a1 = shr_double(a0, a1, 1);
1052         a0 = a0 >> 1;
1053     }
1054 
1055     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1056     q0 = estimateDiv128To64(a0, a1, b0);
1057 
1058     /*
1059      * Estimate is high because B1 was not included (unless B1 == 0).
1060      * Reduce quotient and increase remainder until remainder is non-negative.
1061      * This loop will execute 0 to 2 times.
1062      */
1063     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1064     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1065     while (r0 != 0) {
1066         q0--;
1067         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1068     }
1069 
1070     /* Repeat using the remainder, producing a second word of quotient. */
1071     q1 = estimateDiv128To64(r1, r2, b0);
1072     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1073     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1074     while (r1 != 0) {
1075         q1--;
1076         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1077     }
1078 
1079     /* Any remainder indicates inexact; set sticky bit. */
1080     q1 |= (r2 | r3) != 0;
1081 
1082     a->frac_hi = q0;
1083     a->frac_lo = q1;
1084     return ret;
1085 }
1086 
1087 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1088 
1089 static bool frac64_eqz(FloatParts64 *a)
1090 {
1091     return a->frac == 0;
1092 }
1093 
1094 static bool frac128_eqz(FloatParts128 *a)
1095 {
1096     return (a->frac_hi | a->frac_lo) == 0;
1097 }
1098 
1099 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1100 
1101 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1102 {
1103     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1104 }
1105 
1106 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1107 {
1108     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1109                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1110 }
1111 
1112 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1113 
1114 static void frac64_neg(FloatParts64 *a)
1115 {
1116     a->frac = -a->frac;
1117 }
1118 
1119 static void frac128_neg(FloatParts128 *a)
1120 {
1121     bool c = 0;
1122     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1123     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1124 }
1125 
1126 static void frac256_neg(FloatParts256 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1131     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1132     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1133 }
1134 
1135 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1136 
1137 static int frac64_normalize(FloatParts64 *a)
1138 {
1139     if (a->frac) {
1140         int shift = clz64(a->frac);
1141         a->frac <<= shift;
1142         return shift;
1143     }
1144     return 64;
1145 }
1146 
1147 static int frac128_normalize(FloatParts128 *a)
1148 {
1149     if (a->frac_hi) {
1150         int shl = clz64(a->frac_hi);
1151         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1152         a->frac_lo <<= shl;
1153         return shl;
1154     } else if (a->frac_lo) {
1155         int shl = clz64(a->frac_lo);
1156         a->frac_hi = a->frac_lo << shl;
1157         a->frac_lo = 0;
1158         return shl + 64;
1159     }
1160     return 128;
1161 }
1162 
1163 static int frac256_normalize(FloatParts256 *a)
1164 {
1165     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1166     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1167     int ret, shl;
1168 
1169     if (likely(a0)) {
1170         shl = clz64(a0);
1171         if (shl == 0) {
1172             return 0;
1173         }
1174         ret = shl;
1175     } else {
1176         if (a1) {
1177             ret = 64;
1178             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1179         } else if (a2) {
1180             ret = 128;
1181             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1182         } else if (a3) {
1183             ret = 192;
1184             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1185         } else {
1186             ret = 256;
1187             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1188             goto done;
1189         }
1190         shl = clz64(a0);
1191         if (shl == 0) {
1192             goto done;
1193         }
1194         ret += shl;
1195     }
1196 
1197     a0 = shl_double(a0, a1, shl);
1198     a1 = shl_double(a1, a2, shl);
1199     a2 = shl_double(a2, a3, shl);
1200     a3 <<= shl;
1201 
1202  done:
1203     a->frac_hi = a0;
1204     a->frac_hm = a1;
1205     a->frac_lm = a2;
1206     a->frac_lo = a3;
1207     return ret;
1208 }
1209 
1210 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1211 
1212 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1213 {
1214     uint64_t a0, a1, b0, t0, t1, q, quot;
1215     int exp_diff = a->exp - b->exp;
1216     int shift;
1217 
1218     a0 = a->frac;
1219     a1 = 0;
1220 
1221     if (exp_diff < -1) {
1222         if (mod_quot) {
1223             *mod_quot = 0;
1224         }
1225         return;
1226     }
1227     if (exp_diff == -1) {
1228         a0 >>= 1;
1229         exp_diff = 0;
1230     }
1231 
1232     b0 = b->frac;
1233     quot = q = b0 <= a0;
1234     if (q) {
1235         a0 -= b0;
1236     }
1237 
1238     exp_diff -= 64;
1239     while (exp_diff > 0) {
1240         q = estimateDiv128To64(a0, a1, b0);
1241         q = q > 2 ? q - 2 : 0;
1242         mul64To128(b0, q, &t0, &t1);
1243         sub128(a0, a1, t0, t1, &a0, &a1);
1244         shortShift128Left(a0, a1, 62, &a0, &a1);
1245         exp_diff -= 62;
1246         quot = (quot << 62) + q;
1247     }
1248 
1249     exp_diff += 64;
1250     if (exp_diff > 0) {
1251         q = estimateDiv128To64(a0, a1, b0);
1252         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1253         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1254         sub128(a0, a1, t0, t1, &a0, &a1);
1255         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1256         while (le128(t0, t1, a0, a1)) {
1257             ++q;
1258             sub128(a0, a1, t0, t1, &a0, &a1);
1259         }
1260         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1261     } else {
1262         t0 = b0;
1263         t1 = 0;
1264     }
1265 
1266     if (mod_quot) {
1267         *mod_quot = quot;
1268     } else {
1269         sub128(t0, t1, a0, a1, &t0, &t1);
1270         if (lt128(t0, t1, a0, a1) ||
1271             (eq128(t0, t1, a0, a1) && (q & 1))) {
1272             a0 = t0;
1273             a1 = t1;
1274             a->sign = !a->sign;
1275         }
1276     }
1277 
1278     if (likely(a0)) {
1279         shift = clz64(a0);
1280         shortShift128Left(a0, a1, shift, &a0, &a1);
1281     } else if (likely(a1)) {
1282         shift = clz64(a1);
1283         a0 = a1 << shift;
1284         a1 = 0;
1285         shift += 64;
1286     } else {
1287         a->cls = float_class_zero;
1288         return;
1289     }
1290 
1291     a->exp = b->exp + exp_diff - shift;
1292     a->frac = a0 | (a1 != 0);
1293 }
1294 
1295 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1296                            uint64_t *mod_quot)
1297 {
1298     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1299     int exp_diff = a->exp - b->exp;
1300     int shift;
1301 
1302     a0 = a->frac_hi;
1303     a1 = a->frac_lo;
1304     a2 = 0;
1305 
1306     if (exp_diff < -1) {
1307         if (mod_quot) {
1308             *mod_quot = 0;
1309         }
1310         return;
1311     }
1312     if (exp_diff == -1) {
1313         shift128Right(a0, a1, 1, &a0, &a1);
1314         exp_diff = 0;
1315     }
1316 
1317     b0 = b->frac_hi;
1318     b1 = b->frac_lo;
1319 
1320     quot = q = le128(b0, b1, a0, a1);
1321     if (q) {
1322         sub128(a0, a1, b0, b1, &a0, &a1);
1323     }
1324 
1325     exp_diff -= 64;
1326     while (exp_diff > 0) {
1327         q = estimateDiv128To64(a0, a1, b0);
1328         q = q > 4 ? q - 4 : 0;
1329         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1330         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1331         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1332         exp_diff -= 61;
1333         quot = (quot << 61) + q;
1334     }
1335 
1336     exp_diff += 64;
1337     if (exp_diff > 0) {
1338         q = estimateDiv128To64(a0, a1, b0);
1339         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1340         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1341         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1342         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1343         while (le192(t0, t1, t2, a0, a1, a2)) {
1344             ++q;
1345             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1346         }
1347         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1348     } else {
1349         t0 = b0;
1350         t1 = b1;
1351         t2 = 0;
1352     }
1353 
1354     if (mod_quot) {
1355         *mod_quot = quot;
1356     } else {
1357         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1358         if (lt192(t0, t1, t2, a0, a1, a2) ||
1359             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1360             a0 = t0;
1361             a1 = t1;
1362             a2 = t2;
1363             a->sign = !a->sign;
1364         }
1365     }
1366 
1367     if (likely(a0)) {
1368         shift = clz64(a0);
1369         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1370     } else if (likely(a1)) {
1371         shift = clz64(a1);
1372         shortShift128Left(a1, a2, shift, &a0, &a1);
1373         a2 = 0;
1374         shift += 64;
1375     } else if (likely(a2)) {
1376         shift = clz64(a2);
1377         a0 = a2 << shift;
1378         a1 = a2 = 0;
1379         shift += 128;
1380     } else {
1381         a->cls = float_class_zero;
1382         return;
1383     }
1384 
1385     a->exp = b->exp + exp_diff - shift;
1386     a->frac_hi = a0;
1387     a->frac_lo = a1 | (a2 != 0);
1388 }
1389 
1390 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1391 
1392 static void frac64_shl(FloatParts64 *a, int c)
1393 {
1394     a->frac <<= c;
1395 }
1396 
1397 static void frac128_shl(FloatParts128 *a, int c)
1398 {
1399     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1400 
1401     if (c & 64) {
1402         a0 = a1, a1 = 0;
1403     }
1404 
1405     c &= 63;
1406     if (c) {
1407         a0 = shl_double(a0, a1, c);
1408         a1 = a1 << c;
1409     }
1410 
1411     a->frac_hi = a0;
1412     a->frac_lo = a1;
1413 }
1414 
1415 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1416 
1417 static void frac64_shr(FloatParts64 *a, int c)
1418 {
1419     a->frac >>= c;
1420 }
1421 
1422 static void frac128_shr(FloatParts128 *a, int c)
1423 {
1424     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1425 
1426     if (c & 64) {
1427         a1 = a0, a0 = 0;
1428     }
1429 
1430     c &= 63;
1431     if (c) {
1432         a1 = shr_double(a0, a1, c);
1433         a0 = a0 >> c;
1434     }
1435 
1436     a->frac_hi = a0;
1437     a->frac_lo = a1;
1438 }
1439 
1440 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1441 
1442 static void frac64_shrjam(FloatParts64 *a, int c)
1443 {
1444     uint64_t a0 = a->frac;
1445 
1446     if (likely(c != 0)) {
1447         if (likely(c < 64)) {
1448             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1449         } else {
1450             a0 = a0 != 0;
1451         }
1452         a->frac = a0;
1453     }
1454 }
1455 
1456 static void frac128_shrjam(FloatParts128 *a, int c)
1457 {
1458     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1459     uint64_t sticky = 0;
1460 
1461     if (unlikely(c == 0)) {
1462         return;
1463     } else if (likely(c < 64)) {
1464         /* nothing */
1465     } else if (likely(c < 128)) {
1466         sticky = a1;
1467         a1 = a0;
1468         a0 = 0;
1469         c &= 63;
1470         if (c == 0) {
1471             goto done;
1472         }
1473     } else {
1474         sticky = a0 | a1;
1475         a0 = a1 = 0;
1476         goto done;
1477     }
1478 
1479     sticky |= shr_double(a1, 0, c);
1480     a1 = shr_double(a0, a1, c);
1481     a0 = a0 >> c;
1482 
1483  done:
1484     a->frac_lo = a1 | (sticky != 0);
1485     a->frac_hi = a0;
1486 }
1487 
1488 static void frac256_shrjam(FloatParts256 *a, int c)
1489 {
1490     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1491     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1492     uint64_t sticky = 0;
1493 
1494     if (unlikely(c == 0)) {
1495         return;
1496     } else if (likely(c < 64)) {
1497         /* nothing */
1498     } else if (likely(c < 256)) {
1499         if (unlikely(c & 128)) {
1500             sticky |= a2 | a3;
1501             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1502         }
1503         if (unlikely(c & 64)) {
1504             sticky |= a3;
1505             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1506         }
1507         c &= 63;
1508         if (c == 0) {
1509             goto done;
1510         }
1511     } else {
1512         sticky = a0 | a1 | a2 | a3;
1513         a0 = a1 = a2 = a3 = 0;
1514         goto done;
1515     }
1516 
1517     sticky |= shr_double(a3, 0, c);
1518     a3 = shr_double(a2, a3, c);
1519     a2 = shr_double(a1, a2, c);
1520     a1 = shr_double(a0, a1, c);
1521     a0 = a0 >> c;
1522 
1523  done:
1524     a->frac_lo = a3 | (sticky != 0);
1525     a->frac_lm = a2;
1526     a->frac_hm = a1;
1527     a->frac_hi = a0;
1528 }
1529 
1530 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1531 
1532 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1533 {
1534     return usub64_overflow(a->frac, b->frac, &r->frac);
1535 }
1536 
1537 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1538 {
1539     bool c = 0;
1540     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1541     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1542     return c;
1543 }
1544 
1545 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1546 {
1547     bool c = 0;
1548     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1549     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1550     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1551     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1552     return c;
1553 }
1554 
1555 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1556 
1557 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1558 {
1559     r->frac = a->frac_hi | (a->frac_lo != 0);
1560 }
1561 
1562 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1563 {
1564     r->frac_hi = a->frac_hi;
1565     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1566 }
1567 
1568 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1569 
1570 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1571 {
1572     r->frac_hi = a->frac;
1573     r->frac_lo = 0;
1574 }
1575 
1576 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1577 {
1578     r->frac_hi = a->frac_hi;
1579     r->frac_hm = a->frac_lo;
1580     r->frac_lm = 0;
1581     r->frac_lo = 0;
1582 }
1583 
1584 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1585 
1586 /*
1587  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1588  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1589  * and thus MIT licenced.
1590  */
1591 static const uint16_t rsqrt_tab[128] = {
1592     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1593     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1594     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1595     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1596     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1597     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1598     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1599     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1600     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1601     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1602     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1603     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1604     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1605     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1606     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1607     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1608 };
1609 
1610 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1611 #define FloatPartsN    glue(FloatParts,N)
1612 #define FloatPartsW    glue(FloatParts,W)
1613 
1614 #define N 64
1615 #define W 128
1616 
1617 #include "softfloat-parts-addsub.c.inc"
1618 #include "softfloat-parts.c.inc"
1619 
1620 #undef  N
1621 #undef  W
1622 #define N 128
1623 #define W 256
1624 
1625 #include "softfloat-parts-addsub.c.inc"
1626 #include "softfloat-parts.c.inc"
1627 
1628 #undef  N
1629 #undef  W
1630 #define N            256
1631 
1632 #include "softfloat-parts-addsub.c.inc"
1633 
1634 #undef  N
1635 #undef  W
1636 #undef  partsN
1637 #undef  FloatPartsN
1638 #undef  FloatPartsW
1639 
1640 /*
1641  * Pack/unpack routines with a specific FloatFmt.
1642  */
1643 
1644 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1645                                       float_status *s, const FloatFmt *params)
1646 {
1647     float16_unpack_raw(p, f);
1648     parts_canonicalize(p, s, params);
1649 }
1650 
1651 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1652                                      float_status *s)
1653 {
1654     float16a_unpack_canonical(p, f, s, &float16_params);
1655 }
1656 
1657 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1658                                       float_status *s)
1659 {
1660     bfloat16_unpack_raw(p, f);
1661     parts_canonicalize(p, s, &bfloat16_params);
1662 }
1663 
1664 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1665                                              float_status *s,
1666                                              const FloatFmt *params)
1667 {
1668     parts_uncanon(p, s, params);
1669     return float16_pack_raw(p);
1670 }
1671 
1672 static float16 float16_round_pack_canonical(FloatParts64 *p,
1673                                             float_status *s)
1674 {
1675     return float16a_round_pack_canonical(p, s, &float16_params);
1676 }
1677 
1678 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1679                                               float_status *s)
1680 {
1681     parts_uncanon(p, s, &bfloat16_params);
1682     return bfloat16_pack_raw(p);
1683 }
1684 
1685 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1686                                      float_status *s)
1687 {
1688     float32_unpack_raw(p, f);
1689     parts_canonicalize(p, s, &float32_params);
1690 }
1691 
1692 static float32 float32_round_pack_canonical(FloatParts64 *p,
1693                                             float_status *s)
1694 {
1695     parts_uncanon(p, s, &float32_params);
1696     return float32_pack_raw(p);
1697 }
1698 
1699 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1700                                      float_status *s)
1701 {
1702     float64_unpack_raw(p, f);
1703     parts_canonicalize(p, s, &float64_params);
1704 }
1705 
1706 static float64 float64_round_pack_canonical(FloatParts64 *p,
1707                                             float_status *s)
1708 {
1709     parts_uncanon(p, s, &float64_params);
1710     return float64_pack_raw(p);
1711 }
1712 
1713 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1714                                                float_status *s)
1715 {
1716     parts_uncanon(p, s, &float32_params);
1717 
1718     /*
1719      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1720      * We need to adjust the fraction higher so that the least N bits are
1721      * zero, and the fraction is adjacent to the float64 implicit bit.
1722      */
1723     switch (p->cls) {
1724     case float_class_normal:
1725         if (unlikely(p->exp == 0)) {
1726             /*
1727              * The result is denormal for float32, but can be represented
1728              * in normalized form for float64.  Adjust, per canonicalize.
1729              */
1730             int shift = frac_normalize(p);
1731             p->exp = (float32_params.frac_shift -
1732                       float32_params.exp_bias - shift + 1 +
1733                       float64_params.exp_bias);
1734             frac_shr(p, float64_params.frac_shift);
1735         } else {
1736             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1737             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1738         }
1739         break;
1740     case float_class_snan:
1741     case float_class_qnan:
1742         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1743         p->exp = float64_params.exp_max;
1744         break;
1745     case float_class_inf:
1746         p->exp = float64_params.exp_max;
1747         break;
1748     case float_class_zero:
1749         break;
1750     default:
1751         g_assert_not_reached();
1752     }
1753 
1754     return float64_pack_raw(p);
1755 }
1756 
1757 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1758                                       float_status *s)
1759 {
1760     float128_unpack_raw(p, f);
1761     parts_canonicalize(p, s, &float128_params);
1762 }
1763 
1764 static float128 float128_round_pack_canonical(FloatParts128 *p,
1765                                               float_status *s)
1766 {
1767     parts_uncanon(p, s, &float128_params);
1768     return float128_pack_raw(p);
1769 }
1770 
1771 /* Returns false if the encoding is invalid. */
1772 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1773                                       float_status *s)
1774 {
1775     /* Ensure rounding precision is set before beginning. */
1776     switch (s->floatx80_rounding_precision) {
1777     case floatx80_precision_x:
1778     case floatx80_precision_d:
1779     case floatx80_precision_s:
1780         break;
1781     default:
1782         g_assert_not_reached();
1783     }
1784 
1785     if (unlikely(floatx80_invalid_encoding(f))) {
1786         float_raise(float_flag_invalid, s);
1787         return false;
1788     }
1789 
1790     floatx80_unpack_raw(p, f);
1791 
1792     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1793         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1794     } else {
1795         /* The explicit integer bit is ignored, after invalid checks. */
1796         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1797         p->cls = (p->frac_hi == 0 ? float_class_inf
1798                   : parts_is_snan_frac(p->frac_hi, s)
1799                   ? float_class_snan : float_class_qnan);
1800     }
1801     return true;
1802 }
1803 
1804 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1805                                               float_status *s)
1806 {
1807     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1808     uint64_t frac;
1809     int exp;
1810 
1811     switch (p->cls) {
1812     case float_class_normal:
1813         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1814             parts_uncanon_normal(p, s, fmt);
1815             frac = p->frac_hi;
1816             exp = p->exp;
1817         } else {
1818             FloatParts64 p64;
1819 
1820             p64.sign = p->sign;
1821             p64.exp = p->exp;
1822             frac_truncjam(&p64, p);
1823             parts_uncanon_normal(&p64, s, fmt);
1824             frac = p64.frac;
1825             exp = p64.exp;
1826         }
1827         if (exp != fmt->exp_max) {
1828             break;
1829         }
1830         /* rounded to inf -- fall through to set frac correctly */
1831 
1832     case float_class_inf:
1833         /* x86 and m68k differ in the setting of the integer bit. */
1834         frac = floatx80_infinity_low;
1835         exp = fmt->exp_max;
1836         break;
1837 
1838     case float_class_zero:
1839         frac = 0;
1840         exp = 0;
1841         break;
1842 
1843     case float_class_snan:
1844     case float_class_qnan:
1845         /* NaNs have the integer bit set. */
1846         frac = p->frac_hi | (1ull << 63);
1847         exp = fmt->exp_max;
1848         break;
1849 
1850     default:
1851         g_assert_not_reached();
1852     }
1853 
1854     return packFloatx80(p->sign, exp, frac);
1855 }
1856 
1857 /*
1858  * Addition and subtraction
1859  */
1860 
1861 static float16 QEMU_FLATTEN
1862 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1863 {
1864     FloatParts64 pa, pb, *pr;
1865 
1866     float16_unpack_canonical(&pa, a, status);
1867     float16_unpack_canonical(&pb, b, status);
1868     pr = parts_addsub(&pa, &pb, status, subtract);
1869 
1870     return float16_round_pack_canonical(pr, status);
1871 }
1872 
1873 float16 float16_add(float16 a, float16 b, float_status *status)
1874 {
1875     return float16_addsub(a, b, status, false);
1876 }
1877 
1878 float16 float16_sub(float16 a, float16 b, float_status *status)
1879 {
1880     return float16_addsub(a, b, status, true);
1881 }
1882 
1883 static float32 QEMU_SOFTFLOAT_ATTR
1884 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1885 {
1886     FloatParts64 pa, pb, *pr;
1887 
1888     float32_unpack_canonical(&pa, a, status);
1889     float32_unpack_canonical(&pb, b, status);
1890     pr = parts_addsub(&pa, &pb, status, subtract);
1891 
1892     return float32_round_pack_canonical(pr, status);
1893 }
1894 
1895 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1896 {
1897     return soft_f32_addsub(a, b, status, false);
1898 }
1899 
1900 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1901 {
1902     return soft_f32_addsub(a, b, status, true);
1903 }
1904 
1905 static float64 QEMU_SOFTFLOAT_ATTR
1906 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1907 {
1908     FloatParts64 pa, pb, *pr;
1909 
1910     float64_unpack_canonical(&pa, a, status);
1911     float64_unpack_canonical(&pb, b, status);
1912     pr = parts_addsub(&pa, &pb, status, subtract);
1913 
1914     return float64_round_pack_canonical(pr, status);
1915 }
1916 
1917 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1918 {
1919     return soft_f64_addsub(a, b, status, false);
1920 }
1921 
1922 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1923 {
1924     return soft_f64_addsub(a, b, status, true);
1925 }
1926 
1927 static float hard_f32_add(float a, float b)
1928 {
1929     return a + b;
1930 }
1931 
1932 static float hard_f32_sub(float a, float b)
1933 {
1934     return a - b;
1935 }
1936 
1937 static double hard_f64_add(double a, double b)
1938 {
1939     return a + b;
1940 }
1941 
1942 static double hard_f64_sub(double a, double b)
1943 {
1944     return a - b;
1945 }
1946 
1947 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1948 {
1949     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1950         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1951     }
1952     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1953 }
1954 
1955 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1956 {
1957     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1958         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1959     } else {
1960         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1961     }
1962 }
1963 
1964 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1965                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1966 {
1967     return float32_gen2(a, b, s, hard, soft,
1968                         f32_is_zon2, f32_addsubmul_post);
1969 }
1970 
1971 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1972                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1973 {
1974     return float64_gen2(a, b, s, hard, soft,
1975                         f64_is_zon2, f64_addsubmul_post);
1976 }
1977 
1978 float32 QEMU_FLATTEN
1979 float32_add(float32 a, float32 b, float_status *s)
1980 {
1981     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1982 }
1983 
1984 float32 QEMU_FLATTEN
1985 float32_sub(float32 a, float32 b, float_status *s)
1986 {
1987     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1988 }
1989 
1990 float64 QEMU_FLATTEN
1991 float64_add(float64 a, float64 b, float_status *s)
1992 {
1993     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1994 }
1995 
1996 float64 QEMU_FLATTEN
1997 float64_sub(float64 a, float64 b, float_status *s)
1998 {
1999     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2000 }
2001 
2002 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2003                                  bool subtract)
2004 {
2005     FloatParts64 pa, pb, *pr;
2006 
2007     float64_unpack_canonical(&pa, a, status);
2008     float64_unpack_canonical(&pb, b, status);
2009     pr = parts_addsub(&pa, &pb, status, subtract);
2010 
2011     return float64r32_round_pack_canonical(pr, status);
2012 }
2013 
2014 float64 float64r32_add(float64 a, float64 b, float_status *status)
2015 {
2016     return float64r32_addsub(a, b, status, false);
2017 }
2018 
2019 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2020 {
2021     return float64r32_addsub(a, b, status, true);
2022 }
2023 
2024 static bfloat16 QEMU_FLATTEN
2025 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2026 {
2027     FloatParts64 pa, pb, *pr;
2028 
2029     bfloat16_unpack_canonical(&pa, a, status);
2030     bfloat16_unpack_canonical(&pb, b, status);
2031     pr = parts_addsub(&pa, &pb, status, subtract);
2032 
2033     return bfloat16_round_pack_canonical(pr, status);
2034 }
2035 
2036 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2037 {
2038     return bfloat16_addsub(a, b, status, false);
2039 }
2040 
2041 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2042 {
2043     return bfloat16_addsub(a, b, status, true);
2044 }
2045 
2046 static float128 QEMU_FLATTEN
2047 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2048 {
2049     FloatParts128 pa, pb, *pr;
2050 
2051     float128_unpack_canonical(&pa, a, status);
2052     float128_unpack_canonical(&pb, b, status);
2053     pr = parts_addsub(&pa, &pb, status, subtract);
2054 
2055     return float128_round_pack_canonical(pr, status);
2056 }
2057 
2058 float128 float128_add(float128 a, float128 b, float_status *status)
2059 {
2060     return float128_addsub(a, b, status, false);
2061 }
2062 
2063 float128 float128_sub(float128 a, float128 b, float_status *status)
2064 {
2065     return float128_addsub(a, b, status, true);
2066 }
2067 
2068 static floatx80 QEMU_FLATTEN
2069 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2070 {
2071     FloatParts128 pa, pb, *pr;
2072 
2073     if (!floatx80_unpack_canonical(&pa, a, status) ||
2074         !floatx80_unpack_canonical(&pb, b, status)) {
2075         return floatx80_default_nan(status);
2076     }
2077 
2078     pr = parts_addsub(&pa, &pb, status, subtract);
2079     return floatx80_round_pack_canonical(pr, status);
2080 }
2081 
2082 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2083 {
2084     return floatx80_addsub(a, b, status, false);
2085 }
2086 
2087 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2088 {
2089     return floatx80_addsub(a, b, status, true);
2090 }
2091 
2092 /*
2093  * Multiplication
2094  */
2095 
2096 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2097 {
2098     FloatParts64 pa, pb, *pr;
2099 
2100     float16_unpack_canonical(&pa, a, status);
2101     float16_unpack_canonical(&pb, b, status);
2102     pr = parts_mul(&pa, &pb, status);
2103 
2104     return float16_round_pack_canonical(pr, status);
2105 }
2106 
2107 static float32 QEMU_SOFTFLOAT_ATTR
2108 soft_f32_mul(float32 a, float32 b, float_status *status)
2109 {
2110     FloatParts64 pa, pb, *pr;
2111 
2112     float32_unpack_canonical(&pa, a, status);
2113     float32_unpack_canonical(&pb, b, status);
2114     pr = parts_mul(&pa, &pb, status);
2115 
2116     return float32_round_pack_canonical(pr, status);
2117 }
2118 
2119 static float64 QEMU_SOFTFLOAT_ATTR
2120 soft_f64_mul(float64 a, float64 b, float_status *status)
2121 {
2122     FloatParts64 pa, pb, *pr;
2123 
2124     float64_unpack_canonical(&pa, a, status);
2125     float64_unpack_canonical(&pb, b, status);
2126     pr = parts_mul(&pa, &pb, status);
2127 
2128     return float64_round_pack_canonical(pr, status);
2129 }
2130 
2131 static float hard_f32_mul(float a, float b)
2132 {
2133     return a * b;
2134 }
2135 
2136 static double hard_f64_mul(double a, double b)
2137 {
2138     return a * b;
2139 }
2140 
2141 float32 QEMU_FLATTEN
2142 float32_mul(float32 a, float32 b, float_status *s)
2143 {
2144     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2145                         f32_is_zon2, f32_addsubmul_post);
2146 }
2147 
2148 float64 QEMU_FLATTEN
2149 float64_mul(float64 a, float64 b, float_status *s)
2150 {
2151     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2152                         f64_is_zon2, f64_addsubmul_post);
2153 }
2154 
2155 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2156 {
2157     FloatParts64 pa, pb, *pr;
2158 
2159     float64_unpack_canonical(&pa, a, status);
2160     float64_unpack_canonical(&pb, b, status);
2161     pr = parts_mul(&pa, &pb, status);
2162 
2163     return float64r32_round_pack_canonical(pr, status);
2164 }
2165 
2166 bfloat16 QEMU_FLATTEN
2167 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2168 {
2169     FloatParts64 pa, pb, *pr;
2170 
2171     bfloat16_unpack_canonical(&pa, a, status);
2172     bfloat16_unpack_canonical(&pb, b, status);
2173     pr = parts_mul(&pa, &pb, status);
2174 
2175     return bfloat16_round_pack_canonical(pr, status);
2176 }
2177 
2178 float128 QEMU_FLATTEN
2179 float128_mul(float128 a, float128 b, float_status *status)
2180 {
2181     FloatParts128 pa, pb, *pr;
2182 
2183     float128_unpack_canonical(&pa, a, status);
2184     float128_unpack_canonical(&pb, b, status);
2185     pr = parts_mul(&pa, &pb, status);
2186 
2187     return float128_round_pack_canonical(pr, status);
2188 }
2189 
2190 floatx80 QEMU_FLATTEN
2191 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2192 {
2193     FloatParts128 pa, pb, *pr;
2194 
2195     if (!floatx80_unpack_canonical(&pa, a, status) ||
2196         !floatx80_unpack_canonical(&pb, b, status)) {
2197         return floatx80_default_nan(status);
2198     }
2199 
2200     pr = parts_mul(&pa, &pb, status);
2201     return floatx80_round_pack_canonical(pr, status);
2202 }
2203 
2204 /*
2205  * Fused multiply-add
2206  */
2207 
2208 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2209                                     int flags, float_status *status)
2210 {
2211     FloatParts64 pa, pb, pc, *pr;
2212 
2213     float16_unpack_canonical(&pa, a, status);
2214     float16_unpack_canonical(&pb, b, status);
2215     float16_unpack_canonical(&pc, c, status);
2216     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2217 
2218     return float16_round_pack_canonical(pr, status);
2219 }
2220 
2221 static float32 QEMU_SOFTFLOAT_ATTR
2222 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2223                 float_status *status)
2224 {
2225     FloatParts64 pa, pb, pc, *pr;
2226 
2227     float32_unpack_canonical(&pa, a, status);
2228     float32_unpack_canonical(&pb, b, status);
2229     float32_unpack_canonical(&pc, c, status);
2230     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2231 
2232     return float32_round_pack_canonical(pr, status);
2233 }
2234 
2235 static float64 QEMU_SOFTFLOAT_ATTR
2236 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2237                 float_status *status)
2238 {
2239     FloatParts64 pa, pb, pc, *pr;
2240 
2241     float64_unpack_canonical(&pa, a, status);
2242     float64_unpack_canonical(&pb, b, status);
2243     float64_unpack_canonical(&pc, c, status);
2244     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2245 
2246     return float64_round_pack_canonical(pr, status);
2247 }
2248 
2249 static bool force_soft_fma;
2250 
2251 float32 QEMU_FLATTEN
2252 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2253 {
2254     union_float32 ua, ub, uc, ur;
2255 
2256     ua.s = xa;
2257     ub.s = xb;
2258     uc.s = xc;
2259 
2260     if (unlikely(!can_use_fpu(s))) {
2261         goto soft;
2262     }
2263     if (unlikely(flags & float_muladd_halve_result)) {
2264         goto soft;
2265     }
2266 
2267     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2268     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2269         goto soft;
2270     }
2271 
2272     if (unlikely(force_soft_fma)) {
2273         goto soft;
2274     }
2275 
2276     /*
2277      * When (a || b) == 0, there's no need to check for under/over flow,
2278      * since we know the addend is (normal || 0) and the product is 0.
2279      */
2280     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2281         union_float32 up;
2282         bool prod_sign;
2283 
2284         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2285         prod_sign ^= !!(flags & float_muladd_negate_product);
2286         up.s = float32_set_sign(float32_zero, prod_sign);
2287 
2288         if (flags & float_muladd_negate_c) {
2289             uc.h = -uc.h;
2290         }
2291         ur.h = up.h + uc.h;
2292     } else {
2293         union_float32 ua_orig = ua;
2294         union_float32 uc_orig = uc;
2295 
2296         if (flags & float_muladd_negate_product) {
2297             ua.h = -ua.h;
2298         }
2299         if (flags & float_muladd_negate_c) {
2300             uc.h = -uc.h;
2301         }
2302 
2303         ur.h = fmaf(ua.h, ub.h, uc.h);
2304 
2305         if (unlikely(f32_is_inf(ur))) {
2306             float_raise(float_flag_overflow, s);
2307         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2308             ua = ua_orig;
2309             uc = uc_orig;
2310             goto soft;
2311         }
2312     }
2313     if (flags & float_muladd_negate_result) {
2314         return float32_chs(ur.s);
2315     }
2316     return ur.s;
2317 
2318  soft:
2319     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2320 }
2321 
2322 float64 QEMU_FLATTEN
2323 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2324 {
2325     union_float64 ua, ub, uc, ur;
2326 
2327     ua.s = xa;
2328     ub.s = xb;
2329     uc.s = xc;
2330 
2331     if (unlikely(!can_use_fpu(s))) {
2332         goto soft;
2333     }
2334     if (unlikely(flags & float_muladd_halve_result)) {
2335         goto soft;
2336     }
2337 
2338     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2339     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2340         goto soft;
2341     }
2342 
2343     if (unlikely(force_soft_fma)) {
2344         goto soft;
2345     }
2346 
2347     /*
2348      * When (a || b) == 0, there's no need to check for under/over flow,
2349      * since we know the addend is (normal || 0) and the product is 0.
2350      */
2351     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2352         union_float64 up;
2353         bool prod_sign;
2354 
2355         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2356         prod_sign ^= !!(flags & float_muladd_negate_product);
2357         up.s = float64_set_sign(float64_zero, prod_sign);
2358 
2359         if (flags & float_muladd_negate_c) {
2360             uc.h = -uc.h;
2361         }
2362         ur.h = up.h + uc.h;
2363     } else {
2364         union_float64 ua_orig = ua;
2365         union_float64 uc_orig = uc;
2366 
2367         if (flags & float_muladd_negate_product) {
2368             ua.h = -ua.h;
2369         }
2370         if (flags & float_muladd_negate_c) {
2371             uc.h = -uc.h;
2372         }
2373 
2374         ur.h = fma(ua.h, ub.h, uc.h);
2375 
2376         if (unlikely(f64_is_inf(ur))) {
2377             float_raise(float_flag_overflow, s);
2378         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2379             ua = ua_orig;
2380             uc = uc_orig;
2381             goto soft;
2382         }
2383     }
2384     if (flags & float_muladd_negate_result) {
2385         return float64_chs(ur.s);
2386     }
2387     return ur.s;
2388 
2389  soft:
2390     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2391 }
2392 
2393 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2394                           int flags, float_status *status)
2395 {
2396     FloatParts64 pa, pb, pc, *pr;
2397 
2398     float64_unpack_canonical(&pa, a, status);
2399     float64_unpack_canonical(&pb, b, status);
2400     float64_unpack_canonical(&pc, c, status);
2401     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2402 
2403     return float64r32_round_pack_canonical(pr, status);
2404 }
2405 
2406 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2407                                       int flags, float_status *status)
2408 {
2409     FloatParts64 pa, pb, pc, *pr;
2410 
2411     bfloat16_unpack_canonical(&pa, a, status);
2412     bfloat16_unpack_canonical(&pb, b, status);
2413     bfloat16_unpack_canonical(&pc, c, status);
2414     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2415 
2416     return bfloat16_round_pack_canonical(pr, status);
2417 }
2418 
2419 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2420                                       int flags, float_status *status)
2421 {
2422     FloatParts128 pa, pb, pc, *pr;
2423 
2424     float128_unpack_canonical(&pa, a, status);
2425     float128_unpack_canonical(&pb, b, status);
2426     float128_unpack_canonical(&pc, c, status);
2427     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2428 
2429     return float128_round_pack_canonical(pr, status);
2430 }
2431 
2432 /*
2433  * Division
2434  */
2435 
2436 float16 float16_div(float16 a, float16 b, float_status *status)
2437 {
2438     FloatParts64 pa, pb, *pr;
2439 
2440     float16_unpack_canonical(&pa, a, status);
2441     float16_unpack_canonical(&pb, b, status);
2442     pr = parts_div(&pa, &pb, status);
2443 
2444     return float16_round_pack_canonical(pr, status);
2445 }
2446 
2447 static float32 QEMU_SOFTFLOAT_ATTR
2448 soft_f32_div(float32 a, float32 b, float_status *status)
2449 {
2450     FloatParts64 pa, pb, *pr;
2451 
2452     float32_unpack_canonical(&pa, a, status);
2453     float32_unpack_canonical(&pb, b, status);
2454     pr = parts_div(&pa, &pb, status);
2455 
2456     return float32_round_pack_canonical(pr, status);
2457 }
2458 
2459 static float64 QEMU_SOFTFLOAT_ATTR
2460 soft_f64_div(float64 a, float64 b, float_status *status)
2461 {
2462     FloatParts64 pa, pb, *pr;
2463 
2464     float64_unpack_canonical(&pa, a, status);
2465     float64_unpack_canonical(&pb, b, status);
2466     pr = parts_div(&pa, &pb, status);
2467 
2468     return float64_round_pack_canonical(pr, status);
2469 }
2470 
2471 static float hard_f32_div(float a, float b)
2472 {
2473     return a / b;
2474 }
2475 
2476 static double hard_f64_div(double a, double b)
2477 {
2478     return a / b;
2479 }
2480 
2481 static bool f32_div_pre(union_float32 a, union_float32 b)
2482 {
2483     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2484         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2485                fpclassify(b.h) == FP_NORMAL;
2486     }
2487     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2488 }
2489 
2490 static bool f64_div_pre(union_float64 a, union_float64 b)
2491 {
2492     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2494                fpclassify(b.h) == FP_NORMAL;
2495     }
2496     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2497 }
2498 
2499 static bool f32_div_post(union_float32 a, union_float32 b)
2500 {
2501     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2502         return fpclassify(a.h) != FP_ZERO;
2503     }
2504     return !float32_is_zero(a.s);
2505 }
2506 
2507 static bool f64_div_post(union_float64 a, union_float64 b)
2508 {
2509     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2510         return fpclassify(a.h) != FP_ZERO;
2511     }
2512     return !float64_is_zero(a.s);
2513 }
2514 
2515 float32 QEMU_FLATTEN
2516 float32_div(float32 a, float32 b, float_status *s)
2517 {
2518     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2519                         f32_div_pre, f32_div_post);
2520 }
2521 
2522 float64 QEMU_FLATTEN
2523 float64_div(float64 a, float64 b, float_status *s)
2524 {
2525     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2526                         f64_div_pre, f64_div_post);
2527 }
2528 
2529 float64 float64r32_div(float64 a, float64 b, float_status *status)
2530 {
2531     FloatParts64 pa, pb, *pr;
2532 
2533     float64_unpack_canonical(&pa, a, status);
2534     float64_unpack_canonical(&pb, b, status);
2535     pr = parts_div(&pa, &pb, status);
2536 
2537     return float64r32_round_pack_canonical(pr, status);
2538 }
2539 
2540 bfloat16 QEMU_FLATTEN
2541 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2542 {
2543     FloatParts64 pa, pb, *pr;
2544 
2545     bfloat16_unpack_canonical(&pa, a, status);
2546     bfloat16_unpack_canonical(&pb, b, status);
2547     pr = parts_div(&pa, &pb, status);
2548 
2549     return bfloat16_round_pack_canonical(pr, status);
2550 }
2551 
2552 float128 QEMU_FLATTEN
2553 float128_div(float128 a, float128 b, float_status *status)
2554 {
2555     FloatParts128 pa, pb, *pr;
2556 
2557     float128_unpack_canonical(&pa, a, status);
2558     float128_unpack_canonical(&pb, b, status);
2559     pr = parts_div(&pa, &pb, status);
2560 
2561     return float128_round_pack_canonical(pr, status);
2562 }
2563 
2564 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2565 {
2566     FloatParts128 pa, pb, *pr;
2567 
2568     if (!floatx80_unpack_canonical(&pa, a, status) ||
2569         !floatx80_unpack_canonical(&pb, b, status)) {
2570         return floatx80_default_nan(status);
2571     }
2572 
2573     pr = parts_div(&pa, &pb, status);
2574     return floatx80_round_pack_canonical(pr, status);
2575 }
2576 
2577 /*
2578  * Remainder
2579  */
2580 
2581 float32 float32_rem(float32 a, float32 b, float_status *status)
2582 {
2583     FloatParts64 pa, pb, *pr;
2584 
2585     float32_unpack_canonical(&pa, a, status);
2586     float32_unpack_canonical(&pb, b, status);
2587     pr = parts_modrem(&pa, &pb, NULL, status);
2588 
2589     return float32_round_pack_canonical(pr, status);
2590 }
2591 
2592 float64 float64_rem(float64 a, float64 b, float_status *status)
2593 {
2594     FloatParts64 pa, pb, *pr;
2595 
2596     float64_unpack_canonical(&pa, a, status);
2597     float64_unpack_canonical(&pb, b, status);
2598     pr = parts_modrem(&pa, &pb, NULL, status);
2599 
2600     return float64_round_pack_canonical(pr, status);
2601 }
2602 
2603 float128 float128_rem(float128 a, float128 b, float_status *status)
2604 {
2605     FloatParts128 pa, pb, *pr;
2606 
2607     float128_unpack_canonical(&pa, a, status);
2608     float128_unpack_canonical(&pb, b, status);
2609     pr = parts_modrem(&pa, &pb, NULL, status);
2610 
2611     return float128_round_pack_canonical(pr, status);
2612 }
2613 
2614 /*
2615  * Returns the remainder of the extended double-precision floating-point value
2616  * `a' with respect to the corresponding value `b'.
2617  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2618  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2619  * the remainder based on truncating the quotient toward zero instead and
2620  * *quotient is set to the low 64 bits of the absolute value of the integer
2621  * quotient.
2622  */
2623 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2624                          uint64_t *quotient, float_status *status)
2625 {
2626     FloatParts128 pa, pb, *pr;
2627 
2628     *quotient = 0;
2629     if (!floatx80_unpack_canonical(&pa, a, status) ||
2630         !floatx80_unpack_canonical(&pb, b, status)) {
2631         return floatx80_default_nan(status);
2632     }
2633     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2634 
2635     return floatx80_round_pack_canonical(pr, status);
2636 }
2637 
2638 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2639 {
2640     uint64_t quotient;
2641     return floatx80_modrem(a, b, false, &quotient, status);
2642 }
2643 
2644 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2645 {
2646     uint64_t quotient;
2647     return floatx80_modrem(a, b, true, &quotient, status);
2648 }
2649 
2650 /*
2651  * Float to Float conversions
2652  *
2653  * Returns the result of converting one float format to another. The
2654  * conversion is performed according to the IEC/IEEE Standard for
2655  * Binary Floating-Point Arithmetic.
2656  *
2657  * Usually this only needs to take care of raising invalid exceptions
2658  * and handling the conversion on NaNs.
2659  */
2660 
2661 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2662 {
2663     switch (a->cls) {
2664     case float_class_snan:
2665         float_raise(float_flag_invalid_snan, s);
2666         /* fall through */
2667     case float_class_qnan:
2668         /*
2669          * There is no NaN in the destination format.  Raise Invalid
2670          * and return a zero with the sign of the input NaN.
2671          */
2672         float_raise(float_flag_invalid, s);
2673         a->cls = float_class_zero;
2674         break;
2675 
2676     case float_class_inf:
2677         /*
2678          * There is no Inf in the destination format.  Raise Invalid
2679          * and return the maximum normal with the correct sign.
2680          */
2681         float_raise(float_flag_invalid, s);
2682         a->cls = float_class_normal;
2683         a->exp = float16_params_ahp.exp_max;
2684         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2685                                   float16_params_ahp.frac_size + 1);
2686         break;
2687 
2688     case float_class_normal:
2689     case float_class_zero:
2690         break;
2691 
2692     default:
2693         g_assert_not_reached();
2694     }
2695 }
2696 
2697 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2698 {
2699     if (is_nan(a->cls)) {
2700         parts_return_nan(a, s);
2701     }
2702 }
2703 
2704 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2705 {
2706     if (is_nan(a->cls)) {
2707         parts_return_nan(a, s);
2708     }
2709 }
2710 
2711 #define parts_float_to_float(P, S) \
2712     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2713 
2714 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2715                                         float_status *s)
2716 {
2717     a->cls = b->cls;
2718     a->sign = b->sign;
2719     a->exp = b->exp;
2720 
2721     if (a->cls == float_class_normal) {
2722         frac_truncjam(a, b);
2723     } else if (is_nan(a->cls)) {
2724         /* Discard the low bits of the NaN. */
2725         a->frac = b->frac_hi;
2726         parts_return_nan(a, s);
2727     }
2728 }
2729 
2730 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2731                                        float_status *s)
2732 {
2733     a->cls = b->cls;
2734     a->sign = b->sign;
2735     a->exp = b->exp;
2736     frac_widen(a, b);
2737 
2738     if (is_nan(a->cls)) {
2739         parts_return_nan(a, s);
2740     }
2741 }
2742 
2743 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2744 {
2745     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2746     FloatParts64 p;
2747 
2748     float16a_unpack_canonical(&p, a, s, fmt16);
2749     parts_float_to_float(&p, s);
2750     return float32_round_pack_canonical(&p, s);
2751 }
2752 
2753 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2754 {
2755     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2756     FloatParts64 p;
2757 
2758     float16a_unpack_canonical(&p, a, s, fmt16);
2759     parts_float_to_float(&p, s);
2760     return float64_round_pack_canonical(&p, s);
2761 }
2762 
2763 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2764 {
2765     FloatParts64 p;
2766     const FloatFmt *fmt;
2767 
2768     float32_unpack_canonical(&p, a, s);
2769     if (ieee) {
2770         parts_float_to_float(&p, s);
2771         fmt = &float16_params;
2772     } else {
2773         parts_float_to_ahp(&p, s);
2774         fmt = &float16_params_ahp;
2775     }
2776     return float16a_round_pack_canonical(&p, s, fmt);
2777 }
2778 
2779 static float64 QEMU_SOFTFLOAT_ATTR
2780 soft_float32_to_float64(float32 a, float_status *s)
2781 {
2782     FloatParts64 p;
2783 
2784     float32_unpack_canonical(&p, a, s);
2785     parts_float_to_float(&p, s);
2786     return float64_round_pack_canonical(&p, s);
2787 }
2788 
2789 float64 float32_to_float64(float32 a, float_status *s)
2790 {
2791     if (likely(float32_is_normal(a))) {
2792         /* Widening conversion can never produce inexact results.  */
2793         union_float32 uf;
2794         union_float64 ud;
2795         uf.s = a;
2796         ud.h = uf.h;
2797         return ud.s;
2798     } else if (float32_is_zero(a)) {
2799         return float64_set_sign(float64_zero, float32_is_neg(a));
2800     } else {
2801         return soft_float32_to_float64(a, s);
2802     }
2803 }
2804 
2805 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2806 {
2807     FloatParts64 p;
2808     const FloatFmt *fmt;
2809 
2810     float64_unpack_canonical(&p, a, s);
2811     if (ieee) {
2812         parts_float_to_float(&p, s);
2813         fmt = &float16_params;
2814     } else {
2815         parts_float_to_ahp(&p, s);
2816         fmt = &float16_params_ahp;
2817     }
2818     return float16a_round_pack_canonical(&p, s, fmt);
2819 }
2820 
2821 float32 float64_to_float32(float64 a, float_status *s)
2822 {
2823     FloatParts64 p;
2824 
2825     float64_unpack_canonical(&p, a, s);
2826     parts_float_to_float(&p, s);
2827     return float32_round_pack_canonical(&p, s);
2828 }
2829 
2830 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2831 {
2832     FloatParts64 p;
2833 
2834     bfloat16_unpack_canonical(&p, a, s);
2835     parts_float_to_float(&p, s);
2836     return float32_round_pack_canonical(&p, s);
2837 }
2838 
2839 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2840 {
2841     FloatParts64 p;
2842 
2843     bfloat16_unpack_canonical(&p, a, s);
2844     parts_float_to_float(&p, s);
2845     return float64_round_pack_canonical(&p, s);
2846 }
2847 
2848 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2849 {
2850     FloatParts64 p;
2851 
2852     float32_unpack_canonical(&p, a, s);
2853     parts_float_to_float(&p, s);
2854     return bfloat16_round_pack_canonical(&p, s);
2855 }
2856 
2857 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860 
2861     float64_unpack_canonical(&p, a, s);
2862     parts_float_to_float(&p, s);
2863     return bfloat16_round_pack_canonical(&p, s);
2864 }
2865 
2866 float32 float128_to_float32(float128 a, float_status *s)
2867 {
2868     FloatParts64 p64;
2869     FloatParts128 p128;
2870 
2871     float128_unpack_canonical(&p128, a, s);
2872     parts_float_to_float_narrow(&p64, &p128, s);
2873     return float32_round_pack_canonical(&p64, s);
2874 }
2875 
2876 float64 float128_to_float64(float128 a, float_status *s)
2877 {
2878     FloatParts64 p64;
2879     FloatParts128 p128;
2880 
2881     float128_unpack_canonical(&p128, a, s);
2882     parts_float_to_float_narrow(&p64, &p128, s);
2883     return float64_round_pack_canonical(&p64, s);
2884 }
2885 
2886 float128 float32_to_float128(float32 a, float_status *s)
2887 {
2888     FloatParts64 p64;
2889     FloatParts128 p128;
2890 
2891     float32_unpack_canonical(&p64, a, s);
2892     parts_float_to_float_widen(&p128, &p64, s);
2893     return float128_round_pack_canonical(&p128, s);
2894 }
2895 
2896 float128 float64_to_float128(float64 a, float_status *s)
2897 {
2898     FloatParts64 p64;
2899     FloatParts128 p128;
2900 
2901     float64_unpack_canonical(&p64, a, s);
2902     parts_float_to_float_widen(&p128, &p64, s);
2903     return float128_round_pack_canonical(&p128, s);
2904 }
2905 
2906 float32 floatx80_to_float32(floatx80 a, float_status *s)
2907 {
2908     FloatParts64 p64;
2909     FloatParts128 p128;
2910 
2911     if (floatx80_unpack_canonical(&p128, a, s)) {
2912         parts_float_to_float_narrow(&p64, &p128, s);
2913     } else {
2914         parts_default_nan(&p64, s);
2915     }
2916     return float32_round_pack_canonical(&p64, s);
2917 }
2918 
2919 float64 floatx80_to_float64(floatx80 a, float_status *s)
2920 {
2921     FloatParts64 p64;
2922     FloatParts128 p128;
2923 
2924     if (floatx80_unpack_canonical(&p128, a, s)) {
2925         parts_float_to_float_narrow(&p64, &p128, s);
2926     } else {
2927         parts_default_nan(&p64, s);
2928     }
2929     return float64_round_pack_canonical(&p64, s);
2930 }
2931 
2932 float128 floatx80_to_float128(floatx80 a, float_status *s)
2933 {
2934     FloatParts128 p;
2935 
2936     if (floatx80_unpack_canonical(&p, a, s)) {
2937         parts_float_to_float(&p, s);
2938     } else {
2939         parts_default_nan(&p, s);
2940     }
2941     return float128_round_pack_canonical(&p, s);
2942 }
2943 
2944 floatx80 float32_to_floatx80(float32 a, float_status *s)
2945 {
2946     FloatParts64 p64;
2947     FloatParts128 p128;
2948 
2949     float32_unpack_canonical(&p64, a, s);
2950     parts_float_to_float_widen(&p128, &p64, s);
2951     return floatx80_round_pack_canonical(&p128, s);
2952 }
2953 
2954 floatx80 float64_to_floatx80(float64 a, float_status *s)
2955 {
2956     FloatParts64 p64;
2957     FloatParts128 p128;
2958 
2959     float64_unpack_canonical(&p64, a, s);
2960     parts_float_to_float_widen(&p128, &p64, s);
2961     return floatx80_round_pack_canonical(&p128, s);
2962 }
2963 
2964 floatx80 float128_to_floatx80(float128 a, float_status *s)
2965 {
2966     FloatParts128 p;
2967 
2968     float128_unpack_canonical(&p, a, s);
2969     parts_float_to_float(&p, s);
2970     return floatx80_round_pack_canonical(&p, s);
2971 }
2972 
2973 /*
2974  * Round to integral value
2975  */
2976 
2977 float16 float16_round_to_int(float16 a, float_status *s)
2978 {
2979     FloatParts64 p;
2980 
2981     float16_unpack_canonical(&p, a, s);
2982     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2983     return float16_round_pack_canonical(&p, s);
2984 }
2985 
2986 float32 float32_round_to_int(float32 a, float_status *s)
2987 {
2988     FloatParts64 p;
2989 
2990     float32_unpack_canonical(&p, a, s);
2991     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2992     return float32_round_pack_canonical(&p, s);
2993 }
2994 
2995 float64 float64_round_to_int(float64 a, float_status *s)
2996 {
2997     FloatParts64 p;
2998 
2999     float64_unpack_canonical(&p, a, s);
3000     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3001     return float64_round_pack_canonical(&p, s);
3002 }
3003 
3004 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3005 {
3006     FloatParts64 p;
3007 
3008     bfloat16_unpack_canonical(&p, a, s);
3009     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3010     return bfloat16_round_pack_canonical(&p, s);
3011 }
3012 
3013 float128 float128_round_to_int(float128 a, float_status *s)
3014 {
3015     FloatParts128 p;
3016 
3017     float128_unpack_canonical(&p, a, s);
3018     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3019     return float128_round_pack_canonical(&p, s);
3020 }
3021 
3022 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3023 {
3024     FloatParts128 p;
3025 
3026     if (!floatx80_unpack_canonical(&p, a, status)) {
3027         return floatx80_default_nan(status);
3028     }
3029 
3030     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3031                        &floatx80_params[status->floatx80_rounding_precision]);
3032     return floatx80_round_pack_canonical(&p, status);
3033 }
3034 
3035 /*
3036  * Floating-point to signed integer conversions
3037  */
3038 
3039 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3040                               float_status *s)
3041 {
3042     FloatParts64 p;
3043 
3044     float16_unpack_canonical(&p, a, s);
3045     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3046 }
3047 
3048 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3049                                 float_status *s)
3050 {
3051     FloatParts64 p;
3052 
3053     float16_unpack_canonical(&p, a, s);
3054     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3055 }
3056 
3057 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3058                                 float_status *s)
3059 {
3060     FloatParts64 p;
3061 
3062     float16_unpack_canonical(&p, a, s);
3063     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3064 }
3065 
3066 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3067                                 float_status *s)
3068 {
3069     FloatParts64 p;
3070 
3071     float16_unpack_canonical(&p, a, s);
3072     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3073 }
3074 
3075 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3076                                 float_status *s)
3077 {
3078     FloatParts64 p;
3079 
3080     float32_unpack_canonical(&p, a, s);
3081     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3082 }
3083 
3084 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3085                                 float_status *s)
3086 {
3087     FloatParts64 p;
3088 
3089     float32_unpack_canonical(&p, a, s);
3090     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3091 }
3092 
3093 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3094                                 float_status *s)
3095 {
3096     FloatParts64 p;
3097 
3098     float32_unpack_canonical(&p, a, s);
3099     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3100 }
3101 
3102 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3103                                 float_status *s)
3104 {
3105     FloatParts64 p;
3106 
3107     float64_unpack_canonical(&p, a, s);
3108     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3109 }
3110 
3111 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3112                                 float_status *s)
3113 {
3114     FloatParts64 p;
3115 
3116     float64_unpack_canonical(&p, a, s);
3117     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3118 }
3119 
3120 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3121                                 float_status *s)
3122 {
3123     FloatParts64 p;
3124 
3125     float64_unpack_canonical(&p, a, s);
3126     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3127 }
3128 
3129 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3130                                  float_status *s)
3131 {
3132     FloatParts64 p;
3133 
3134     bfloat16_unpack_canonical(&p, a, s);
3135     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3136 }
3137 
3138 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3139                                  float_status *s)
3140 {
3141     FloatParts64 p;
3142 
3143     bfloat16_unpack_canonical(&p, a, s);
3144     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3145 }
3146 
3147 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3148                                  float_status *s)
3149 {
3150     FloatParts64 p;
3151 
3152     bfloat16_unpack_canonical(&p, a, s);
3153     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3154 }
3155 
3156 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3157                                         int scale, float_status *s)
3158 {
3159     FloatParts128 p;
3160 
3161     float128_unpack_canonical(&p, a, s);
3162     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3163 }
3164 
3165 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3166                                         int scale, float_status *s)
3167 {
3168     FloatParts128 p;
3169 
3170     float128_unpack_canonical(&p, a, s);
3171     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3172 }
3173 
3174 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3175                                         int scale, float_status *s)
3176 {
3177     int flags = 0;
3178     Int128 r;
3179     FloatParts128 p;
3180 
3181     float128_unpack_canonical(&p, a, s);
3182 
3183     switch (p.cls) {
3184     case float_class_snan:
3185         flags |= float_flag_invalid_snan;
3186         /* fall through */
3187     case float_class_qnan:
3188         flags |= float_flag_invalid;
3189         r = UINT128_MAX;
3190         break;
3191 
3192     case float_class_inf:
3193         flags = float_flag_invalid | float_flag_invalid_cvti;
3194         r = p.sign ? INT128_MIN : INT128_MAX;
3195         break;
3196 
3197     case float_class_zero:
3198         return int128_zero();
3199 
3200     case float_class_normal:
3201         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3202             flags = float_flag_inexact;
3203         }
3204 
3205         if (p.exp < 127) {
3206             int shift = 127 - p.exp;
3207             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3208             if (p.sign) {
3209                 r = int128_neg(r);
3210             }
3211         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3212                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3213             r = INT128_MIN;
3214         } else {
3215             flags = float_flag_invalid | float_flag_invalid_cvti;
3216             r = p.sign ? INT128_MIN : INT128_MAX;
3217         }
3218         break;
3219 
3220     default:
3221         g_assert_not_reached();
3222     }
3223 
3224     float_raise(flags, s);
3225     return r;
3226 }
3227 
3228 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3229                                         int scale, float_status *s)
3230 {
3231     FloatParts128 p;
3232 
3233     if (!floatx80_unpack_canonical(&p, a, s)) {
3234         parts_default_nan(&p, s);
3235     }
3236     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3237 }
3238 
3239 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3240                                         int scale, float_status *s)
3241 {
3242     FloatParts128 p;
3243 
3244     if (!floatx80_unpack_canonical(&p, a, s)) {
3245         parts_default_nan(&p, s);
3246     }
3247     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3248 }
3249 
3250 int8_t float16_to_int8(float16 a, float_status *s)
3251 {
3252     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3253 }
3254 
3255 int16_t float16_to_int16(float16 a, float_status *s)
3256 {
3257     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3258 }
3259 
3260 int32_t float16_to_int32(float16 a, float_status *s)
3261 {
3262     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3263 }
3264 
3265 int64_t float16_to_int64(float16 a, float_status *s)
3266 {
3267     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3268 }
3269 
3270 int16_t float32_to_int16(float32 a, float_status *s)
3271 {
3272     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3273 }
3274 
3275 int32_t float32_to_int32(float32 a, float_status *s)
3276 {
3277     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3278 }
3279 
3280 int64_t float32_to_int64(float32 a, float_status *s)
3281 {
3282     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3283 }
3284 
3285 int16_t float64_to_int16(float64 a, float_status *s)
3286 {
3287     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3288 }
3289 
3290 int32_t float64_to_int32(float64 a, float_status *s)
3291 {
3292     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3293 }
3294 
3295 int64_t float64_to_int64(float64 a, float_status *s)
3296 {
3297     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3298 }
3299 
3300 int32_t float128_to_int32(float128 a, float_status *s)
3301 {
3302     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3303 }
3304 
3305 int64_t float128_to_int64(float128 a, float_status *s)
3306 {
3307     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3308 }
3309 
3310 Int128 float128_to_int128(float128 a, float_status *s)
3311 {
3312     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3313 }
3314 
3315 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3316 {
3317     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3318 }
3319 
3320 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3321 {
3322     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3323 }
3324 
3325 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3326 {
3327     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3328 }
3329 
3330 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3331 {
3332     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3333 }
3334 
3335 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3336 {
3337     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3338 }
3339 
3340 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3341 {
3342     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3343 }
3344 
3345 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3346 {
3347     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3348 }
3349 
3350 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3351 {
3352     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3353 }
3354 
3355 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3356 {
3357     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3358 }
3359 
3360 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3361 {
3362     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3363 }
3364 
3365 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3366 {
3367     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3368 }
3369 
3370 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3371 {
3372     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3373 }
3374 
3375 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3376 {
3377     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3378 }
3379 
3380 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3381 {
3382     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3383 }
3384 
3385 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3386 {
3387     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3388 }
3389 
3390 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3391 {
3392     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3393 }
3394 
3395 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3396 {
3397     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3398 }
3399 
3400 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3401 {
3402     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3403 }
3404 
3405 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3406 {
3407     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3408 }
3409 
3410 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3411 {
3412     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3413 }
3414 
3415 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3416 {
3417     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3418 }
3419 
3420 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3421 {
3422     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3423 }
3424 
3425 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3426                                 float_status *s)
3427 {
3428     FloatParts64 p;
3429 
3430     float64_unpack_canonical(&p, a, s);
3431     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3432 }
3433 
3434 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3435                                 float_status *s)
3436 {
3437     FloatParts64 p;
3438 
3439     float64_unpack_canonical(&p, a, s);
3440     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3441 }
3442 
3443 /*
3444  * Floating-point to unsigned integer conversions
3445  */
3446 
3447 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3448                                 float_status *s)
3449 {
3450     FloatParts64 p;
3451 
3452     float16_unpack_canonical(&p, a, s);
3453     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3454 }
3455 
3456 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3457                                   float_status *s)
3458 {
3459     FloatParts64 p;
3460 
3461     float16_unpack_canonical(&p, a, s);
3462     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3463 }
3464 
3465 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3466                                   float_status *s)
3467 {
3468     FloatParts64 p;
3469 
3470     float16_unpack_canonical(&p, a, s);
3471     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3472 }
3473 
3474 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3475                                   float_status *s)
3476 {
3477     FloatParts64 p;
3478 
3479     float16_unpack_canonical(&p, a, s);
3480     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3481 }
3482 
3483 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3484                                   float_status *s)
3485 {
3486     FloatParts64 p;
3487 
3488     float32_unpack_canonical(&p, a, s);
3489     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3490 }
3491 
3492 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3493                                   float_status *s)
3494 {
3495     FloatParts64 p;
3496 
3497     float32_unpack_canonical(&p, a, s);
3498     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3499 }
3500 
3501 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3502                                   float_status *s)
3503 {
3504     FloatParts64 p;
3505 
3506     float32_unpack_canonical(&p, a, s);
3507     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3508 }
3509 
3510 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3511                                   float_status *s)
3512 {
3513     FloatParts64 p;
3514 
3515     float64_unpack_canonical(&p, a, s);
3516     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3517 }
3518 
3519 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3520                                   float_status *s)
3521 {
3522     FloatParts64 p;
3523 
3524     float64_unpack_canonical(&p, a, s);
3525     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3526 }
3527 
3528 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3529                                   float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float64_unpack_canonical(&p, a, s);
3534     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3535 }
3536 
3537 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3538                                    int scale, float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     bfloat16_unpack_canonical(&p, a, s);
3543     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3544 }
3545 
3546 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3547                                    int scale, float_status *s)
3548 {
3549     FloatParts64 p;
3550 
3551     bfloat16_unpack_canonical(&p, a, s);
3552     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3553 }
3554 
3555 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3556                                    int scale, float_status *s)
3557 {
3558     FloatParts64 p;
3559 
3560     bfloat16_unpack_canonical(&p, a, s);
3561     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3562 }
3563 
3564 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3565                                           int scale, float_status *s)
3566 {
3567     FloatParts128 p;
3568 
3569     float128_unpack_canonical(&p, a, s);
3570     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3571 }
3572 
3573 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3574                                           int scale, float_status *s)
3575 {
3576     FloatParts128 p;
3577 
3578     float128_unpack_canonical(&p, a, s);
3579     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3580 }
3581 
3582 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3583                                          int scale, float_status *s)
3584 {
3585     int flags = 0;
3586     Int128 r;
3587     FloatParts128 p;
3588 
3589     float128_unpack_canonical(&p, a, s);
3590 
3591     switch (p.cls) {
3592     case float_class_snan:
3593         flags |= float_flag_invalid_snan;
3594         /* fall through */
3595     case float_class_qnan:
3596         flags |= float_flag_invalid;
3597         r = UINT128_MAX;
3598         break;
3599 
3600     case float_class_inf:
3601         flags = float_flag_invalid | float_flag_invalid_cvti;
3602         r = p.sign ? int128_zero() : UINT128_MAX;
3603         break;
3604 
3605     case float_class_zero:
3606         return int128_zero();
3607 
3608     case float_class_normal:
3609         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3610             flags = float_flag_inexact;
3611             if (p.cls == float_class_zero) {
3612                 r = int128_zero();
3613                 break;
3614             }
3615         }
3616 
3617         if (p.sign) {
3618             flags = float_flag_invalid | float_flag_invalid_cvti;
3619             r = int128_zero();
3620         } else if (p.exp <= 127) {
3621             int shift = 127 - p.exp;
3622             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3623         } else {
3624             flags = float_flag_invalid | float_flag_invalid_cvti;
3625             r = UINT128_MAX;
3626         }
3627         break;
3628 
3629     default:
3630         g_assert_not_reached();
3631     }
3632 
3633     float_raise(flags, s);
3634     return r;
3635 }
3636 
3637 uint8_t float16_to_uint8(float16 a, float_status *s)
3638 {
3639     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3640 }
3641 
3642 uint16_t float16_to_uint16(float16 a, float_status *s)
3643 {
3644     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3645 }
3646 
3647 uint32_t float16_to_uint32(float16 a, float_status *s)
3648 {
3649     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3650 }
3651 
3652 uint64_t float16_to_uint64(float16 a, float_status *s)
3653 {
3654     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3655 }
3656 
3657 uint16_t float32_to_uint16(float32 a, float_status *s)
3658 {
3659     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3660 }
3661 
3662 uint32_t float32_to_uint32(float32 a, float_status *s)
3663 {
3664     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3665 }
3666 
3667 uint64_t float32_to_uint64(float32 a, float_status *s)
3668 {
3669     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3670 }
3671 
3672 uint16_t float64_to_uint16(float64 a, float_status *s)
3673 {
3674     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3675 }
3676 
3677 uint32_t float64_to_uint32(float64 a, float_status *s)
3678 {
3679     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3680 }
3681 
3682 uint64_t float64_to_uint64(float64 a, float_status *s)
3683 {
3684     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3685 }
3686 
3687 uint32_t float128_to_uint32(float128 a, float_status *s)
3688 {
3689     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3690 }
3691 
3692 uint64_t float128_to_uint64(float128 a, float_status *s)
3693 {
3694     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3695 }
3696 
3697 Int128 float128_to_uint128(float128 a, float_status *s)
3698 {
3699     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3700 }
3701 
3702 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3703 {
3704     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3705 }
3706 
3707 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3708 {
3709     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3710 }
3711 
3712 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3713 {
3714     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3715 }
3716 
3717 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3718 {
3719     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3720 }
3721 
3722 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3723 {
3724     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3725 }
3726 
3727 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3728 {
3729     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3730 }
3731 
3732 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3733 {
3734     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3735 }
3736 
3737 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3738 {
3739     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3740 }
3741 
3742 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3743 {
3744     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3745 }
3746 
3747 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3748 {
3749     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3750 }
3751 
3752 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3753 {
3754     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3755 }
3756 
3757 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3758 {
3759     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3760 }
3761 
3762 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3763 {
3764     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3765 }
3766 
3767 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3768 {
3769     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3770 }
3771 
3772 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3773 {
3774     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3775 }
3776 
3777 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3778 {
3779     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3780 }
3781 
3782 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3783 {
3784     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3785 }
3786 
3787 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3788 {
3789     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3790 }
3791 
3792 /*
3793  * Signed integer to floating-point conversions
3794  */
3795 
3796 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3797 {
3798     FloatParts64 p;
3799 
3800     parts_sint_to_float(&p, a, scale, status);
3801     return float16_round_pack_canonical(&p, status);
3802 }
3803 
3804 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3805 {
3806     return int64_to_float16_scalbn(a, scale, status);
3807 }
3808 
3809 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3810 {
3811     return int64_to_float16_scalbn(a, scale, status);
3812 }
3813 
3814 float16 int64_to_float16(int64_t a, float_status *status)
3815 {
3816     return int64_to_float16_scalbn(a, 0, status);
3817 }
3818 
3819 float16 int32_to_float16(int32_t a, float_status *status)
3820 {
3821     return int64_to_float16_scalbn(a, 0, status);
3822 }
3823 
3824 float16 int16_to_float16(int16_t a, float_status *status)
3825 {
3826     return int64_to_float16_scalbn(a, 0, status);
3827 }
3828 
3829 float16 int8_to_float16(int8_t a, float_status *status)
3830 {
3831     return int64_to_float16_scalbn(a, 0, status);
3832 }
3833 
3834 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3835 {
3836     FloatParts64 p;
3837 
3838     /* Without scaling, there are no overflow concerns. */
3839     if (likely(scale == 0) && can_use_fpu(status)) {
3840         union_float32 ur;
3841         ur.h = a;
3842         return ur.s;
3843     }
3844 
3845     parts64_sint_to_float(&p, a, scale, status);
3846     return float32_round_pack_canonical(&p, status);
3847 }
3848 
3849 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3850 {
3851     return int64_to_float32_scalbn(a, scale, status);
3852 }
3853 
3854 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3855 {
3856     return int64_to_float32_scalbn(a, scale, status);
3857 }
3858 
3859 float32 int64_to_float32(int64_t a, float_status *status)
3860 {
3861     return int64_to_float32_scalbn(a, 0, status);
3862 }
3863 
3864 float32 int32_to_float32(int32_t a, float_status *status)
3865 {
3866     return int64_to_float32_scalbn(a, 0, status);
3867 }
3868 
3869 float32 int16_to_float32(int16_t a, float_status *status)
3870 {
3871     return int64_to_float32_scalbn(a, 0, status);
3872 }
3873 
3874 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3875 {
3876     FloatParts64 p;
3877 
3878     /* Without scaling, there are no overflow concerns. */
3879     if (likely(scale == 0) && can_use_fpu(status)) {
3880         union_float64 ur;
3881         ur.h = a;
3882         return ur.s;
3883     }
3884 
3885     parts_sint_to_float(&p, a, scale, status);
3886     return float64_round_pack_canonical(&p, status);
3887 }
3888 
3889 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3890 {
3891     return int64_to_float64_scalbn(a, scale, status);
3892 }
3893 
3894 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3895 {
3896     return int64_to_float64_scalbn(a, scale, status);
3897 }
3898 
3899 float64 int64_to_float64(int64_t a, float_status *status)
3900 {
3901     return int64_to_float64_scalbn(a, 0, status);
3902 }
3903 
3904 float64 int32_to_float64(int32_t a, float_status *status)
3905 {
3906     return int64_to_float64_scalbn(a, 0, status);
3907 }
3908 
3909 float64 int16_to_float64(int16_t a, float_status *status)
3910 {
3911     return int64_to_float64_scalbn(a, 0, status);
3912 }
3913 
3914 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3915 {
3916     FloatParts64 p;
3917 
3918     parts_sint_to_float(&p, a, scale, status);
3919     return bfloat16_round_pack_canonical(&p, status);
3920 }
3921 
3922 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3923 {
3924     return int64_to_bfloat16_scalbn(a, scale, status);
3925 }
3926 
3927 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3928 {
3929     return int64_to_bfloat16_scalbn(a, scale, status);
3930 }
3931 
3932 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3933 {
3934     return int64_to_bfloat16_scalbn(a, 0, status);
3935 }
3936 
3937 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3938 {
3939     return int64_to_bfloat16_scalbn(a, 0, status);
3940 }
3941 
3942 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3943 {
3944     return int64_to_bfloat16_scalbn(a, 0, status);
3945 }
3946 
3947 float128 int128_to_float128(Int128 a, float_status *status)
3948 {
3949     FloatParts128 p = { };
3950     int shift;
3951 
3952     if (int128_nz(a)) {
3953         p.cls = float_class_normal;
3954         if (!int128_nonneg(a)) {
3955             p.sign = true;
3956             a = int128_neg(a);
3957         }
3958 
3959         shift = clz64(int128_gethi(a));
3960         if (shift == 64) {
3961             shift += clz64(int128_getlo(a));
3962         }
3963 
3964         p.exp = 127 - shift;
3965         a = int128_lshift(a, shift);
3966 
3967         p.frac_hi = int128_gethi(a);
3968         p.frac_lo = int128_getlo(a);
3969     } else {
3970         p.cls = float_class_zero;
3971     }
3972 
3973     return float128_round_pack_canonical(&p, status);
3974 }
3975 
3976 float128 int64_to_float128(int64_t a, float_status *status)
3977 {
3978     FloatParts128 p;
3979 
3980     parts_sint_to_float(&p, a, 0, status);
3981     return float128_round_pack_canonical(&p, status);
3982 }
3983 
3984 float128 int32_to_float128(int32_t a, float_status *status)
3985 {
3986     return int64_to_float128(a, status);
3987 }
3988 
3989 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3990 {
3991     FloatParts128 p;
3992 
3993     parts_sint_to_float(&p, a, 0, status);
3994     return floatx80_round_pack_canonical(&p, status);
3995 }
3996 
3997 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3998 {
3999     return int64_to_floatx80(a, status);
4000 }
4001 
4002 /*
4003  * Unsigned Integer to floating-point conversions
4004  */
4005 
4006 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4007 {
4008     FloatParts64 p;
4009 
4010     parts_uint_to_float(&p, a, scale, status);
4011     return float16_round_pack_canonical(&p, status);
4012 }
4013 
4014 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4015 {
4016     return uint64_to_float16_scalbn(a, scale, status);
4017 }
4018 
4019 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4020 {
4021     return uint64_to_float16_scalbn(a, scale, status);
4022 }
4023 
4024 float16 uint64_to_float16(uint64_t a, float_status *status)
4025 {
4026     return uint64_to_float16_scalbn(a, 0, status);
4027 }
4028 
4029 float16 uint32_to_float16(uint32_t a, float_status *status)
4030 {
4031     return uint64_to_float16_scalbn(a, 0, status);
4032 }
4033 
4034 float16 uint16_to_float16(uint16_t a, float_status *status)
4035 {
4036     return uint64_to_float16_scalbn(a, 0, status);
4037 }
4038 
4039 float16 uint8_to_float16(uint8_t a, float_status *status)
4040 {
4041     return uint64_to_float16_scalbn(a, 0, status);
4042 }
4043 
4044 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4045 {
4046     FloatParts64 p;
4047 
4048     /* Without scaling, there are no overflow concerns. */
4049     if (likely(scale == 0) && can_use_fpu(status)) {
4050         union_float32 ur;
4051         ur.h = a;
4052         return ur.s;
4053     }
4054 
4055     parts_uint_to_float(&p, a, scale, status);
4056     return float32_round_pack_canonical(&p, status);
4057 }
4058 
4059 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4060 {
4061     return uint64_to_float32_scalbn(a, scale, status);
4062 }
4063 
4064 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4065 {
4066     return uint64_to_float32_scalbn(a, scale, status);
4067 }
4068 
4069 float32 uint64_to_float32(uint64_t a, float_status *status)
4070 {
4071     return uint64_to_float32_scalbn(a, 0, status);
4072 }
4073 
4074 float32 uint32_to_float32(uint32_t a, float_status *status)
4075 {
4076     return uint64_to_float32_scalbn(a, 0, status);
4077 }
4078 
4079 float32 uint16_to_float32(uint16_t a, float_status *status)
4080 {
4081     return uint64_to_float32_scalbn(a, 0, status);
4082 }
4083 
4084 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4085 {
4086     FloatParts64 p;
4087 
4088     /* Without scaling, there are no overflow concerns. */
4089     if (likely(scale == 0) && can_use_fpu(status)) {
4090         union_float64 ur;
4091         ur.h = a;
4092         return ur.s;
4093     }
4094 
4095     parts_uint_to_float(&p, a, scale, status);
4096     return float64_round_pack_canonical(&p, status);
4097 }
4098 
4099 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4100 {
4101     return uint64_to_float64_scalbn(a, scale, status);
4102 }
4103 
4104 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4105 {
4106     return uint64_to_float64_scalbn(a, scale, status);
4107 }
4108 
4109 float64 uint64_to_float64(uint64_t a, float_status *status)
4110 {
4111     return uint64_to_float64_scalbn(a, 0, status);
4112 }
4113 
4114 float64 uint32_to_float64(uint32_t a, float_status *status)
4115 {
4116     return uint64_to_float64_scalbn(a, 0, status);
4117 }
4118 
4119 float64 uint16_to_float64(uint16_t a, float_status *status)
4120 {
4121     return uint64_to_float64_scalbn(a, 0, status);
4122 }
4123 
4124 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4125 {
4126     FloatParts64 p;
4127 
4128     parts_uint_to_float(&p, a, scale, status);
4129     return bfloat16_round_pack_canonical(&p, status);
4130 }
4131 
4132 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4133 {
4134     return uint64_to_bfloat16_scalbn(a, scale, status);
4135 }
4136 
4137 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4138 {
4139     return uint64_to_bfloat16_scalbn(a, scale, status);
4140 }
4141 
4142 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4143 {
4144     return uint64_to_bfloat16_scalbn(a, 0, status);
4145 }
4146 
4147 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4148 {
4149     return uint64_to_bfloat16_scalbn(a, 0, status);
4150 }
4151 
4152 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4153 {
4154     return uint64_to_bfloat16_scalbn(a, 0, status);
4155 }
4156 
4157 float128 uint64_to_float128(uint64_t a, float_status *status)
4158 {
4159     FloatParts128 p;
4160 
4161     parts_uint_to_float(&p, a, 0, status);
4162     return float128_round_pack_canonical(&p, status);
4163 }
4164 
4165 float128 uint128_to_float128(Int128 a, float_status *status)
4166 {
4167     FloatParts128 p = { };
4168     int shift;
4169 
4170     if (int128_nz(a)) {
4171         p.cls = float_class_normal;
4172 
4173         shift = clz64(int128_gethi(a));
4174         if (shift == 64) {
4175             shift += clz64(int128_getlo(a));
4176         }
4177 
4178         p.exp = 127 - shift;
4179         a = int128_lshift(a, shift);
4180 
4181         p.frac_hi = int128_gethi(a);
4182         p.frac_lo = int128_getlo(a);
4183     } else {
4184         p.cls = float_class_zero;
4185     }
4186 
4187     return float128_round_pack_canonical(&p, status);
4188 }
4189 
4190 /*
4191  * Minimum and maximum
4192  */
4193 
4194 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4195 {
4196     FloatParts64 pa, pb, *pr;
4197 
4198     float16_unpack_canonical(&pa, a, s);
4199     float16_unpack_canonical(&pb, b, s);
4200     pr = parts_minmax(&pa, &pb, s, flags);
4201 
4202     return float16_round_pack_canonical(pr, s);
4203 }
4204 
4205 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4206                                 float_status *s, int flags)
4207 {
4208     FloatParts64 pa, pb, *pr;
4209 
4210     bfloat16_unpack_canonical(&pa, a, s);
4211     bfloat16_unpack_canonical(&pb, b, s);
4212     pr = parts_minmax(&pa, &pb, s, flags);
4213 
4214     return bfloat16_round_pack_canonical(pr, s);
4215 }
4216 
4217 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4218 {
4219     FloatParts64 pa, pb, *pr;
4220 
4221     float32_unpack_canonical(&pa, a, s);
4222     float32_unpack_canonical(&pb, b, s);
4223     pr = parts_minmax(&pa, &pb, s, flags);
4224 
4225     return float32_round_pack_canonical(pr, s);
4226 }
4227 
4228 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4229 {
4230     FloatParts64 pa, pb, *pr;
4231 
4232     float64_unpack_canonical(&pa, a, s);
4233     float64_unpack_canonical(&pb, b, s);
4234     pr = parts_minmax(&pa, &pb, s, flags);
4235 
4236     return float64_round_pack_canonical(pr, s);
4237 }
4238 
4239 static float128 float128_minmax(float128 a, float128 b,
4240                                 float_status *s, int flags)
4241 {
4242     FloatParts128 pa, pb, *pr;
4243 
4244     float128_unpack_canonical(&pa, a, s);
4245     float128_unpack_canonical(&pb, b, s);
4246     pr = parts_minmax(&pa, &pb, s, flags);
4247 
4248     return float128_round_pack_canonical(pr, s);
4249 }
4250 
4251 #define MINMAX_1(type, name, flags) \
4252     type type##_##name(type a, type b, float_status *s) \
4253     { return type##_minmax(a, b, s, flags); }
4254 
4255 #define MINMAX_2(type) \
4256     MINMAX_1(type, max, 0)                                                \
4257     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4258     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4259     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4260     MINMAX_1(type, min, minmax_ismin)                                     \
4261     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4262     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4263     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4264 
4265 MINMAX_2(float16)
4266 MINMAX_2(bfloat16)
4267 MINMAX_2(float32)
4268 MINMAX_2(float64)
4269 MINMAX_2(float128)
4270 
4271 #undef MINMAX_1
4272 #undef MINMAX_2
4273 
4274 /*
4275  * Floating point compare
4276  */
4277 
4278 static FloatRelation QEMU_FLATTEN
4279 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4280 {
4281     FloatParts64 pa, pb;
4282 
4283     float16_unpack_canonical(&pa, a, s);
4284     float16_unpack_canonical(&pb, b, s);
4285     return parts_compare(&pa, &pb, s, is_quiet);
4286 }
4287 
4288 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4289 {
4290     return float16_do_compare(a, b, s, false);
4291 }
4292 
4293 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4294 {
4295     return float16_do_compare(a, b, s, true);
4296 }
4297 
4298 static FloatRelation QEMU_SOFTFLOAT_ATTR
4299 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4300 {
4301     FloatParts64 pa, pb;
4302 
4303     float32_unpack_canonical(&pa, a, s);
4304     float32_unpack_canonical(&pb, b, s);
4305     return parts_compare(&pa, &pb, s, is_quiet);
4306 }
4307 
4308 static FloatRelation QEMU_FLATTEN
4309 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4310 {
4311     union_float32 ua, ub;
4312 
4313     ua.s = xa;
4314     ub.s = xb;
4315 
4316     if (QEMU_NO_HARDFLOAT) {
4317         goto soft;
4318     }
4319 
4320     float32_input_flush2(&ua.s, &ub.s, s);
4321     if (isgreaterequal(ua.h, ub.h)) {
4322         if (isgreater(ua.h, ub.h)) {
4323             return float_relation_greater;
4324         }
4325         return float_relation_equal;
4326     }
4327     if (likely(isless(ua.h, ub.h))) {
4328         return float_relation_less;
4329     }
4330     /*
4331      * The only condition remaining is unordered.
4332      * Fall through to set flags.
4333      */
4334  soft:
4335     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4336 }
4337 
4338 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4339 {
4340     return float32_hs_compare(a, b, s, false);
4341 }
4342 
4343 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4344 {
4345     return float32_hs_compare(a, b, s, true);
4346 }
4347 
4348 static FloatRelation QEMU_SOFTFLOAT_ATTR
4349 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4350 {
4351     FloatParts64 pa, pb;
4352 
4353     float64_unpack_canonical(&pa, a, s);
4354     float64_unpack_canonical(&pb, b, s);
4355     return parts_compare(&pa, &pb, s, is_quiet);
4356 }
4357 
4358 static FloatRelation QEMU_FLATTEN
4359 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4360 {
4361     union_float64 ua, ub;
4362 
4363     ua.s = xa;
4364     ub.s = xb;
4365 
4366     if (QEMU_NO_HARDFLOAT) {
4367         goto soft;
4368     }
4369 
4370     float64_input_flush2(&ua.s, &ub.s, s);
4371     if (isgreaterequal(ua.h, ub.h)) {
4372         if (isgreater(ua.h, ub.h)) {
4373             return float_relation_greater;
4374         }
4375         return float_relation_equal;
4376     }
4377     if (likely(isless(ua.h, ub.h))) {
4378         return float_relation_less;
4379     }
4380     /*
4381      * The only condition remaining is unordered.
4382      * Fall through to set flags.
4383      */
4384  soft:
4385     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4386 }
4387 
4388 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4389 {
4390     return float64_hs_compare(a, b, s, false);
4391 }
4392 
4393 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4394 {
4395     return float64_hs_compare(a, b, s, true);
4396 }
4397 
4398 static FloatRelation QEMU_FLATTEN
4399 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4400 {
4401     FloatParts64 pa, pb;
4402 
4403     bfloat16_unpack_canonical(&pa, a, s);
4404     bfloat16_unpack_canonical(&pb, b, s);
4405     return parts_compare(&pa, &pb, s, is_quiet);
4406 }
4407 
4408 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4409 {
4410     return bfloat16_do_compare(a, b, s, false);
4411 }
4412 
4413 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4414 {
4415     return bfloat16_do_compare(a, b, s, true);
4416 }
4417 
4418 static FloatRelation QEMU_FLATTEN
4419 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4420 {
4421     FloatParts128 pa, pb;
4422 
4423     float128_unpack_canonical(&pa, a, s);
4424     float128_unpack_canonical(&pb, b, s);
4425     return parts_compare(&pa, &pb, s, is_quiet);
4426 }
4427 
4428 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4429 {
4430     return float128_do_compare(a, b, s, false);
4431 }
4432 
4433 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4434 {
4435     return float128_do_compare(a, b, s, true);
4436 }
4437 
4438 static FloatRelation QEMU_FLATTEN
4439 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4440 {
4441     FloatParts128 pa, pb;
4442 
4443     if (!floatx80_unpack_canonical(&pa, a, s) ||
4444         !floatx80_unpack_canonical(&pb, b, s)) {
4445         return float_relation_unordered;
4446     }
4447     return parts_compare(&pa, &pb, s, is_quiet);
4448 }
4449 
4450 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4451 {
4452     return floatx80_do_compare(a, b, s, false);
4453 }
4454 
4455 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4456 {
4457     return floatx80_do_compare(a, b, s, true);
4458 }
4459 
4460 /*
4461  * Scale by 2**N
4462  */
4463 
4464 float16 float16_scalbn(float16 a, int n, float_status *status)
4465 {
4466     FloatParts64 p;
4467 
4468     float16_unpack_canonical(&p, a, status);
4469     parts_scalbn(&p, n, status);
4470     return float16_round_pack_canonical(&p, status);
4471 }
4472 
4473 float32 float32_scalbn(float32 a, int n, float_status *status)
4474 {
4475     FloatParts64 p;
4476 
4477     float32_unpack_canonical(&p, a, status);
4478     parts_scalbn(&p, n, status);
4479     return float32_round_pack_canonical(&p, status);
4480 }
4481 
4482 float64 float64_scalbn(float64 a, int n, float_status *status)
4483 {
4484     FloatParts64 p;
4485 
4486     float64_unpack_canonical(&p, a, status);
4487     parts_scalbn(&p, n, status);
4488     return float64_round_pack_canonical(&p, status);
4489 }
4490 
4491 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4492 {
4493     FloatParts64 p;
4494 
4495     bfloat16_unpack_canonical(&p, a, status);
4496     parts_scalbn(&p, n, status);
4497     return bfloat16_round_pack_canonical(&p, status);
4498 }
4499 
4500 float128 float128_scalbn(float128 a, int n, float_status *status)
4501 {
4502     FloatParts128 p;
4503 
4504     float128_unpack_canonical(&p, a, status);
4505     parts_scalbn(&p, n, status);
4506     return float128_round_pack_canonical(&p, status);
4507 }
4508 
4509 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4510 {
4511     FloatParts128 p;
4512 
4513     if (!floatx80_unpack_canonical(&p, a, status)) {
4514         return floatx80_default_nan(status);
4515     }
4516     parts_scalbn(&p, n, status);
4517     return floatx80_round_pack_canonical(&p, status);
4518 }
4519 
4520 /*
4521  * Square Root
4522  */
4523 
4524 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4525 {
4526     FloatParts64 p;
4527 
4528     float16_unpack_canonical(&p, a, status);
4529     parts_sqrt(&p, status, &float16_params);
4530     return float16_round_pack_canonical(&p, status);
4531 }
4532 
4533 static float32 QEMU_SOFTFLOAT_ATTR
4534 soft_f32_sqrt(float32 a, float_status *status)
4535 {
4536     FloatParts64 p;
4537 
4538     float32_unpack_canonical(&p, a, status);
4539     parts_sqrt(&p, status, &float32_params);
4540     return float32_round_pack_canonical(&p, status);
4541 }
4542 
4543 static float64 QEMU_SOFTFLOAT_ATTR
4544 soft_f64_sqrt(float64 a, float_status *status)
4545 {
4546     FloatParts64 p;
4547 
4548     float64_unpack_canonical(&p, a, status);
4549     parts_sqrt(&p, status, &float64_params);
4550     return float64_round_pack_canonical(&p, status);
4551 }
4552 
4553 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4554 {
4555     union_float32 ua, ur;
4556 
4557     ua.s = xa;
4558     if (unlikely(!can_use_fpu(s))) {
4559         goto soft;
4560     }
4561 
4562     float32_input_flush1(&ua.s, s);
4563     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4564         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4565                        fpclassify(ua.h) == FP_ZERO) ||
4566                      signbit(ua.h))) {
4567             goto soft;
4568         }
4569     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4570                         float32_is_neg(ua.s))) {
4571         goto soft;
4572     }
4573     ur.h = sqrtf(ua.h);
4574     return ur.s;
4575 
4576  soft:
4577     return soft_f32_sqrt(ua.s, s);
4578 }
4579 
4580 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4581 {
4582     union_float64 ua, ur;
4583 
4584     ua.s = xa;
4585     if (unlikely(!can_use_fpu(s))) {
4586         goto soft;
4587     }
4588 
4589     float64_input_flush1(&ua.s, s);
4590     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4591         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4592                        fpclassify(ua.h) == FP_ZERO) ||
4593                      signbit(ua.h))) {
4594             goto soft;
4595         }
4596     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4597                         float64_is_neg(ua.s))) {
4598         goto soft;
4599     }
4600     ur.h = sqrt(ua.h);
4601     return ur.s;
4602 
4603  soft:
4604     return soft_f64_sqrt(ua.s, s);
4605 }
4606 
4607 float64 float64r32_sqrt(float64 a, float_status *status)
4608 {
4609     FloatParts64 p;
4610 
4611     float64_unpack_canonical(&p, a, status);
4612     parts_sqrt(&p, status, &float64_params);
4613     return float64r32_round_pack_canonical(&p, status);
4614 }
4615 
4616 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4617 {
4618     FloatParts64 p;
4619 
4620     bfloat16_unpack_canonical(&p, a, status);
4621     parts_sqrt(&p, status, &bfloat16_params);
4622     return bfloat16_round_pack_canonical(&p, status);
4623 }
4624 
4625 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4626 {
4627     FloatParts128 p;
4628 
4629     float128_unpack_canonical(&p, a, status);
4630     parts_sqrt(&p, status, &float128_params);
4631     return float128_round_pack_canonical(&p, status);
4632 }
4633 
4634 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4635 {
4636     FloatParts128 p;
4637 
4638     if (!floatx80_unpack_canonical(&p, a, s)) {
4639         return floatx80_default_nan(s);
4640     }
4641     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4642     return floatx80_round_pack_canonical(&p, s);
4643 }
4644 
4645 /*
4646  * log2
4647  */
4648 float32 float32_log2(float32 a, float_status *status)
4649 {
4650     FloatParts64 p;
4651 
4652     float32_unpack_canonical(&p, a, status);
4653     parts_log2(&p, status, &float32_params);
4654     return float32_round_pack_canonical(&p, status);
4655 }
4656 
4657 float64 float64_log2(float64 a, float_status *status)
4658 {
4659     FloatParts64 p;
4660 
4661     float64_unpack_canonical(&p, a, status);
4662     parts_log2(&p, status, &float64_params);
4663     return float64_round_pack_canonical(&p, status);
4664 }
4665 
4666 /*----------------------------------------------------------------------------
4667 | The pattern for a default generated NaN.
4668 *----------------------------------------------------------------------------*/
4669 
4670 float16 float16_default_nan(float_status *status)
4671 {
4672     FloatParts64 p;
4673 
4674     parts_default_nan(&p, status);
4675     p.frac >>= float16_params.frac_shift;
4676     return float16_pack_raw(&p);
4677 }
4678 
4679 float32 float32_default_nan(float_status *status)
4680 {
4681     FloatParts64 p;
4682 
4683     parts_default_nan(&p, status);
4684     p.frac >>= float32_params.frac_shift;
4685     return float32_pack_raw(&p);
4686 }
4687 
4688 float64 float64_default_nan(float_status *status)
4689 {
4690     FloatParts64 p;
4691 
4692     parts_default_nan(&p, status);
4693     p.frac >>= float64_params.frac_shift;
4694     return float64_pack_raw(&p);
4695 }
4696 
4697 float128 float128_default_nan(float_status *status)
4698 {
4699     FloatParts128 p;
4700 
4701     parts_default_nan(&p, status);
4702     frac_shr(&p, float128_params.frac_shift);
4703     return float128_pack_raw(&p);
4704 }
4705 
4706 bfloat16 bfloat16_default_nan(float_status *status)
4707 {
4708     FloatParts64 p;
4709 
4710     parts_default_nan(&p, status);
4711     p.frac >>= bfloat16_params.frac_shift;
4712     return bfloat16_pack_raw(&p);
4713 }
4714 
4715 /*----------------------------------------------------------------------------
4716 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4717 *----------------------------------------------------------------------------*/
4718 
4719 float16 float16_silence_nan(float16 a, float_status *status)
4720 {
4721     FloatParts64 p;
4722 
4723     float16_unpack_raw(&p, a);
4724     p.frac <<= float16_params.frac_shift;
4725     parts_silence_nan(&p, status);
4726     p.frac >>= float16_params.frac_shift;
4727     return float16_pack_raw(&p);
4728 }
4729 
4730 float32 float32_silence_nan(float32 a, float_status *status)
4731 {
4732     FloatParts64 p;
4733 
4734     float32_unpack_raw(&p, a);
4735     p.frac <<= float32_params.frac_shift;
4736     parts_silence_nan(&p, status);
4737     p.frac >>= float32_params.frac_shift;
4738     return float32_pack_raw(&p);
4739 }
4740 
4741 float64 float64_silence_nan(float64 a, float_status *status)
4742 {
4743     FloatParts64 p;
4744 
4745     float64_unpack_raw(&p, a);
4746     p.frac <<= float64_params.frac_shift;
4747     parts_silence_nan(&p, status);
4748     p.frac >>= float64_params.frac_shift;
4749     return float64_pack_raw(&p);
4750 }
4751 
4752 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4753 {
4754     FloatParts64 p;
4755 
4756     bfloat16_unpack_raw(&p, a);
4757     p.frac <<= bfloat16_params.frac_shift;
4758     parts_silence_nan(&p, status);
4759     p.frac >>= bfloat16_params.frac_shift;
4760     return bfloat16_pack_raw(&p);
4761 }
4762 
4763 float128 float128_silence_nan(float128 a, float_status *status)
4764 {
4765     FloatParts128 p;
4766 
4767     float128_unpack_raw(&p, a);
4768     frac_shl(&p, float128_params.frac_shift);
4769     parts_silence_nan(&p, status);
4770     frac_shr(&p, float128_params.frac_shift);
4771     return float128_pack_raw(&p);
4772 }
4773 
4774 /*----------------------------------------------------------------------------
4775 | If `a' is denormal and we are in flush-to-zero mode then set the
4776 | input-denormal exception and return zero. Otherwise just return the value.
4777 *----------------------------------------------------------------------------*/
4778 
4779 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4780 {
4781     if (p.exp == 0 && p.frac != 0) {
4782         float_raise(float_flag_input_denormal, status);
4783         return true;
4784     }
4785 
4786     return false;
4787 }
4788 
4789 float16 float16_squash_input_denormal(float16 a, float_status *status)
4790 {
4791     if (status->flush_inputs_to_zero) {
4792         FloatParts64 p;
4793 
4794         float16_unpack_raw(&p, a);
4795         if (parts_squash_denormal(p, status)) {
4796             return float16_set_sign(float16_zero, p.sign);
4797         }
4798     }
4799     return a;
4800 }
4801 
4802 float32 float32_squash_input_denormal(float32 a, float_status *status)
4803 {
4804     if (status->flush_inputs_to_zero) {
4805         FloatParts64 p;
4806 
4807         float32_unpack_raw(&p, a);
4808         if (parts_squash_denormal(p, status)) {
4809             return float32_set_sign(float32_zero, p.sign);
4810         }
4811     }
4812     return a;
4813 }
4814 
4815 float64 float64_squash_input_denormal(float64 a, float_status *status)
4816 {
4817     if (status->flush_inputs_to_zero) {
4818         FloatParts64 p;
4819 
4820         float64_unpack_raw(&p, a);
4821         if (parts_squash_denormal(p, status)) {
4822             return float64_set_sign(float64_zero, p.sign);
4823         }
4824     }
4825     return a;
4826 }
4827 
4828 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4829 {
4830     if (status->flush_inputs_to_zero) {
4831         FloatParts64 p;
4832 
4833         bfloat16_unpack_raw(&p, a);
4834         if (parts_squash_denormal(p, status)) {
4835             return bfloat16_set_sign(bfloat16_zero, p.sign);
4836         }
4837     }
4838     return a;
4839 }
4840 
4841 /*----------------------------------------------------------------------------
4842 | Normalizes the subnormal extended double-precision floating-point value
4843 | represented by the denormalized significand `aSig'.  The normalized exponent
4844 | and significand are stored at the locations pointed to by `zExpPtr' and
4845 | `zSigPtr', respectively.
4846 *----------------------------------------------------------------------------*/
4847 
4848 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4849                                 uint64_t *zSigPtr)
4850 {
4851     int8_t shiftCount;
4852 
4853     shiftCount = clz64(aSig);
4854     *zSigPtr = aSig<<shiftCount;
4855     *zExpPtr = 1 - shiftCount;
4856 }
4857 
4858 /*----------------------------------------------------------------------------
4859 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4860 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4861 | and returns the proper extended double-precision floating-point value
4862 | corresponding to the abstract input.  Ordinarily, the abstract value is
4863 | rounded and packed into the extended double-precision format, with the
4864 | inexact exception raised if the abstract input cannot be represented
4865 | exactly.  However, if the abstract value is too large, the overflow and
4866 | inexact exceptions are raised and an infinity or maximal finite value is
4867 | returned.  If the abstract value is too small, the input value is rounded to
4868 | a subnormal number, and the underflow and inexact exceptions are raised if
4869 | the abstract input cannot be represented exactly as a subnormal extended
4870 | double-precision floating-point number.
4871 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4872 | the result is rounded to the same number of bits as single or double
4873 | precision, respectively.  Otherwise, the result is rounded to the full
4874 | precision of the extended double-precision format.
4875 |     The input significand must be normalized or smaller.  If the input
4876 | significand is not normalized, `zExp' must be 0; in that case, the result
4877 | returned is a subnormal number, and it must not require rounding.  The
4878 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4879 | Floating-Point Arithmetic.
4880 *----------------------------------------------------------------------------*/
4881 
4882 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4883                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4884                               float_status *status)
4885 {
4886     FloatRoundMode roundingMode;
4887     bool roundNearestEven, increment, isTiny;
4888     int64_t roundIncrement, roundMask, roundBits;
4889 
4890     roundingMode = status->float_rounding_mode;
4891     roundNearestEven = ( roundingMode == float_round_nearest_even );
4892     switch (roundingPrecision) {
4893     case floatx80_precision_x:
4894         goto precision80;
4895     case floatx80_precision_d:
4896         roundIncrement = UINT64_C(0x0000000000000400);
4897         roundMask = UINT64_C(0x00000000000007FF);
4898         break;
4899     case floatx80_precision_s:
4900         roundIncrement = UINT64_C(0x0000008000000000);
4901         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4902         break;
4903     default:
4904         g_assert_not_reached();
4905     }
4906     zSig0 |= ( zSig1 != 0 );
4907     switch (roundingMode) {
4908     case float_round_nearest_even:
4909     case float_round_ties_away:
4910         break;
4911     case float_round_to_zero:
4912         roundIncrement = 0;
4913         break;
4914     case float_round_up:
4915         roundIncrement = zSign ? 0 : roundMask;
4916         break;
4917     case float_round_down:
4918         roundIncrement = zSign ? roundMask : 0;
4919         break;
4920     default:
4921         abort();
4922     }
4923     roundBits = zSig0 & roundMask;
4924     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4925         if (    ( 0x7FFE < zExp )
4926              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4927            ) {
4928             goto overflow;
4929         }
4930         if ( zExp <= 0 ) {
4931             if (status->flush_to_zero) {
4932                 float_raise(float_flag_output_denormal, status);
4933                 return packFloatx80(zSign, 0, 0);
4934             }
4935             isTiny = status->tininess_before_rounding
4936                   || (zExp < 0 )
4937                   || (zSig0 <= zSig0 + roundIncrement);
4938             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4939             zExp = 0;
4940             roundBits = zSig0 & roundMask;
4941             if (isTiny && roundBits) {
4942                 float_raise(float_flag_underflow, status);
4943             }
4944             if (roundBits) {
4945                 float_raise(float_flag_inexact, status);
4946             }
4947             zSig0 += roundIncrement;
4948             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4949             roundIncrement = roundMask + 1;
4950             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4951                 roundMask |= roundIncrement;
4952             }
4953             zSig0 &= ~ roundMask;
4954             return packFloatx80( zSign, zExp, zSig0 );
4955         }
4956     }
4957     if (roundBits) {
4958         float_raise(float_flag_inexact, status);
4959     }
4960     zSig0 += roundIncrement;
4961     if ( zSig0 < roundIncrement ) {
4962         ++zExp;
4963         zSig0 = UINT64_C(0x8000000000000000);
4964     }
4965     roundIncrement = roundMask + 1;
4966     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4967         roundMask |= roundIncrement;
4968     }
4969     zSig0 &= ~ roundMask;
4970     if ( zSig0 == 0 ) zExp = 0;
4971     return packFloatx80( zSign, zExp, zSig0 );
4972  precision80:
4973     switch (roundingMode) {
4974     case float_round_nearest_even:
4975     case float_round_ties_away:
4976         increment = ((int64_t)zSig1 < 0);
4977         break;
4978     case float_round_to_zero:
4979         increment = 0;
4980         break;
4981     case float_round_up:
4982         increment = !zSign && zSig1;
4983         break;
4984     case float_round_down:
4985         increment = zSign && zSig1;
4986         break;
4987     default:
4988         abort();
4989     }
4990     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4991         if (    ( 0x7FFE < zExp )
4992              || (    ( zExp == 0x7FFE )
4993                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4994                   && increment
4995                 )
4996            ) {
4997             roundMask = 0;
4998  overflow:
4999             float_raise(float_flag_overflow | float_flag_inexact, status);
5000             if (    ( roundingMode == float_round_to_zero )
5001                  || ( zSign && ( roundingMode == float_round_up ) )
5002                  || ( ! zSign && ( roundingMode == float_round_down ) )
5003                ) {
5004                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5005             }
5006             return packFloatx80(zSign,
5007                                 floatx80_infinity_high,
5008                                 floatx80_infinity_low);
5009         }
5010         if ( zExp <= 0 ) {
5011             isTiny = status->tininess_before_rounding
5012                   || (zExp < 0)
5013                   || !increment
5014                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5015             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5016             zExp = 0;
5017             if (isTiny && zSig1) {
5018                 float_raise(float_flag_underflow, status);
5019             }
5020             if (zSig1) {
5021                 float_raise(float_flag_inexact, status);
5022             }
5023             switch (roundingMode) {
5024             case float_round_nearest_even:
5025             case float_round_ties_away:
5026                 increment = ((int64_t)zSig1 < 0);
5027                 break;
5028             case float_round_to_zero:
5029                 increment = 0;
5030                 break;
5031             case float_round_up:
5032                 increment = !zSign && zSig1;
5033                 break;
5034             case float_round_down:
5035                 increment = zSign && zSig1;
5036                 break;
5037             default:
5038                 abort();
5039             }
5040             if ( increment ) {
5041                 ++zSig0;
5042                 if (!(zSig1 << 1) && roundNearestEven) {
5043                     zSig0 &= ~1;
5044                 }
5045                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5046             }
5047             return packFloatx80( zSign, zExp, zSig0 );
5048         }
5049     }
5050     if (zSig1) {
5051         float_raise(float_flag_inexact, status);
5052     }
5053     if ( increment ) {
5054         ++zSig0;
5055         if ( zSig0 == 0 ) {
5056             ++zExp;
5057             zSig0 = UINT64_C(0x8000000000000000);
5058         }
5059         else {
5060             if (!(zSig1 << 1) && roundNearestEven) {
5061                 zSig0 &= ~1;
5062             }
5063         }
5064     }
5065     else {
5066         if ( zSig0 == 0 ) zExp = 0;
5067     }
5068     return packFloatx80( zSign, zExp, zSig0 );
5069 
5070 }
5071 
5072 /*----------------------------------------------------------------------------
5073 | Takes an abstract floating-point value having sign `zSign', exponent
5074 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5075 | and returns the proper extended double-precision floating-point value
5076 | corresponding to the abstract input.  This routine is just like
5077 | `roundAndPackFloatx80' except that the input significand does not have to be
5078 | normalized.
5079 *----------------------------------------------------------------------------*/
5080 
5081 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5082                                        bool zSign, int32_t zExp,
5083                                        uint64_t zSig0, uint64_t zSig1,
5084                                        float_status *status)
5085 {
5086     int8_t shiftCount;
5087 
5088     if ( zSig0 == 0 ) {
5089         zSig0 = zSig1;
5090         zSig1 = 0;
5091         zExp -= 64;
5092     }
5093     shiftCount = clz64(zSig0);
5094     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5095     zExp -= shiftCount;
5096     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5097                                 zSig0, zSig1, status);
5098 
5099 }
5100 
5101 /*----------------------------------------------------------------------------
5102 | Returns the binary exponential of the single-precision floating-point value
5103 | `a'. The operation is performed according to the IEC/IEEE Standard for
5104 | Binary Floating-Point Arithmetic.
5105 |
5106 | Uses the following identities:
5107 |
5108 | 1. -------------------------------------------------------------------------
5109 |      x    x*ln(2)
5110 |     2  = e
5111 |
5112 | 2. -------------------------------------------------------------------------
5113 |                      2     3     4     5           n
5114 |      x        x     x     x     x     x           x
5115 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5116 |               1!    2!    3!    4!    5!          n!
5117 *----------------------------------------------------------------------------*/
5118 
5119 static const float64 float32_exp2_coefficients[15] =
5120 {
5121     const_float64( 0x3ff0000000000000ll ), /*  1 */
5122     const_float64( 0x3fe0000000000000ll ), /*  2 */
5123     const_float64( 0x3fc5555555555555ll ), /*  3 */
5124     const_float64( 0x3fa5555555555555ll ), /*  4 */
5125     const_float64( 0x3f81111111111111ll ), /*  5 */
5126     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5127     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5128     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5129     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5130     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5131     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5132     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5133     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5134     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5135     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5136 };
5137 
5138 float32 float32_exp2(float32 a, float_status *status)
5139 {
5140     FloatParts64 xp, xnp, tp, rp;
5141     int i;
5142 
5143     float32_unpack_canonical(&xp, a, status);
5144     if (unlikely(xp.cls != float_class_normal)) {
5145         switch (xp.cls) {
5146         case float_class_snan:
5147         case float_class_qnan:
5148             parts_return_nan(&xp, status);
5149             return float32_round_pack_canonical(&xp, status);
5150         case float_class_inf:
5151             return xp.sign ? float32_zero : a;
5152         case float_class_zero:
5153             return float32_one;
5154         default:
5155             break;
5156         }
5157         g_assert_not_reached();
5158     }
5159 
5160     float_raise(float_flag_inexact, status);
5161 
5162     float64_unpack_canonical(&tp, float64_ln2, status);
5163     xp = *parts_mul(&xp, &tp, status);
5164     xnp = xp;
5165 
5166     float64_unpack_canonical(&rp, float64_one, status);
5167     for (i = 0 ; i < 15 ; i++) {
5168         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5169         rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5170         xnp = *parts_mul(&xnp, &xp, status);
5171     }
5172 
5173     return float32_round_pack_canonical(&rp, status);
5174 }
5175 
5176 /*----------------------------------------------------------------------------
5177 | Rounds the extended double-precision floating-point value `a'
5178 | to the precision provided by floatx80_rounding_precision and returns the
5179 | result as an extended double-precision floating-point value.
5180 | The operation is performed according to the IEC/IEEE Standard for Binary
5181 | Floating-Point Arithmetic.
5182 *----------------------------------------------------------------------------*/
5183 
5184 floatx80 floatx80_round(floatx80 a, float_status *status)
5185 {
5186     FloatParts128 p;
5187 
5188     if (!floatx80_unpack_canonical(&p, a, status)) {
5189         return floatx80_default_nan(status);
5190     }
5191     return floatx80_round_pack_canonical(&p, status);
5192 }
5193 
5194 static void __attribute__((constructor)) softfloat_init(void)
5195 {
5196     union_float64 ua, ub, uc, ur;
5197 
5198     if (QEMU_NO_HARDFLOAT) {
5199         return;
5200     }
5201     /*
5202      * Test that the host's FMA is not obviously broken. For example,
5203      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5204      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5205      */
5206     ua.s = 0x0020000000000001ULL;
5207     ub.s = 0x3ca0000000000000ULL;
5208     uc.s = 0x0020000000000000ULL;
5209     ur.h = fma(ua.h, ub.h, uc.h);
5210     if (ur.s != 0x0020000000000001ULL) {
5211         force_soft_fma = true;
5212     }
5213 }
5214