xref: /qemu/fpu/softfloat.c (revision ab930e80)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  */
521 typedef struct {
522     int exp_size;
523     int exp_bias;
524     int exp_re_bias;
525     int exp_max;
526     int frac_size;
527     int frac_shift;
528     bool arm_althp;
529     uint64_t round_mask;
530 } FloatFmt;
531 
532 /* Expand fields based on the size of exponent and fraction */
533 #define FLOAT_PARAMS_(E)                                \
534     .exp_size       = E,                                \
535     .exp_bias       = ((1 << E) - 1) >> 1,              \
536     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
537     .exp_max        = (1 << E) - 1
538 
539 #define FLOAT_PARAMS(E, F)                              \
540     FLOAT_PARAMS_(E),                                   \
541     .frac_size      = F,                                \
542     .frac_shift     = (-F - 1) & 63,                    \
543     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
544 
545 static const FloatFmt float16_params = {
546     FLOAT_PARAMS(5, 10)
547 };
548 
549 static const FloatFmt float16_params_ahp = {
550     FLOAT_PARAMS(5, 10),
551     .arm_althp = true
552 };
553 
554 static const FloatFmt bfloat16_params = {
555     FLOAT_PARAMS(8, 7)
556 };
557 
558 static const FloatFmt float32_params = {
559     FLOAT_PARAMS(8, 23)
560 };
561 
562 static const FloatFmt float64_params = {
563     FLOAT_PARAMS(11, 52)
564 };
565 
566 static const FloatFmt float128_params = {
567     FLOAT_PARAMS(15, 112)
568 };
569 
570 #define FLOATX80_PARAMS(R)              \
571     FLOAT_PARAMS_(15),                  \
572     .frac_size = R == 64 ? 63 : R,      \
573     .frac_shift = 0,                    \
574     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
575 
576 static const FloatFmt floatx80_params[3] = {
577     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
578     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
579     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
580 };
581 
582 /* Unpack a float to parts, but do not canonicalize.  */
583 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
584 {
585     const int f_size = fmt->frac_size;
586     const int e_size = fmt->exp_size;
587 
588     *r = (FloatParts64) {
589         .cls = float_class_unclassified,
590         .sign = extract64(raw, f_size + e_size, 1),
591         .exp = extract64(raw, f_size, e_size),
592         .frac = extract64(raw, 0, f_size)
593     };
594 }
595 
596 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
597 {
598     unpack_raw64(p, &float16_params, f);
599 }
600 
601 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
602 {
603     unpack_raw64(p, &bfloat16_params, f);
604 }
605 
606 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
607 {
608     unpack_raw64(p, &float32_params, f);
609 }
610 
611 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
612 {
613     unpack_raw64(p, &float64_params, f);
614 }
615 
616 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
617 {
618     *p = (FloatParts128) {
619         .cls = float_class_unclassified,
620         .sign = extract32(f.high, 15, 1),
621         .exp = extract32(f.high, 0, 15),
622         .frac_hi = f.low
623     };
624 }
625 
626 static void float128_unpack_raw(FloatParts128 *p, float128 f)
627 {
628     const int f_size = float128_params.frac_size - 64;
629     const int e_size = float128_params.exp_size;
630 
631     *p = (FloatParts128) {
632         .cls = float_class_unclassified,
633         .sign = extract64(f.high, f_size + e_size, 1),
634         .exp = extract64(f.high, f_size, e_size),
635         .frac_hi = extract64(f.high, 0, f_size),
636         .frac_lo = f.low,
637     };
638 }
639 
640 /* Pack a float from parts, but do not canonicalize.  */
641 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
642 {
643     const int f_size = fmt->frac_size;
644     const int e_size = fmt->exp_size;
645     uint64_t ret;
646 
647     ret = (uint64_t)p->sign << (f_size + e_size);
648     ret = deposit64(ret, f_size, e_size, p->exp);
649     ret = deposit64(ret, 0, f_size, p->frac);
650     return ret;
651 }
652 
653 static inline float16 float16_pack_raw(const FloatParts64 *p)
654 {
655     return make_float16(pack_raw64(p, &float16_params));
656 }
657 
658 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
659 {
660     return pack_raw64(p, &bfloat16_params);
661 }
662 
663 static inline float32 float32_pack_raw(const FloatParts64 *p)
664 {
665     return make_float32(pack_raw64(p, &float32_params));
666 }
667 
668 static inline float64 float64_pack_raw(const FloatParts64 *p)
669 {
670     return make_float64(pack_raw64(p, &float64_params));
671 }
672 
673 static float128 float128_pack_raw(const FloatParts128 *p)
674 {
675     const int f_size = float128_params.frac_size - 64;
676     const int e_size = float128_params.exp_size;
677     uint64_t hi;
678 
679     hi = (uint64_t)p->sign << (f_size + e_size);
680     hi = deposit64(hi, f_size, e_size, p->exp);
681     hi = deposit64(hi, 0, f_size, p->frac_hi);
682     return make_float128(hi, p->frac_lo);
683 }
684 
685 /*----------------------------------------------------------------------------
686 | Functions and definitions to determine:  (1) whether tininess for underflow
687 | is detected before or after rounding by default, (2) what (if anything)
688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
690 | are propagated from function inputs to output.  These details are target-
691 | specific.
692 *----------------------------------------------------------------------------*/
693 #include "softfloat-specialize.c.inc"
694 
695 #define PARTS_GENERIC_64_128(NAME, P) \
696     _Generic((P), FloatParts64 *: parts64_##NAME, \
697                   FloatParts128 *: parts128_##NAME)
698 
699 #define PARTS_GENERIC_64_128_256(NAME, P) \
700     _Generic((P), FloatParts64 *: parts64_##NAME, \
701                   FloatParts128 *: parts128_##NAME, \
702                   FloatParts256 *: parts256_##NAME)
703 
704 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
705 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
706 
707 static void parts64_return_nan(FloatParts64 *a, float_status *s);
708 static void parts128_return_nan(FloatParts128 *a, float_status *s);
709 
710 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
711 
712 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
713                                       float_status *s);
714 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
715                                         float_status *s);
716 
717 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
718 
719 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
720                                              FloatParts64 *c, float_status *s,
721                                              int ab_mask, int abc_mask);
722 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
723                                                FloatParts128 *b,
724                                                FloatParts128 *c,
725                                                float_status *s,
726                                                int ab_mask, int abc_mask);
727 
728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
729     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
730 
731 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
732                                  const FloatFmt *fmt);
733 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
734                                   const FloatFmt *fmt);
735 
736 #define parts_canonicalize(A, S, F) \
737     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
738 
739 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
740                                    const FloatFmt *fmt);
741 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
742                                     const FloatFmt *fmt);
743 
744 #define parts_uncanon_normal(A, S, F) \
745     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
746 
747 static void parts64_uncanon(FloatParts64 *p, float_status *status,
748                             const FloatFmt *fmt);
749 static void parts128_uncanon(FloatParts128 *p, float_status *status,
750                              const FloatFmt *fmt);
751 
752 #define parts_uncanon(A, S, F) \
753     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
754 
755 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
756 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
757 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
758 
759 #define parts_add_normal(A, B) \
760     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
761 
762 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
763 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
764 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
765 
766 #define parts_sub_normal(A, B) \
767     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
768 
769 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
770                                     float_status *s, bool subtract);
771 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
772                                       float_status *s, bool subtract);
773 
774 #define parts_addsub(A, B, S, Z) \
775     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
776 
777 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
778                                  float_status *s);
779 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
780                                    float_status *s);
781 
782 #define parts_mul(A, B, S) \
783     PARTS_GENERIC_64_128(mul, A)(A, B, S)
784 
785 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
786                                     FloatParts64 *c, int flags,
787                                     float_status *s);
788 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
789                                       FloatParts128 *c, int flags,
790                                       float_status *s);
791 
792 #define parts_muladd(A, B, C, Z, S) \
793     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
794 
795 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
796                                  float_status *s);
797 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
798                                    float_status *s);
799 
800 #define parts_div(A, B, S) \
801     PARTS_GENERIC_64_128(div, A)(A, B, S)
802 
803 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
804                                     uint64_t *mod_quot, float_status *s);
805 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
806                                       uint64_t *mod_quot, float_status *s);
807 
808 #define parts_modrem(A, B, Q, S) \
809     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
810 
811 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
812 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
813 
814 #define parts_sqrt(A, S, F) \
815     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
816 
817 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
818                                         int scale, int frac_size);
819 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
820                                          int scale, int frac_size);
821 
822 #define parts_round_to_int_normal(A, R, C, F) \
823     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
824 
825 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
826                                  int scale, float_status *s,
827                                  const FloatFmt *fmt);
828 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
829                                   int scale, float_status *s,
830                                   const FloatFmt *fmt);
831 
832 #define parts_round_to_int(A, R, C, S, F) \
833     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
834 
835 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
836                                      int scale, int64_t min, int64_t max,
837                                      float_status *s);
838 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
839                                      int scale, int64_t min, int64_t max,
840                                      float_status *s);
841 
842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
843     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
844 
845 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
846                                       int scale, uint64_t max,
847                                       float_status *s);
848 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
849                                        int scale, uint64_t max,
850                                        float_status *s);
851 
852 #define parts_float_to_uint(P, R, Z, M, S) \
853     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
854 
855 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
856                                   int scale, float_status *s);
857 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
858                                    int scale, float_status *s);
859 
860 #define parts_sint_to_float(P, I, Z, S) \
861     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
862 
863 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
864                                   int scale, float_status *s);
865 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
866                                    int scale, float_status *s);
867 
868 #define parts_uint_to_float(P, I, Z, S) \
869     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
870 
871 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
872                                     float_status *s, int flags);
873 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
874                                       float_status *s, int flags);
875 
876 #define parts_minmax(A, B, S, F) \
877     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
878 
879 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
880                                      float_status *s, bool q);
881 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
882                                       float_status *s, bool q);
883 
884 #define parts_compare(A, B, S, Q) \
885     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
886 
887 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
888 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
889 
890 #define parts_scalbn(A, N, S) \
891     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
892 
893 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
894 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
895 
896 #define parts_log2(A, S, F) \
897     PARTS_GENERIC_64_128(log2, A)(A, S, F)
898 
899 /*
900  * Helper functions for softfloat-parts.c.inc, per-size operations.
901  */
902 
903 #define FRAC_GENERIC_64_128(NAME, P) \
904     _Generic((P), FloatParts64 *: frac64_##NAME, \
905                   FloatParts128 *: frac128_##NAME)
906 
907 #define FRAC_GENERIC_64_128_256(NAME, P) \
908     _Generic((P), FloatParts64 *: frac64_##NAME, \
909                   FloatParts128 *: frac128_##NAME, \
910                   FloatParts256 *: frac256_##NAME)
911 
912 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
913 {
914     return uadd64_overflow(a->frac, b->frac, &r->frac);
915 }
916 
917 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
918 {
919     bool c = 0;
920     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
921     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
922     return c;
923 }
924 
925 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
926 {
927     bool c = 0;
928     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
929     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
930     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
931     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
932     return c;
933 }
934 
935 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
936 
937 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
938 {
939     return uadd64_overflow(a->frac, c, &r->frac);
940 }
941 
942 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
943 {
944     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
945     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
946 }
947 
948 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
949 
950 static void frac64_allones(FloatParts64 *a)
951 {
952     a->frac = -1;
953 }
954 
955 static void frac128_allones(FloatParts128 *a)
956 {
957     a->frac_hi = a->frac_lo = -1;
958 }
959 
960 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
961 
962 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
963 {
964     return (a->frac == b->frac ? float_relation_equal
965             : a->frac < b->frac ? float_relation_less
966             : float_relation_greater);
967 }
968 
969 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
970 {
971     uint64_t ta = a->frac_hi, tb = b->frac_hi;
972     if (ta == tb) {
973         ta = a->frac_lo, tb = b->frac_lo;
974         if (ta == tb) {
975             return float_relation_equal;
976         }
977     }
978     return ta < tb ? float_relation_less : float_relation_greater;
979 }
980 
981 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
982 
983 static void frac64_clear(FloatParts64 *a)
984 {
985     a->frac = 0;
986 }
987 
988 static void frac128_clear(FloatParts128 *a)
989 {
990     a->frac_hi = a->frac_lo = 0;
991 }
992 
993 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
994 
995 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
996 {
997     uint64_t n1, n0, r, q;
998     bool ret;
999 
1000     /*
1001      * We want a 2*N / N-bit division to produce exactly an N-bit
1002      * result, so that we do not lose any precision and so that we
1003      * do not have to renormalize afterward.  If A.frac < B.frac,
1004      * then division would produce an (N-1)-bit result; shift A left
1005      * by one to produce the an N-bit result, and return true to
1006      * decrement the exponent to match.
1007      *
1008      * The udiv_qrnnd algorithm that we're using requires normalization,
1009      * i.e. the msb of the denominator must be set, which is already true.
1010      */
1011     ret = a->frac < b->frac;
1012     if (ret) {
1013         n0 = a->frac;
1014         n1 = 0;
1015     } else {
1016         n0 = a->frac >> 1;
1017         n1 = a->frac << 63;
1018     }
1019     q = udiv_qrnnd(&r, n0, n1, b->frac);
1020 
1021     /* Set lsb if there is a remainder, to set inexact. */
1022     a->frac = q | (r != 0);
1023 
1024     return ret;
1025 }
1026 
1027 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1028 {
1029     uint64_t q0, q1, a0, a1, b0, b1;
1030     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1031     bool ret = false;
1032 
1033     a0 = a->frac_hi, a1 = a->frac_lo;
1034     b0 = b->frac_hi, b1 = b->frac_lo;
1035 
1036     ret = lt128(a0, a1, b0, b1);
1037     if (!ret) {
1038         a1 = shr_double(a0, a1, 1);
1039         a0 = a0 >> 1;
1040     }
1041 
1042     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1043     q0 = estimateDiv128To64(a0, a1, b0);
1044 
1045     /*
1046      * Estimate is high because B1 was not included (unless B1 == 0).
1047      * Reduce quotient and increase remainder until remainder is non-negative.
1048      * This loop will execute 0 to 2 times.
1049      */
1050     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1051     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1052     while (r0 != 0) {
1053         q0--;
1054         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1055     }
1056 
1057     /* Repeat using the remainder, producing a second word of quotient. */
1058     q1 = estimateDiv128To64(r1, r2, b0);
1059     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1060     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1061     while (r1 != 0) {
1062         q1--;
1063         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1064     }
1065 
1066     /* Any remainder indicates inexact; set sticky bit. */
1067     q1 |= (r2 | r3) != 0;
1068 
1069     a->frac_hi = q0;
1070     a->frac_lo = q1;
1071     return ret;
1072 }
1073 
1074 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1075 
1076 static bool frac64_eqz(FloatParts64 *a)
1077 {
1078     return a->frac == 0;
1079 }
1080 
1081 static bool frac128_eqz(FloatParts128 *a)
1082 {
1083     return (a->frac_hi | a->frac_lo) == 0;
1084 }
1085 
1086 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1087 
1088 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1089 {
1090     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1091 }
1092 
1093 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1094 {
1095     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1096                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1097 }
1098 
1099 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1100 
1101 static void frac64_neg(FloatParts64 *a)
1102 {
1103     a->frac = -a->frac;
1104 }
1105 
1106 static void frac128_neg(FloatParts128 *a)
1107 {
1108     bool c = 0;
1109     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1110     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1111 }
1112 
1113 static void frac256_neg(FloatParts256 *a)
1114 {
1115     bool c = 0;
1116     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1117     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1118     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1119     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1120 }
1121 
1122 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1123 
1124 static int frac64_normalize(FloatParts64 *a)
1125 {
1126     if (a->frac) {
1127         int shift = clz64(a->frac);
1128         a->frac <<= shift;
1129         return shift;
1130     }
1131     return 64;
1132 }
1133 
1134 static int frac128_normalize(FloatParts128 *a)
1135 {
1136     if (a->frac_hi) {
1137         int shl = clz64(a->frac_hi);
1138         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1139         a->frac_lo <<= shl;
1140         return shl;
1141     } else if (a->frac_lo) {
1142         int shl = clz64(a->frac_lo);
1143         a->frac_hi = a->frac_lo << shl;
1144         a->frac_lo = 0;
1145         return shl + 64;
1146     }
1147     return 128;
1148 }
1149 
1150 static int frac256_normalize(FloatParts256 *a)
1151 {
1152     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1153     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1154     int ret, shl;
1155 
1156     if (likely(a0)) {
1157         shl = clz64(a0);
1158         if (shl == 0) {
1159             return 0;
1160         }
1161         ret = shl;
1162     } else {
1163         if (a1) {
1164             ret = 64;
1165             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1166         } else if (a2) {
1167             ret = 128;
1168             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1169         } else if (a3) {
1170             ret = 192;
1171             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1172         } else {
1173             ret = 256;
1174             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1175             goto done;
1176         }
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             goto done;
1180         }
1181         ret += shl;
1182     }
1183 
1184     a0 = shl_double(a0, a1, shl);
1185     a1 = shl_double(a1, a2, shl);
1186     a2 = shl_double(a2, a3, shl);
1187     a3 <<= shl;
1188 
1189  done:
1190     a->frac_hi = a0;
1191     a->frac_hm = a1;
1192     a->frac_lm = a2;
1193     a->frac_lo = a3;
1194     return ret;
1195 }
1196 
1197 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1198 
1199 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1200 {
1201     uint64_t a0, a1, b0, t0, t1, q, quot;
1202     int exp_diff = a->exp - b->exp;
1203     int shift;
1204 
1205     a0 = a->frac;
1206     a1 = 0;
1207 
1208     if (exp_diff < -1) {
1209         if (mod_quot) {
1210             *mod_quot = 0;
1211         }
1212         return;
1213     }
1214     if (exp_diff == -1) {
1215         a0 >>= 1;
1216         exp_diff = 0;
1217     }
1218 
1219     b0 = b->frac;
1220     quot = q = b0 <= a0;
1221     if (q) {
1222         a0 -= b0;
1223     }
1224 
1225     exp_diff -= 64;
1226     while (exp_diff > 0) {
1227         q = estimateDiv128To64(a0, a1, b0);
1228         q = q > 2 ? q - 2 : 0;
1229         mul64To128(b0, q, &t0, &t1);
1230         sub128(a0, a1, t0, t1, &a0, &a1);
1231         shortShift128Left(a0, a1, 62, &a0, &a1);
1232         exp_diff -= 62;
1233         quot = (quot << 62) + q;
1234     }
1235 
1236     exp_diff += 64;
1237     if (exp_diff > 0) {
1238         q = estimateDiv128To64(a0, a1, b0);
1239         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1240         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1241         sub128(a0, a1, t0, t1, &a0, &a1);
1242         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1243         while (le128(t0, t1, a0, a1)) {
1244             ++q;
1245             sub128(a0, a1, t0, t1, &a0, &a1);
1246         }
1247         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1248     } else {
1249         t0 = b0;
1250         t1 = 0;
1251     }
1252 
1253     if (mod_quot) {
1254         *mod_quot = quot;
1255     } else {
1256         sub128(t0, t1, a0, a1, &t0, &t1);
1257         if (lt128(t0, t1, a0, a1) ||
1258             (eq128(t0, t1, a0, a1) && (q & 1))) {
1259             a0 = t0;
1260             a1 = t1;
1261             a->sign = !a->sign;
1262         }
1263     }
1264 
1265     if (likely(a0)) {
1266         shift = clz64(a0);
1267         shortShift128Left(a0, a1, shift, &a0, &a1);
1268     } else if (likely(a1)) {
1269         shift = clz64(a1);
1270         a0 = a1 << shift;
1271         a1 = 0;
1272         shift += 64;
1273     } else {
1274         a->cls = float_class_zero;
1275         return;
1276     }
1277 
1278     a->exp = b->exp + exp_diff - shift;
1279     a->frac = a0 | (a1 != 0);
1280 }
1281 
1282 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1283                            uint64_t *mod_quot)
1284 {
1285     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1286     int exp_diff = a->exp - b->exp;
1287     int shift;
1288 
1289     a0 = a->frac_hi;
1290     a1 = a->frac_lo;
1291     a2 = 0;
1292 
1293     if (exp_diff < -1) {
1294         if (mod_quot) {
1295             *mod_quot = 0;
1296         }
1297         return;
1298     }
1299     if (exp_diff == -1) {
1300         shift128Right(a0, a1, 1, &a0, &a1);
1301         exp_diff = 0;
1302     }
1303 
1304     b0 = b->frac_hi;
1305     b1 = b->frac_lo;
1306 
1307     quot = q = le128(b0, b1, a0, a1);
1308     if (q) {
1309         sub128(a0, a1, b0, b1, &a0, &a1);
1310     }
1311 
1312     exp_diff -= 64;
1313     while (exp_diff > 0) {
1314         q = estimateDiv128To64(a0, a1, b0);
1315         q = q > 4 ? q - 4 : 0;
1316         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1317         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1318         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1319         exp_diff -= 61;
1320         quot = (quot << 61) + q;
1321     }
1322 
1323     exp_diff += 64;
1324     if (exp_diff > 0) {
1325         q = estimateDiv128To64(a0, a1, b0);
1326         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1327         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1328         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1330         while (le192(t0, t1, t2, a0, a1, a2)) {
1331             ++q;
1332             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1333         }
1334         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1335     } else {
1336         t0 = b0;
1337         t1 = b1;
1338         t2 = 0;
1339     }
1340 
1341     if (mod_quot) {
1342         *mod_quot = quot;
1343     } else {
1344         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1345         if (lt192(t0, t1, t2, a0, a1, a2) ||
1346             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1347             a0 = t0;
1348             a1 = t1;
1349             a2 = t2;
1350             a->sign = !a->sign;
1351         }
1352     }
1353 
1354     if (likely(a0)) {
1355         shift = clz64(a0);
1356         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1357     } else if (likely(a1)) {
1358         shift = clz64(a1);
1359         shortShift128Left(a1, a2, shift, &a0, &a1);
1360         a2 = 0;
1361         shift += 64;
1362     } else if (likely(a2)) {
1363         shift = clz64(a2);
1364         a0 = a2 << shift;
1365         a1 = a2 = 0;
1366         shift += 128;
1367     } else {
1368         a->cls = float_class_zero;
1369         return;
1370     }
1371 
1372     a->exp = b->exp + exp_diff - shift;
1373     a->frac_hi = a0;
1374     a->frac_lo = a1 | (a2 != 0);
1375 }
1376 
1377 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1378 
1379 static void frac64_shl(FloatParts64 *a, int c)
1380 {
1381     a->frac <<= c;
1382 }
1383 
1384 static void frac128_shl(FloatParts128 *a, int c)
1385 {
1386     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1387 
1388     if (c & 64) {
1389         a0 = a1, a1 = 0;
1390     }
1391 
1392     c &= 63;
1393     if (c) {
1394         a0 = shl_double(a0, a1, c);
1395         a1 = a1 << c;
1396     }
1397 
1398     a->frac_hi = a0;
1399     a->frac_lo = a1;
1400 }
1401 
1402 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1403 
1404 static void frac64_shr(FloatParts64 *a, int c)
1405 {
1406     a->frac >>= c;
1407 }
1408 
1409 static void frac128_shr(FloatParts128 *a, int c)
1410 {
1411     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1412 
1413     if (c & 64) {
1414         a1 = a0, a0 = 0;
1415     }
1416 
1417     c &= 63;
1418     if (c) {
1419         a1 = shr_double(a0, a1, c);
1420         a0 = a0 >> c;
1421     }
1422 
1423     a->frac_hi = a0;
1424     a->frac_lo = a1;
1425 }
1426 
1427 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1428 
1429 static void frac64_shrjam(FloatParts64 *a, int c)
1430 {
1431     uint64_t a0 = a->frac;
1432 
1433     if (likely(c != 0)) {
1434         if (likely(c < 64)) {
1435             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1436         } else {
1437             a0 = a0 != 0;
1438         }
1439         a->frac = a0;
1440     }
1441 }
1442 
1443 static void frac128_shrjam(FloatParts128 *a, int c)
1444 {
1445     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446     uint64_t sticky = 0;
1447 
1448     if (unlikely(c == 0)) {
1449         return;
1450     } else if (likely(c < 64)) {
1451         /* nothing */
1452     } else if (likely(c < 128)) {
1453         sticky = a1;
1454         a1 = a0;
1455         a0 = 0;
1456         c &= 63;
1457         if (c == 0) {
1458             goto done;
1459         }
1460     } else {
1461         sticky = a0 | a1;
1462         a0 = a1 = 0;
1463         goto done;
1464     }
1465 
1466     sticky |= shr_double(a1, 0, c);
1467     a1 = shr_double(a0, a1, c);
1468     a0 = a0 >> c;
1469 
1470  done:
1471     a->frac_lo = a1 | (sticky != 0);
1472     a->frac_hi = a0;
1473 }
1474 
1475 static void frac256_shrjam(FloatParts256 *a, int c)
1476 {
1477     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1478     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1479     uint64_t sticky = 0;
1480 
1481     if (unlikely(c == 0)) {
1482         return;
1483     } else if (likely(c < 64)) {
1484         /* nothing */
1485     } else if (likely(c < 256)) {
1486         if (unlikely(c & 128)) {
1487             sticky |= a2 | a3;
1488             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1489         }
1490         if (unlikely(c & 64)) {
1491             sticky |= a3;
1492             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1493         }
1494         c &= 63;
1495         if (c == 0) {
1496             goto done;
1497         }
1498     } else {
1499         sticky = a0 | a1 | a2 | a3;
1500         a0 = a1 = a2 = a3 = 0;
1501         goto done;
1502     }
1503 
1504     sticky |= shr_double(a3, 0, c);
1505     a3 = shr_double(a2, a3, c);
1506     a2 = shr_double(a1, a2, c);
1507     a1 = shr_double(a0, a1, c);
1508     a0 = a0 >> c;
1509 
1510  done:
1511     a->frac_lo = a3 | (sticky != 0);
1512     a->frac_lm = a2;
1513     a->frac_hm = a1;
1514     a->frac_hi = a0;
1515 }
1516 
1517 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1518 
1519 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1520 {
1521     return usub64_overflow(a->frac, b->frac, &r->frac);
1522 }
1523 
1524 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1525 {
1526     bool c = 0;
1527     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1528     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1529     return c;
1530 }
1531 
1532 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1533 {
1534     bool c = 0;
1535     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1536     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1537     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1538     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1539     return c;
1540 }
1541 
1542 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1543 
1544 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1545 {
1546     r->frac = a->frac_hi | (a->frac_lo != 0);
1547 }
1548 
1549 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1550 {
1551     r->frac_hi = a->frac_hi;
1552     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1553 }
1554 
1555 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1556 
1557 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1558 {
1559     r->frac_hi = a->frac;
1560     r->frac_lo = 0;
1561 }
1562 
1563 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1564 {
1565     r->frac_hi = a->frac_hi;
1566     r->frac_hm = a->frac_lo;
1567     r->frac_lm = 0;
1568     r->frac_lo = 0;
1569 }
1570 
1571 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1572 
1573 /*
1574  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1575  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1576  * and thus MIT licenced.
1577  */
1578 static const uint16_t rsqrt_tab[128] = {
1579     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1580     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1581     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1582     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1583     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1584     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1585     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1586     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1587     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1588     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1589     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1590     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1591     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1592     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1593     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1594     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1595 };
1596 
1597 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1598 #define FloatPartsN    glue(FloatParts,N)
1599 #define FloatPartsW    glue(FloatParts,W)
1600 
1601 #define N 64
1602 #define W 128
1603 
1604 #include "softfloat-parts-addsub.c.inc"
1605 #include "softfloat-parts.c.inc"
1606 
1607 #undef  N
1608 #undef  W
1609 #define N 128
1610 #define W 256
1611 
1612 #include "softfloat-parts-addsub.c.inc"
1613 #include "softfloat-parts.c.inc"
1614 
1615 #undef  N
1616 #undef  W
1617 #define N            256
1618 
1619 #include "softfloat-parts-addsub.c.inc"
1620 
1621 #undef  N
1622 #undef  W
1623 #undef  partsN
1624 #undef  FloatPartsN
1625 #undef  FloatPartsW
1626 
1627 /*
1628  * Pack/unpack routines with a specific FloatFmt.
1629  */
1630 
1631 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1632                                       float_status *s, const FloatFmt *params)
1633 {
1634     float16_unpack_raw(p, f);
1635     parts_canonicalize(p, s, params);
1636 }
1637 
1638 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1639                                      float_status *s)
1640 {
1641     float16a_unpack_canonical(p, f, s, &float16_params);
1642 }
1643 
1644 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1645                                       float_status *s)
1646 {
1647     bfloat16_unpack_raw(p, f);
1648     parts_canonicalize(p, s, &bfloat16_params);
1649 }
1650 
1651 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1652                                              float_status *s,
1653                                              const FloatFmt *params)
1654 {
1655     parts_uncanon(p, s, params);
1656     return float16_pack_raw(p);
1657 }
1658 
1659 static float16 float16_round_pack_canonical(FloatParts64 *p,
1660                                             float_status *s)
1661 {
1662     return float16a_round_pack_canonical(p, s, &float16_params);
1663 }
1664 
1665 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1666                                               float_status *s)
1667 {
1668     parts_uncanon(p, s, &bfloat16_params);
1669     return bfloat16_pack_raw(p);
1670 }
1671 
1672 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1673                                      float_status *s)
1674 {
1675     float32_unpack_raw(p, f);
1676     parts_canonicalize(p, s, &float32_params);
1677 }
1678 
1679 static float32 float32_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     parts_uncanon(p, s, &float32_params);
1683     return float32_pack_raw(p);
1684 }
1685 
1686 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1687                                      float_status *s)
1688 {
1689     float64_unpack_raw(p, f);
1690     parts_canonicalize(p, s, &float64_params);
1691 }
1692 
1693 static float64 float64_round_pack_canonical(FloatParts64 *p,
1694                                             float_status *s)
1695 {
1696     parts_uncanon(p, s, &float64_params);
1697     return float64_pack_raw(p);
1698 }
1699 
1700 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1701                                                float_status *s)
1702 {
1703     parts_uncanon(p, s, &float32_params);
1704 
1705     /*
1706      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1707      * We need to adjust the fraction higher so that the least N bits are
1708      * zero, and the fraction is adjacent to the float64 implicit bit.
1709      */
1710     switch (p->cls) {
1711     case float_class_normal:
1712         if (unlikely(p->exp == 0)) {
1713             /*
1714              * The result is denormal for float32, but can be represented
1715              * in normalized form for float64.  Adjust, per canonicalize.
1716              */
1717             int shift = frac_normalize(p);
1718             p->exp = (float32_params.frac_shift -
1719                       float32_params.exp_bias - shift + 1 +
1720                       float64_params.exp_bias);
1721             frac_shr(p, float64_params.frac_shift);
1722         } else {
1723             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1724             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1725         }
1726         break;
1727     case float_class_snan:
1728     case float_class_qnan:
1729         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1730         p->exp = float64_params.exp_max;
1731         break;
1732     case float_class_inf:
1733         p->exp = float64_params.exp_max;
1734         break;
1735     case float_class_zero:
1736         break;
1737     default:
1738         g_assert_not_reached();
1739     }
1740 
1741     return float64_pack_raw(p);
1742 }
1743 
1744 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1745                                       float_status *s)
1746 {
1747     float128_unpack_raw(p, f);
1748     parts_canonicalize(p, s, &float128_params);
1749 }
1750 
1751 static float128 float128_round_pack_canonical(FloatParts128 *p,
1752                                               float_status *s)
1753 {
1754     parts_uncanon(p, s, &float128_params);
1755     return float128_pack_raw(p);
1756 }
1757 
1758 /* Returns false if the encoding is invalid. */
1759 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1760                                       float_status *s)
1761 {
1762     /* Ensure rounding precision is set before beginning. */
1763     switch (s->floatx80_rounding_precision) {
1764     case floatx80_precision_x:
1765     case floatx80_precision_d:
1766     case floatx80_precision_s:
1767         break;
1768     default:
1769         g_assert_not_reached();
1770     }
1771 
1772     if (unlikely(floatx80_invalid_encoding(f))) {
1773         float_raise(float_flag_invalid, s);
1774         return false;
1775     }
1776 
1777     floatx80_unpack_raw(p, f);
1778 
1779     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1780         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1781     } else {
1782         /* The explicit integer bit is ignored, after invalid checks. */
1783         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1784         p->cls = (p->frac_hi == 0 ? float_class_inf
1785                   : parts_is_snan_frac(p->frac_hi, s)
1786                   ? float_class_snan : float_class_qnan);
1787     }
1788     return true;
1789 }
1790 
1791 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1792                                               float_status *s)
1793 {
1794     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1795     uint64_t frac;
1796     int exp;
1797 
1798     switch (p->cls) {
1799     case float_class_normal:
1800         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1801             parts_uncanon_normal(p, s, fmt);
1802             frac = p->frac_hi;
1803             exp = p->exp;
1804         } else {
1805             FloatParts64 p64;
1806 
1807             p64.sign = p->sign;
1808             p64.exp = p->exp;
1809             frac_truncjam(&p64, p);
1810             parts_uncanon_normal(&p64, s, fmt);
1811             frac = p64.frac;
1812             exp = p64.exp;
1813         }
1814         if (exp != fmt->exp_max) {
1815             break;
1816         }
1817         /* rounded to inf -- fall through to set frac correctly */
1818 
1819     case float_class_inf:
1820         /* x86 and m68k differ in the setting of the integer bit. */
1821         frac = floatx80_infinity_low;
1822         exp = fmt->exp_max;
1823         break;
1824 
1825     case float_class_zero:
1826         frac = 0;
1827         exp = 0;
1828         break;
1829 
1830     case float_class_snan:
1831     case float_class_qnan:
1832         /* NaNs have the integer bit set. */
1833         frac = p->frac_hi | (1ull << 63);
1834         exp = fmt->exp_max;
1835         break;
1836 
1837     default:
1838         g_assert_not_reached();
1839     }
1840 
1841     return packFloatx80(p->sign, exp, frac);
1842 }
1843 
1844 /*
1845  * Addition and subtraction
1846  */
1847 
1848 static float16 QEMU_FLATTEN
1849 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1850 {
1851     FloatParts64 pa, pb, *pr;
1852 
1853     float16_unpack_canonical(&pa, a, status);
1854     float16_unpack_canonical(&pb, b, status);
1855     pr = parts_addsub(&pa, &pb, status, subtract);
1856 
1857     return float16_round_pack_canonical(pr, status);
1858 }
1859 
1860 float16 float16_add(float16 a, float16 b, float_status *status)
1861 {
1862     return float16_addsub(a, b, status, false);
1863 }
1864 
1865 float16 float16_sub(float16 a, float16 b, float_status *status)
1866 {
1867     return float16_addsub(a, b, status, true);
1868 }
1869 
1870 static float32 QEMU_SOFTFLOAT_ATTR
1871 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1872 {
1873     FloatParts64 pa, pb, *pr;
1874 
1875     float32_unpack_canonical(&pa, a, status);
1876     float32_unpack_canonical(&pb, b, status);
1877     pr = parts_addsub(&pa, &pb, status, subtract);
1878 
1879     return float32_round_pack_canonical(pr, status);
1880 }
1881 
1882 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1883 {
1884     return soft_f32_addsub(a, b, status, false);
1885 }
1886 
1887 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1888 {
1889     return soft_f32_addsub(a, b, status, true);
1890 }
1891 
1892 static float64 QEMU_SOFTFLOAT_ATTR
1893 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1894 {
1895     FloatParts64 pa, pb, *pr;
1896 
1897     float64_unpack_canonical(&pa, a, status);
1898     float64_unpack_canonical(&pb, b, status);
1899     pr = parts_addsub(&pa, &pb, status, subtract);
1900 
1901     return float64_round_pack_canonical(pr, status);
1902 }
1903 
1904 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1905 {
1906     return soft_f64_addsub(a, b, status, false);
1907 }
1908 
1909 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1910 {
1911     return soft_f64_addsub(a, b, status, true);
1912 }
1913 
1914 static float hard_f32_add(float a, float b)
1915 {
1916     return a + b;
1917 }
1918 
1919 static float hard_f32_sub(float a, float b)
1920 {
1921     return a - b;
1922 }
1923 
1924 static double hard_f64_add(double a, double b)
1925 {
1926     return a + b;
1927 }
1928 
1929 static double hard_f64_sub(double a, double b)
1930 {
1931     return a - b;
1932 }
1933 
1934 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1935 {
1936     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1937         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1938     }
1939     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1940 }
1941 
1942 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1943 {
1944     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1945         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1946     } else {
1947         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1948     }
1949 }
1950 
1951 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1952                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1953 {
1954     return float32_gen2(a, b, s, hard, soft,
1955                         f32_is_zon2, f32_addsubmul_post);
1956 }
1957 
1958 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1959                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1960 {
1961     return float64_gen2(a, b, s, hard, soft,
1962                         f64_is_zon2, f64_addsubmul_post);
1963 }
1964 
1965 float32 QEMU_FLATTEN
1966 float32_add(float32 a, float32 b, float_status *s)
1967 {
1968     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1969 }
1970 
1971 float32 QEMU_FLATTEN
1972 float32_sub(float32 a, float32 b, float_status *s)
1973 {
1974     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1975 }
1976 
1977 float64 QEMU_FLATTEN
1978 float64_add(float64 a, float64 b, float_status *s)
1979 {
1980     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1981 }
1982 
1983 float64 QEMU_FLATTEN
1984 float64_sub(float64 a, float64 b, float_status *s)
1985 {
1986     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1987 }
1988 
1989 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1990                                  bool subtract)
1991 {
1992     FloatParts64 pa, pb, *pr;
1993 
1994     float64_unpack_canonical(&pa, a, status);
1995     float64_unpack_canonical(&pb, b, status);
1996     pr = parts_addsub(&pa, &pb, status, subtract);
1997 
1998     return float64r32_round_pack_canonical(pr, status);
1999 }
2000 
2001 float64 float64r32_add(float64 a, float64 b, float_status *status)
2002 {
2003     return float64r32_addsub(a, b, status, false);
2004 }
2005 
2006 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2007 {
2008     return float64r32_addsub(a, b, status, true);
2009 }
2010 
2011 static bfloat16 QEMU_FLATTEN
2012 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2013 {
2014     FloatParts64 pa, pb, *pr;
2015 
2016     bfloat16_unpack_canonical(&pa, a, status);
2017     bfloat16_unpack_canonical(&pb, b, status);
2018     pr = parts_addsub(&pa, &pb, status, subtract);
2019 
2020     return bfloat16_round_pack_canonical(pr, status);
2021 }
2022 
2023 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2024 {
2025     return bfloat16_addsub(a, b, status, false);
2026 }
2027 
2028 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2029 {
2030     return bfloat16_addsub(a, b, status, true);
2031 }
2032 
2033 static float128 QEMU_FLATTEN
2034 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2035 {
2036     FloatParts128 pa, pb, *pr;
2037 
2038     float128_unpack_canonical(&pa, a, status);
2039     float128_unpack_canonical(&pb, b, status);
2040     pr = parts_addsub(&pa, &pb, status, subtract);
2041 
2042     return float128_round_pack_canonical(pr, status);
2043 }
2044 
2045 float128 float128_add(float128 a, float128 b, float_status *status)
2046 {
2047     return float128_addsub(a, b, status, false);
2048 }
2049 
2050 float128 float128_sub(float128 a, float128 b, float_status *status)
2051 {
2052     return float128_addsub(a, b, status, true);
2053 }
2054 
2055 static floatx80 QEMU_FLATTEN
2056 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2057 {
2058     FloatParts128 pa, pb, *pr;
2059 
2060     if (!floatx80_unpack_canonical(&pa, a, status) ||
2061         !floatx80_unpack_canonical(&pb, b, status)) {
2062         return floatx80_default_nan(status);
2063     }
2064 
2065     pr = parts_addsub(&pa, &pb, status, subtract);
2066     return floatx80_round_pack_canonical(pr, status);
2067 }
2068 
2069 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2070 {
2071     return floatx80_addsub(a, b, status, false);
2072 }
2073 
2074 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2075 {
2076     return floatx80_addsub(a, b, status, true);
2077 }
2078 
2079 /*
2080  * Multiplication
2081  */
2082 
2083 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2084 {
2085     FloatParts64 pa, pb, *pr;
2086 
2087     float16_unpack_canonical(&pa, a, status);
2088     float16_unpack_canonical(&pb, b, status);
2089     pr = parts_mul(&pa, &pb, status);
2090 
2091     return float16_round_pack_canonical(pr, status);
2092 }
2093 
2094 static float32 QEMU_SOFTFLOAT_ATTR
2095 soft_f32_mul(float32 a, float32 b, float_status *status)
2096 {
2097     FloatParts64 pa, pb, *pr;
2098 
2099     float32_unpack_canonical(&pa, a, status);
2100     float32_unpack_canonical(&pb, b, status);
2101     pr = parts_mul(&pa, &pb, status);
2102 
2103     return float32_round_pack_canonical(pr, status);
2104 }
2105 
2106 static float64 QEMU_SOFTFLOAT_ATTR
2107 soft_f64_mul(float64 a, float64 b, float_status *status)
2108 {
2109     FloatParts64 pa, pb, *pr;
2110 
2111     float64_unpack_canonical(&pa, a, status);
2112     float64_unpack_canonical(&pb, b, status);
2113     pr = parts_mul(&pa, &pb, status);
2114 
2115     return float64_round_pack_canonical(pr, status);
2116 }
2117 
2118 static float hard_f32_mul(float a, float b)
2119 {
2120     return a * b;
2121 }
2122 
2123 static double hard_f64_mul(double a, double b)
2124 {
2125     return a * b;
2126 }
2127 
2128 float32 QEMU_FLATTEN
2129 float32_mul(float32 a, float32 b, float_status *s)
2130 {
2131     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2132                         f32_is_zon2, f32_addsubmul_post);
2133 }
2134 
2135 float64 QEMU_FLATTEN
2136 float64_mul(float64 a, float64 b, float_status *s)
2137 {
2138     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2139                         f64_is_zon2, f64_addsubmul_post);
2140 }
2141 
2142 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2143 {
2144     FloatParts64 pa, pb, *pr;
2145 
2146     float64_unpack_canonical(&pa, a, status);
2147     float64_unpack_canonical(&pb, b, status);
2148     pr = parts_mul(&pa, &pb, status);
2149 
2150     return float64r32_round_pack_canonical(pr, status);
2151 }
2152 
2153 bfloat16 QEMU_FLATTEN
2154 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2155 {
2156     FloatParts64 pa, pb, *pr;
2157 
2158     bfloat16_unpack_canonical(&pa, a, status);
2159     bfloat16_unpack_canonical(&pb, b, status);
2160     pr = parts_mul(&pa, &pb, status);
2161 
2162     return bfloat16_round_pack_canonical(pr, status);
2163 }
2164 
2165 float128 QEMU_FLATTEN
2166 float128_mul(float128 a, float128 b, float_status *status)
2167 {
2168     FloatParts128 pa, pb, *pr;
2169 
2170     float128_unpack_canonical(&pa, a, status);
2171     float128_unpack_canonical(&pb, b, status);
2172     pr = parts_mul(&pa, &pb, status);
2173 
2174     return float128_round_pack_canonical(pr, status);
2175 }
2176 
2177 floatx80 QEMU_FLATTEN
2178 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2179 {
2180     FloatParts128 pa, pb, *pr;
2181 
2182     if (!floatx80_unpack_canonical(&pa, a, status) ||
2183         !floatx80_unpack_canonical(&pb, b, status)) {
2184         return floatx80_default_nan(status);
2185     }
2186 
2187     pr = parts_mul(&pa, &pb, status);
2188     return floatx80_round_pack_canonical(pr, status);
2189 }
2190 
2191 /*
2192  * Fused multiply-add
2193  */
2194 
2195 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2196                                     int flags, float_status *status)
2197 {
2198     FloatParts64 pa, pb, pc, *pr;
2199 
2200     float16_unpack_canonical(&pa, a, status);
2201     float16_unpack_canonical(&pb, b, status);
2202     float16_unpack_canonical(&pc, c, status);
2203     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2204 
2205     return float16_round_pack_canonical(pr, status);
2206 }
2207 
2208 static float32 QEMU_SOFTFLOAT_ATTR
2209 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2210                 float_status *status)
2211 {
2212     FloatParts64 pa, pb, pc, *pr;
2213 
2214     float32_unpack_canonical(&pa, a, status);
2215     float32_unpack_canonical(&pb, b, status);
2216     float32_unpack_canonical(&pc, c, status);
2217     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2218 
2219     return float32_round_pack_canonical(pr, status);
2220 }
2221 
2222 static float64 QEMU_SOFTFLOAT_ATTR
2223 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2224                 float_status *status)
2225 {
2226     FloatParts64 pa, pb, pc, *pr;
2227 
2228     float64_unpack_canonical(&pa, a, status);
2229     float64_unpack_canonical(&pb, b, status);
2230     float64_unpack_canonical(&pc, c, status);
2231     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2232 
2233     return float64_round_pack_canonical(pr, status);
2234 }
2235 
2236 static bool force_soft_fma;
2237 
2238 float32 QEMU_FLATTEN
2239 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2240 {
2241     union_float32 ua, ub, uc, ur;
2242 
2243     ua.s = xa;
2244     ub.s = xb;
2245     uc.s = xc;
2246 
2247     if (unlikely(!can_use_fpu(s))) {
2248         goto soft;
2249     }
2250     if (unlikely(flags & float_muladd_halve_result)) {
2251         goto soft;
2252     }
2253 
2254     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2255     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2256         goto soft;
2257     }
2258 
2259     if (unlikely(force_soft_fma)) {
2260         goto soft;
2261     }
2262 
2263     /*
2264      * When (a || b) == 0, there's no need to check for under/over flow,
2265      * since we know the addend is (normal || 0) and the product is 0.
2266      */
2267     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2268         union_float32 up;
2269         bool prod_sign;
2270 
2271         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2272         prod_sign ^= !!(flags & float_muladd_negate_product);
2273         up.s = float32_set_sign(float32_zero, prod_sign);
2274 
2275         if (flags & float_muladd_negate_c) {
2276             uc.h = -uc.h;
2277         }
2278         ur.h = up.h + uc.h;
2279     } else {
2280         union_float32 ua_orig = ua;
2281         union_float32 uc_orig = uc;
2282 
2283         if (flags & float_muladd_negate_product) {
2284             ua.h = -ua.h;
2285         }
2286         if (flags & float_muladd_negate_c) {
2287             uc.h = -uc.h;
2288         }
2289 
2290         ur.h = fmaf(ua.h, ub.h, uc.h);
2291 
2292         if (unlikely(f32_is_inf(ur))) {
2293             float_raise(float_flag_overflow, s);
2294         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2295             ua = ua_orig;
2296             uc = uc_orig;
2297             goto soft;
2298         }
2299     }
2300     if (flags & float_muladd_negate_result) {
2301         return float32_chs(ur.s);
2302     }
2303     return ur.s;
2304 
2305  soft:
2306     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2307 }
2308 
2309 float64 QEMU_FLATTEN
2310 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2311 {
2312     union_float64 ua, ub, uc, ur;
2313 
2314     ua.s = xa;
2315     ub.s = xb;
2316     uc.s = xc;
2317 
2318     if (unlikely(!can_use_fpu(s))) {
2319         goto soft;
2320     }
2321     if (unlikely(flags & float_muladd_halve_result)) {
2322         goto soft;
2323     }
2324 
2325     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2326     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2327         goto soft;
2328     }
2329 
2330     if (unlikely(force_soft_fma)) {
2331         goto soft;
2332     }
2333 
2334     /*
2335      * When (a || b) == 0, there's no need to check for under/over flow,
2336      * since we know the addend is (normal || 0) and the product is 0.
2337      */
2338     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2339         union_float64 up;
2340         bool prod_sign;
2341 
2342         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2343         prod_sign ^= !!(flags & float_muladd_negate_product);
2344         up.s = float64_set_sign(float64_zero, prod_sign);
2345 
2346         if (flags & float_muladd_negate_c) {
2347             uc.h = -uc.h;
2348         }
2349         ur.h = up.h + uc.h;
2350     } else {
2351         union_float64 ua_orig = ua;
2352         union_float64 uc_orig = uc;
2353 
2354         if (flags & float_muladd_negate_product) {
2355             ua.h = -ua.h;
2356         }
2357         if (flags & float_muladd_negate_c) {
2358             uc.h = -uc.h;
2359         }
2360 
2361         ur.h = fma(ua.h, ub.h, uc.h);
2362 
2363         if (unlikely(f64_is_inf(ur))) {
2364             float_raise(float_flag_overflow, s);
2365         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2366             ua = ua_orig;
2367             uc = uc_orig;
2368             goto soft;
2369         }
2370     }
2371     if (flags & float_muladd_negate_result) {
2372         return float64_chs(ur.s);
2373     }
2374     return ur.s;
2375 
2376  soft:
2377     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2378 }
2379 
2380 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2381                           int flags, float_status *status)
2382 {
2383     FloatParts64 pa, pb, pc, *pr;
2384 
2385     float64_unpack_canonical(&pa, a, status);
2386     float64_unpack_canonical(&pb, b, status);
2387     float64_unpack_canonical(&pc, c, status);
2388     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2389 
2390     return float64r32_round_pack_canonical(pr, status);
2391 }
2392 
2393 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2394                                       int flags, float_status *status)
2395 {
2396     FloatParts64 pa, pb, pc, *pr;
2397 
2398     bfloat16_unpack_canonical(&pa, a, status);
2399     bfloat16_unpack_canonical(&pb, b, status);
2400     bfloat16_unpack_canonical(&pc, c, status);
2401     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2402 
2403     return bfloat16_round_pack_canonical(pr, status);
2404 }
2405 
2406 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2407                                       int flags, float_status *status)
2408 {
2409     FloatParts128 pa, pb, pc, *pr;
2410 
2411     float128_unpack_canonical(&pa, a, status);
2412     float128_unpack_canonical(&pb, b, status);
2413     float128_unpack_canonical(&pc, c, status);
2414     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2415 
2416     return float128_round_pack_canonical(pr, status);
2417 }
2418 
2419 /*
2420  * Division
2421  */
2422 
2423 float16 float16_div(float16 a, float16 b, float_status *status)
2424 {
2425     FloatParts64 pa, pb, *pr;
2426 
2427     float16_unpack_canonical(&pa, a, status);
2428     float16_unpack_canonical(&pb, b, status);
2429     pr = parts_div(&pa, &pb, status);
2430 
2431     return float16_round_pack_canonical(pr, status);
2432 }
2433 
2434 static float32 QEMU_SOFTFLOAT_ATTR
2435 soft_f32_div(float32 a, float32 b, float_status *status)
2436 {
2437     FloatParts64 pa, pb, *pr;
2438 
2439     float32_unpack_canonical(&pa, a, status);
2440     float32_unpack_canonical(&pb, b, status);
2441     pr = parts_div(&pa, &pb, status);
2442 
2443     return float32_round_pack_canonical(pr, status);
2444 }
2445 
2446 static float64 QEMU_SOFTFLOAT_ATTR
2447 soft_f64_div(float64 a, float64 b, float_status *status)
2448 {
2449     FloatParts64 pa, pb, *pr;
2450 
2451     float64_unpack_canonical(&pa, a, status);
2452     float64_unpack_canonical(&pb, b, status);
2453     pr = parts_div(&pa, &pb, status);
2454 
2455     return float64_round_pack_canonical(pr, status);
2456 }
2457 
2458 static float hard_f32_div(float a, float b)
2459 {
2460     return a / b;
2461 }
2462 
2463 static double hard_f64_div(double a, double b)
2464 {
2465     return a / b;
2466 }
2467 
2468 static bool f32_div_pre(union_float32 a, union_float32 b)
2469 {
2470     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2471         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2472                fpclassify(b.h) == FP_NORMAL;
2473     }
2474     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2475 }
2476 
2477 static bool f64_div_pre(union_float64 a, union_float64 b)
2478 {
2479     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2480         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2481                fpclassify(b.h) == FP_NORMAL;
2482     }
2483     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2484 }
2485 
2486 static bool f32_div_post(union_float32 a, union_float32 b)
2487 {
2488     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2489         return fpclassify(a.h) != FP_ZERO;
2490     }
2491     return !float32_is_zero(a.s);
2492 }
2493 
2494 static bool f64_div_post(union_float64 a, union_float64 b)
2495 {
2496     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2497         return fpclassify(a.h) != FP_ZERO;
2498     }
2499     return !float64_is_zero(a.s);
2500 }
2501 
2502 float32 QEMU_FLATTEN
2503 float32_div(float32 a, float32 b, float_status *s)
2504 {
2505     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2506                         f32_div_pre, f32_div_post);
2507 }
2508 
2509 float64 QEMU_FLATTEN
2510 float64_div(float64 a, float64 b, float_status *s)
2511 {
2512     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2513                         f64_div_pre, f64_div_post);
2514 }
2515 
2516 float64 float64r32_div(float64 a, float64 b, float_status *status)
2517 {
2518     FloatParts64 pa, pb, *pr;
2519 
2520     float64_unpack_canonical(&pa, a, status);
2521     float64_unpack_canonical(&pb, b, status);
2522     pr = parts_div(&pa, &pb, status);
2523 
2524     return float64r32_round_pack_canonical(pr, status);
2525 }
2526 
2527 bfloat16 QEMU_FLATTEN
2528 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2529 {
2530     FloatParts64 pa, pb, *pr;
2531 
2532     bfloat16_unpack_canonical(&pa, a, status);
2533     bfloat16_unpack_canonical(&pb, b, status);
2534     pr = parts_div(&pa, &pb, status);
2535 
2536     return bfloat16_round_pack_canonical(pr, status);
2537 }
2538 
2539 float128 QEMU_FLATTEN
2540 float128_div(float128 a, float128 b, float_status *status)
2541 {
2542     FloatParts128 pa, pb, *pr;
2543 
2544     float128_unpack_canonical(&pa, a, status);
2545     float128_unpack_canonical(&pb, b, status);
2546     pr = parts_div(&pa, &pb, status);
2547 
2548     return float128_round_pack_canonical(pr, status);
2549 }
2550 
2551 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2552 {
2553     FloatParts128 pa, pb, *pr;
2554 
2555     if (!floatx80_unpack_canonical(&pa, a, status) ||
2556         !floatx80_unpack_canonical(&pb, b, status)) {
2557         return floatx80_default_nan(status);
2558     }
2559 
2560     pr = parts_div(&pa, &pb, status);
2561     return floatx80_round_pack_canonical(pr, status);
2562 }
2563 
2564 /*
2565  * Remainder
2566  */
2567 
2568 float32 float32_rem(float32 a, float32 b, float_status *status)
2569 {
2570     FloatParts64 pa, pb, *pr;
2571 
2572     float32_unpack_canonical(&pa, a, status);
2573     float32_unpack_canonical(&pb, b, status);
2574     pr = parts_modrem(&pa, &pb, NULL, status);
2575 
2576     return float32_round_pack_canonical(pr, status);
2577 }
2578 
2579 float64 float64_rem(float64 a, float64 b, float_status *status)
2580 {
2581     FloatParts64 pa, pb, *pr;
2582 
2583     float64_unpack_canonical(&pa, a, status);
2584     float64_unpack_canonical(&pb, b, status);
2585     pr = parts_modrem(&pa, &pb, NULL, status);
2586 
2587     return float64_round_pack_canonical(pr, status);
2588 }
2589 
2590 float128 float128_rem(float128 a, float128 b, float_status *status)
2591 {
2592     FloatParts128 pa, pb, *pr;
2593 
2594     float128_unpack_canonical(&pa, a, status);
2595     float128_unpack_canonical(&pb, b, status);
2596     pr = parts_modrem(&pa, &pb, NULL, status);
2597 
2598     return float128_round_pack_canonical(pr, status);
2599 }
2600 
2601 /*
2602  * Returns the remainder of the extended double-precision floating-point value
2603  * `a' with respect to the corresponding value `b'.
2604  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2605  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2606  * the remainder based on truncating the quotient toward zero instead and
2607  * *quotient is set to the low 64 bits of the absolute value of the integer
2608  * quotient.
2609  */
2610 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2611                          uint64_t *quotient, float_status *status)
2612 {
2613     FloatParts128 pa, pb, *pr;
2614 
2615     *quotient = 0;
2616     if (!floatx80_unpack_canonical(&pa, a, status) ||
2617         !floatx80_unpack_canonical(&pb, b, status)) {
2618         return floatx80_default_nan(status);
2619     }
2620     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2621 
2622     return floatx80_round_pack_canonical(pr, status);
2623 }
2624 
2625 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2626 {
2627     uint64_t quotient;
2628     return floatx80_modrem(a, b, false, &quotient, status);
2629 }
2630 
2631 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2632 {
2633     uint64_t quotient;
2634     return floatx80_modrem(a, b, true, &quotient, status);
2635 }
2636 
2637 /*
2638  * Float to Float conversions
2639  *
2640  * Returns the result of converting one float format to another. The
2641  * conversion is performed according to the IEC/IEEE Standard for
2642  * Binary Floating-Point Arithmetic.
2643  *
2644  * Usually this only needs to take care of raising invalid exceptions
2645  * and handling the conversion on NaNs.
2646  */
2647 
2648 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2649 {
2650     switch (a->cls) {
2651     case float_class_snan:
2652         float_raise(float_flag_invalid_snan, s);
2653         /* fall through */
2654     case float_class_qnan:
2655         /*
2656          * There is no NaN in the destination format.  Raise Invalid
2657          * and return a zero with the sign of the input NaN.
2658          */
2659         float_raise(float_flag_invalid, s);
2660         a->cls = float_class_zero;
2661         break;
2662 
2663     case float_class_inf:
2664         /*
2665          * There is no Inf in the destination format.  Raise Invalid
2666          * and return the maximum normal with the correct sign.
2667          */
2668         float_raise(float_flag_invalid, s);
2669         a->cls = float_class_normal;
2670         a->exp = float16_params_ahp.exp_max;
2671         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2672                                   float16_params_ahp.frac_size + 1);
2673         break;
2674 
2675     case float_class_normal:
2676     case float_class_zero:
2677         break;
2678 
2679     default:
2680         g_assert_not_reached();
2681     }
2682 }
2683 
2684 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2685 {
2686     if (is_nan(a->cls)) {
2687         parts_return_nan(a, s);
2688     }
2689 }
2690 
2691 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2692 {
2693     if (is_nan(a->cls)) {
2694         parts_return_nan(a, s);
2695     }
2696 }
2697 
2698 #define parts_float_to_float(P, S) \
2699     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2700 
2701 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2702                                         float_status *s)
2703 {
2704     a->cls = b->cls;
2705     a->sign = b->sign;
2706     a->exp = b->exp;
2707 
2708     if (a->cls == float_class_normal) {
2709         frac_truncjam(a, b);
2710     } else if (is_nan(a->cls)) {
2711         /* Discard the low bits of the NaN. */
2712         a->frac = b->frac_hi;
2713         parts_return_nan(a, s);
2714     }
2715 }
2716 
2717 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2718                                        float_status *s)
2719 {
2720     a->cls = b->cls;
2721     a->sign = b->sign;
2722     a->exp = b->exp;
2723     frac_widen(a, b);
2724 
2725     if (is_nan(a->cls)) {
2726         parts_return_nan(a, s);
2727     }
2728 }
2729 
2730 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2731 {
2732     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2733     FloatParts64 p;
2734 
2735     float16a_unpack_canonical(&p, a, s, fmt16);
2736     parts_float_to_float(&p, s);
2737     return float32_round_pack_canonical(&p, s);
2738 }
2739 
2740 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2741 {
2742     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2743     FloatParts64 p;
2744 
2745     float16a_unpack_canonical(&p, a, s, fmt16);
2746     parts_float_to_float(&p, s);
2747     return float64_round_pack_canonical(&p, s);
2748 }
2749 
2750 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2751 {
2752     FloatParts64 p;
2753     const FloatFmt *fmt;
2754 
2755     float32_unpack_canonical(&p, a, s);
2756     if (ieee) {
2757         parts_float_to_float(&p, s);
2758         fmt = &float16_params;
2759     } else {
2760         parts_float_to_ahp(&p, s);
2761         fmt = &float16_params_ahp;
2762     }
2763     return float16a_round_pack_canonical(&p, s, fmt);
2764 }
2765 
2766 static float64 QEMU_SOFTFLOAT_ATTR
2767 soft_float32_to_float64(float32 a, float_status *s)
2768 {
2769     FloatParts64 p;
2770 
2771     float32_unpack_canonical(&p, a, s);
2772     parts_float_to_float(&p, s);
2773     return float64_round_pack_canonical(&p, s);
2774 }
2775 
2776 float64 float32_to_float64(float32 a, float_status *s)
2777 {
2778     if (likely(float32_is_normal(a))) {
2779         /* Widening conversion can never produce inexact results.  */
2780         union_float32 uf;
2781         union_float64 ud;
2782         uf.s = a;
2783         ud.h = uf.h;
2784         return ud.s;
2785     } else if (float32_is_zero(a)) {
2786         return float64_set_sign(float64_zero, float32_is_neg(a));
2787     } else {
2788         return soft_float32_to_float64(a, s);
2789     }
2790 }
2791 
2792 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2793 {
2794     FloatParts64 p;
2795     const FloatFmt *fmt;
2796 
2797     float64_unpack_canonical(&p, a, s);
2798     if (ieee) {
2799         parts_float_to_float(&p, s);
2800         fmt = &float16_params;
2801     } else {
2802         parts_float_to_ahp(&p, s);
2803         fmt = &float16_params_ahp;
2804     }
2805     return float16a_round_pack_canonical(&p, s, fmt);
2806 }
2807 
2808 float32 float64_to_float32(float64 a, float_status *s)
2809 {
2810     FloatParts64 p;
2811 
2812     float64_unpack_canonical(&p, a, s);
2813     parts_float_to_float(&p, s);
2814     return float32_round_pack_canonical(&p, s);
2815 }
2816 
2817 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2818 {
2819     FloatParts64 p;
2820 
2821     bfloat16_unpack_canonical(&p, a, s);
2822     parts_float_to_float(&p, s);
2823     return float32_round_pack_canonical(&p, s);
2824 }
2825 
2826 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2827 {
2828     FloatParts64 p;
2829 
2830     bfloat16_unpack_canonical(&p, a, s);
2831     parts_float_to_float(&p, s);
2832     return float64_round_pack_canonical(&p, s);
2833 }
2834 
2835 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2836 {
2837     FloatParts64 p;
2838 
2839     float32_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return bfloat16_round_pack_canonical(&p, s);
2842 }
2843 
2844 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2845 {
2846     FloatParts64 p;
2847 
2848     float64_unpack_canonical(&p, a, s);
2849     parts_float_to_float(&p, s);
2850     return bfloat16_round_pack_canonical(&p, s);
2851 }
2852 
2853 float32 float128_to_float32(float128 a, float_status *s)
2854 {
2855     FloatParts64 p64;
2856     FloatParts128 p128;
2857 
2858     float128_unpack_canonical(&p128, a, s);
2859     parts_float_to_float_narrow(&p64, &p128, s);
2860     return float32_round_pack_canonical(&p64, s);
2861 }
2862 
2863 float64 float128_to_float64(float128 a, float_status *s)
2864 {
2865     FloatParts64 p64;
2866     FloatParts128 p128;
2867 
2868     float128_unpack_canonical(&p128, a, s);
2869     parts_float_to_float_narrow(&p64, &p128, s);
2870     return float64_round_pack_canonical(&p64, s);
2871 }
2872 
2873 float128 float32_to_float128(float32 a, float_status *s)
2874 {
2875     FloatParts64 p64;
2876     FloatParts128 p128;
2877 
2878     float32_unpack_canonical(&p64, a, s);
2879     parts_float_to_float_widen(&p128, &p64, s);
2880     return float128_round_pack_canonical(&p128, s);
2881 }
2882 
2883 float128 float64_to_float128(float64 a, float_status *s)
2884 {
2885     FloatParts64 p64;
2886     FloatParts128 p128;
2887 
2888     float64_unpack_canonical(&p64, a, s);
2889     parts_float_to_float_widen(&p128, &p64, s);
2890     return float128_round_pack_canonical(&p128, s);
2891 }
2892 
2893 float32 floatx80_to_float32(floatx80 a, float_status *s)
2894 {
2895     FloatParts64 p64;
2896     FloatParts128 p128;
2897 
2898     if (floatx80_unpack_canonical(&p128, a, s)) {
2899         parts_float_to_float_narrow(&p64, &p128, s);
2900     } else {
2901         parts_default_nan(&p64, s);
2902     }
2903     return float32_round_pack_canonical(&p64, s);
2904 }
2905 
2906 float64 floatx80_to_float64(floatx80 a, float_status *s)
2907 {
2908     FloatParts64 p64;
2909     FloatParts128 p128;
2910 
2911     if (floatx80_unpack_canonical(&p128, a, s)) {
2912         parts_float_to_float_narrow(&p64, &p128, s);
2913     } else {
2914         parts_default_nan(&p64, s);
2915     }
2916     return float64_round_pack_canonical(&p64, s);
2917 }
2918 
2919 float128 floatx80_to_float128(floatx80 a, float_status *s)
2920 {
2921     FloatParts128 p;
2922 
2923     if (floatx80_unpack_canonical(&p, a, s)) {
2924         parts_float_to_float(&p, s);
2925     } else {
2926         parts_default_nan(&p, s);
2927     }
2928     return float128_round_pack_canonical(&p, s);
2929 }
2930 
2931 floatx80 float32_to_floatx80(float32 a, float_status *s)
2932 {
2933     FloatParts64 p64;
2934     FloatParts128 p128;
2935 
2936     float32_unpack_canonical(&p64, a, s);
2937     parts_float_to_float_widen(&p128, &p64, s);
2938     return floatx80_round_pack_canonical(&p128, s);
2939 }
2940 
2941 floatx80 float64_to_floatx80(float64 a, float_status *s)
2942 {
2943     FloatParts64 p64;
2944     FloatParts128 p128;
2945 
2946     float64_unpack_canonical(&p64, a, s);
2947     parts_float_to_float_widen(&p128, &p64, s);
2948     return floatx80_round_pack_canonical(&p128, s);
2949 }
2950 
2951 floatx80 float128_to_floatx80(float128 a, float_status *s)
2952 {
2953     FloatParts128 p;
2954 
2955     float128_unpack_canonical(&p, a, s);
2956     parts_float_to_float(&p, s);
2957     return floatx80_round_pack_canonical(&p, s);
2958 }
2959 
2960 /*
2961  * Round to integral value
2962  */
2963 
2964 float16 float16_round_to_int(float16 a, float_status *s)
2965 {
2966     FloatParts64 p;
2967 
2968     float16_unpack_canonical(&p, a, s);
2969     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2970     return float16_round_pack_canonical(&p, s);
2971 }
2972 
2973 float32 float32_round_to_int(float32 a, float_status *s)
2974 {
2975     FloatParts64 p;
2976 
2977     float32_unpack_canonical(&p, a, s);
2978     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2979     return float32_round_pack_canonical(&p, s);
2980 }
2981 
2982 float64 float64_round_to_int(float64 a, float_status *s)
2983 {
2984     FloatParts64 p;
2985 
2986     float64_unpack_canonical(&p, a, s);
2987     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2988     return float64_round_pack_canonical(&p, s);
2989 }
2990 
2991 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2992 {
2993     FloatParts64 p;
2994 
2995     bfloat16_unpack_canonical(&p, a, s);
2996     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2997     return bfloat16_round_pack_canonical(&p, s);
2998 }
2999 
3000 float128 float128_round_to_int(float128 a, float_status *s)
3001 {
3002     FloatParts128 p;
3003 
3004     float128_unpack_canonical(&p, a, s);
3005     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3006     return float128_round_pack_canonical(&p, s);
3007 }
3008 
3009 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3010 {
3011     FloatParts128 p;
3012 
3013     if (!floatx80_unpack_canonical(&p, a, status)) {
3014         return floatx80_default_nan(status);
3015     }
3016 
3017     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3018                        &floatx80_params[status->floatx80_rounding_precision]);
3019     return floatx80_round_pack_canonical(&p, status);
3020 }
3021 
3022 /*
3023  * Floating-point to signed integer conversions
3024  */
3025 
3026 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3027                               float_status *s)
3028 {
3029     FloatParts64 p;
3030 
3031     float16_unpack_canonical(&p, a, s);
3032     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3033 }
3034 
3035 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3036                                 float_status *s)
3037 {
3038     FloatParts64 p;
3039 
3040     float16_unpack_canonical(&p, a, s);
3041     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3042 }
3043 
3044 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3045                                 float_status *s)
3046 {
3047     FloatParts64 p;
3048 
3049     float16_unpack_canonical(&p, a, s);
3050     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3051 }
3052 
3053 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3054                                 float_status *s)
3055 {
3056     FloatParts64 p;
3057 
3058     float16_unpack_canonical(&p, a, s);
3059     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3060 }
3061 
3062 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3063                                 float_status *s)
3064 {
3065     FloatParts64 p;
3066 
3067     float32_unpack_canonical(&p, a, s);
3068     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3069 }
3070 
3071 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3072                                 float_status *s)
3073 {
3074     FloatParts64 p;
3075 
3076     float32_unpack_canonical(&p, a, s);
3077     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3078 }
3079 
3080 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3081                                 float_status *s)
3082 {
3083     FloatParts64 p;
3084 
3085     float32_unpack_canonical(&p, a, s);
3086     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3087 }
3088 
3089 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3090                                 float_status *s)
3091 {
3092     FloatParts64 p;
3093 
3094     float64_unpack_canonical(&p, a, s);
3095     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3096 }
3097 
3098 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3099                                 float_status *s)
3100 {
3101     FloatParts64 p;
3102 
3103     float64_unpack_canonical(&p, a, s);
3104     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3105 }
3106 
3107 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3108                                 float_status *s)
3109 {
3110     FloatParts64 p;
3111 
3112     float64_unpack_canonical(&p, a, s);
3113     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3114 }
3115 
3116 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3117                                  float_status *s)
3118 {
3119     FloatParts64 p;
3120 
3121     bfloat16_unpack_canonical(&p, a, s);
3122     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3123 }
3124 
3125 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3126                                  float_status *s)
3127 {
3128     FloatParts64 p;
3129 
3130     bfloat16_unpack_canonical(&p, a, s);
3131     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3132 }
3133 
3134 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3135                                  float_status *s)
3136 {
3137     FloatParts64 p;
3138 
3139     bfloat16_unpack_canonical(&p, a, s);
3140     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3141 }
3142 
3143 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3144                                         int scale, float_status *s)
3145 {
3146     FloatParts128 p;
3147 
3148     float128_unpack_canonical(&p, a, s);
3149     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3150 }
3151 
3152 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3153                                         int scale, float_status *s)
3154 {
3155     FloatParts128 p;
3156 
3157     float128_unpack_canonical(&p, a, s);
3158     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3159 }
3160 
3161 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3162                                         int scale, float_status *s)
3163 {
3164     int flags = 0;
3165     Int128 r;
3166     FloatParts128 p;
3167 
3168     float128_unpack_canonical(&p, a, s);
3169 
3170     switch (p.cls) {
3171     case float_class_snan:
3172         flags |= float_flag_invalid_snan;
3173         /* fall through */
3174     case float_class_qnan:
3175         flags |= float_flag_invalid;
3176         r = UINT128_MAX;
3177         break;
3178 
3179     case float_class_inf:
3180         flags = float_flag_invalid | float_flag_invalid_cvti;
3181         r = p.sign ? INT128_MIN : INT128_MAX;
3182         break;
3183 
3184     case float_class_zero:
3185         return int128_zero();
3186 
3187     case float_class_normal:
3188         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3189             flags = float_flag_inexact;
3190         }
3191 
3192         if (p.exp < 127) {
3193             int shift = 127 - p.exp;
3194             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3195             if (p.sign) {
3196                 r = int128_neg(r);
3197             }
3198         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3199                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3200             r = INT128_MIN;
3201         } else {
3202             flags = float_flag_invalid | float_flag_invalid_cvti;
3203             r = p.sign ? INT128_MIN : INT128_MAX;
3204         }
3205         break;
3206 
3207     default:
3208         g_assert_not_reached();
3209     }
3210 
3211     float_raise(flags, s);
3212     return r;
3213 }
3214 
3215 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3216                                         int scale, float_status *s)
3217 {
3218     FloatParts128 p;
3219 
3220     if (!floatx80_unpack_canonical(&p, a, s)) {
3221         parts_default_nan(&p, s);
3222     }
3223     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3224 }
3225 
3226 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3227                                         int scale, float_status *s)
3228 {
3229     FloatParts128 p;
3230 
3231     if (!floatx80_unpack_canonical(&p, a, s)) {
3232         parts_default_nan(&p, s);
3233     }
3234     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3235 }
3236 
3237 int8_t float16_to_int8(float16 a, float_status *s)
3238 {
3239     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3240 }
3241 
3242 int16_t float16_to_int16(float16 a, float_status *s)
3243 {
3244     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3245 }
3246 
3247 int32_t float16_to_int32(float16 a, float_status *s)
3248 {
3249     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3250 }
3251 
3252 int64_t float16_to_int64(float16 a, float_status *s)
3253 {
3254     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3255 }
3256 
3257 int16_t float32_to_int16(float32 a, float_status *s)
3258 {
3259     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3260 }
3261 
3262 int32_t float32_to_int32(float32 a, float_status *s)
3263 {
3264     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3265 }
3266 
3267 int64_t float32_to_int64(float32 a, float_status *s)
3268 {
3269     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3270 }
3271 
3272 int16_t float64_to_int16(float64 a, float_status *s)
3273 {
3274     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3275 }
3276 
3277 int32_t float64_to_int32(float64 a, float_status *s)
3278 {
3279     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3280 }
3281 
3282 int64_t float64_to_int64(float64 a, float_status *s)
3283 {
3284     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3285 }
3286 
3287 int32_t float128_to_int32(float128 a, float_status *s)
3288 {
3289     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3290 }
3291 
3292 int64_t float128_to_int64(float128 a, float_status *s)
3293 {
3294     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3295 }
3296 
3297 Int128 float128_to_int128(float128 a, float_status *s)
3298 {
3299     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3300 }
3301 
3302 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3303 {
3304     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3305 }
3306 
3307 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3308 {
3309     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3310 }
3311 
3312 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3313 {
3314     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3315 }
3316 
3317 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3318 {
3319     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3320 }
3321 
3322 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3323 {
3324     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3325 }
3326 
3327 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3328 {
3329     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3330 }
3331 
3332 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3333 {
3334     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3335 }
3336 
3337 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3338 {
3339     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3340 }
3341 
3342 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3343 {
3344     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3345 }
3346 
3347 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3348 {
3349     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3350 }
3351 
3352 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3353 {
3354     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3355 }
3356 
3357 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3358 {
3359     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3360 }
3361 
3362 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3363 {
3364     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3365 }
3366 
3367 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3368 {
3369     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3370 }
3371 
3372 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3373 {
3374     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3375 }
3376 
3377 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3378 {
3379     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3380 }
3381 
3382 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3383 {
3384     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3385 }
3386 
3387 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3388 {
3389     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3390 }
3391 
3392 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3393 {
3394     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3395 }
3396 
3397 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3398 {
3399     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3400 }
3401 
3402 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3403 {
3404     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3405 }
3406 
3407 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3408 {
3409     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3410 }
3411 
3412 /*
3413  * Floating-point to unsigned integer conversions
3414  */
3415 
3416 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3417                                 float_status *s)
3418 {
3419     FloatParts64 p;
3420 
3421     float16_unpack_canonical(&p, a, s);
3422     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3423 }
3424 
3425 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3426                                   float_status *s)
3427 {
3428     FloatParts64 p;
3429 
3430     float16_unpack_canonical(&p, a, s);
3431     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3432 }
3433 
3434 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3435                                   float_status *s)
3436 {
3437     FloatParts64 p;
3438 
3439     float16_unpack_canonical(&p, a, s);
3440     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3441 }
3442 
3443 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3444                                   float_status *s)
3445 {
3446     FloatParts64 p;
3447 
3448     float16_unpack_canonical(&p, a, s);
3449     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3450 }
3451 
3452 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3453                                   float_status *s)
3454 {
3455     FloatParts64 p;
3456 
3457     float32_unpack_canonical(&p, a, s);
3458     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3459 }
3460 
3461 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3462                                   float_status *s)
3463 {
3464     FloatParts64 p;
3465 
3466     float32_unpack_canonical(&p, a, s);
3467     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3468 }
3469 
3470 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3471                                   float_status *s)
3472 {
3473     FloatParts64 p;
3474 
3475     float32_unpack_canonical(&p, a, s);
3476     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3477 }
3478 
3479 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3480                                   float_status *s)
3481 {
3482     FloatParts64 p;
3483 
3484     float64_unpack_canonical(&p, a, s);
3485     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3486 }
3487 
3488 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3489                                   float_status *s)
3490 {
3491     FloatParts64 p;
3492 
3493     float64_unpack_canonical(&p, a, s);
3494     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3495 }
3496 
3497 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3498                                   float_status *s)
3499 {
3500     FloatParts64 p;
3501 
3502     float64_unpack_canonical(&p, a, s);
3503     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3504 }
3505 
3506 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3507                                    int scale, float_status *s)
3508 {
3509     FloatParts64 p;
3510 
3511     bfloat16_unpack_canonical(&p, a, s);
3512     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3513 }
3514 
3515 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3516                                    int scale, float_status *s)
3517 {
3518     FloatParts64 p;
3519 
3520     bfloat16_unpack_canonical(&p, a, s);
3521     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3522 }
3523 
3524 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3525                                    int scale, float_status *s)
3526 {
3527     FloatParts64 p;
3528 
3529     bfloat16_unpack_canonical(&p, a, s);
3530     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3531 }
3532 
3533 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3534                                           int scale, float_status *s)
3535 {
3536     FloatParts128 p;
3537 
3538     float128_unpack_canonical(&p, a, s);
3539     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3540 }
3541 
3542 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3543                                           int scale, float_status *s)
3544 {
3545     FloatParts128 p;
3546 
3547     float128_unpack_canonical(&p, a, s);
3548     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3549 }
3550 
3551 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3552                                          int scale, float_status *s)
3553 {
3554     int flags = 0;
3555     Int128 r;
3556     FloatParts128 p;
3557 
3558     float128_unpack_canonical(&p, a, s);
3559 
3560     switch (p.cls) {
3561     case float_class_snan:
3562         flags |= float_flag_invalid_snan;
3563         /* fall through */
3564     case float_class_qnan:
3565         flags |= float_flag_invalid;
3566         r = UINT128_MAX;
3567         break;
3568 
3569     case float_class_inf:
3570         flags = float_flag_invalid | float_flag_invalid_cvti;
3571         r = p.sign ? int128_zero() : UINT128_MAX;
3572         break;
3573 
3574     case float_class_zero:
3575         return int128_zero();
3576 
3577     case float_class_normal:
3578         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3579             flags = float_flag_inexact;
3580             if (p.cls == float_class_zero) {
3581                 r = int128_zero();
3582                 break;
3583             }
3584         }
3585 
3586         if (p.sign) {
3587             flags = float_flag_invalid | float_flag_invalid_cvti;
3588             r = int128_zero();
3589         } else if (p.exp <= 127) {
3590             int shift = 127 - p.exp;
3591             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3592         } else {
3593             flags = float_flag_invalid | float_flag_invalid_cvti;
3594             r = UINT128_MAX;
3595         }
3596         break;
3597 
3598     default:
3599         g_assert_not_reached();
3600     }
3601 
3602     float_raise(flags, s);
3603     return r;
3604 }
3605 
3606 uint8_t float16_to_uint8(float16 a, float_status *s)
3607 {
3608     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3609 }
3610 
3611 uint16_t float16_to_uint16(float16 a, float_status *s)
3612 {
3613     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3614 }
3615 
3616 uint32_t float16_to_uint32(float16 a, float_status *s)
3617 {
3618     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3619 }
3620 
3621 uint64_t float16_to_uint64(float16 a, float_status *s)
3622 {
3623     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3624 }
3625 
3626 uint16_t float32_to_uint16(float32 a, float_status *s)
3627 {
3628     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3629 }
3630 
3631 uint32_t float32_to_uint32(float32 a, float_status *s)
3632 {
3633     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3634 }
3635 
3636 uint64_t float32_to_uint64(float32 a, float_status *s)
3637 {
3638     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3639 }
3640 
3641 uint16_t float64_to_uint16(float64 a, float_status *s)
3642 {
3643     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3644 }
3645 
3646 uint32_t float64_to_uint32(float64 a, float_status *s)
3647 {
3648     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3649 }
3650 
3651 uint64_t float64_to_uint64(float64 a, float_status *s)
3652 {
3653     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3654 }
3655 
3656 uint32_t float128_to_uint32(float128 a, float_status *s)
3657 {
3658     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3659 }
3660 
3661 uint64_t float128_to_uint64(float128 a, float_status *s)
3662 {
3663     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3664 }
3665 
3666 Int128 float128_to_uint128(float128 a, float_status *s)
3667 {
3668     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3669 }
3670 
3671 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3672 {
3673     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3674 }
3675 
3676 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3677 {
3678     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3679 }
3680 
3681 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3682 {
3683     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3684 }
3685 
3686 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3687 {
3688     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3689 }
3690 
3691 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3692 {
3693     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3694 }
3695 
3696 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3697 {
3698     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3699 }
3700 
3701 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3702 {
3703     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3704 }
3705 
3706 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3707 {
3708     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3709 }
3710 
3711 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3712 {
3713     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3714 }
3715 
3716 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3717 {
3718     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3719 }
3720 
3721 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3722 {
3723     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3724 }
3725 
3726 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3727 {
3728     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3729 }
3730 
3731 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3732 {
3733     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735 
3736 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3737 {
3738     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740 
3741 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3742 {
3743     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3744 }
3745 
3746 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3747 {
3748     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3749 }
3750 
3751 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3752 {
3753     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3754 }
3755 
3756 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3757 {
3758     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3759 }
3760 
3761 /*
3762  * Signed integer to floating-point conversions
3763  */
3764 
3765 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3766 {
3767     FloatParts64 p;
3768 
3769     parts_sint_to_float(&p, a, scale, status);
3770     return float16_round_pack_canonical(&p, status);
3771 }
3772 
3773 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3774 {
3775     return int64_to_float16_scalbn(a, scale, status);
3776 }
3777 
3778 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3779 {
3780     return int64_to_float16_scalbn(a, scale, status);
3781 }
3782 
3783 float16 int64_to_float16(int64_t a, float_status *status)
3784 {
3785     return int64_to_float16_scalbn(a, 0, status);
3786 }
3787 
3788 float16 int32_to_float16(int32_t a, float_status *status)
3789 {
3790     return int64_to_float16_scalbn(a, 0, status);
3791 }
3792 
3793 float16 int16_to_float16(int16_t a, float_status *status)
3794 {
3795     return int64_to_float16_scalbn(a, 0, status);
3796 }
3797 
3798 float16 int8_to_float16(int8_t a, float_status *status)
3799 {
3800     return int64_to_float16_scalbn(a, 0, status);
3801 }
3802 
3803 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3804 {
3805     FloatParts64 p;
3806 
3807     /* Without scaling, there are no overflow concerns. */
3808     if (likely(scale == 0) && can_use_fpu(status)) {
3809         union_float32 ur;
3810         ur.h = a;
3811         return ur.s;
3812     }
3813 
3814     parts64_sint_to_float(&p, a, scale, status);
3815     return float32_round_pack_canonical(&p, status);
3816 }
3817 
3818 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3819 {
3820     return int64_to_float32_scalbn(a, scale, status);
3821 }
3822 
3823 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3824 {
3825     return int64_to_float32_scalbn(a, scale, status);
3826 }
3827 
3828 float32 int64_to_float32(int64_t a, float_status *status)
3829 {
3830     return int64_to_float32_scalbn(a, 0, status);
3831 }
3832 
3833 float32 int32_to_float32(int32_t a, float_status *status)
3834 {
3835     return int64_to_float32_scalbn(a, 0, status);
3836 }
3837 
3838 float32 int16_to_float32(int16_t a, float_status *status)
3839 {
3840     return int64_to_float32_scalbn(a, 0, status);
3841 }
3842 
3843 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3844 {
3845     FloatParts64 p;
3846 
3847     /* Without scaling, there are no overflow concerns. */
3848     if (likely(scale == 0) && can_use_fpu(status)) {
3849         union_float64 ur;
3850         ur.h = a;
3851         return ur.s;
3852     }
3853 
3854     parts_sint_to_float(&p, a, scale, status);
3855     return float64_round_pack_canonical(&p, status);
3856 }
3857 
3858 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3859 {
3860     return int64_to_float64_scalbn(a, scale, status);
3861 }
3862 
3863 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3864 {
3865     return int64_to_float64_scalbn(a, scale, status);
3866 }
3867 
3868 float64 int64_to_float64(int64_t a, float_status *status)
3869 {
3870     return int64_to_float64_scalbn(a, 0, status);
3871 }
3872 
3873 float64 int32_to_float64(int32_t a, float_status *status)
3874 {
3875     return int64_to_float64_scalbn(a, 0, status);
3876 }
3877 
3878 float64 int16_to_float64(int16_t a, float_status *status)
3879 {
3880     return int64_to_float64_scalbn(a, 0, status);
3881 }
3882 
3883 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3884 {
3885     FloatParts64 p;
3886 
3887     parts_sint_to_float(&p, a, scale, status);
3888     return bfloat16_round_pack_canonical(&p, status);
3889 }
3890 
3891 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3892 {
3893     return int64_to_bfloat16_scalbn(a, scale, status);
3894 }
3895 
3896 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3897 {
3898     return int64_to_bfloat16_scalbn(a, scale, status);
3899 }
3900 
3901 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3902 {
3903     return int64_to_bfloat16_scalbn(a, 0, status);
3904 }
3905 
3906 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3907 {
3908     return int64_to_bfloat16_scalbn(a, 0, status);
3909 }
3910 
3911 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3912 {
3913     return int64_to_bfloat16_scalbn(a, 0, status);
3914 }
3915 
3916 float128 int128_to_float128(Int128 a, float_status *status)
3917 {
3918     FloatParts128 p = { };
3919     int shift;
3920 
3921     if (int128_nz(a)) {
3922         p.cls = float_class_normal;
3923         if (!int128_nonneg(a)) {
3924             p.sign = true;
3925             a = int128_neg(a);
3926         }
3927 
3928         shift = clz64(int128_gethi(a));
3929         if (shift == 64) {
3930             shift += clz64(int128_getlo(a));
3931         }
3932 
3933         p.exp = 127 - shift;
3934         a = int128_lshift(a, shift);
3935 
3936         p.frac_hi = int128_gethi(a);
3937         p.frac_lo = int128_getlo(a);
3938     } else {
3939         p.cls = float_class_zero;
3940     }
3941 
3942     return float128_round_pack_canonical(&p, status);
3943 }
3944 
3945 float128 int64_to_float128(int64_t a, float_status *status)
3946 {
3947     FloatParts128 p;
3948 
3949     parts_sint_to_float(&p, a, 0, status);
3950     return float128_round_pack_canonical(&p, status);
3951 }
3952 
3953 float128 int32_to_float128(int32_t a, float_status *status)
3954 {
3955     return int64_to_float128(a, status);
3956 }
3957 
3958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3959 {
3960     FloatParts128 p;
3961 
3962     parts_sint_to_float(&p, a, 0, status);
3963     return floatx80_round_pack_canonical(&p, status);
3964 }
3965 
3966 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3967 {
3968     return int64_to_floatx80(a, status);
3969 }
3970 
3971 /*
3972  * Unsigned Integer to floating-point conversions
3973  */
3974 
3975 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3976 {
3977     FloatParts64 p;
3978 
3979     parts_uint_to_float(&p, a, scale, status);
3980     return float16_round_pack_canonical(&p, status);
3981 }
3982 
3983 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3984 {
3985     return uint64_to_float16_scalbn(a, scale, status);
3986 }
3987 
3988 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3989 {
3990     return uint64_to_float16_scalbn(a, scale, status);
3991 }
3992 
3993 float16 uint64_to_float16(uint64_t a, float_status *status)
3994 {
3995     return uint64_to_float16_scalbn(a, 0, status);
3996 }
3997 
3998 float16 uint32_to_float16(uint32_t a, float_status *status)
3999 {
4000     return uint64_to_float16_scalbn(a, 0, status);
4001 }
4002 
4003 float16 uint16_to_float16(uint16_t a, float_status *status)
4004 {
4005     return uint64_to_float16_scalbn(a, 0, status);
4006 }
4007 
4008 float16 uint8_to_float16(uint8_t a, float_status *status)
4009 {
4010     return uint64_to_float16_scalbn(a, 0, status);
4011 }
4012 
4013 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4014 {
4015     FloatParts64 p;
4016 
4017     /* Without scaling, there are no overflow concerns. */
4018     if (likely(scale == 0) && can_use_fpu(status)) {
4019         union_float32 ur;
4020         ur.h = a;
4021         return ur.s;
4022     }
4023 
4024     parts_uint_to_float(&p, a, scale, status);
4025     return float32_round_pack_canonical(&p, status);
4026 }
4027 
4028 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4029 {
4030     return uint64_to_float32_scalbn(a, scale, status);
4031 }
4032 
4033 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4034 {
4035     return uint64_to_float32_scalbn(a, scale, status);
4036 }
4037 
4038 float32 uint64_to_float32(uint64_t a, float_status *status)
4039 {
4040     return uint64_to_float32_scalbn(a, 0, status);
4041 }
4042 
4043 float32 uint32_to_float32(uint32_t a, float_status *status)
4044 {
4045     return uint64_to_float32_scalbn(a, 0, status);
4046 }
4047 
4048 float32 uint16_to_float32(uint16_t a, float_status *status)
4049 {
4050     return uint64_to_float32_scalbn(a, 0, status);
4051 }
4052 
4053 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4054 {
4055     FloatParts64 p;
4056 
4057     /* Without scaling, there are no overflow concerns. */
4058     if (likely(scale == 0) && can_use_fpu(status)) {
4059         union_float64 ur;
4060         ur.h = a;
4061         return ur.s;
4062     }
4063 
4064     parts_uint_to_float(&p, a, scale, status);
4065     return float64_round_pack_canonical(&p, status);
4066 }
4067 
4068 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4069 {
4070     return uint64_to_float64_scalbn(a, scale, status);
4071 }
4072 
4073 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4074 {
4075     return uint64_to_float64_scalbn(a, scale, status);
4076 }
4077 
4078 float64 uint64_to_float64(uint64_t a, float_status *status)
4079 {
4080     return uint64_to_float64_scalbn(a, 0, status);
4081 }
4082 
4083 float64 uint32_to_float64(uint32_t a, float_status *status)
4084 {
4085     return uint64_to_float64_scalbn(a, 0, status);
4086 }
4087 
4088 float64 uint16_to_float64(uint16_t a, float_status *status)
4089 {
4090     return uint64_to_float64_scalbn(a, 0, status);
4091 }
4092 
4093 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4094 {
4095     FloatParts64 p;
4096 
4097     parts_uint_to_float(&p, a, scale, status);
4098     return bfloat16_round_pack_canonical(&p, status);
4099 }
4100 
4101 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4102 {
4103     return uint64_to_bfloat16_scalbn(a, scale, status);
4104 }
4105 
4106 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4107 {
4108     return uint64_to_bfloat16_scalbn(a, scale, status);
4109 }
4110 
4111 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4112 {
4113     return uint64_to_bfloat16_scalbn(a, 0, status);
4114 }
4115 
4116 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4117 {
4118     return uint64_to_bfloat16_scalbn(a, 0, status);
4119 }
4120 
4121 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4122 {
4123     return uint64_to_bfloat16_scalbn(a, 0, status);
4124 }
4125 
4126 float128 uint64_to_float128(uint64_t a, float_status *status)
4127 {
4128     FloatParts128 p;
4129 
4130     parts_uint_to_float(&p, a, 0, status);
4131     return float128_round_pack_canonical(&p, status);
4132 }
4133 
4134 float128 uint128_to_float128(Int128 a, float_status *status)
4135 {
4136     FloatParts128 p = { };
4137     int shift;
4138 
4139     if (int128_nz(a)) {
4140         p.cls = float_class_normal;
4141 
4142         shift = clz64(int128_gethi(a));
4143         if (shift == 64) {
4144             shift += clz64(int128_getlo(a));
4145         }
4146 
4147         p.exp = 127 - shift;
4148         a = int128_lshift(a, shift);
4149 
4150         p.frac_hi = int128_gethi(a);
4151         p.frac_lo = int128_getlo(a);
4152     } else {
4153         p.cls = float_class_zero;
4154     }
4155 
4156     return float128_round_pack_canonical(&p, status);
4157 }
4158 
4159 /*
4160  * Minimum and maximum
4161  */
4162 
4163 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4164 {
4165     FloatParts64 pa, pb, *pr;
4166 
4167     float16_unpack_canonical(&pa, a, s);
4168     float16_unpack_canonical(&pb, b, s);
4169     pr = parts_minmax(&pa, &pb, s, flags);
4170 
4171     return float16_round_pack_canonical(pr, s);
4172 }
4173 
4174 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4175                                 float_status *s, int flags)
4176 {
4177     FloatParts64 pa, pb, *pr;
4178 
4179     bfloat16_unpack_canonical(&pa, a, s);
4180     bfloat16_unpack_canonical(&pb, b, s);
4181     pr = parts_minmax(&pa, &pb, s, flags);
4182 
4183     return bfloat16_round_pack_canonical(pr, s);
4184 }
4185 
4186 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4187 {
4188     FloatParts64 pa, pb, *pr;
4189 
4190     float32_unpack_canonical(&pa, a, s);
4191     float32_unpack_canonical(&pb, b, s);
4192     pr = parts_minmax(&pa, &pb, s, flags);
4193 
4194     return float32_round_pack_canonical(pr, s);
4195 }
4196 
4197 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4198 {
4199     FloatParts64 pa, pb, *pr;
4200 
4201     float64_unpack_canonical(&pa, a, s);
4202     float64_unpack_canonical(&pb, b, s);
4203     pr = parts_minmax(&pa, &pb, s, flags);
4204 
4205     return float64_round_pack_canonical(pr, s);
4206 }
4207 
4208 static float128 float128_minmax(float128 a, float128 b,
4209                                 float_status *s, int flags)
4210 {
4211     FloatParts128 pa, pb, *pr;
4212 
4213     float128_unpack_canonical(&pa, a, s);
4214     float128_unpack_canonical(&pb, b, s);
4215     pr = parts_minmax(&pa, &pb, s, flags);
4216 
4217     return float128_round_pack_canonical(pr, s);
4218 }
4219 
4220 #define MINMAX_1(type, name, flags) \
4221     type type##_##name(type a, type b, float_status *s) \
4222     { return type##_minmax(a, b, s, flags); }
4223 
4224 #define MINMAX_2(type) \
4225     MINMAX_1(type, max, 0)                                                \
4226     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4227     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4228     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4229     MINMAX_1(type, min, minmax_ismin)                                     \
4230     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4231     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4232     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4233 
4234 MINMAX_2(float16)
4235 MINMAX_2(bfloat16)
4236 MINMAX_2(float32)
4237 MINMAX_2(float64)
4238 MINMAX_2(float128)
4239 
4240 #undef MINMAX_1
4241 #undef MINMAX_2
4242 
4243 /*
4244  * Floating point compare
4245  */
4246 
4247 static FloatRelation QEMU_FLATTEN
4248 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4249 {
4250     FloatParts64 pa, pb;
4251 
4252     float16_unpack_canonical(&pa, a, s);
4253     float16_unpack_canonical(&pb, b, s);
4254     return parts_compare(&pa, &pb, s, is_quiet);
4255 }
4256 
4257 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4258 {
4259     return float16_do_compare(a, b, s, false);
4260 }
4261 
4262 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4263 {
4264     return float16_do_compare(a, b, s, true);
4265 }
4266 
4267 static FloatRelation QEMU_SOFTFLOAT_ATTR
4268 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4269 {
4270     FloatParts64 pa, pb;
4271 
4272     float32_unpack_canonical(&pa, a, s);
4273     float32_unpack_canonical(&pb, b, s);
4274     return parts_compare(&pa, &pb, s, is_quiet);
4275 }
4276 
4277 static FloatRelation QEMU_FLATTEN
4278 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4279 {
4280     union_float32 ua, ub;
4281 
4282     ua.s = xa;
4283     ub.s = xb;
4284 
4285     if (QEMU_NO_HARDFLOAT) {
4286         goto soft;
4287     }
4288 
4289     float32_input_flush2(&ua.s, &ub.s, s);
4290     if (isgreaterequal(ua.h, ub.h)) {
4291         if (isgreater(ua.h, ub.h)) {
4292             return float_relation_greater;
4293         }
4294         return float_relation_equal;
4295     }
4296     if (likely(isless(ua.h, ub.h))) {
4297         return float_relation_less;
4298     }
4299     /*
4300      * The only condition remaining is unordered.
4301      * Fall through to set flags.
4302      */
4303  soft:
4304     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4305 }
4306 
4307 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4308 {
4309     return float32_hs_compare(a, b, s, false);
4310 }
4311 
4312 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4313 {
4314     return float32_hs_compare(a, b, s, true);
4315 }
4316 
4317 static FloatRelation QEMU_SOFTFLOAT_ATTR
4318 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4319 {
4320     FloatParts64 pa, pb;
4321 
4322     float64_unpack_canonical(&pa, a, s);
4323     float64_unpack_canonical(&pb, b, s);
4324     return parts_compare(&pa, &pb, s, is_quiet);
4325 }
4326 
4327 static FloatRelation QEMU_FLATTEN
4328 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4329 {
4330     union_float64 ua, ub;
4331 
4332     ua.s = xa;
4333     ub.s = xb;
4334 
4335     if (QEMU_NO_HARDFLOAT) {
4336         goto soft;
4337     }
4338 
4339     float64_input_flush2(&ua.s, &ub.s, s);
4340     if (isgreaterequal(ua.h, ub.h)) {
4341         if (isgreater(ua.h, ub.h)) {
4342             return float_relation_greater;
4343         }
4344         return float_relation_equal;
4345     }
4346     if (likely(isless(ua.h, ub.h))) {
4347         return float_relation_less;
4348     }
4349     /*
4350      * The only condition remaining is unordered.
4351      * Fall through to set flags.
4352      */
4353  soft:
4354     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4355 }
4356 
4357 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4358 {
4359     return float64_hs_compare(a, b, s, false);
4360 }
4361 
4362 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4363 {
4364     return float64_hs_compare(a, b, s, true);
4365 }
4366 
4367 static FloatRelation QEMU_FLATTEN
4368 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4369 {
4370     FloatParts64 pa, pb;
4371 
4372     bfloat16_unpack_canonical(&pa, a, s);
4373     bfloat16_unpack_canonical(&pb, b, s);
4374     return parts_compare(&pa, &pb, s, is_quiet);
4375 }
4376 
4377 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4378 {
4379     return bfloat16_do_compare(a, b, s, false);
4380 }
4381 
4382 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4383 {
4384     return bfloat16_do_compare(a, b, s, true);
4385 }
4386 
4387 static FloatRelation QEMU_FLATTEN
4388 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4389 {
4390     FloatParts128 pa, pb;
4391 
4392     float128_unpack_canonical(&pa, a, s);
4393     float128_unpack_canonical(&pb, b, s);
4394     return parts_compare(&pa, &pb, s, is_quiet);
4395 }
4396 
4397 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4398 {
4399     return float128_do_compare(a, b, s, false);
4400 }
4401 
4402 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4403 {
4404     return float128_do_compare(a, b, s, true);
4405 }
4406 
4407 static FloatRelation QEMU_FLATTEN
4408 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4409 {
4410     FloatParts128 pa, pb;
4411 
4412     if (!floatx80_unpack_canonical(&pa, a, s) ||
4413         !floatx80_unpack_canonical(&pb, b, s)) {
4414         return float_relation_unordered;
4415     }
4416     return parts_compare(&pa, &pb, s, is_quiet);
4417 }
4418 
4419 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4420 {
4421     return floatx80_do_compare(a, b, s, false);
4422 }
4423 
4424 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4425 {
4426     return floatx80_do_compare(a, b, s, true);
4427 }
4428 
4429 /*
4430  * Scale by 2**N
4431  */
4432 
4433 float16 float16_scalbn(float16 a, int n, float_status *status)
4434 {
4435     FloatParts64 p;
4436 
4437     float16_unpack_canonical(&p, a, status);
4438     parts_scalbn(&p, n, status);
4439     return float16_round_pack_canonical(&p, status);
4440 }
4441 
4442 float32 float32_scalbn(float32 a, int n, float_status *status)
4443 {
4444     FloatParts64 p;
4445 
4446     float32_unpack_canonical(&p, a, status);
4447     parts_scalbn(&p, n, status);
4448     return float32_round_pack_canonical(&p, status);
4449 }
4450 
4451 float64 float64_scalbn(float64 a, int n, float_status *status)
4452 {
4453     FloatParts64 p;
4454 
4455     float64_unpack_canonical(&p, a, status);
4456     parts_scalbn(&p, n, status);
4457     return float64_round_pack_canonical(&p, status);
4458 }
4459 
4460 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4461 {
4462     FloatParts64 p;
4463 
4464     bfloat16_unpack_canonical(&p, a, status);
4465     parts_scalbn(&p, n, status);
4466     return bfloat16_round_pack_canonical(&p, status);
4467 }
4468 
4469 float128 float128_scalbn(float128 a, int n, float_status *status)
4470 {
4471     FloatParts128 p;
4472 
4473     float128_unpack_canonical(&p, a, status);
4474     parts_scalbn(&p, n, status);
4475     return float128_round_pack_canonical(&p, status);
4476 }
4477 
4478 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4479 {
4480     FloatParts128 p;
4481 
4482     if (!floatx80_unpack_canonical(&p, a, status)) {
4483         return floatx80_default_nan(status);
4484     }
4485     parts_scalbn(&p, n, status);
4486     return floatx80_round_pack_canonical(&p, status);
4487 }
4488 
4489 /*
4490  * Square Root
4491  */
4492 
4493 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4494 {
4495     FloatParts64 p;
4496 
4497     float16_unpack_canonical(&p, a, status);
4498     parts_sqrt(&p, status, &float16_params);
4499     return float16_round_pack_canonical(&p, status);
4500 }
4501 
4502 static float32 QEMU_SOFTFLOAT_ATTR
4503 soft_f32_sqrt(float32 a, float_status *status)
4504 {
4505     FloatParts64 p;
4506 
4507     float32_unpack_canonical(&p, a, status);
4508     parts_sqrt(&p, status, &float32_params);
4509     return float32_round_pack_canonical(&p, status);
4510 }
4511 
4512 static float64 QEMU_SOFTFLOAT_ATTR
4513 soft_f64_sqrt(float64 a, float_status *status)
4514 {
4515     FloatParts64 p;
4516 
4517     float64_unpack_canonical(&p, a, status);
4518     parts_sqrt(&p, status, &float64_params);
4519     return float64_round_pack_canonical(&p, status);
4520 }
4521 
4522 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4523 {
4524     union_float32 ua, ur;
4525 
4526     ua.s = xa;
4527     if (unlikely(!can_use_fpu(s))) {
4528         goto soft;
4529     }
4530 
4531     float32_input_flush1(&ua.s, s);
4532     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4533         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4534                        fpclassify(ua.h) == FP_ZERO) ||
4535                      signbit(ua.h))) {
4536             goto soft;
4537         }
4538     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4539                         float32_is_neg(ua.s))) {
4540         goto soft;
4541     }
4542     ur.h = sqrtf(ua.h);
4543     return ur.s;
4544 
4545  soft:
4546     return soft_f32_sqrt(ua.s, s);
4547 }
4548 
4549 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4550 {
4551     union_float64 ua, ur;
4552 
4553     ua.s = xa;
4554     if (unlikely(!can_use_fpu(s))) {
4555         goto soft;
4556     }
4557 
4558     float64_input_flush1(&ua.s, s);
4559     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4560         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4561                        fpclassify(ua.h) == FP_ZERO) ||
4562                      signbit(ua.h))) {
4563             goto soft;
4564         }
4565     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4566                         float64_is_neg(ua.s))) {
4567         goto soft;
4568     }
4569     ur.h = sqrt(ua.h);
4570     return ur.s;
4571 
4572  soft:
4573     return soft_f64_sqrt(ua.s, s);
4574 }
4575 
4576 float64 float64r32_sqrt(float64 a, float_status *status)
4577 {
4578     FloatParts64 p;
4579 
4580     float64_unpack_canonical(&p, a, status);
4581     parts_sqrt(&p, status, &float64_params);
4582     return float64r32_round_pack_canonical(&p, status);
4583 }
4584 
4585 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4586 {
4587     FloatParts64 p;
4588 
4589     bfloat16_unpack_canonical(&p, a, status);
4590     parts_sqrt(&p, status, &bfloat16_params);
4591     return bfloat16_round_pack_canonical(&p, status);
4592 }
4593 
4594 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4595 {
4596     FloatParts128 p;
4597 
4598     float128_unpack_canonical(&p, a, status);
4599     parts_sqrt(&p, status, &float128_params);
4600     return float128_round_pack_canonical(&p, status);
4601 }
4602 
4603 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4604 {
4605     FloatParts128 p;
4606 
4607     if (!floatx80_unpack_canonical(&p, a, s)) {
4608         return floatx80_default_nan(s);
4609     }
4610     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4611     return floatx80_round_pack_canonical(&p, s);
4612 }
4613 
4614 /*
4615  * log2
4616  */
4617 float32 float32_log2(float32 a, float_status *status)
4618 {
4619     FloatParts64 p;
4620 
4621     float32_unpack_canonical(&p, a, status);
4622     parts_log2(&p, status, &float32_params);
4623     return float32_round_pack_canonical(&p, status);
4624 }
4625 
4626 float64 float64_log2(float64 a, float_status *status)
4627 {
4628     FloatParts64 p;
4629 
4630     float64_unpack_canonical(&p, a, status);
4631     parts_log2(&p, status, &float64_params);
4632     return float64_round_pack_canonical(&p, status);
4633 }
4634 
4635 /*----------------------------------------------------------------------------
4636 | The pattern for a default generated NaN.
4637 *----------------------------------------------------------------------------*/
4638 
4639 float16 float16_default_nan(float_status *status)
4640 {
4641     FloatParts64 p;
4642 
4643     parts_default_nan(&p, status);
4644     p.frac >>= float16_params.frac_shift;
4645     return float16_pack_raw(&p);
4646 }
4647 
4648 float32 float32_default_nan(float_status *status)
4649 {
4650     FloatParts64 p;
4651 
4652     parts_default_nan(&p, status);
4653     p.frac >>= float32_params.frac_shift;
4654     return float32_pack_raw(&p);
4655 }
4656 
4657 float64 float64_default_nan(float_status *status)
4658 {
4659     FloatParts64 p;
4660 
4661     parts_default_nan(&p, status);
4662     p.frac >>= float64_params.frac_shift;
4663     return float64_pack_raw(&p);
4664 }
4665 
4666 float128 float128_default_nan(float_status *status)
4667 {
4668     FloatParts128 p;
4669 
4670     parts_default_nan(&p, status);
4671     frac_shr(&p, float128_params.frac_shift);
4672     return float128_pack_raw(&p);
4673 }
4674 
4675 bfloat16 bfloat16_default_nan(float_status *status)
4676 {
4677     FloatParts64 p;
4678 
4679     parts_default_nan(&p, status);
4680     p.frac >>= bfloat16_params.frac_shift;
4681     return bfloat16_pack_raw(&p);
4682 }
4683 
4684 /*----------------------------------------------------------------------------
4685 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4686 *----------------------------------------------------------------------------*/
4687 
4688 float16 float16_silence_nan(float16 a, float_status *status)
4689 {
4690     FloatParts64 p;
4691 
4692     float16_unpack_raw(&p, a);
4693     p.frac <<= float16_params.frac_shift;
4694     parts_silence_nan(&p, status);
4695     p.frac >>= float16_params.frac_shift;
4696     return float16_pack_raw(&p);
4697 }
4698 
4699 float32 float32_silence_nan(float32 a, float_status *status)
4700 {
4701     FloatParts64 p;
4702 
4703     float32_unpack_raw(&p, a);
4704     p.frac <<= float32_params.frac_shift;
4705     parts_silence_nan(&p, status);
4706     p.frac >>= float32_params.frac_shift;
4707     return float32_pack_raw(&p);
4708 }
4709 
4710 float64 float64_silence_nan(float64 a, float_status *status)
4711 {
4712     FloatParts64 p;
4713 
4714     float64_unpack_raw(&p, a);
4715     p.frac <<= float64_params.frac_shift;
4716     parts_silence_nan(&p, status);
4717     p.frac >>= float64_params.frac_shift;
4718     return float64_pack_raw(&p);
4719 }
4720 
4721 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4722 {
4723     FloatParts64 p;
4724 
4725     bfloat16_unpack_raw(&p, a);
4726     p.frac <<= bfloat16_params.frac_shift;
4727     parts_silence_nan(&p, status);
4728     p.frac >>= bfloat16_params.frac_shift;
4729     return bfloat16_pack_raw(&p);
4730 }
4731 
4732 float128 float128_silence_nan(float128 a, float_status *status)
4733 {
4734     FloatParts128 p;
4735 
4736     float128_unpack_raw(&p, a);
4737     frac_shl(&p, float128_params.frac_shift);
4738     parts_silence_nan(&p, status);
4739     frac_shr(&p, float128_params.frac_shift);
4740     return float128_pack_raw(&p);
4741 }
4742 
4743 /*----------------------------------------------------------------------------
4744 | If `a' is denormal and we are in flush-to-zero mode then set the
4745 | input-denormal exception and return zero. Otherwise just return the value.
4746 *----------------------------------------------------------------------------*/
4747 
4748 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4749 {
4750     if (p.exp == 0 && p.frac != 0) {
4751         float_raise(float_flag_input_denormal, status);
4752         return true;
4753     }
4754 
4755     return false;
4756 }
4757 
4758 float16 float16_squash_input_denormal(float16 a, float_status *status)
4759 {
4760     if (status->flush_inputs_to_zero) {
4761         FloatParts64 p;
4762 
4763         float16_unpack_raw(&p, a);
4764         if (parts_squash_denormal(p, status)) {
4765             return float16_set_sign(float16_zero, p.sign);
4766         }
4767     }
4768     return a;
4769 }
4770 
4771 float32 float32_squash_input_denormal(float32 a, float_status *status)
4772 {
4773     if (status->flush_inputs_to_zero) {
4774         FloatParts64 p;
4775 
4776         float32_unpack_raw(&p, a);
4777         if (parts_squash_denormal(p, status)) {
4778             return float32_set_sign(float32_zero, p.sign);
4779         }
4780     }
4781     return a;
4782 }
4783 
4784 float64 float64_squash_input_denormal(float64 a, float_status *status)
4785 {
4786     if (status->flush_inputs_to_zero) {
4787         FloatParts64 p;
4788 
4789         float64_unpack_raw(&p, a);
4790         if (parts_squash_denormal(p, status)) {
4791             return float64_set_sign(float64_zero, p.sign);
4792         }
4793     }
4794     return a;
4795 }
4796 
4797 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4798 {
4799     if (status->flush_inputs_to_zero) {
4800         FloatParts64 p;
4801 
4802         bfloat16_unpack_raw(&p, a);
4803         if (parts_squash_denormal(p, status)) {
4804             return bfloat16_set_sign(bfloat16_zero, p.sign);
4805         }
4806     }
4807     return a;
4808 }
4809 
4810 /*----------------------------------------------------------------------------
4811 | Normalizes the subnormal extended double-precision floating-point value
4812 | represented by the denormalized significand `aSig'.  The normalized exponent
4813 | and significand are stored at the locations pointed to by `zExpPtr' and
4814 | `zSigPtr', respectively.
4815 *----------------------------------------------------------------------------*/
4816 
4817 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4818                                 uint64_t *zSigPtr)
4819 {
4820     int8_t shiftCount;
4821 
4822     shiftCount = clz64(aSig);
4823     *zSigPtr = aSig<<shiftCount;
4824     *zExpPtr = 1 - shiftCount;
4825 }
4826 
4827 /*----------------------------------------------------------------------------
4828 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4829 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4830 | and returns the proper extended double-precision floating-point value
4831 | corresponding to the abstract input.  Ordinarily, the abstract value is
4832 | rounded and packed into the extended double-precision format, with the
4833 | inexact exception raised if the abstract input cannot be represented
4834 | exactly.  However, if the abstract value is too large, the overflow and
4835 | inexact exceptions are raised and an infinity or maximal finite value is
4836 | returned.  If the abstract value is too small, the input value is rounded to
4837 | a subnormal number, and the underflow and inexact exceptions are raised if
4838 | the abstract input cannot be represented exactly as a subnormal extended
4839 | double-precision floating-point number.
4840 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4841 | the result is rounded to the same number of bits as single or double
4842 | precision, respectively.  Otherwise, the result is rounded to the full
4843 | precision of the extended double-precision format.
4844 |     The input significand must be normalized or smaller.  If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding.  The
4847 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4848 | Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4850 
4851 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4852                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4853                               float_status *status)
4854 {
4855     FloatRoundMode roundingMode;
4856     bool roundNearestEven, increment, isTiny;
4857     int64_t roundIncrement, roundMask, roundBits;
4858 
4859     roundingMode = status->float_rounding_mode;
4860     roundNearestEven = ( roundingMode == float_round_nearest_even );
4861     switch (roundingPrecision) {
4862     case floatx80_precision_x:
4863         goto precision80;
4864     case floatx80_precision_d:
4865         roundIncrement = UINT64_C(0x0000000000000400);
4866         roundMask = UINT64_C(0x00000000000007FF);
4867         break;
4868     case floatx80_precision_s:
4869         roundIncrement = UINT64_C(0x0000008000000000);
4870         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4871         break;
4872     default:
4873         g_assert_not_reached();
4874     }
4875     zSig0 |= ( zSig1 != 0 );
4876     switch (roundingMode) {
4877     case float_round_nearest_even:
4878     case float_round_ties_away:
4879         break;
4880     case float_round_to_zero:
4881         roundIncrement = 0;
4882         break;
4883     case float_round_up:
4884         roundIncrement = zSign ? 0 : roundMask;
4885         break;
4886     case float_round_down:
4887         roundIncrement = zSign ? roundMask : 0;
4888         break;
4889     default:
4890         abort();
4891     }
4892     roundBits = zSig0 & roundMask;
4893     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4894         if (    ( 0x7FFE < zExp )
4895              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4896            ) {
4897             goto overflow;
4898         }
4899         if ( zExp <= 0 ) {
4900             if (status->flush_to_zero) {
4901                 float_raise(float_flag_output_denormal, status);
4902                 return packFloatx80(zSign, 0, 0);
4903             }
4904             isTiny = status->tininess_before_rounding
4905                   || (zExp < 0 )
4906                   || (zSig0 <= zSig0 + roundIncrement);
4907             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4908             zExp = 0;
4909             roundBits = zSig0 & roundMask;
4910             if (isTiny && roundBits) {
4911                 float_raise(float_flag_underflow, status);
4912             }
4913             if (roundBits) {
4914                 float_raise(float_flag_inexact, status);
4915             }
4916             zSig0 += roundIncrement;
4917             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4918             roundIncrement = roundMask + 1;
4919             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4920                 roundMask |= roundIncrement;
4921             }
4922             zSig0 &= ~ roundMask;
4923             return packFloatx80( zSign, zExp, zSig0 );
4924         }
4925     }
4926     if (roundBits) {
4927         float_raise(float_flag_inexact, status);
4928     }
4929     zSig0 += roundIncrement;
4930     if ( zSig0 < roundIncrement ) {
4931         ++zExp;
4932         zSig0 = UINT64_C(0x8000000000000000);
4933     }
4934     roundIncrement = roundMask + 1;
4935     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4936         roundMask |= roundIncrement;
4937     }
4938     zSig0 &= ~ roundMask;
4939     if ( zSig0 == 0 ) zExp = 0;
4940     return packFloatx80( zSign, zExp, zSig0 );
4941  precision80:
4942     switch (roundingMode) {
4943     case float_round_nearest_even:
4944     case float_round_ties_away:
4945         increment = ((int64_t)zSig1 < 0);
4946         break;
4947     case float_round_to_zero:
4948         increment = 0;
4949         break;
4950     case float_round_up:
4951         increment = !zSign && zSig1;
4952         break;
4953     case float_round_down:
4954         increment = zSign && zSig1;
4955         break;
4956     default:
4957         abort();
4958     }
4959     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4960         if (    ( 0x7FFE < zExp )
4961              || (    ( zExp == 0x7FFE )
4962                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4963                   && increment
4964                 )
4965            ) {
4966             roundMask = 0;
4967  overflow:
4968             float_raise(float_flag_overflow | float_flag_inexact, status);
4969             if (    ( roundingMode == float_round_to_zero )
4970                  || ( zSign && ( roundingMode == float_round_up ) )
4971                  || ( ! zSign && ( roundingMode == float_round_down ) )
4972                ) {
4973                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4974             }
4975             return packFloatx80(zSign,
4976                                 floatx80_infinity_high,
4977                                 floatx80_infinity_low);
4978         }
4979         if ( zExp <= 0 ) {
4980             isTiny = status->tininess_before_rounding
4981                   || (zExp < 0)
4982                   || !increment
4983                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4984             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4985             zExp = 0;
4986             if (isTiny && zSig1) {
4987                 float_raise(float_flag_underflow, status);
4988             }
4989             if (zSig1) {
4990                 float_raise(float_flag_inexact, status);
4991             }
4992             switch (roundingMode) {
4993             case float_round_nearest_even:
4994             case float_round_ties_away:
4995                 increment = ((int64_t)zSig1 < 0);
4996                 break;
4997             case float_round_to_zero:
4998                 increment = 0;
4999                 break;
5000             case float_round_up:
5001                 increment = !zSign && zSig1;
5002                 break;
5003             case float_round_down:
5004                 increment = zSign && zSig1;
5005                 break;
5006             default:
5007                 abort();
5008             }
5009             if ( increment ) {
5010                 ++zSig0;
5011                 if (!(zSig1 << 1) && roundNearestEven) {
5012                     zSig0 &= ~1;
5013                 }
5014                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5015             }
5016             return packFloatx80( zSign, zExp, zSig0 );
5017         }
5018     }
5019     if (zSig1) {
5020         float_raise(float_flag_inexact, status);
5021     }
5022     if ( increment ) {
5023         ++zSig0;
5024         if ( zSig0 == 0 ) {
5025             ++zExp;
5026             zSig0 = UINT64_C(0x8000000000000000);
5027         }
5028         else {
5029             if (!(zSig1 << 1) && roundNearestEven) {
5030                 zSig0 &= ~1;
5031             }
5032         }
5033     }
5034     else {
5035         if ( zSig0 == 0 ) zExp = 0;
5036     }
5037     return packFloatx80( zSign, zExp, zSig0 );
5038 
5039 }
5040 
5041 /*----------------------------------------------------------------------------
5042 | Takes an abstract floating-point value having sign `zSign', exponent
5043 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5044 | and returns the proper extended double-precision floating-point value
5045 | corresponding to the abstract input.  This routine is just like
5046 | `roundAndPackFloatx80' except that the input significand does not have to be
5047 | normalized.
5048 *----------------------------------------------------------------------------*/
5049 
5050 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5051                                        bool zSign, int32_t zExp,
5052                                        uint64_t zSig0, uint64_t zSig1,
5053                                        float_status *status)
5054 {
5055     int8_t shiftCount;
5056 
5057     if ( zSig0 == 0 ) {
5058         zSig0 = zSig1;
5059         zSig1 = 0;
5060         zExp -= 64;
5061     }
5062     shiftCount = clz64(zSig0);
5063     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5064     zExp -= shiftCount;
5065     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5066                                 zSig0, zSig1, status);
5067 
5068 }
5069 
5070 /*----------------------------------------------------------------------------
5071 | Returns the binary exponential of the single-precision floating-point value
5072 | `a'. The operation is performed according to the IEC/IEEE Standard for
5073 | Binary Floating-Point Arithmetic.
5074 |
5075 | Uses the following identities:
5076 |
5077 | 1. -------------------------------------------------------------------------
5078 |      x    x*ln(2)
5079 |     2  = e
5080 |
5081 | 2. -------------------------------------------------------------------------
5082 |                      2     3     4     5           n
5083 |      x        x     x     x     x     x           x
5084 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5085 |               1!    2!    3!    4!    5!          n!
5086 *----------------------------------------------------------------------------*/
5087 
5088 static const float64 float32_exp2_coefficients[15] =
5089 {
5090     const_float64( 0x3ff0000000000000ll ), /*  1 */
5091     const_float64( 0x3fe0000000000000ll ), /*  2 */
5092     const_float64( 0x3fc5555555555555ll ), /*  3 */
5093     const_float64( 0x3fa5555555555555ll ), /*  4 */
5094     const_float64( 0x3f81111111111111ll ), /*  5 */
5095     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5096     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5097     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5098     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5099     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5100     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5101     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5102     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5103     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5104     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5105 };
5106 
5107 float32 float32_exp2(float32 a, float_status *status)
5108 {
5109     FloatParts64 xp, xnp, tp, rp;
5110     int i;
5111 
5112     float32_unpack_canonical(&xp, a, status);
5113     if (unlikely(xp.cls != float_class_normal)) {
5114         switch (xp.cls) {
5115         case float_class_snan:
5116         case float_class_qnan:
5117             parts_return_nan(&xp, status);
5118             return float32_round_pack_canonical(&xp, status);
5119         case float_class_inf:
5120             return xp.sign ? float32_zero : a;
5121         case float_class_zero:
5122             return float32_one;
5123         default:
5124             break;
5125         }
5126         g_assert_not_reached();
5127     }
5128 
5129     float_raise(float_flag_inexact, status);
5130 
5131     float64_unpack_canonical(&tp, float64_ln2, status);
5132     xp = *parts_mul(&xp, &tp, status);
5133     xnp = xp;
5134 
5135     float64_unpack_canonical(&rp, float64_one, status);
5136     for (i = 0 ; i < 15 ; i++) {
5137         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5138         rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5139         xnp = *parts_mul(&xnp, &xp, status);
5140     }
5141 
5142     return float32_round_pack_canonical(&rp, status);
5143 }
5144 
5145 /*----------------------------------------------------------------------------
5146 | Rounds the extended double-precision floating-point value `a'
5147 | to the precision provided by floatx80_rounding_precision and returns the
5148 | result as an extended double-precision floating-point value.
5149 | The operation is performed according to the IEC/IEEE Standard for Binary
5150 | Floating-Point Arithmetic.
5151 *----------------------------------------------------------------------------*/
5152 
5153 floatx80 floatx80_round(floatx80 a, float_status *status)
5154 {
5155     FloatParts128 p;
5156 
5157     if (!floatx80_unpack_canonical(&p, a, status)) {
5158         return floatx80_default_nan(status);
5159     }
5160     return floatx80_round_pack_canonical(&p, status);
5161 }
5162 
5163 static void __attribute__((constructor)) softfloat_init(void)
5164 {
5165     union_float64 ua, ub, uc, ur;
5166 
5167     if (QEMU_NO_HARDFLOAT) {
5168         return;
5169     }
5170     /*
5171      * Test that the host's FMA is not obviously broken. For example,
5172      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5173      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5174      */
5175     ua.s = 0x0020000000000001ULL;
5176     ub.s = 0x3ca0000000000000ULL;
5177     uc.s = 0x0020000000000000ULL;
5178     ur.h = fma(ua.h, ub.h, uc.h);
5179     if (ur.s != 0x0020000000000001ULL) {
5180         force_soft_fma = true;
5181     }
5182 }
5183