xref: /qemu/fpu/softfloat-parts.c.inc (revision ca61e750)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    if (is_snan(a->cls) || is_snan(b->cls)) {
43        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
44    }
45
46    if (s->default_nan_mode) {
47        parts_default_nan(a, s);
48    } else {
49        int cmp = frac_cmp(a, b);
50        if (cmp == 0) {
51            cmp = a->sign < b->sign;
52        }
53
54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55            a = b;
56        }
57        if (is_snan(a->cls)) {
58            parts_silence_nan(a, s);
59        }
60    }
61    return a;
62}
63
64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65                                            FloatPartsN *c, float_status *s,
66                                            int ab_mask, int abc_mask)
67{
68    int which;
69
70    if (unlikely(abc_mask & float_cmask_snan)) {
71        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
72    }
73
74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75                          ab_mask == float_cmask_infzero, s);
76
77    if (s->default_nan_mode || which == 3) {
78        /*
79         * Note that this check is after pickNaNMulAdd so that function
80         * has an opportunity to set the Invalid flag for infzero.
81         */
82        parts_default_nan(a, s);
83        return a;
84    }
85
86    switch (which) {
87    case 0:
88        break;
89    case 1:
90        a = b;
91        break;
92    case 2:
93        a = c;
94        break;
95    default:
96        g_assert_not_reached();
97    }
98    if (is_snan(a->cls)) {
99        parts_silence_nan(a, s);
100    }
101    return a;
102}
103
104/*
105 * Canonicalize the FloatParts structure.  Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109                                 const FloatFmt *fmt)
110{
111    if (unlikely(p->exp == 0)) {
112        if (likely(frac_eqz(p))) {
113            p->cls = float_class_zero;
114        } else if (status->flush_inputs_to_zero) {
115            float_raise(float_flag_input_denormal, status);
116            p->cls = float_class_zero;
117            frac_clear(p);
118        } else {
119            int shift = frac_normalize(p);
120            p->cls = float_class_normal;
121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122        }
123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124        p->cls = float_class_normal;
125        p->exp -= fmt->exp_bias;
126        frac_shl(p, fmt->frac_shift);
127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128    } else if (likely(frac_eqz(p))) {
129        p->cls = float_class_inf;
130    } else {
131        frac_shl(p, fmt->frac_shift);
132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
133                  ? float_class_snan : float_class_qnan);
134    }
135}
136
137/*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
144                                   const FloatFmt *fmt)
145{
146    const int exp_max = fmt->exp_max;
147    const int frac_shift = fmt->frac_shift;
148    const uint64_t round_mask = fmt->round_mask;
149    const uint64_t frac_lsb = round_mask + 1;
150    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
151    const uint64_t roundeven_mask = round_mask | frac_lsb;
152    uint64_t inc;
153    bool overflow_norm = false;
154    int exp, flags = 0;
155
156    switch (s->float_rounding_mode) {
157    case float_round_nearest_even:
158        if (N > 64 && frac_lsb == 0) {
159            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
160                   ? frac_lsbm1 : 0);
161        } else {
162            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
163                   ? frac_lsbm1 : 0);
164        }
165        break;
166    case float_round_ties_away:
167        inc = frac_lsbm1;
168        break;
169    case float_round_to_zero:
170        overflow_norm = true;
171        inc = 0;
172        break;
173    case float_round_up:
174        inc = p->sign ? 0 : round_mask;
175        overflow_norm = p->sign;
176        break;
177    case float_round_down:
178        inc = p->sign ? round_mask : 0;
179        overflow_norm = !p->sign;
180        break;
181    case float_round_to_odd:
182        overflow_norm = true;
183        /* fall through */
184    case float_round_to_odd_inf:
185        if (N > 64 && frac_lsb == 0) {
186            inc = p->frac_hi & 1 ? 0 : round_mask;
187        } else {
188            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
189        }
190        break;
191    default:
192        g_assert_not_reached();
193    }
194
195    exp = p->exp + fmt->exp_bias;
196    if (likely(exp > 0)) {
197        if (p->frac_lo & round_mask) {
198            flags |= float_flag_inexact;
199            if (frac_addi(p, p, inc)) {
200                frac_shr(p, 1);
201                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
202                exp++;
203            }
204            p->frac_lo &= ~round_mask;
205        }
206
207        if (fmt->arm_althp) {
208            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
209            if (unlikely(exp > exp_max)) {
210                /* Overflow.  Return the maximum normal.  */
211                flags = float_flag_invalid;
212                exp = exp_max;
213                frac_allones(p);
214                p->frac_lo &= ~round_mask;
215            }
216        } else if (unlikely(exp >= exp_max)) {
217            flags |= float_flag_overflow | float_flag_inexact;
218            if (overflow_norm) {
219                exp = exp_max - 1;
220                frac_allones(p);
221                p->frac_lo &= ~round_mask;
222            } else {
223                p->cls = float_class_inf;
224                exp = exp_max;
225                frac_clear(p);
226            }
227        }
228        frac_shr(p, frac_shift);
229    } else if (s->flush_to_zero) {
230        flags |= float_flag_output_denormal;
231        p->cls = float_class_zero;
232        exp = 0;
233        frac_clear(p);
234    } else {
235        bool is_tiny = s->tininess_before_rounding || exp < 0;
236
237        if (!is_tiny) {
238            FloatPartsN discard;
239            is_tiny = !frac_addi(&discard, p, inc);
240        }
241
242        frac_shrjam(p, 1 - exp);
243
244        if (p->frac_lo & round_mask) {
245            /* Need to recompute round-to-even/round-to-odd. */
246            switch (s->float_rounding_mode) {
247            case float_round_nearest_even:
248                if (N > 64 && frac_lsb == 0) {
249                    inc = ((p->frac_hi & 1) ||
250                           (p->frac_lo & round_mask) != frac_lsbm1
251                           ? frac_lsbm1 : 0);
252                } else {
253                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
254                           ? frac_lsbm1 : 0);
255                }
256                break;
257            case float_round_to_odd:
258            case float_round_to_odd_inf:
259                if (N > 64 && frac_lsb == 0) {
260                    inc = p->frac_hi & 1 ? 0 : round_mask;
261                } else {
262                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
263                }
264                break;
265            default:
266                break;
267            }
268            flags |= float_flag_inexact;
269            frac_addi(p, p, inc);
270            p->frac_lo &= ~round_mask;
271        }
272
273        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
274        frac_shr(p, frac_shift);
275
276        if (is_tiny && (flags & float_flag_inexact)) {
277            flags |= float_flag_underflow;
278        }
279        if (exp == 0 && frac_eqz(p)) {
280            p->cls = float_class_zero;
281        }
282    }
283    p->exp = exp;
284    float_raise(flags, s);
285}
286
287static void partsN(uncanon)(FloatPartsN *p, float_status *s,
288                            const FloatFmt *fmt)
289{
290    if (likely(p->cls == float_class_normal)) {
291        parts_uncanon_normal(p, s, fmt);
292    } else {
293        switch (p->cls) {
294        case float_class_zero:
295            p->exp = 0;
296            frac_clear(p);
297            return;
298        case float_class_inf:
299            g_assert(!fmt->arm_althp);
300            p->exp = fmt->exp_max;
301            frac_clear(p);
302            return;
303        case float_class_qnan:
304        case float_class_snan:
305            g_assert(!fmt->arm_althp);
306            p->exp = fmt->exp_max;
307            frac_shr(p, fmt->frac_shift);
308            return;
309        default:
310            break;
311        }
312        g_assert_not_reached();
313    }
314}
315
316/*
317 * Returns the result of adding or subtracting the values of the
318 * floating-point values `a' and `b'. The operation is performed
319 * according to the IEC/IEEE Standard for Binary Floating-Point
320 * Arithmetic.
321 */
322static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
323                                   float_status *s, bool subtract)
324{
325    bool b_sign = b->sign ^ subtract;
326    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
327
328    if (a->sign != b_sign) {
329        /* Subtraction */
330        if (likely(ab_mask == float_cmask_normal)) {
331            if (parts_sub_normal(a, b)) {
332                return a;
333            }
334            /* Subtract was exact, fall through to set sign. */
335            ab_mask = float_cmask_zero;
336        }
337
338        if (ab_mask == float_cmask_zero) {
339            a->sign = s->float_rounding_mode == float_round_down;
340            return a;
341        }
342
343        if (unlikely(ab_mask & float_cmask_anynan)) {
344            goto p_nan;
345        }
346
347        if (ab_mask & float_cmask_inf) {
348            if (a->cls != float_class_inf) {
349                /* N - Inf */
350                goto return_b;
351            }
352            if (b->cls != float_class_inf) {
353                /* Inf - N */
354                return a;
355            }
356            /* Inf - Inf */
357            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
358            parts_default_nan(a, s);
359            return a;
360        }
361    } else {
362        /* Addition */
363        if (likely(ab_mask == float_cmask_normal)) {
364            parts_add_normal(a, b);
365            return a;
366        }
367
368        if (ab_mask == float_cmask_zero) {
369            return a;
370        }
371
372        if (unlikely(ab_mask & float_cmask_anynan)) {
373            goto p_nan;
374        }
375
376        if (ab_mask & float_cmask_inf) {
377            a->cls = float_class_inf;
378            return a;
379        }
380    }
381
382    if (b->cls == float_class_zero) {
383        g_assert(a->cls == float_class_normal);
384        return a;
385    }
386
387    g_assert(a->cls == float_class_zero);
388    g_assert(b->cls == float_class_normal);
389 return_b:
390    b->sign = b_sign;
391    return b;
392
393 p_nan:
394    return parts_pick_nan(a, b, s);
395}
396
397/*
398 * Returns the result of multiplying the floating-point values `a' and
399 * `b'. The operation is performed according to the IEC/IEEE Standard
400 * for Binary Floating-Point Arithmetic.
401 */
402static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
403                                float_status *s)
404{
405    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
406    bool sign = a->sign ^ b->sign;
407
408    if (likely(ab_mask == float_cmask_normal)) {
409        FloatPartsW tmp;
410
411        frac_mulw(&tmp, a, b);
412        frac_truncjam(a, &tmp);
413
414        a->exp += b->exp + 1;
415        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
416            frac_add(a, a, a);
417            a->exp -= 1;
418        }
419
420        a->sign = sign;
421        return a;
422    }
423
424    /* Inf * Zero == NaN */
425    if (unlikely(ab_mask == float_cmask_infzero)) {
426        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
427        parts_default_nan(a, s);
428        return a;
429    }
430
431    if (unlikely(ab_mask & float_cmask_anynan)) {
432        return parts_pick_nan(a, b, s);
433    }
434
435    /* Multiply by 0 or Inf */
436    if (ab_mask & float_cmask_inf) {
437        a->cls = float_class_inf;
438        a->sign = sign;
439        return a;
440    }
441
442    g_assert(ab_mask & float_cmask_zero);
443    a->cls = float_class_zero;
444    a->sign = sign;
445    return a;
446}
447
448/*
449 * Returns the result of multiplying the floating-point values `a' and
450 * `b' then adding 'c', with no intermediate rounding step after the
451 * multiplication. The operation is performed according to the
452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
453 * The flags argument allows the caller to select negation of the
454 * addend, the intermediate product, or the final result. (The
455 * difference between this and having the caller do a separate
456 * negation is that negating externally will flip the sign bit on NaNs.)
457 *
458 * Requires A and C extracted into a double-sized structure to provide the
459 * extra space for the widening multiply.
460 */
461static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
462                                   FloatPartsN *c, int flags, float_status *s)
463{
464    int ab_mask, abc_mask;
465    FloatPartsW p_widen, c_widen;
466
467    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
468    abc_mask = float_cmask(c->cls) | ab_mask;
469
470    /*
471     * It is implementation-defined whether the cases of (0,inf,qnan)
472     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
473     * they return if they do), so we have to hand this information
474     * off to the target-specific pick-a-NaN routine.
475     */
476    if (unlikely(abc_mask & float_cmask_anynan)) {
477        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
478    }
479
480    if (flags & float_muladd_negate_c) {
481        c->sign ^= 1;
482    }
483
484    /* Compute the sign of the product into A. */
485    a->sign ^= b->sign;
486    if (flags & float_muladd_negate_product) {
487        a->sign ^= 1;
488    }
489
490    if (unlikely(ab_mask != float_cmask_normal)) {
491        if (unlikely(ab_mask == float_cmask_infzero)) {
492            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
493            goto d_nan;
494        }
495
496        if (ab_mask & float_cmask_inf) {
497            if (c->cls == float_class_inf && a->sign != c->sign) {
498                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
499                goto d_nan;
500            }
501            goto return_inf;
502        }
503
504        g_assert(ab_mask & float_cmask_zero);
505        if (c->cls == float_class_normal) {
506            *a = *c;
507            goto return_normal;
508        }
509        if (c->cls == float_class_zero) {
510            if (a->sign != c->sign) {
511                goto return_sub_zero;
512            }
513            goto return_zero;
514        }
515        g_assert(c->cls == float_class_inf);
516    }
517
518    if (unlikely(c->cls == float_class_inf)) {
519        a->sign = c->sign;
520        goto return_inf;
521    }
522
523    /* Perform the multiplication step. */
524    p_widen.sign = a->sign;
525    p_widen.exp = a->exp + b->exp + 1;
526    frac_mulw(&p_widen, a, b);
527    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
528        frac_add(&p_widen, &p_widen, &p_widen);
529        p_widen.exp -= 1;
530    }
531
532    /* Perform the addition step. */
533    if (c->cls != float_class_zero) {
534        /* Zero-extend C to less significant bits. */
535        frac_widen(&c_widen, c);
536        c_widen.exp = c->exp;
537
538        if (a->sign == c->sign) {
539            parts_add_normal(&p_widen, &c_widen);
540        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
541            goto return_sub_zero;
542        }
543    }
544
545    /* Narrow with sticky bit, for proper rounding later. */
546    frac_truncjam(a, &p_widen);
547    a->sign = p_widen.sign;
548    a->exp = p_widen.exp;
549
550 return_normal:
551    if (flags & float_muladd_halve_result) {
552        a->exp -= 1;
553    }
554 finish_sign:
555    if (flags & float_muladd_negate_result) {
556        a->sign ^= 1;
557    }
558    return a;
559
560 return_sub_zero:
561    a->sign = s->float_rounding_mode == float_round_down;
562 return_zero:
563    a->cls = float_class_zero;
564    goto finish_sign;
565
566 return_inf:
567    a->cls = float_class_inf;
568    goto finish_sign;
569
570 d_nan:
571    parts_default_nan(a, s);
572    return a;
573}
574
575/*
576 * Returns the result of dividing the floating-point value `a' by the
577 * corresponding value `b'. The operation is performed according to
578 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
579 */
580static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
581                                float_status *s)
582{
583    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
584    bool sign = a->sign ^ b->sign;
585
586    if (likely(ab_mask == float_cmask_normal)) {
587        a->sign = sign;
588        a->exp -= b->exp + frac_div(a, b);
589        return a;
590    }
591
592    /* 0/0 or Inf/Inf => NaN */
593    if (unlikely(ab_mask == float_cmask_zero)) {
594        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
595        goto d_nan;
596    }
597    if (unlikely(ab_mask == float_cmask_inf)) {
598        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
599        goto d_nan;
600    }
601
602    /* All the NaN cases */
603    if (unlikely(ab_mask & float_cmask_anynan)) {
604        return parts_pick_nan(a, b, s);
605    }
606
607    a->sign = sign;
608
609    /* Inf / X */
610    if (a->cls == float_class_inf) {
611        return a;
612    }
613
614    /* 0 / X */
615    if (a->cls == float_class_zero) {
616        return a;
617    }
618
619    /* X / Inf */
620    if (b->cls == float_class_inf) {
621        a->cls = float_class_zero;
622        return a;
623    }
624
625    /* X / 0 => Inf */
626    g_assert(b->cls == float_class_zero);
627    float_raise(float_flag_divbyzero, s);
628    a->cls = float_class_inf;
629    return a;
630
631 d_nan:
632    parts_default_nan(a, s);
633    return a;
634}
635
636/*
637 * Floating point remainder, per IEC/IEEE, or modulus.
638 */
639static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
640                                   uint64_t *mod_quot, float_status *s)
641{
642    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
643
644    if (likely(ab_mask == float_cmask_normal)) {
645        frac_modrem(a, b, mod_quot);
646        return a;
647    }
648
649    if (mod_quot) {
650        *mod_quot = 0;
651    }
652
653    /* All the NaN cases */
654    if (unlikely(ab_mask & float_cmask_anynan)) {
655        return parts_pick_nan(a, b, s);
656    }
657
658    /* Inf % N; N % 0 */
659    if (a->cls == float_class_inf || b->cls == float_class_zero) {
660        float_raise(float_flag_invalid, s);
661        parts_default_nan(a, s);
662        return a;
663    }
664
665    /* N % Inf; 0 % N */
666    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
667    return a;
668}
669
670/*
671 * Square Root
672 *
673 * The base algorithm is lifted from
674 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
675 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
676 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
677 * and is thus MIT licenced.
678 */
679static void partsN(sqrt)(FloatPartsN *a, float_status *status,
680                         const FloatFmt *fmt)
681{
682    const uint32_t three32 = 3u << 30;
683    const uint64_t three64 = 3ull << 62;
684    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
685    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
686    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
687    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
688    uint64_t discard;
689    bool exp_odd;
690    size_t index;
691
692    if (unlikely(a->cls != float_class_normal)) {
693        switch (a->cls) {
694        case float_class_snan:
695        case float_class_qnan:
696            parts_return_nan(a, status);
697            return;
698        case float_class_zero:
699            return;
700        case float_class_inf:
701            if (unlikely(a->sign)) {
702                goto d_nan;
703            }
704            return;
705        default:
706            g_assert_not_reached();
707        }
708    }
709
710    if (unlikely(a->sign)) {
711        goto d_nan;
712    }
713
714    /*
715     * Argument reduction.
716     * x = 4^e frac; with integer e, and frac in [1, 4)
717     * m = frac fixed point at bit 62, since we're in base 4.
718     * If base-2 exponent is odd, exchange that for multiply by 2,
719     * which results in no shift.
720     */
721    exp_odd = a->exp & 1;
722    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
723    if (!exp_odd) {
724        frac_shr(a, 1);
725    }
726
727    /*
728     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
729     *
730     * Initial estimate:
731     * 7-bit lookup table (1-bit exponent and 6-bit significand).
732     *
733     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
734     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
735     * a table lookup is faster and needs one less iteration.
736     * The 7-bit table gives |e| < 0x1.fdp-9.
737     *
738     * A Newton-Raphson iteration for r is
739     *   s = m*r
740     *   d = s*r
741     *   u = 3 - d
742     *   r = r*u/2
743     *
744     * Fixed point representations:
745     *   m, s, d, u, three are all 2.30; r is 0.32
746     */
747    m64 = a->frac_hi;
748    m32 = m64 >> 32;
749
750    r32 = rsqrt_tab[index] << 16;
751    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
752
753    s32 = ((uint64_t)m32 * r32) >> 32;
754    d32 = ((uint64_t)s32 * r32) >> 32;
755    u32 = three32 - d32;
756
757    if (N == 64) {
758        /* float64 or smaller */
759
760        r32 = ((uint64_t)r32 * u32) >> 31;
761        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
762
763        s32 = ((uint64_t)m32 * r32) >> 32;
764        d32 = ((uint64_t)s32 * r32) >> 32;
765        u32 = three32 - d32;
766
767        if (fmt->frac_size <= 23) {
768            /* float32 or smaller */
769
770            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
771            s32 = (s32 - 1) >> 6;               /* 9.23 */
772            /* s < sqrt(m) < s + 0x1.08p-23 */
773
774            /* compute nearest rounded result to 2.23 bits */
775            uint32_t d0 = (m32 << 16) - s32 * s32;
776            uint32_t d1 = s32 - d0;
777            uint32_t d2 = d1 + s32 + 1;
778            s32 += d1 >> 31;
779            a->frac_hi = (uint64_t)s32 << (64 - 25);
780
781            /* increment or decrement for inexact */
782            if (d2 != 0) {
783                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
784            }
785            goto done;
786        }
787
788        /* float64 */
789
790        r64 = (uint64_t)r32 * u32 * 2;
791        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
792        mul64To128(m64, r64, &s64, &discard);
793        mul64To128(s64, r64, &d64, &discard);
794        u64 = three64 - d64;
795
796        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
797        s64 = (s64 - 2) >> 9;                  /* 12.52 */
798
799        /* Compute nearest rounded result */
800        uint64_t d0 = (m64 << 42) - s64 * s64;
801        uint64_t d1 = s64 - d0;
802        uint64_t d2 = d1 + s64 + 1;
803        s64 += d1 >> 63;
804        a->frac_hi = s64 << (64 - 54);
805
806        /* increment or decrement for inexact */
807        if (d2 != 0) {
808            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
809        }
810        goto done;
811    }
812
813    r64 = (uint64_t)r32 * u32 * 2;
814    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
815
816    mul64To128(m64, r64, &s64, &discard);
817    mul64To128(s64, r64, &d64, &discard);
818    u64 = three64 - d64;
819    mul64To128(u64, r64, &r64, &discard);
820    r64 <<= 1;
821    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
822
823    mul64To128(m64, r64, &s64, &discard);
824    mul64To128(s64, r64, &d64, &discard);
825    u64 = three64 - d64;
826    mul64To128(u64, r64, &rh, &rl);
827    add128(rh, rl, rh, rl, &rh, &rl);
828    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
829
830    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
831    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
832    sub128(three64, 0, dh, dl, &uh, &ul);
833    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
834    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
835
836    sub128(sh, sl, 0, 4, &sh, &sl);
837    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
838    /* s < sqrt(m) < s + 1ulp */
839
840    /* Compute nearest rounded result */
841    mul64To128(sl, sl, &d0h, &d0l);
842    d0h += 2 * sh * sl;
843    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
844    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
845    add128(sh, sl, 0, 1, &d2h, &d2l);
846    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
847    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
848    shift128Left(sh, sl, 128 - 114, &sh, &sl);
849
850    /* increment or decrement for inexact */
851    if (d2h | d2l) {
852        if ((int64_t)(d1h ^ d2h) < 0) {
853            sub128(sh, sl, 0, 1, &sh, &sl);
854        } else {
855            add128(sh, sl, 0, 1, &sh, &sl);
856        }
857    }
858    a->frac_lo = sl;
859    a->frac_hi = sh;
860
861 done:
862    /* Convert back from base 4 to base 2. */
863    a->exp >>= 1;
864    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
865        frac_add(a, a, a);
866    } else {
867        a->exp += 1;
868    }
869    return;
870
871 d_nan:
872    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
873    parts_default_nan(a, status);
874}
875
876/*
877 * Rounds the floating-point value `a' to an integer, and returns the
878 * result as a floating-point value. The operation is performed
879 * according to the IEC/IEEE Standard for Binary Floating-Point
880 * Arithmetic.
881 *
882 * parts_round_to_int_normal is an internal helper function for
883 * normal numbers only, returning true for inexact but not directly
884 * raising float_flag_inexact.
885 */
886static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
887                                        int scale, int frac_size)
888{
889    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
890    int shift_adj;
891
892    scale = MIN(MAX(scale, -0x10000), 0x10000);
893    a->exp += scale;
894
895    if (a->exp < 0) {
896        bool one;
897
898        /* All fractional */
899        switch (rmode) {
900        case float_round_nearest_even:
901            one = false;
902            if (a->exp == -1) {
903                FloatPartsN tmp;
904                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
905                frac_add(&tmp, a, a);
906                /* Anything remaining means frac > 0.5. */
907                one = !frac_eqz(&tmp);
908            }
909            break;
910        case float_round_ties_away:
911            one = a->exp == -1;
912            break;
913        case float_round_to_zero:
914            one = false;
915            break;
916        case float_round_up:
917            one = !a->sign;
918            break;
919        case float_round_down:
920            one = a->sign;
921            break;
922        case float_round_to_odd:
923            one = true;
924            break;
925        default:
926            g_assert_not_reached();
927        }
928
929        frac_clear(a);
930        a->exp = 0;
931        if (one) {
932            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
933        } else {
934            a->cls = float_class_zero;
935        }
936        return true;
937    }
938
939    if (a->exp >= frac_size) {
940        /* All integral */
941        return false;
942    }
943
944    if (N > 64 && a->exp < N - 64) {
945        /*
946         * Rounding is not in the low word -- shift lsb to bit 2,
947         * which leaves room for sticky and rounding bit.
948         */
949        shift_adj = (N - 1) - (a->exp + 2);
950        frac_shrjam(a, shift_adj);
951        frac_lsb = 1 << 2;
952    } else {
953        shift_adj = 0;
954        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
955    }
956
957    frac_lsbm1 = frac_lsb >> 1;
958    rnd_mask = frac_lsb - 1;
959    rnd_even_mask = rnd_mask | frac_lsb;
960
961    if (!(a->frac_lo & rnd_mask)) {
962        /* Fractional bits already clear, undo the shift above. */
963        frac_shl(a, shift_adj);
964        return false;
965    }
966
967    switch (rmode) {
968    case float_round_nearest_even:
969        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
970        break;
971    case float_round_ties_away:
972        inc = frac_lsbm1;
973        break;
974    case float_round_to_zero:
975        inc = 0;
976        break;
977    case float_round_up:
978        inc = a->sign ? 0 : rnd_mask;
979        break;
980    case float_round_down:
981        inc = a->sign ? rnd_mask : 0;
982        break;
983    case float_round_to_odd:
984        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
985        break;
986    default:
987        g_assert_not_reached();
988    }
989
990    if (shift_adj == 0) {
991        if (frac_addi(a, a, inc)) {
992            frac_shr(a, 1);
993            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
994            a->exp++;
995        }
996        a->frac_lo &= ~rnd_mask;
997    } else {
998        frac_addi(a, a, inc);
999        a->frac_lo &= ~rnd_mask;
1000        /* Be careful shifting back, not to overflow */
1001        frac_shl(a, shift_adj - 1);
1002        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1003            a->exp++;
1004        } else {
1005            frac_add(a, a, a);
1006        }
1007    }
1008    return true;
1009}
1010
1011static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1012                                 int scale, float_status *s,
1013                                 const FloatFmt *fmt)
1014{
1015    switch (a->cls) {
1016    case float_class_qnan:
1017    case float_class_snan:
1018        parts_return_nan(a, s);
1019        break;
1020    case float_class_zero:
1021    case float_class_inf:
1022        break;
1023    case float_class_normal:
1024        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1025            float_raise(float_flag_inexact, s);
1026        }
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031}
1032
1033/*
1034 * Returns the result of converting the floating-point value `a' to
1035 * the two's complement integer format. The conversion is performed
1036 * according to the IEC/IEEE Standard for Binary Floating-Point
1037 * Arithmetic---which means in particular that the conversion is
1038 * rounded according to the current rounding mode. If `a' is a NaN,
1039 * the largest positive integer is returned. Otherwise, if the
1040 * conversion overflows, the largest integer with the same sign as `a'
1041 * is returned.
1042 */
1043static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1044                                     int scale, int64_t min, int64_t max,
1045                                     float_status *s)
1046{
1047    int flags = 0;
1048    uint64_t r;
1049
1050    switch (p->cls) {
1051    case float_class_snan:
1052        flags |= float_flag_invalid_snan;
1053        /* fall through */
1054    case float_class_qnan:
1055        flags |= float_flag_invalid;
1056        r = max;
1057        break;
1058
1059    case float_class_inf:
1060        flags = float_flag_invalid | float_flag_invalid_cvti;
1061        r = p->sign ? min : max;
1062        break;
1063
1064    case float_class_zero:
1065        return 0;
1066
1067    case float_class_normal:
1068        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1069        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1070            flags = float_flag_inexact;
1071        }
1072
1073        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1074            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1075        } else {
1076            r = UINT64_MAX;
1077        }
1078        if (p->sign) {
1079            if (r <= -(uint64_t)min) {
1080                r = -r;
1081            } else {
1082                flags = float_flag_invalid | float_flag_invalid_cvti;
1083                r = min;
1084            }
1085        } else if (r > max) {
1086            flags = float_flag_invalid | float_flag_invalid_cvti;
1087            r = max;
1088        }
1089        break;
1090
1091    default:
1092        g_assert_not_reached();
1093    }
1094
1095    float_raise(flags, s);
1096    return r;
1097}
1098
1099/*
1100 *  Returns the result of converting the floating-point value `a' to
1101 *  the unsigned integer format. The conversion is performed according
1102 *  to the IEC/IEEE Standard for Binary Floating-Point
1103 *  Arithmetic---which means in particular that the conversion is
1104 *  rounded according to the current rounding mode. If `a' is a NaN,
1105 *  the largest unsigned integer is returned. Otherwise, if the
1106 *  conversion overflows, the largest unsigned integer is returned. If
1107 *  the 'a' is negative, the result is rounded and zero is returned;
1108 *  values that do not round to zero will raise the inexact exception
1109 *  flag.
1110 */
1111static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1112                                      int scale, uint64_t max, float_status *s)
1113{
1114    int flags = 0;
1115    uint64_t r;
1116
1117    switch (p->cls) {
1118    case float_class_snan:
1119        flags |= float_flag_invalid_snan;
1120        /* fall through */
1121    case float_class_qnan:
1122        flags |= float_flag_invalid;
1123        r = max;
1124        break;
1125
1126    case float_class_inf:
1127        flags = float_flag_invalid | float_flag_invalid_cvti;
1128        r = p->sign ? 0 : max;
1129        break;
1130
1131    case float_class_zero:
1132        return 0;
1133
1134    case float_class_normal:
1135        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1136        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1137            flags = float_flag_inexact;
1138            if (p->cls == float_class_zero) {
1139                r = 0;
1140                break;
1141            }
1142        }
1143
1144        if (p->sign) {
1145            flags = float_flag_invalid | float_flag_invalid_cvti;
1146            r = 0;
1147        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1148            flags = float_flag_invalid | float_flag_invalid_cvti;
1149            r = max;
1150        } else {
1151            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1152            if (r > max) {
1153                flags = float_flag_invalid | float_flag_invalid_cvti;
1154                r = max;
1155            }
1156        }
1157        break;
1158
1159    default:
1160        g_assert_not_reached();
1161    }
1162
1163    float_raise(flags, s);
1164    return r;
1165}
1166
1167/*
1168 * Integer to float conversions
1169 *
1170 * Returns the result of converting the two's complement integer `a'
1171 * to the floating-point format. The conversion is performed according
1172 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1173 */
1174static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1175                                  int scale, float_status *s)
1176{
1177    uint64_t f = a;
1178    int shift;
1179
1180    memset(p, 0, sizeof(*p));
1181
1182    if (a == 0) {
1183        p->cls = float_class_zero;
1184        return;
1185    }
1186
1187    p->cls = float_class_normal;
1188    if (a < 0) {
1189        f = -f;
1190        p->sign = true;
1191    }
1192    shift = clz64(f);
1193    scale = MIN(MAX(scale, -0x10000), 0x10000);
1194
1195    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1196    p->frac_hi = f << shift;
1197}
1198
1199/*
1200 * Unsigned Integer to float conversions
1201 *
1202 * Returns the result of converting the unsigned integer `a' to the
1203 * floating-point format. The conversion is performed according to the
1204 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1205 */
1206static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1207                                  int scale, float_status *status)
1208{
1209    memset(p, 0, sizeof(*p));
1210
1211    if (a == 0) {
1212        p->cls = float_class_zero;
1213    } else {
1214        int shift = clz64(a);
1215        scale = MIN(MAX(scale, -0x10000), 0x10000);
1216        p->cls = float_class_normal;
1217        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1218        p->frac_hi = a << shift;
1219    }
1220}
1221
1222/*
1223 * Float min/max.
1224 */
1225static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1226                                   float_status *s, int flags)
1227{
1228    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1229    int a_exp, b_exp, cmp;
1230
1231    if (unlikely(ab_mask & float_cmask_anynan)) {
1232        /*
1233         * For minNum/maxNum (IEEE 754-2008)
1234         * or minimumNumber/maximumNumber (IEEE 754-2019),
1235         * if one operand is a QNaN, and the other
1236         * operand is numerical, then return numerical argument.
1237         */
1238        if ((flags & (minmax_isnum | minmax_isnumber))
1239            && !(ab_mask & float_cmask_snan)
1240            && (ab_mask & ~float_cmask_qnan)) {
1241            return is_nan(a->cls) ? b : a;
1242        }
1243
1244        /*
1245         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1246         * are removed and replaced with minimum, minimumNumber, maximum
1247         * and maximumNumber.
1248         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1249         *   If both operands are NaNs, a QNaN is returned.
1250         *   If either operand is a SNaN,
1251         *   an invalid operation exception is signaled,
1252         *   but unless both operands are NaNs,
1253         *   the SNaN is otherwise ignored and not converted to a QNaN.
1254         */
1255        if ((flags & minmax_isnumber)
1256            && (ab_mask & float_cmask_snan)
1257            && (ab_mask & ~float_cmask_anynan)) {
1258            float_raise(float_flag_invalid, s);
1259            return is_nan(a->cls) ? b : a;
1260        }
1261
1262        return parts_pick_nan(a, b, s);
1263    }
1264
1265    a_exp = a->exp;
1266    b_exp = b->exp;
1267
1268    if (unlikely(ab_mask != float_cmask_normal)) {
1269        switch (a->cls) {
1270        case float_class_normal:
1271            break;
1272        case float_class_inf:
1273            a_exp = INT16_MAX;
1274            break;
1275        case float_class_zero:
1276            a_exp = INT16_MIN;
1277            break;
1278        default:
1279            g_assert_not_reached();
1280            break;
1281        }
1282        switch (b->cls) {
1283        case float_class_normal:
1284            break;
1285        case float_class_inf:
1286            b_exp = INT16_MAX;
1287            break;
1288        case float_class_zero:
1289            b_exp = INT16_MIN;
1290            break;
1291        default:
1292            g_assert_not_reached();
1293            break;
1294        }
1295    }
1296
1297    /* Compare magnitudes. */
1298    cmp = a_exp - b_exp;
1299    if (cmp == 0) {
1300        cmp = frac_cmp(a, b);
1301    }
1302
1303    /*
1304     * Take the sign into account.
1305     * For ismag, only do this if the magnitudes are equal.
1306     */
1307    if (!(flags & minmax_ismag) || cmp == 0) {
1308        if (a->sign != b->sign) {
1309            /* For differing signs, the negative operand is less. */
1310            cmp = a->sign ? -1 : 1;
1311        } else if (a->sign) {
1312            /* For two negative operands, invert the magnitude comparison. */
1313            cmp = -cmp;
1314        }
1315    }
1316
1317    if (flags & minmax_ismin) {
1318        cmp = -cmp;
1319    }
1320    return cmp < 0 ? b : a;
1321}
1322
1323/*
1324 * Floating point compare
1325 */
1326static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1327                                     float_status *s, bool is_quiet)
1328{
1329    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1330
1331    if (likely(ab_mask == float_cmask_normal)) {
1332        FloatRelation cmp;
1333
1334        if (a->sign != b->sign) {
1335            goto a_sign;
1336        }
1337        if (a->exp == b->exp) {
1338            cmp = frac_cmp(a, b);
1339        } else if (a->exp < b->exp) {
1340            cmp = float_relation_less;
1341        } else {
1342            cmp = float_relation_greater;
1343        }
1344        if (a->sign) {
1345            cmp = -cmp;
1346        }
1347        return cmp;
1348    }
1349
1350    if (unlikely(ab_mask & float_cmask_anynan)) {
1351        if (ab_mask & float_cmask_snan) {
1352            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1353        } else if (!is_quiet) {
1354            float_raise(float_flag_invalid, s);
1355        }
1356        return float_relation_unordered;
1357    }
1358
1359    if (ab_mask & float_cmask_zero) {
1360        if (ab_mask == float_cmask_zero) {
1361            return float_relation_equal;
1362        } else if (a->cls == float_class_zero) {
1363            goto b_sign;
1364        } else {
1365            goto a_sign;
1366        }
1367    }
1368
1369    if (ab_mask == float_cmask_inf) {
1370        if (a->sign == b->sign) {
1371            return float_relation_equal;
1372        }
1373    } else if (b->cls == float_class_inf) {
1374        goto b_sign;
1375    } else {
1376        g_assert(a->cls == float_class_inf);
1377    }
1378
1379 a_sign:
1380    return a->sign ? float_relation_less : float_relation_greater;
1381 b_sign:
1382    return b->sign ? float_relation_greater : float_relation_less;
1383}
1384
1385/*
1386 * Multiply A by 2 raised to the power N.
1387 */
1388static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1389{
1390    switch (a->cls) {
1391    case float_class_snan:
1392    case float_class_qnan:
1393        parts_return_nan(a, s);
1394        break;
1395    case float_class_zero:
1396    case float_class_inf:
1397        break;
1398    case float_class_normal:
1399        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1400        break;
1401    default:
1402        g_assert_not_reached();
1403    }
1404}
1405
1406/*
1407 * Return log2(A)
1408 */
1409static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1410{
1411    uint64_t a0, a1, r, t, ign;
1412    FloatPartsN f;
1413    int i, n, a_exp, f_exp;
1414
1415    if (unlikely(a->cls != float_class_normal)) {
1416        switch (a->cls) {
1417        case float_class_snan:
1418        case float_class_qnan:
1419            parts_return_nan(a, s);
1420            return;
1421        case float_class_zero:
1422            /* log2(0) = -inf */
1423            a->cls = float_class_inf;
1424            a->sign = 1;
1425            return;
1426        case float_class_inf:
1427            if (unlikely(a->sign)) {
1428                goto d_nan;
1429            }
1430            return;
1431        default:
1432            break;
1433        }
1434        g_assert_not_reached();
1435    }
1436    if (unlikely(a->sign)) {
1437        goto d_nan;
1438    }
1439
1440    /* TODO: This algorithm looses bits too quickly for float128. */
1441    g_assert(N == 64);
1442
1443    a_exp = a->exp;
1444    f_exp = -1;
1445
1446    r = 0;
1447    t = DECOMPOSED_IMPLICIT_BIT;
1448    a0 = a->frac_hi;
1449    a1 = 0;
1450
1451    n = fmt->frac_size + 2;
1452    if (unlikely(a_exp == -1)) {
1453        /*
1454         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1455         * When the value is very close to 1.0, there are lots of 1's in
1456         * the msb parts of the fraction.  At the end, when we subtract
1457         * this value from -1.0, we can see a catastrophic loss of precision,
1458         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1459         * bits of y in the final result.  To minimize this, compute as many
1460         * digits as we can.
1461         * ??? This case needs another algorithm to avoid this.
1462         */
1463        n = fmt->frac_size * 2 + 2;
1464        /* Don't compute a value overlapping the sticky bit */
1465        n = MIN(n, 62);
1466    }
1467
1468    for (i = 0; i < n; i++) {
1469        if (a1) {
1470            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1471        } else if (a0 & 0xffffffffull) {
1472            mul64To128(a0, a0, &a0, &a1);
1473        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1474            a0 >>= 32;
1475            a0 *= a0;
1476        } else {
1477            goto exact;
1478        }
1479
1480        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1481            if (unlikely(a_exp == 0 && r == 0)) {
1482                /*
1483                 * When a_exp == 0, we're computing the log2 of a value
1484                 * [1.0,2.0).  When the value is very close to 1.0, there
1485                 * are lots of 0's in the msb parts of the fraction.
1486                 * We need to compute more digits to produce a correct
1487                 * result -- restart at the top of the fraction.
1488                 * ??? This is likely to lose precision quickly, as for
1489                 * float128; we may need another method.
1490                 */
1491                f_exp -= i;
1492                t = r = DECOMPOSED_IMPLICIT_BIT;
1493                i = 0;
1494            } else {
1495                r |= t;
1496            }
1497        } else {
1498            add128(a0, a1, a0, a1, &a0, &a1);
1499        }
1500        t >>= 1;
1501    }
1502
1503    /* Set sticky for inexact. */
1504    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1505
1506 exact:
1507    parts_sint_to_float(a, a_exp, 0, s);
1508    if (r == 0) {
1509        return;
1510    }
1511
1512    memset(&f, 0, sizeof(f));
1513    f.cls = float_class_normal;
1514    f.frac_hi = r;
1515    f.exp = f_exp - frac_normalize(&f);
1516
1517    if (a_exp < 0) {
1518        parts_sub_normal(a, &f);
1519    } else if (a_exp > 0) {
1520        parts_add_normal(a, &f);
1521    } else {
1522        *a = f;
1523    }
1524    return;
1525
1526 d_nan:
1527    float_raise(float_flag_invalid, s);
1528    parts_default_nan(a, s);
1529}
1530