xref: /qemu/fpu/softfloat-parts.c.inc (revision 8da5f1db)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    if (is_snan(a->cls) || is_snan(b->cls)) {
43        float_raise(float_flag_invalid, s);
44    }
45
46    if (s->default_nan_mode) {
47        parts_default_nan(a, s);
48    } else {
49        int cmp = frac_cmp(a, b);
50        if (cmp == 0) {
51            cmp = a->sign < b->sign;
52        }
53
54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55            a = b;
56        }
57        if (is_snan(a->cls)) {
58            parts_silence_nan(a, s);
59        }
60    }
61    return a;
62}
63
64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65                                            FloatPartsN *c, float_status *s,
66                                            int ab_mask, int abc_mask)
67{
68    int which;
69
70    if (unlikely(abc_mask & float_cmask_snan)) {
71        float_raise(float_flag_invalid, s);
72    }
73
74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75                          ab_mask == float_cmask_infzero, s);
76
77    if (s->default_nan_mode || which == 3) {
78        /*
79         * Note that this check is after pickNaNMulAdd so that function
80         * has an opportunity to set the Invalid flag for infzero.
81         */
82        parts_default_nan(a, s);
83        return a;
84    }
85
86    switch (which) {
87    case 0:
88        break;
89    case 1:
90        a = b;
91        break;
92    case 2:
93        a = c;
94        break;
95    default:
96        g_assert_not_reached();
97    }
98    if (is_snan(a->cls)) {
99        parts_silence_nan(a, s);
100    }
101    return a;
102}
103
104/*
105 * Canonicalize the FloatParts structure.  Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109                                 const FloatFmt *fmt)
110{
111    if (unlikely(p->exp == 0)) {
112        if (likely(frac_eqz(p))) {
113            p->cls = float_class_zero;
114        } else if (status->flush_inputs_to_zero) {
115            float_raise(float_flag_input_denormal, status);
116            p->cls = float_class_zero;
117            frac_clear(p);
118        } else {
119            int shift = frac_normalize(p);
120            p->cls = float_class_normal;
121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122        }
123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124        p->cls = float_class_normal;
125        p->exp -= fmt->exp_bias;
126        frac_shl(p, fmt->frac_shift);
127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128    } else if (likely(frac_eqz(p))) {
129        p->cls = float_class_inf;
130    } else {
131        frac_shl(p, fmt->frac_shift);
132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
133                  ? float_class_snan : float_class_qnan);
134    }
135}
136
137/*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
144                                   const FloatFmt *fmt)
145{
146    const int exp_max = fmt->exp_max;
147    const int frac_shift = fmt->frac_shift;
148    const uint64_t round_mask = fmt->round_mask;
149    const uint64_t frac_lsb = round_mask + 1;
150    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
151    const uint64_t roundeven_mask = round_mask | frac_lsb;
152    uint64_t inc;
153    bool overflow_norm = false;
154    int exp, flags = 0;
155
156    switch (s->float_rounding_mode) {
157    case float_round_nearest_even:
158        inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
159        break;
160    case float_round_ties_away:
161        inc = frac_lsbm1;
162        break;
163    case float_round_to_zero:
164        overflow_norm = true;
165        inc = 0;
166        break;
167    case float_round_up:
168        inc = p->sign ? 0 : round_mask;
169        overflow_norm = p->sign;
170        break;
171    case float_round_down:
172        inc = p->sign ? round_mask : 0;
173        overflow_norm = !p->sign;
174        break;
175    case float_round_to_odd:
176        overflow_norm = true;
177        /* fall through */
178    case float_round_to_odd_inf:
179        inc = p->frac_lo & frac_lsb ? 0 : round_mask;
180        break;
181    default:
182        g_assert_not_reached();
183    }
184
185    exp = p->exp + fmt->exp_bias;
186    if (likely(exp > 0)) {
187        if (p->frac_lo & round_mask) {
188            flags |= float_flag_inexact;
189            if (frac_addi(p, p, inc)) {
190                frac_shr(p, 1);
191                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
192                exp++;
193            }
194        }
195        frac_shr(p, frac_shift);
196
197        if (fmt->arm_althp) {
198            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
199            if (unlikely(exp > exp_max)) {
200                /* Overflow.  Return the maximum normal.  */
201                flags = float_flag_invalid;
202                exp = exp_max;
203                frac_allones(p);
204            }
205        } else if (unlikely(exp >= exp_max)) {
206            flags |= float_flag_overflow | float_flag_inexact;
207            if (overflow_norm) {
208                exp = exp_max - 1;
209                frac_allones(p);
210            } else {
211                p->cls = float_class_inf;
212                exp = exp_max;
213                frac_clear(p);
214            }
215        }
216    } else if (s->flush_to_zero) {
217        flags |= float_flag_output_denormal;
218        p->cls = float_class_zero;
219        exp = 0;
220        frac_clear(p);
221    } else {
222        bool is_tiny = s->tininess_before_rounding || exp < 0;
223
224        if (!is_tiny) {
225            FloatPartsN discard;
226            is_tiny = !frac_addi(&discard, p, inc);
227        }
228
229        frac_shrjam(p, 1 - exp);
230
231        if (p->frac_lo & round_mask) {
232            /* Need to recompute round-to-even/round-to-odd. */
233            switch (s->float_rounding_mode) {
234            case float_round_nearest_even:
235                inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
236                       ? frac_lsbm1 : 0);
237                break;
238            case float_round_to_odd:
239            case float_round_to_odd_inf:
240                inc = p->frac_lo & frac_lsb ? 0 : round_mask;
241                break;
242            default:
243                break;
244            }
245            flags |= float_flag_inexact;
246            frac_addi(p, p, inc);
247        }
248
249        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
250        frac_shr(p, frac_shift);
251
252        if (is_tiny && (flags & float_flag_inexact)) {
253            flags |= float_flag_underflow;
254        }
255        if (exp == 0 && frac_eqz(p)) {
256            p->cls = float_class_zero;
257        }
258    }
259    p->exp = exp;
260    float_raise(flags, s);
261}
262
263static void partsN(uncanon)(FloatPartsN *p, float_status *s,
264                            const FloatFmt *fmt)
265{
266    if (likely(p->cls == float_class_normal)) {
267        parts_uncanon_normal(p, s, fmt);
268    } else {
269        switch (p->cls) {
270        case float_class_zero:
271            p->exp = 0;
272            frac_clear(p);
273            return;
274        case float_class_inf:
275            g_assert(!fmt->arm_althp);
276            p->exp = fmt->exp_max;
277            frac_clear(p);
278            return;
279        case float_class_qnan:
280        case float_class_snan:
281            g_assert(!fmt->arm_althp);
282            p->exp = fmt->exp_max;
283            frac_shr(p, fmt->frac_shift);
284            return;
285        default:
286            break;
287        }
288        g_assert_not_reached();
289    }
290}
291
292/*
293 * Returns the result of adding or subtracting the values of the
294 * floating-point values `a' and `b'. The operation is performed
295 * according to the IEC/IEEE Standard for Binary Floating-Point
296 * Arithmetic.
297 */
298static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
299                                   float_status *s, bool subtract)
300{
301    bool b_sign = b->sign ^ subtract;
302    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
303
304    if (a->sign != b_sign) {
305        /* Subtraction */
306        if (likely(ab_mask == float_cmask_normal)) {
307            if (parts_sub_normal(a, b)) {
308                return a;
309            }
310            /* Subtract was exact, fall through to set sign. */
311            ab_mask = float_cmask_zero;
312        }
313
314        if (ab_mask == float_cmask_zero) {
315            a->sign = s->float_rounding_mode == float_round_down;
316            return a;
317        }
318
319        if (unlikely(ab_mask & float_cmask_anynan)) {
320            goto p_nan;
321        }
322
323        if (ab_mask & float_cmask_inf) {
324            if (a->cls != float_class_inf) {
325                /* N - Inf */
326                goto return_b;
327            }
328            if (b->cls != float_class_inf) {
329                /* Inf - N */
330                return a;
331            }
332            /* Inf - Inf */
333            float_raise(float_flag_invalid, s);
334            parts_default_nan(a, s);
335            return a;
336        }
337    } else {
338        /* Addition */
339        if (likely(ab_mask == float_cmask_normal)) {
340            parts_add_normal(a, b);
341            return a;
342        }
343
344        if (ab_mask == float_cmask_zero) {
345            return a;
346        }
347
348        if (unlikely(ab_mask & float_cmask_anynan)) {
349            goto p_nan;
350        }
351
352        if (ab_mask & float_cmask_inf) {
353            a->cls = float_class_inf;
354            return a;
355        }
356    }
357
358    if (b->cls == float_class_zero) {
359        g_assert(a->cls == float_class_normal);
360        return a;
361    }
362
363    g_assert(a->cls == float_class_zero);
364    g_assert(b->cls == float_class_normal);
365 return_b:
366    b->sign = b_sign;
367    return b;
368
369 p_nan:
370    return parts_pick_nan(a, b, s);
371}
372
373/*
374 * Returns the result of multiplying the floating-point values `a' and
375 * `b'. The operation is performed according to the IEC/IEEE Standard
376 * for Binary Floating-Point Arithmetic.
377 */
378static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
379                                float_status *s)
380{
381    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
382    bool sign = a->sign ^ b->sign;
383
384    if (likely(ab_mask == float_cmask_normal)) {
385        FloatPartsW tmp;
386
387        frac_mulw(&tmp, a, b);
388        frac_truncjam(a, &tmp);
389
390        a->exp += b->exp + 1;
391        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
392            frac_add(a, a, a);
393            a->exp -= 1;
394        }
395
396        a->sign = sign;
397        return a;
398    }
399
400    /* Inf * Zero == NaN */
401    if (unlikely(ab_mask == float_cmask_infzero)) {
402        float_raise(float_flag_invalid, s);
403        parts_default_nan(a, s);
404        return a;
405    }
406
407    if (unlikely(ab_mask & float_cmask_anynan)) {
408        return parts_pick_nan(a, b, s);
409    }
410
411    /* Multiply by 0 or Inf */
412    if (ab_mask & float_cmask_inf) {
413        a->cls = float_class_inf;
414        a->sign = sign;
415        return a;
416    }
417
418    g_assert(ab_mask & float_cmask_zero);
419    a->cls = float_class_zero;
420    a->sign = sign;
421    return a;
422}
423
424/*
425 * Returns the result of multiplying the floating-point values `a' and
426 * `b' then adding 'c', with no intermediate rounding step after the
427 * multiplication. The operation is performed according to the
428 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
429 * The flags argument allows the caller to select negation of the
430 * addend, the intermediate product, or the final result. (The
431 * difference between this and having the caller do a separate
432 * negation is that negating externally will flip the sign bit on NaNs.)
433 *
434 * Requires A and C extracted into a double-sized structure to provide the
435 * extra space for the widening multiply.
436 */
437static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
438                                   FloatPartsN *c, int flags, float_status *s)
439{
440    int ab_mask, abc_mask;
441    FloatPartsW p_widen, c_widen;
442
443    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
444    abc_mask = float_cmask(c->cls) | ab_mask;
445
446    /*
447     * It is implementation-defined whether the cases of (0,inf,qnan)
448     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
449     * they return if they do), so we have to hand this information
450     * off to the target-specific pick-a-NaN routine.
451     */
452    if (unlikely(abc_mask & float_cmask_anynan)) {
453        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
454    }
455
456    if (flags & float_muladd_negate_c) {
457        c->sign ^= 1;
458    }
459
460    /* Compute the sign of the product into A. */
461    a->sign ^= b->sign;
462    if (flags & float_muladd_negate_product) {
463        a->sign ^= 1;
464    }
465
466    if (unlikely(ab_mask != float_cmask_normal)) {
467        if (unlikely(ab_mask == float_cmask_infzero)) {
468            goto d_nan;
469        }
470
471        if (ab_mask & float_cmask_inf) {
472            if (c->cls == float_class_inf && a->sign != c->sign) {
473                goto d_nan;
474            }
475            goto return_inf;
476        }
477
478        g_assert(ab_mask & float_cmask_zero);
479        if (c->cls == float_class_normal) {
480            *a = *c;
481            goto return_normal;
482        }
483        if (c->cls == float_class_zero) {
484            if (a->sign != c->sign) {
485                goto return_sub_zero;
486            }
487            goto return_zero;
488        }
489        g_assert(c->cls == float_class_inf);
490    }
491
492    if (unlikely(c->cls == float_class_inf)) {
493        a->sign = c->sign;
494        goto return_inf;
495    }
496
497    /* Perform the multiplication step. */
498    p_widen.sign = a->sign;
499    p_widen.exp = a->exp + b->exp + 1;
500    frac_mulw(&p_widen, a, b);
501    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
502        frac_add(&p_widen, &p_widen, &p_widen);
503        p_widen.exp -= 1;
504    }
505
506    /* Perform the addition step. */
507    if (c->cls != float_class_zero) {
508        /* Zero-extend C to less significant bits. */
509        frac_widen(&c_widen, c);
510        c_widen.exp = c->exp;
511
512        if (a->sign == c->sign) {
513            parts_add_normal(&p_widen, &c_widen);
514        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
515            goto return_sub_zero;
516        }
517    }
518
519    /* Narrow with sticky bit, for proper rounding later. */
520    frac_truncjam(a, &p_widen);
521    a->sign = p_widen.sign;
522    a->exp = p_widen.exp;
523
524 return_normal:
525    if (flags & float_muladd_halve_result) {
526        a->exp -= 1;
527    }
528 finish_sign:
529    if (flags & float_muladd_negate_result) {
530        a->sign ^= 1;
531    }
532    return a;
533
534 return_sub_zero:
535    a->sign = s->float_rounding_mode == float_round_down;
536 return_zero:
537    a->cls = float_class_zero;
538    goto finish_sign;
539
540 return_inf:
541    a->cls = float_class_inf;
542    goto finish_sign;
543
544 d_nan:
545    float_raise(float_flag_invalid, s);
546    parts_default_nan(a, s);
547    return a;
548}
549
550/*
551 * Returns the result of dividing the floating-point value `a' by the
552 * corresponding value `b'. The operation is performed according to
553 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
554 */
555static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
556                                float_status *s)
557{
558    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
559    bool sign = a->sign ^ b->sign;
560
561    if (likely(ab_mask == float_cmask_normal)) {
562        a->sign = sign;
563        a->exp -= b->exp + frac_div(a, b);
564        return a;
565    }
566
567    /* 0/0 or Inf/Inf => NaN */
568    if (unlikely(ab_mask == float_cmask_zero) ||
569        unlikely(ab_mask == float_cmask_inf)) {
570        float_raise(float_flag_invalid, s);
571        parts_default_nan(a, s);
572        return a;
573    }
574
575    /* All the NaN cases */
576    if (unlikely(ab_mask & float_cmask_anynan)) {
577        return parts_pick_nan(a, b, s);
578    }
579
580    a->sign = sign;
581
582    /* Inf / X */
583    if (a->cls == float_class_inf) {
584        return a;
585    }
586
587    /* 0 / X */
588    if (a->cls == float_class_zero) {
589        return a;
590    }
591
592    /* X / Inf */
593    if (b->cls == float_class_inf) {
594        a->cls = float_class_zero;
595        return a;
596    }
597
598    /* X / 0 => Inf */
599    g_assert(b->cls == float_class_zero);
600    float_raise(float_flag_divbyzero, s);
601    a->cls = float_class_inf;
602    return a;
603}
604
605/*
606 * Square Root
607 *
608 * The base algorithm is lifted from
609 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
610 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
611 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
612 * and is thus MIT licenced.
613 */
614static void partsN(sqrt)(FloatPartsN *a, float_status *status,
615                         const FloatFmt *fmt)
616{
617    const uint32_t three32 = 3u << 30;
618    const uint64_t three64 = 3ull << 62;
619    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
620    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
621    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
622    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
623    uint64_t discard;
624    bool exp_odd;
625    size_t index;
626
627    if (unlikely(a->cls != float_class_normal)) {
628        switch (a->cls) {
629        case float_class_snan:
630        case float_class_qnan:
631            parts_return_nan(a, status);
632            return;
633        case float_class_zero:
634            return;
635        case float_class_inf:
636            if (unlikely(a->sign)) {
637                goto d_nan;
638            }
639            return;
640        default:
641            g_assert_not_reached();
642        }
643    }
644
645    if (unlikely(a->sign)) {
646        goto d_nan;
647    }
648
649    /*
650     * Argument reduction.
651     * x = 4^e frac; with integer e, and frac in [1, 4)
652     * m = frac fixed point at bit 62, since we're in base 4.
653     * If base-2 exponent is odd, exchange that for multiply by 2,
654     * which results in no shift.
655     */
656    exp_odd = a->exp & 1;
657    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
658    if (!exp_odd) {
659        frac_shr(a, 1);
660    }
661
662    /*
663     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
664     *
665     * Initial estimate:
666     * 7-bit lookup table (1-bit exponent and 6-bit significand).
667     *
668     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
669     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
670     * a table lookup is faster and needs one less iteration.
671     * The 7-bit table gives |e| < 0x1.fdp-9.
672     *
673     * A Newton-Raphson iteration for r is
674     *   s = m*r
675     *   d = s*r
676     *   u = 3 - d
677     *   r = r*u/2
678     *
679     * Fixed point representations:
680     *   m, s, d, u, three are all 2.30; r is 0.32
681     */
682    m64 = a->frac_hi;
683    m32 = m64 >> 32;
684
685    r32 = rsqrt_tab[index] << 16;
686    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
687
688    s32 = ((uint64_t)m32 * r32) >> 32;
689    d32 = ((uint64_t)s32 * r32) >> 32;
690    u32 = three32 - d32;
691
692    if (N == 64) {
693        /* float64 or smaller */
694
695        r32 = ((uint64_t)r32 * u32) >> 31;
696        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
697
698        s32 = ((uint64_t)m32 * r32) >> 32;
699        d32 = ((uint64_t)s32 * r32) >> 32;
700        u32 = three32 - d32;
701
702        if (fmt->frac_size <= 23) {
703            /* float32 or smaller */
704
705            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
706            s32 = (s32 - 1) >> 6;               /* 9.23 */
707            /* s < sqrt(m) < s + 0x1.08p-23 */
708
709            /* compute nearest rounded result to 2.23 bits */
710            uint32_t d0 = (m32 << 16) - s32 * s32;
711            uint32_t d1 = s32 - d0;
712            uint32_t d2 = d1 + s32 + 1;
713            s32 += d1 >> 31;
714            a->frac_hi = (uint64_t)s32 << (64 - 25);
715
716            /* increment or decrement for inexact */
717            if (d2 != 0) {
718                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
719            }
720            goto done;
721        }
722
723        /* float64 */
724
725        r64 = (uint64_t)r32 * u32 * 2;
726        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
727        mul64To128(m64, r64, &s64, &discard);
728        mul64To128(s64, r64, &d64, &discard);
729        u64 = three64 - d64;
730
731        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
732        s64 = (s64 - 2) >> 9;                  /* 12.52 */
733
734        /* Compute nearest rounded result */
735        uint64_t d0 = (m64 << 42) - s64 * s64;
736        uint64_t d1 = s64 - d0;
737        uint64_t d2 = d1 + s64 + 1;
738        s64 += d1 >> 63;
739        a->frac_hi = s64 << (64 - 54);
740
741        /* increment or decrement for inexact */
742        if (d2 != 0) {
743            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
744        }
745        goto done;
746    }
747
748    r64 = (uint64_t)r32 * u32 * 2;
749    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
750
751    mul64To128(m64, r64, &s64, &discard);
752    mul64To128(s64, r64, &d64, &discard);
753    u64 = three64 - d64;
754    mul64To128(u64, r64, &r64, &discard);
755    r64 <<= 1;
756    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
757
758    mul64To128(m64, r64, &s64, &discard);
759    mul64To128(s64, r64, &d64, &discard);
760    u64 = three64 - d64;
761    mul64To128(u64, r64, &rh, &rl);
762    add128(rh, rl, rh, rl, &rh, &rl);
763    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
764
765    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
766    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
767    sub128(three64, 0, dh, dl, &uh, &ul);
768    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
769    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
770
771    sub128(sh, sl, 0, 4, &sh, &sl);
772    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
773    /* s < sqrt(m) < s + 1ulp */
774
775    /* Compute nearest rounded result */
776    mul64To128(sl, sl, &d0h, &d0l);
777    d0h += 2 * sh * sl;
778    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
779    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
780    add128(sh, sl, 0, 1, &d2h, &d2l);
781    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
782    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
783    shift128Left(sh, sl, 128 - 114, &sh, &sl);
784
785    /* increment or decrement for inexact */
786    if (d2h | d2l) {
787        if ((int64_t)(d1h ^ d2h) < 0) {
788            sub128(sh, sl, 0, 1, &sh, &sl);
789        } else {
790            add128(sh, sl, 0, 1, &sh, &sl);
791        }
792    }
793    a->frac_lo = sl;
794    a->frac_hi = sh;
795
796 done:
797    /* Convert back from base 4 to base 2. */
798    a->exp >>= 1;
799    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
800        frac_add(a, a, a);
801    } else {
802        a->exp += 1;
803    }
804    return;
805
806 d_nan:
807    float_raise(float_flag_invalid, status);
808    parts_default_nan(a, status);
809}
810
811/*
812 * Rounds the floating-point value `a' to an integer, and returns the
813 * result as a floating-point value. The operation is performed
814 * according to the IEC/IEEE Standard for Binary Floating-Point
815 * Arithmetic.
816 *
817 * parts_round_to_int_normal is an internal helper function for
818 * normal numbers only, returning true for inexact but not directly
819 * raising float_flag_inexact.
820 */
821static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
822                                        int scale, int frac_size)
823{
824    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
825    int shift_adj;
826
827    scale = MIN(MAX(scale, -0x10000), 0x10000);
828    a->exp += scale;
829
830    if (a->exp < 0) {
831        bool one;
832
833        /* All fractional */
834        switch (rmode) {
835        case float_round_nearest_even:
836            one = false;
837            if (a->exp == -1) {
838                FloatPartsN tmp;
839                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
840                frac_add(&tmp, a, a);
841                /* Anything remaining means frac > 0.5. */
842                one = !frac_eqz(&tmp);
843            }
844            break;
845        case float_round_ties_away:
846            one = a->exp == -1;
847            break;
848        case float_round_to_zero:
849            one = false;
850            break;
851        case float_round_up:
852            one = !a->sign;
853            break;
854        case float_round_down:
855            one = a->sign;
856            break;
857        case float_round_to_odd:
858            one = true;
859            break;
860        default:
861            g_assert_not_reached();
862        }
863
864        frac_clear(a);
865        a->exp = 0;
866        if (one) {
867            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
868        } else {
869            a->cls = float_class_zero;
870        }
871        return true;
872    }
873
874    if (a->exp >= frac_size) {
875        /* All integral */
876        return false;
877    }
878
879    if (N > 64 && a->exp < N - 64) {
880        /*
881         * Rounding is not in the low word -- shift lsb to bit 2,
882         * which leaves room for sticky and rounding bit.
883         */
884        shift_adj = (N - 1) - (a->exp + 2);
885        frac_shrjam(a, shift_adj);
886        frac_lsb = 1 << 2;
887    } else {
888        shift_adj = 0;
889        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
890    }
891
892    frac_lsbm1 = frac_lsb >> 1;
893    rnd_mask = frac_lsb - 1;
894    rnd_even_mask = rnd_mask | frac_lsb;
895
896    if (!(a->frac_lo & rnd_mask)) {
897        /* Fractional bits already clear, undo the shift above. */
898        frac_shl(a, shift_adj);
899        return false;
900    }
901
902    switch (rmode) {
903    case float_round_nearest_even:
904        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
905        break;
906    case float_round_ties_away:
907        inc = frac_lsbm1;
908        break;
909    case float_round_to_zero:
910        inc = 0;
911        break;
912    case float_round_up:
913        inc = a->sign ? 0 : rnd_mask;
914        break;
915    case float_round_down:
916        inc = a->sign ? rnd_mask : 0;
917        break;
918    case float_round_to_odd:
919        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
920        break;
921    default:
922        g_assert_not_reached();
923    }
924
925    if (shift_adj == 0) {
926        if (frac_addi(a, a, inc)) {
927            frac_shr(a, 1);
928            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
929            a->exp++;
930        }
931        a->frac_lo &= ~rnd_mask;
932    } else {
933        frac_addi(a, a, inc);
934        a->frac_lo &= ~rnd_mask;
935        /* Be careful shifting back, not to overflow */
936        frac_shl(a, shift_adj - 1);
937        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
938            a->exp++;
939        } else {
940            frac_add(a, a, a);
941        }
942    }
943    return true;
944}
945
946static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
947                                 int scale, float_status *s,
948                                 const FloatFmt *fmt)
949{
950    switch (a->cls) {
951    case float_class_qnan:
952    case float_class_snan:
953        parts_return_nan(a, s);
954        break;
955    case float_class_zero:
956    case float_class_inf:
957        break;
958    case float_class_normal:
959        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
960            float_raise(float_flag_inexact, s);
961        }
962        break;
963    default:
964        g_assert_not_reached();
965    }
966}
967
968/*
969 * Returns the result of converting the floating-point value `a' to
970 * the two's complement integer format. The conversion is performed
971 * according to the IEC/IEEE Standard for Binary Floating-Point
972 * Arithmetic---which means in particular that the conversion is
973 * rounded according to the current rounding mode. If `a' is a NaN,
974 * the largest positive integer is returned. Otherwise, if the
975 * conversion overflows, the largest integer with the same sign as `a'
976 * is returned.
977 */
978static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
979                                     int scale, int64_t min, int64_t max,
980                                     float_status *s)
981{
982    int flags = 0;
983    uint64_t r;
984
985    switch (p->cls) {
986    case float_class_snan:
987    case float_class_qnan:
988        flags = float_flag_invalid;
989        r = max;
990        break;
991
992    case float_class_inf:
993        flags = float_flag_invalid;
994        r = p->sign ? min : max;
995        break;
996
997    case float_class_zero:
998        return 0;
999
1000    case float_class_normal:
1001        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1002        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1003            flags = float_flag_inexact;
1004        }
1005
1006        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1007            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1008        } else {
1009            r = UINT64_MAX;
1010        }
1011        if (p->sign) {
1012            if (r <= -(uint64_t)min) {
1013                r = -r;
1014            } else {
1015                flags = float_flag_invalid;
1016                r = min;
1017            }
1018        } else if (r > max) {
1019            flags = float_flag_invalid;
1020            r = max;
1021        }
1022        break;
1023
1024    default:
1025        g_assert_not_reached();
1026    }
1027
1028    float_raise(flags, s);
1029    return r;
1030}
1031
1032/*
1033 *  Returns the result of converting the floating-point value `a' to
1034 *  the unsigned integer format. The conversion is performed according
1035 *  to the IEC/IEEE Standard for Binary Floating-Point
1036 *  Arithmetic---which means in particular that the conversion is
1037 *  rounded according to the current rounding mode. If `a' is a NaN,
1038 *  the largest unsigned integer is returned. Otherwise, if the
1039 *  conversion overflows, the largest unsigned integer is returned. If
1040 *  the 'a' is negative, the result is rounded and zero is returned;
1041 *  values that do not round to zero will raise the inexact exception
1042 *  flag.
1043 */
1044static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1045                                      int scale, uint64_t max, float_status *s)
1046{
1047    int flags = 0;
1048    uint64_t r;
1049
1050    switch (p->cls) {
1051    case float_class_snan:
1052    case float_class_qnan:
1053        flags = float_flag_invalid;
1054        r = max;
1055        break;
1056
1057    case float_class_inf:
1058        flags = float_flag_invalid;
1059        r = p->sign ? 0 : max;
1060        break;
1061
1062    case float_class_zero:
1063        return 0;
1064
1065    case float_class_normal:
1066        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1067        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1068            flags = float_flag_inexact;
1069            if (p->cls == float_class_zero) {
1070                r = 0;
1071                break;
1072            }
1073        }
1074
1075        if (p->sign) {
1076            flags = float_flag_invalid;
1077            r = 0;
1078        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1079            flags = float_flag_invalid;
1080            r = max;
1081        } else {
1082            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1083            if (r > max) {
1084                flags = float_flag_invalid;
1085                r = max;
1086            }
1087        }
1088        break;
1089
1090    default:
1091        g_assert_not_reached();
1092    }
1093
1094    float_raise(flags, s);
1095    return r;
1096}
1097
1098/*
1099 * Integer to float conversions
1100 *
1101 * Returns the result of converting the two's complement integer `a'
1102 * to the floating-point format. The conversion is performed according
1103 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1104 */
1105static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1106                                  int scale, float_status *s)
1107{
1108    uint64_t f = a;
1109    int shift;
1110
1111    memset(p, 0, sizeof(*p));
1112
1113    if (a == 0) {
1114        p->cls = float_class_zero;
1115        return;
1116    }
1117
1118    p->cls = float_class_normal;
1119    if (a < 0) {
1120        f = -f;
1121        p->sign = true;
1122    }
1123    shift = clz64(f);
1124    scale = MIN(MAX(scale, -0x10000), 0x10000);
1125
1126    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1127    p->frac_hi = f << shift;
1128}
1129
1130/*
1131 * Unsigned Integer to float conversions
1132 *
1133 * Returns the result of converting the unsigned integer `a' to the
1134 * floating-point format. The conversion is performed according to the
1135 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1136 */
1137static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1138                                  int scale, float_status *status)
1139{
1140    memset(p, 0, sizeof(*p));
1141
1142    if (a == 0) {
1143        p->cls = float_class_zero;
1144    } else {
1145        int shift = clz64(a);
1146        scale = MIN(MAX(scale, -0x10000), 0x10000);
1147        p->cls = float_class_normal;
1148        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1149        p->frac_hi = a << shift;
1150    }
1151}
1152
1153/*
1154 * Float min/max.
1155 */
1156static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1157                                   float_status *s, int flags)
1158{
1159    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1160    int a_exp, b_exp, cmp;
1161
1162    if (unlikely(ab_mask & float_cmask_anynan)) {
1163        /*
1164         * For minnum/maxnum, if one operand is a QNaN, and the other
1165         * operand is numerical, then return numerical argument.
1166         */
1167        if ((flags & minmax_isnum)
1168            && !(ab_mask & float_cmask_snan)
1169            && (ab_mask & ~float_cmask_qnan)) {
1170            return is_nan(a->cls) ? b : a;
1171        }
1172        return parts_pick_nan(a, b, s);
1173    }
1174
1175    a_exp = a->exp;
1176    b_exp = b->exp;
1177
1178    if (unlikely(ab_mask != float_cmask_normal)) {
1179        switch (a->cls) {
1180        case float_class_normal:
1181            break;
1182        case float_class_inf:
1183            a_exp = INT16_MAX;
1184            break;
1185        case float_class_zero:
1186            a_exp = INT16_MIN;
1187            break;
1188        default:
1189            g_assert_not_reached();
1190            break;
1191        }
1192        switch (b->cls) {
1193        case float_class_normal:
1194            break;
1195        case float_class_inf:
1196            b_exp = INT16_MAX;
1197            break;
1198        case float_class_zero:
1199            b_exp = INT16_MIN;
1200            break;
1201        default:
1202            g_assert_not_reached();
1203            break;
1204        }
1205    }
1206
1207    /* Compare magnitudes. */
1208    cmp = a_exp - b_exp;
1209    if (cmp == 0) {
1210        cmp = frac_cmp(a, b);
1211    }
1212
1213    /*
1214     * Take the sign into account.
1215     * For ismag, only do this if the magnitudes are equal.
1216     */
1217    if (!(flags & minmax_ismag) || cmp == 0) {
1218        if (a->sign != b->sign) {
1219            /* For differing signs, the negative operand is less. */
1220            cmp = a->sign ? -1 : 1;
1221        } else if (a->sign) {
1222            /* For two negative operands, invert the magnitude comparison. */
1223            cmp = -cmp;
1224        }
1225    }
1226
1227    if (flags & minmax_ismin) {
1228        cmp = -cmp;
1229    }
1230    return cmp < 0 ? b : a;
1231}
1232
1233/*
1234 * Floating point compare
1235 */
1236static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1237                                     float_status *s, bool is_quiet)
1238{
1239    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1240    int cmp;
1241
1242    if (likely(ab_mask == float_cmask_normal)) {
1243        if (a->sign != b->sign) {
1244            goto a_sign;
1245        }
1246        if (a->exp != b->exp) {
1247            cmp = a->exp < b->exp ? -1 : 1;
1248        } else {
1249            cmp = frac_cmp(a, b);
1250        }
1251        if (a->sign) {
1252            cmp = -cmp;
1253        }
1254        return cmp;
1255    }
1256
1257    if (unlikely(ab_mask & float_cmask_anynan)) {
1258        if (!is_quiet || (ab_mask & float_cmask_snan)) {
1259            float_raise(float_flag_invalid, s);
1260        }
1261        return float_relation_unordered;
1262    }
1263
1264    if (ab_mask & float_cmask_zero) {
1265        if (ab_mask == float_cmask_zero) {
1266            return float_relation_equal;
1267        } else if (a->cls == float_class_zero) {
1268            goto b_sign;
1269        } else {
1270            goto a_sign;
1271        }
1272    }
1273
1274    if (ab_mask == float_cmask_inf) {
1275        if (a->sign == b->sign) {
1276            return float_relation_equal;
1277        }
1278    } else if (b->cls == float_class_inf) {
1279        goto b_sign;
1280    } else {
1281        g_assert(a->cls == float_class_inf);
1282    }
1283
1284 a_sign:
1285    return a->sign ? float_relation_less : float_relation_greater;
1286 b_sign:
1287    return b->sign ? float_relation_greater : float_relation_less;
1288}
1289
1290/*
1291 * Multiply A by 2 raised to the power N.
1292 */
1293static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1294{
1295    switch (a->cls) {
1296    case float_class_snan:
1297    case float_class_qnan:
1298        parts_return_nan(a, s);
1299        break;
1300    case float_class_zero:
1301    case float_class_inf:
1302        break;
1303    case float_class_normal:
1304        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1305        break;
1306    default:
1307        g_assert_not_reached();
1308    }
1309}
1310