xref: /qemu/fpu/softfloat-parts.c.inc (revision 72246065)
17c45bad8SRichard Henderson/*
27c45bad8SRichard Henderson * QEMU float support
37c45bad8SRichard Henderson *
47c45bad8SRichard Henderson * The code in this source file is derived from release 2a of the SoftFloat
57c45bad8SRichard Henderson * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
67c45bad8SRichard Henderson * some later contributions) are provided under that license, as detailed below.
77c45bad8SRichard Henderson * It has subsequently been modified by contributors to the QEMU Project,
87c45bad8SRichard Henderson * so some portions are provided under:
97c45bad8SRichard Henderson *  the SoftFloat-2a license
107c45bad8SRichard Henderson *  the BSD license
117c45bad8SRichard Henderson *  GPL-v2-or-later
127c45bad8SRichard Henderson *
137c45bad8SRichard Henderson * Any future contributions to this file after December 1st 2014 will be
147c45bad8SRichard Henderson * taken to be licensed under the Softfloat-2a license unless specifically
157c45bad8SRichard Henderson * indicated otherwise.
167c45bad8SRichard Henderson */
177c45bad8SRichard Henderson
187c45bad8SRichard Hendersonstatic void partsN(return_nan)(FloatPartsN *a, float_status *s)
197c45bad8SRichard Henderson{
207c45bad8SRichard Henderson    switch (a->cls) {
217c45bad8SRichard Henderson    case float_class_snan:
22e706d445SRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
237c45bad8SRichard Henderson        if (s->default_nan_mode) {
247c45bad8SRichard Henderson            parts_default_nan(a, s);
257c45bad8SRichard Henderson        } else {
267c45bad8SRichard Henderson            parts_silence_nan(a, s);
277c45bad8SRichard Henderson        }
287c45bad8SRichard Henderson        break;
297c45bad8SRichard Henderson    case float_class_qnan:
307c45bad8SRichard Henderson        if (s->default_nan_mode) {
317c45bad8SRichard Henderson            parts_default_nan(a, s);
327c45bad8SRichard Henderson        }
337c45bad8SRichard Henderson        break;
347c45bad8SRichard Henderson    default:
357c45bad8SRichard Henderson        g_assert_not_reached();
367c45bad8SRichard Henderson    }
377c45bad8SRichard Henderson}
3822c355f4SRichard Henderson
3922c355f4SRichard Hendersonstatic FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
4022c355f4SRichard Henderson                                     float_status *s)
4122c355f4SRichard Henderson{
4222c355f4SRichard Henderson    if (is_snan(a->cls) || is_snan(b->cls)) {
43e706d445SRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
4422c355f4SRichard Henderson    }
4522c355f4SRichard Henderson
4622c355f4SRichard Henderson    if (s->default_nan_mode) {
4722c355f4SRichard Henderson        parts_default_nan(a, s);
4822c355f4SRichard Henderson    } else {
4922c355f4SRichard Henderson        int cmp = frac_cmp(a, b);
5022c355f4SRichard Henderson        if (cmp == 0) {
5122c355f4SRichard Henderson            cmp = a->sign < b->sign;
5222c355f4SRichard Henderson        }
5322c355f4SRichard Henderson
5422c355f4SRichard Henderson        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
5522c355f4SRichard Henderson            a = b;
5622c355f4SRichard Henderson        }
5722c355f4SRichard Henderson        if (is_snan(a->cls)) {
5822c355f4SRichard Henderson            parts_silence_nan(a, s);
5922c355f4SRichard Henderson        }
6022c355f4SRichard Henderson    }
6122c355f4SRichard Henderson    return a;
6222c355f4SRichard Henderson}
63979582d0SRichard Henderson
64979582d0SRichard Hendersonstatic FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65979582d0SRichard Henderson                                            FloatPartsN *c, float_status *s,
66979582d0SRichard Henderson                                            int ab_mask, int abc_mask)
67979582d0SRichard Henderson{
68979582d0SRichard Henderson    int which;
69979582d0SRichard Henderson
70979582d0SRichard Henderson    if (unlikely(abc_mask & float_cmask_snan)) {
71e706d445SRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
72979582d0SRichard Henderson    }
73979582d0SRichard Henderson
74979582d0SRichard Henderson    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75979582d0SRichard Henderson                          ab_mask == float_cmask_infzero, s);
76979582d0SRichard Henderson
77979582d0SRichard Henderson    if (s->default_nan_mode || which == 3) {
78979582d0SRichard Henderson        /*
79979582d0SRichard Henderson         * Note that this check is after pickNaNMulAdd so that function
80979582d0SRichard Henderson         * has an opportunity to set the Invalid flag for infzero.
81979582d0SRichard Henderson         */
82979582d0SRichard Henderson        parts_default_nan(a, s);
83979582d0SRichard Henderson        return a;
84979582d0SRichard Henderson    }
85979582d0SRichard Henderson
86979582d0SRichard Henderson    switch (which) {
87979582d0SRichard Henderson    case 0:
88979582d0SRichard Henderson        break;
89979582d0SRichard Henderson    case 1:
90979582d0SRichard Henderson        a = b;
91979582d0SRichard Henderson        break;
92979582d0SRichard Henderson    case 2:
93979582d0SRichard Henderson        a = c;
94979582d0SRichard Henderson        break;
95979582d0SRichard Henderson    default:
96979582d0SRichard Henderson        g_assert_not_reached();
97979582d0SRichard Henderson    }
98979582d0SRichard Henderson    if (is_snan(a->cls)) {
99979582d0SRichard Henderson        parts_silence_nan(a, s);
100979582d0SRichard Henderson    }
101979582d0SRichard Henderson    return a;
102979582d0SRichard Henderson}
103d46975bcSRichard Henderson
104d46975bcSRichard Henderson/*
105d46975bcSRichard Henderson * Canonicalize the FloatParts structure.  Determine the class,
106d46975bcSRichard Henderson * unbias the exponent, and normalize the fraction.
107d46975bcSRichard Henderson */
108d46975bcSRichard Hendersonstatic void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109d46975bcSRichard Henderson                                 const FloatFmt *fmt)
110d46975bcSRichard Henderson{
111d46975bcSRichard Henderson    if (unlikely(p->exp == 0)) {
112d46975bcSRichard Henderson        if (likely(frac_eqz(p))) {
113d46975bcSRichard Henderson            p->cls = float_class_zero;
114d46975bcSRichard Henderson        } else if (status->flush_inputs_to_zero) {
115d46975bcSRichard Henderson            float_raise(float_flag_input_denormal, status);
116d46975bcSRichard Henderson            p->cls = float_class_zero;
117d46975bcSRichard Henderson            frac_clear(p);
118d46975bcSRichard Henderson        } else {
119d46975bcSRichard Henderson            int shift = frac_normalize(p);
120d46975bcSRichard Henderson            p->cls = float_class_normal;
12172246065SRichard Henderson            p->exp = fmt->frac_shift - fmt->exp_bias
12272246065SRichard Henderson                   - shift + !fmt->m68k_denormal;
123d46975bcSRichard Henderson        }
124d46975bcSRichard Henderson    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
125d46975bcSRichard Henderson        p->cls = float_class_normal;
126d46975bcSRichard Henderson        p->exp -= fmt->exp_bias;
127d46975bcSRichard Henderson        frac_shl(p, fmt->frac_shift);
128d46975bcSRichard Henderson        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
129d46975bcSRichard Henderson    } else if (likely(frac_eqz(p))) {
130d46975bcSRichard Henderson        p->cls = float_class_inf;
131d46975bcSRichard Henderson    } else {
132d46975bcSRichard Henderson        frac_shl(p, fmt->frac_shift);
133d46975bcSRichard Henderson        p->cls = (parts_is_snan_frac(p->frac_hi, status)
134d46975bcSRichard Henderson                  ? float_class_snan : float_class_qnan);
135d46975bcSRichard Henderson    }
136d46975bcSRichard Henderson}
137ee6959f2SRichard Henderson
138ee6959f2SRichard Henderson/*
139ee6959f2SRichard Henderson * Round and uncanonicalize a floating-point number by parts. There
140ee6959f2SRichard Henderson * are FRAC_SHIFT bits that may require rounding at the bottom of the
141ee6959f2SRichard Henderson * fraction; these bits will be removed. The exponent will be biased
142ee6959f2SRichard Henderson * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
143ee6959f2SRichard Henderson */
14425fdedf0SRichard Hendersonstatic void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
145ee6959f2SRichard Henderson                                   const FloatFmt *fmt)
146ee6959f2SRichard Henderson{
147ee6959f2SRichard Henderson    const int exp_max = fmt->exp_max;
148ee6959f2SRichard Henderson    const int frac_shift = fmt->frac_shift;
149ee6959f2SRichard Henderson    const uint64_t round_mask = fmt->round_mask;
150d6e1f0cdSRichard Henderson    const uint64_t frac_lsb = round_mask + 1;
151d6e1f0cdSRichard Henderson    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
152d6e1f0cdSRichard Henderson    const uint64_t roundeven_mask = round_mask | frac_lsb;
153ee6959f2SRichard Henderson    uint64_t inc;
15425fdedf0SRichard Henderson    bool overflow_norm = false;
155ee6959f2SRichard Henderson    int exp, flags = 0;
156ee6959f2SRichard Henderson
157ee6959f2SRichard Henderson    switch (s->float_rounding_mode) {
158ee6959f2SRichard Henderson    case float_round_nearest_even:
15998b3cff7SRichard Henderson        if (N > 64 && frac_lsb == 0) {
16098b3cff7SRichard Henderson            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
16198b3cff7SRichard Henderson                   ? frac_lsbm1 : 0);
16298b3cff7SRichard Henderson        } else {
16398b3cff7SRichard Henderson            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
16498b3cff7SRichard Henderson                   ? frac_lsbm1 : 0);
16598b3cff7SRichard Henderson        }
166ee6959f2SRichard Henderson        break;
167ee6959f2SRichard Henderson    case float_round_ties_away:
168ee6959f2SRichard Henderson        inc = frac_lsbm1;
169ee6959f2SRichard Henderson        break;
170ee6959f2SRichard Henderson    case float_round_to_zero:
171ee6959f2SRichard Henderson        overflow_norm = true;
172ee6959f2SRichard Henderson        inc = 0;
173ee6959f2SRichard Henderson        break;
174ee6959f2SRichard Henderson    case float_round_up:
175ee6959f2SRichard Henderson        inc = p->sign ? 0 : round_mask;
176ee6959f2SRichard Henderson        overflow_norm = p->sign;
177ee6959f2SRichard Henderson        break;
178ee6959f2SRichard Henderson    case float_round_down:
179ee6959f2SRichard Henderson        inc = p->sign ? round_mask : 0;
180ee6959f2SRichard Henderson        overflow_norm = !p->sign;
181ee6959f2SRichard Henderson        break;
182ee6959f2SRichard Henderson    case float_round_to_odd:
183ee6959f2SRichard Henderson        overflow_norm = true;
18460c8f726SRichard Henderson        /* fall through */
18560c8f726SRichard Henderson    case float_round_to_odd_inf:
18698b3cff7SRichard Henderson        if (N > 64 && frac_lsb == 0) {
18798b3cff7SRichard Henderson            inc = p->frac_hi & 1 ? 0 : round_mask;
18898b3cff7SRichard Henderson        } else {
189ee6959f2SRichard Henderson            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
19098b3cff7SRichard Henderson        }
191ee6959f2SRichard Henderson        break;
192ee6959f2SRichard Henderson    default:
193ee6959f2SRichard Henderson        g_assert_not_reached();
194ee6959f2SRichard Henderson    }
195ee6959f2SRichard Henderson
196ee6959f2SRichard Henderson    exp = p->exp + fmt->exp_bias;
197ee6959f2SRichard Henderson    if (likely(exp > 0)) {
198ee6959f2SRichard Henderson        if (p->frac_lo & round_mask) {
199ee6959f2SRichard Henderson            flags |= float_flag_inexact;
200ee6959f2SRichard Henderson            if (frac_addi(p, p, inc)) {
201ee6959f2SRichard Henderson                frac_shr(p, 1);
202ee6959f2SRichard Henderson                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
203ee6959f2SRichard Henderson                exp++;
204ee6959f2SRichard Henderson            }
20598b3cff7SRichard Henderson            p->frac_lo &= ~round_mask;
206ee6959f2SRichard Henderson        }
207ee6959f2SRichard Henderson
208ee6959f2SRichard Henderson        if (fmt->arm_althp) {
209ee6959f2SRichard Henderson            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
210ee6959f2SRichard Henderson            if (unlikely(exp > exp_max)) {
211ee6959f2SRichard Henderson                /* Overflow.  Return the maximum normal.  */
212ee6959f2SRichard Henderson                flags = float_flag_invalid;
213ee6959f2SRichard Henderson                exp = exp_max;
214ee6959f2SRichard Henderson                frac_allones(p);
21598b3cff7SRichard Henderson                p->frac_lo &= ~round_mask;
216ee6959f2SRichard Henderson            }
217ee6959f2SRichard Henderson        } else if (unlikely(exp >= exp_max)) {
218c40da5c6SLucas Mateus Castro (alqotel)            flags |= float_flag_overflow;
219c40da5c6SLucas Mateus Castro (alqotel)            if (s->rebias_overflow) {
220c40da5c6SLucas Mateus Castro (alqotel)                exp -= fmt->exp_re_bias;
221c40da5c6SLucas Mateus Castro (alqotel)            } else if (overflow_norm) {
222c40da5c6SLucas Mateus Castro (alqotel)                flags |= float_flag_inexact;
223ee6959f2SRichard Henderson                exp = exp_max - 1;
224ee6959f2SRichard Henderson                frac_allones(p);
22598b3cff7SRichard Henderson                p->frac_lo &= ~round_mask;
226ee6959f2SRichard Henderson            } else {
227c40da5c6SLucas Mateus Castro (alqotel)                flags |= float_flag_inexact;
228ee6959f2SRichard Henderson                p->cls = float_class_inf;
229ee6959f2SRichard Henderson                exp = exp_max;
230ee6959f2SRichard Henderson                frac_clear(p);
231ee6959f2SRichard Henderson            }
232ee6959f2SRichard Henderson        }
23398b3cff7SRichard Henderson        frac_shr(p, frac_shift);
234c40da5c6SLucas Mateus Castro (alqotel)    } else if (unlikely(s->rebias_underflow)) {
235c40da5c6SLucas Mateus Castro (alqotel)        flags |= float_flag_underflow;
236c40da5c6SLucas Mateus Castro (alqotel)        exp += fmt->exp_re_bias;
237c40da5c6SLucas Mateus Castro (alqotel)        if (p->frac_lo & round_mask) {
238c40da5c6SLucas Mateus Castro (alqotel)            flags |= float_flag_inexact;
239c40da5c6SLucas Mateus Castro (alqotel)            if (frac_addi(p, p, inc)) {
240c40da5c6SLucas Mateus Castro (alqotel)                frac_shr(p, 1);
241c40da5c6SLucas Mateus Castro (alqotel)                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
242c40da5c6SLucas Mateus Castro (alqotel)                exp++;
243c40da5c6SLucas Mateus Castro (alqotel)            }
244c40da5c6SLucas Mateus Castro (alqotel)            p->frac_lo &= ~round_mask;
245c40da5c6SLucas Mateus Castro (alqotel)        }
246c40da5c6SLucas Mateus Castro (alqotel)        frac_shr(p, frac_shift);
247ee6959f2SRichard Henderson    } else if (s->flush_to_zero) {
248ee6959f2SRichard Henderson        flags |= float_flag_output_denormal;
249ee6959f2SRichard Henderson        p->cls = float_class_zero;
250ee6959f2SRichard Henderson        exp = 0;
251ee6959f2SRichard Henderson        frac_clear(p);
252ee6959f2SRichard Henderson    } else {
253ee6959f2SRichard Henderson        bool is_tiny = s->tininess_before_rounding || exp < 0;
254ee6959f2SRichard Henderson
255ee6959f2SRichard Henderson        if (!is_tiny) {
256ee6959f2SRichard Henderson            FloatPartsN discard;
257ee6959f2SRichard Henderson            is_tiny = !frac_addi(&discard, p, inc);
258ee6959f2SRichard Henderson        }
259ee6959f2SRichard Henderson
26072246065SRichard Henderson        frac_shrjam(p, !fmt->m68k_denormal - exp);
261ee6959f2SRichard Henderson
262ee6959f2SRichard Henderson        if (p->frac_lo & round_mask) {
263ee6959f2SRichard Henderson            /* Need to recompute round-to-even/round-to-odd. */
264ee6959f2SRichard Henderson            switch (s->float_rounding_mode) {
265ee6959f2SRichard Henderson            case float_round_nearest_even:
26698b3cff7SRichard Henderson                if (N > 64 && frac_lsb == 0) {
26798b3cff7SRichard Henderson                    inc = ((p->frac_hi & 1) ||
26898b3cff7SRichard Henderson                           (p->frac_lo & round_mask) != frac_lsbm1
26998b3cff7SRichard Henderson                           ? frac_lsbm1 : 0);
27098b3cff7SRichard Henderson                } else {
271ee6959f2SRichard Henderson                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
272ee6959f2SRichard Henderson                           ? frac_lsbm1 : 0);
27398b3cff7SRichard Henderson                }
274ee6959f2SRichard Henderson                break;
275ee6959f2SRichard Henderson            case float_round_to_odd:
27660c8f726SRichard Henderson            case float_round_to_odd_inf:
27798b3cff7SRichard Henderson                if (N > 64 && frac_lsb == 0) {
27898b3cff7SRichard Henderson                    inc = p->frac_hi & 1 ? 0 : round_mask;
27998b3cff7SRichard Henderson                } else {
280ee6959f2SRichard Henderson                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
28198b3cff7SRichard Henderson                }
282ee6959f2SRichard Henderson                break;
283ee6959f2SRichard Henderson            default:
284ee6959f2SRichard Henderson                break;
285ee6959f2SRichard Henderson            }
286ee6959f2SRichard Henderson            flags |= float_flag_inexact;
287ee6959f2SRichard Henderson            frac_addi(p, p, inc);
28898b3cff7SRichard Henderson            p->frac_lo &= ~round_mask;
289ee6959f2SRichard Henderson        }
290ee6959f2SRichard Henderson
29172246065SRichard Henderson        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
292ee6959f2SRichard Henderson        frac_shr(p, frac_shift);
293ee6959f2SRichard Henderson
294ee6959f2SRichard Henderson        if (is_tiny && (flags & float_flag_inexact)) {
295ee6959f2SRichard Henderson            flags |= float_flag_underflow;
296ee6959f2SRichard Henderson        }
297ee6959f2SRichard Henderson        if (exp == 0 && frac_eqz(p)) {
298ee6959f2SRichard Henderson            p->cls = float_class_zero;
299ee6959f2SRichard Henderson        }
300ee6959f2SRichard Henderson    }
301ee6959f2SRichard Henderson    p->exp = exp;
302ee6959f2SRichard Henderson    float_raise(flags, s);
303ee6959f2SRichard Henderson}
304da10a907SRichard Henderson
30525fdedf0SRichard Hendersonstatic void partsN(uncanon)(FloatPartsN *p, float_status *s,
30625fdedf0SRichard Henderson                            const FloatFmt *fmt)
30725fdedf0SRichard Henderson{
30825fdedf0SRichard Henderson    if (likely(p->cls == float_class_normal)) {
30925fdedf0SRichard Henderson        parts_uncanon_normal(p, s, fmt);
31025fdedf0SRichard Henderson    } else {
31125fdedf0SRichard Henderson        switch (p->cls) {
31225fdedf0SRichard Henderson        case float_class_zero:
31325fdedf0SRichard Henderson            p->exp = 0;
31425fdedf0SRichard Henderson            frac_clear(p);
31525fdedf0SRichard Henderson            return;
31625fdedf0SRichard Henderson        case float_class_inf:
31725fdedf0SRichard Henderson            g_assert(!fmt->arm_althp);
31825fdedf0SRichard Henderson            p->exp = fmt->exp_max;
31925fdedf0SRichard Henderson            frac_clear(p);
32025fdedf0SRichard Henderson            return;
32125fdedf0SRichard Henderson        case float_class_qnan:
32225fdedf0SRichard Henderson        case float_class_snan:
32325fdedf0SRichard Henderson            g_assert(!fmt->arm_althp);
32425fdedf0SRichard Henderson            p->exp = fmt->exp_max;
32525fdedf0SRichard Henderson            frac_shr(p, fmt->frac_shift);
32625fdedf0SRichard Henderson            return;
32725fdedf0SRichard Henderson        default:
32825fdedf0SRichard Henderson            break;
32925fdedf0SRichard Henderson        }
33025fdedf0SRichard Henderson        g_assert_not_reached();
33125fdedf0SRichard Henderson    }
33225fdedf0SRichard Henderson}
33325fdedf0SRichard Henderson
334da10a907SRichard Henderson/*
335da10a907SRichard Henderson * Returns the result of adding or subtracting the values of the
336da10a907SRichard Henderson * floating-point values `a' and `b'. The operation is performed
337da10a907SRichard Henderson * according to the IEC/IEEE Standard for Binary Floating-Point
338da10a907SRichard Henderson * Arithmetic.
339da10a907SRichard Henderson */
340da10a907SRichard Hendersonstatic FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
341da10a907SRichard Henderson                                   float_status *s, bool subtract)
342da10a907SRichard Henderson{
343da10a907SRichard Henderson    bool b_sign = b->sign ^ subtract;
344da10a907SRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
345da10a907SRichard Henderson
346da10a907SRichard Henderson    if (a->sign != b_sign) {
347da10a907SRichard Henderson        /* Subtraction */
348da10a907SRichard Henderson        if (likely(ab_mask == float_cmask_normal)) {
349da10a907SRichard Henderson            if (parts_sub_normal(a, b)) {
350da10a907SRichard Henderson                return a;
351da10a907SRichard Henderson            }
352da10a907SRichard Henderson            /* Subtract was exact, fall through to set sign. */
353da10a907SRichard Henderson            ab_mask = float_cmask_zero;
354da10a907SRichard Henderson        }
355da10a907SRichard Henderson
356da10a907SRichard Henderson        if (ab_mask == float_cmask_zero) {
357da10a907SRichard Henderson            a->sign = s->float_rounding_mode == float_round_down;
358da10a907SRichard Henderson            return a;
359da10a907SRichard Henderson        }
360da10a907SRichard Henderson
361da10a907SRichard Henderson        if (unlikely(ab_mask & float_cmask_anynan)) {
362da10a907SRichard Henderson            goto p_nan;
363da10a907SRichard Henderson        }
364da10a907SRichard Henderson
365da10a907SRichard Henderson        if (ab_mask & float_cmask_inf) {
366da10a907SRichard Henderson            if (a->cls != float_class_inf) {
367da10a907SRichard Henderson                /* N - Inf */
368da10a907SRichard Henderson                goto return_b;
369da10a907SRichard Henderson            }
370da10a907SRichard Henderson            if (b->cls != float_class_inf) {
371da10a907SRichard Henderson                /* Inf - N */
372da10a907SRichard Henderson                return a;
373da10a907SRichard Henderson            }
374da10a907SRichard Henderson            /* Inf - Inf */
375ba11446cSRichard Henderson            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
376da10a907SRichard Henderson            parts_default_nan(a, s);
377da10a907SRichard Henderson            return a;
378da10a907SRichard Henderson        }
379da10a907SRichard Henderson    } else {
380da10a907SRichard Henderson        /* Addition */
381da10a907SRichard Henderson        if (likely(ab_mask == float_cmask_normal)) {
382da10a907SRichard Henderson            parts_add_normal(a, b);
383da10a907SRichard Henderson            return a;
384da10a907SRichard Henderson        }
385da10a907SRichard Henderson
386da10a907SRichard Henderson        if (ab_mask == float_cmask_zero) {
387da10a907SRichard Henderson            return a;
388da10a907SRichard Henderson        }
389da10a907SRichard Henderson
390da10a907SRichard Henderson        if (unlikely(ab_mask & float_cmask_anynan)) {
391da10a907SRichard Henderson            goto p_nan;
392da10a907SRichard Henderson        }
393da10a907SRichard Henderson
394da10a907SRichard Henderson        if (ab_mask & float_cmask_inf) {
395da10a907SRichard Henderson            a->cls = float_class_inf;
396da10a907SRichard Henderson            return a;
397da10a907SRichard Henderson        }
398da10a907SRichard Henderson    }
399da10a907SRichard Henderson
400da10a907SRichard Henderson    if (b->cls == float_class_zero) {
401da10a907SRichard Henderson        g_assert(a->cls == float_class_normal);
402da10a907SRichard Henderson        return a;
403da10a907SRichard Henderson    }
404da10a907SRichard Henderson
405da10a907SRichard Henderson    g_assert(a->cls == float_class_zero);
406da10a907SRichard Henderson    g_assert(b->cls == float_class_normal);
407da10a907SRichard Henderson return_b:
408da10a907SRichard Henderson    b->sign = b_sign;
409da10a907SRichard Henderson    return b;
410da10a907SRichard Henderson
411da10a907SRichard Henderson p_nan:
412da10a907SRichard Henderson    return parts_pick_nan(a, b, s);
413da10a907SRichard Henderson}
414aca84527SRichard Henderson
415aca84527SRichard Henderson/*
416aca84527SRichard Henderson * Returns the result of multiplying the floating-point values `a' and
417aca84527SRichard Henderson * `b'. The operation is performed according to the IEC/IEEE Standard
418aca84527SRichard Henderson * for Binary Floating-Point Arithmetic.
419aca84527SRichard Henderson */
420aca84527SRichard Hendersonstatic FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
421aca84527SRichard Henderson                                float_status *s)
422aca84527SRichard Henderson{
423aca84527SRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
424aca84527SRichard Henderson    bool sign = a->sign ^ b->sign;
425aca84527SRichard Henderson
426aca84527SRichard Henderson    if (likely(ab_mask == float_cmask_normal)) {
427aca84527SRichard Henderson        FloatPartsW tmp;
428aca84527SRichard Henderson
429aca84527SRichard Henderson        frac_mulw(&tmp, a, b);
430aca84527SRichard Henderson        frac_truncjam(a, &tmp);
431aca84527SRichard Henderson
432aca84527SRichard Henderson        a->exp += b->exp + 1;
433aca84527SRichard Henderson        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
434aca84527SRichard Henderson            frac_add(a, a, a);
435aca84527SRichard Henderson            a->exp -= 1;
436aca84527SRichard Henderson        }
437aca84527SRichard Henderson
438aca84527SRichard Henderson        a->sign = sign;
439aca84527SRichard Henderson        return a;
440aca84527SRichard Henderson    }
441aca84527SRichard Henderson
442aca84527SRichard Henderson    /* Inf * Zero == NaN */
443aca84527SRichard Henderson    if (unlikely(ab_mask == float_cmask_infzero)) {
444bead3c9bSRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
445aca84527SRichard Henderson        parts_default_nan(a, s);
446aca84527SRichard Henderson        return a;
447aca84527SRichard Henderson    }
448aca84527SRichard Henderson
449aca84527SRichard Henderson    if (unlikely(ab_mask & float_cmask_anynan)) {
450aca84527SRichard Henderson        return parts_pick_nan(a, b, s);
451aca84527SRichard Henderson    }
452aca84527SRichard Henderson
453aca84527SRichard Henderson    /* Multiply by 0 or Inf */
454aca84527SRichard Henderson    if (ab_mask & float_cmask_inf) {
455aca84527SRichard Henderson        a->cls = float_class_inf;
456aca84527SRichard Henderson        a->sign = sign;
457aca84527SRichard Henderson        return a;
458aca84527SRichard Henderson    }
459aca84527SRichard Henderson
460aca84527SRichard Henderson    g_assert(ab_mask & float_cmask_zero);
461aca84527SRichard Henderson    a->cls = float_class_zero;
462aca84527SRichard Henderson    a->sign = sign;
463aca84527SRichard Henderson    return a;
464aca84527SRichard Henderson}
465dedd123cSRichard Henderson
466dedd123cSRichard Henderson/*
467dedd123cSRichard Henderson * Returns the result of multiplying the floating-point values `a' and
468dedd123cSRichard Henderson * `b' then adding 'c', with no intermediate rounding step after the
469dedd123cSRichard Henderson * multiplication. The operation is performed according to the
470dedd123cSRichard Henderson * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
471dedd123cSRichard Henderson * The flags argument allows the caller to select negation of the
472dedd123cSRichard Henderson * addend, the intermediate product, or the final result. (The
473dedd123cSRichard Henderson * difference between this and having the caller do a separate
474dedd123cSRichard Henderson * negation is that negating externally will flip the sign bit on NaNs.)
475dedd123cSRichard Henderson *
476dedd123cSRichard Henderson * Requires A and C extracted into a double-sized structure to provide the
477dedd123cSRichard Henderson * extra space for the widening multiply.
478dedd123cSRichard Henderson */
479dedd123cSRichard Hendersonstatic FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
480dedd123cSRichard Henderson                                   FloatPartsN *c, int flags, float_status *s)
481dedd123cSRichard Henderson{
482dedd123cSRichard Henderson    int ab_mask, abc_mask;
483dedd123cSRichard Henderson    FloatPartsW p_widen, c_widen;
484dedd123cSRichard Henderson
485dedd123cSRichard Henderson    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
486dedd123cSRichard Henderson    abc_mask = float_cmask(c->cls) | ab_mask;
487dedd123cSRichard Henderson
488dedd123cSRichard Henderson    /*
489dedd123cSRichard Henderson     * It is implementation-defined whether the cases of (0,inf,qnan)
490dedd123cSRichard Henderson     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
491dedd123cSRichard Henderson     * they return if they do), so we have to hand this information
492dedd123cSRichard Henderson     * off to the target-specific pick-a-NaN routine.
493dedd123cSRichard Henderson     */
494dedd123cSRichard Henderson    if (unlikely(abc_mask & float_cmask_anynan)) {
495dedd123cSRichard Henderson        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
496dedd123cSRichard Henderson    }
497dedd123cSRichard Henderson
498dedd123cSRichard Henderson    if (flags & float_muladd_negate_c) {
499dedd123cSRichard Henderson        c->sign ^= 1;
500dedd123cSRichard Henderson    }
501dedd123cSRichard Henderson
502dedd123cSRichard Henderson    /* Compute the sign of the product into A. */
503dedd123cSRichard Henderson    a->sign ^= b->sign;
504dedd123cSRichard Henderson    if (flags & float_muladd_negate_product) {
505dedd123cSRichard Henderson        a->sign ^= 1;
506dedd123cSRichard Henderson    }
507dedd123cSRichard Henderson
508dedd123cSRichard Henderson    if (unlikely(ab_mask != float_cmask_normal)) {
509dedd123cSRichard Henderson        if (unlikely(ab_mask == float_cmask_infzero)) {
510bead3c9bSRichard Henderson            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
511dedd123cSRichard Henderson            goto d_nan;
512dedd123cSRichard Henderson        }
513dedd123cSRichard Henderson
514dedd123cSRichard Henderson        if (ab_mask & float_cmask_inf) {
515dedd123cSRichard Henderson            if (c->cls == float_class_inf && a->sign != c->sign) {
516ba11446cSRichard Henderson                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
517dedd123cSRichard Henderson                goto d_nan;
518dedd123cSRichard Henderson            }
519dedd123cSRichard Henderson            goto return_inf;
520dedd123cSRichard Henderson        }
521dedd123cSRichard Henderson
522dedd123cSRichard Henderson        g_assert(ab_mask & float_cmask_zero);
523dedd123cSRichard Henderson        if (c->cls == float_class_normal) {
524dedd123cSRichard Henderson            *a = *c;
525dedd123cSRichard Henderson            goto return_normal;
526dedd123cSRichard Henderson        }
527dedd123cSRichard Henderson        if (c->cls == float_class_zero) {
528dedd123cSRichard Henderson            if (a->sign != c->sign) {
529dedd123cSRichard Henderson                goto return_sub_zero;
530dedd123cSRichard Henderson            }
531dedd123cSRichard Henderson            goto return_zero;
532dedd123cSRichard Henderson        }
533dedd123cSRichard Henderson        g_assert(c->cls == float_class_inf);
534dedd123cSRichard Henderson    }
535dedd123cSRichard Henderson
536dedd123cSRichard Henderson    if (unlikely(c->cls == float_class_inf)) {
537dedd123cSRichard Henderson        a->sign = c->sign;
538dedd123cSRichard Henderson        goto return_inf;
539dedd123cSRichard Henderson    }
540dedd123cSRichard Henderson
541dedd123cSRichard Henderson    /* Perform the multiplication step. */
542dedd123cSRichard Henderson    p_widen.sign = a->sign;
543dedd123cSRichard Henderson    p_widen.exp = a->exp + b->exp + 1;
544dedd123cSRichard Henderson    frac_mulw(&p_widen, a, b);
545dedd123cSRichard Henderson    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
546dedd123cSRichard Henderson        frac_add(&p_widen, &p_widen, &p_widen);
547dedd123cSRichard Henderson        p_widen.exp -= 1;
548dedd123cSRichard Henderson    }
549dedd123cSRichard Henderson
550dedd123cSRichard Henderson    /* Perform the addition step. */
551dedd123cSRichard Henderson    if (c->cls != float_class_zero) {
552dedd123cSRichard Henderson        /* Zero-extend C to less significant bits. */
553dedd123cSRichard Henderson        frac_widen(&c_widen, c);
554dedd123cSRichard Henderson        c_widen.exp = c->exp;
555dedd123cSRichard Henderson
556dedd123cSRichard Henderson        if (a->sign == c->sign) {
557dedd123cSRichard Henderson            parts_add_normal(&p_widen, &c_widen);
558dedd123cSRichard Henderson        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
559dedd123cSRichard Henderson            goto return_sub_zero;
560dedd123cSRichard Henderson        }
561dedd123cSRichard Henderson    }
562dedd123cSRichard Henderson
563dedd123cSRichard Henderson    /* Narrow with sticky bit, for proper rounding later. */
564dedd123cSRichard Henderson    frac_truncjam(a, &p_widen);
565dedd123cSRichard Henderson    a->sign = p_widen.sign;
566dedd123cSRichard Henderson    a->exp = p_widen.exp;
567dedd123cSRichard Henderson
568dedd123cSRichard Henderson return_normal:
569dedd123cSRichard Henderson    if (flags & float_muladd_halve_result) {
570dedd123cSRichard Henderson        a->exp -= 1;
571dedd123cSRichard Henderson    }
572dedd123cSRichard Henderson finish_sign:
573dedd123cSRichard Henderson    if (flags & float_muladd_negate_result) {
574dedd123cSRichard Henderson        a->sign ^= 1;
575dedd123cSRichard Henderson    }
576dedd123cSRichard Henderson    return a;
577dedd123cSRichard Henderson
578dedd123cSRichard Henderson return_sub_zero:
579dedd123cSRichard Henderson    a->sign = s->float_rounding_mode == float_round_down;
580dedd123cSRichard Henderson return_zero:
581dedd123cSRichard Henderson    a->cls = float_class_zero;
582dedd123cSRichard Henderson    goto finish_sign;
583dedd123cSRichard Henderson
584dedd123cSRichard Henderson return_inf:
585dedd123cSRichard Henderson    a->cls = float_class_inf;
586dedd123cSRichard Henderson    goto finish_sign;
587dedd123cSRichard Henderson
588dedd123cSRichard Henderson d_nan:
589dedd123cSRichard Henderson    parts_default_nan(a, s);
590dedd123cSRichard Henderson    return a;
591dedd123cSRichard Henderson}
592ec961b81SRichard Henderson
593ec961b81SRichard Henderson/*
594ec961b81SRichard Henderson * Returns the result of dividing the floating-point value `a' by the
595ec961b81SRichard Henderson * corresponding value `b'. The operation is performed according to
596ec961b81SRichard Henderson * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
597ec961b81SRichard Henderson */
598ec961b81SRichard Hendersonstatic FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
599ec961b81SRichard Henderson                                float_status *s)
600ec961b81SRichard Henderson{
601ec961b81SRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
602ec961b81SRichard Henderson    bool sign = a->sign ^ b->sign;
603ec961b81SRichard Henderson
604ec961b81SRichard Henderson    if (likely(ab_mask == float_cmask_normal)) {
605ec961b81SRichard Henderson        a->sign = sign;
606ec961b81SRichard Henderson        a->exp -= b->exp + frac_div(a, b);
607ec961b81SRichard Henderson        return a;
608ec961b81SRichard Henderson    }
609ec961b81SRichard Henderson
610ec961b81SRichard Henderson    /* 0/0 or Inf/Inf => NaN */
61110cc9640SRichard Henderson    if (unlikely(ab_mask == float_cmask_zero)) {
61210cc9640SRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
61310cc9640SRichard Henderson        goto d_nan;
61410cc9640SRichard Henderson    }
61510cc9640SRichard Henderson    if (unlikely(ab_mask == float_cmask_inf)) {
61610cc9640SRichard Henderson        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
61710cc9640SRichard Henderson        goto d_nan;
618ec961b81SRichard Henderson    }
619ec961b81SRichard Henderson
620ec961b81SRichard Henderson    /* All the NaN cases */
621ec961b81SRichard Henderson    if (unlikely(ab_mask & float_cmask_anynan)) {
622ec961b81SRichard Henderson        return parts_pick_nan(a, b, s);
623ec961b81SRichard Henderson    }
624ec961b81SRichard Henderson
625ec961b81SRichard Henderson    a->sign = sign;
626ec961b81SRichard Henderson
627ec961b81SRichard Henderson    /* Inf / X */
628ec961b81SRichard Henderson    if (a->cls == float_class_inf) {
629ec961b81SRichard Henderson        return a;
630ec961b81SRichard Henderson    }
631ec961b81SRichard Henderson
632ec961b81SRichard Henderson    /* 0 / X */
633ec961b81SRichard Henderson    if (a->cls == float_class_zero) {
634ec961b81SRichard Henderson        return a;
635ec961b81SRichard Henderson    }
636ec961b81SRichard Henderson
637ec961b81SRichard Henderson    /* X / Inf */
638ec961b81SRichard Henderson    if (b->cls == float_class_inf) {
639ec961b81SRichard Henderson        a->cls = float_class_zero;
640ec961b81SRichard Henderson        return a;
641ec961b81SRichard Henderson    }
642ec961b81SRichard Henderson
643ec961b81SRichard Henderson    /* X / 0 => Inf */
644ec961b81SRichard Henderson    g_assert(b->cls == float_class_zero);
645ec961b81SRichard Henderson    float_raise(float_flag_divbyzero, s);
646ec961b81SRichard Henderson    a->cls = float_class_inf;
647ec961b81SRichard Henderson    return a;
64810cc9640SRichard Henderson
64910cc9640SRichard Henderson d_nan:
65010cc9640SRichard Henderson    parts_default_nan(a, s);
65110cc9640SRichard Henderson    return a;
652ec961b81SRichard Henderson}
653afc34931SRichard Henderson
654afc34931SRichard Henderson/*
655feaf2e9cSRichard Henderson * Floating point remainder, per IEC/IEEE, or modulus.
656feaf2e9cSRichard Henderson */
657feaf2e9cSRichard Hendersonstatic FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
658feaf2e9cSRichard Henderson                                   uint64_t *mod_quot, float_status *s)
659feaf2e9cSRichard Henderson{
660feaf2e9cSRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
661feaf2e9cSRichard Henderson
662feaf2e9cSRichard Henderson    if (likely(ab_mask == float_cmask_normal)) {
663feaf2e9cSRichard Henderson        frac_modrem(a, b, mod_quot);
664feaf2e9cSRichard Henderson        return a;
665feaf2e9cSRichard Henderson    }
666feaf2e9cSRichard Henderson
667feaf2e9cSRichard Henderson    if (mod_quot) {
668feaf2e9cSRichard Henderson        *mod_quot = 0;
669feaf2e9cSRichard Henderson    }
670feaf2e9cSRichard Henderson
671feaf2e9cSRichard Henderson    /* All the NaN cases */
672feaf2e9cSRichard Henderson    if (unlikely(ab_mask & float_cmask_anynan)) {
673feaf2e9cSRichard Henderson        return parts_pick_nan(a, b, s);
674feaf2e9cSRichard Henderson    }
675feaf2e9cSRichard Henderson
676feaf2e9cSRichard Henderson    /* Inf % N; N % 0 */
677feaf2e9cSRichard Henderson    if (a->cls == float_class_inf || b->cls == float_class_zero) {
678feaf2e9cSRichard Henderson        float_raise(float_flag_invalid, s);
679feaf2e9cSRichard Henderson        parts_default_nan(a, s);
680feaf2e9cSRichard Henderson        return a;
681feaf2e9cSRichard Henderson    }
682feaf2e9cSRichard Henderson
683feaf2e9cSRichard Henderson    /* N % Inf; 0 % N */
684feaf2e9cSRichard Henderson    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
685feaf2e9cSRichard Henderson    return a;
686feaf2e9cSRichard Henderson}
687feaf2e9cSRichard Henderson
688feaf2e9cSRichard Henderson/*
6899261b245SRichard Henderson * Square Root
6909261b245SRichard Henderson *
6919261b245SRichard Henderson * The base algorithm is lifted from
6929261b245SRichard Henderson * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
6939261b245SRichard Henderson * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
6949261b245SRichard Henderson * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
6959261b245SRichard Henderson * and is thus MIT licenced.
6969261b245SRichard Henderson */
6979261b245SRichard Hendersonstatic void partsN(sqrt)(FloatPartsN *a, float_status *status,
6989261b245SRichard Henderson                         const FloatFmt *fmt)
6999261b245SRichard Henderson{
7009261b245SRichard Henderson    const uint32_t three32 = 3u << 30;
7019261b245SRichard Henderson    const uint64_t three64 = 3ull << 62;
7029261b245SRichard Henderson    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
7039261b245SRichard Henderson    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
7049261b245SRichard Henderson    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
7059261b245SRichard Henderson    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
7069261b245SRichard Henderson    uint64_t discard;
7079261b245SRichard Henderson    bool exp_odd;
7089261b245SRichard Henderson    size_t index;
7099261b245SRichard Henderson
7109261b245SRichard Henderson    if (unlikely(a->cls != float_class_normal)) {
7119261b245SRichard Henderson        switch (a->cls) {
7129261b245SRichard Henderson        case float_class_snan:
7139261b245SRichard Henderson        case float_class_qnan:
7149261b245SRichard Henderson            parts_return_nan(a, status);
7159261b245SRichard Henderson            return;
7169261b245SRichard Henderson        case float_class_zero:
7179261b245SRichard Henderson            return;
7189261b245SRichard Henderson        case float_class_inf:
7199261b245SRichard Henderson            if (unlikely(a->sign)) {
7209261b245SRichard Henderson                goto d_nan;
7219261b245SRichard Henderson            }
7229261b245SRichard Henderson            return;
7239261b245SRichard Henderson        default:
7249261b245SRichard Henderson            g_assert_not_reached();
7259261b245SRichard Henderson        }
7269261b245SRichard Henderson    }
7279261b245SRichard Henderson
7289261b245SRichard Henderson    if (unlikely(a->sign)) {
7299261b245SRichard Henderson        goto d_nan;
7309261b245SRichard Henderson    }
7319261b245SRichard Henderson
7329261b245SRichard Henderson    /*
7339261b245SRichard Henderson     * Argument reduction.
7349261b245SRichard Henderson     * x = 4^e frac; with integer e, and frac in [1, 4)
7359261b245SRichard Henderson     * m = frac fixed point at bit 62, since we're in base 4.
7369261b245SRichard Henderson     * If base-2 exponent is odd, exchange that for multiply by 2,
7379261b245SRichard Henderson     * which results in no shift.
7389261b245SRichard Henderson     */
7399261b245SRichard Henderson    exp_odd = a->exp & 1;
7409261b245SRichard Henderson    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
7419261b245SRichard Henderson    if (!exp_odd) {
7429261b245SRichard Henderson        frac_shr(a, 1);
7439261b245SRichard Henderson    }
7449261b245SRichard Henderson
7459261b245SRichard Henderson    /*
7469261b245SRichard Henderson     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
7479261b245SRichard Henderson     *
7489261b245SRichard Henderson     * Initial estimate:
7499261b245SRichard Henderson     * 7-bit lookup table (1-bit exponent and 6-bit significand).
7509261b245SRichard Henderson     *
7519261b245SRichard Henderson     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
7529261b245SRichard Henderson     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
7539261b245SRichard Henderson     * a table lookup is faster and needs one less iteration.
7549261b245SRichard Henderson     * The 7-bit table gives |e| < 0x1.fdp-9.
7559261b245SRichard Henderson     *
7569261b245SRichard Henderson     * A Newton-Raphson iteration for r is
7579261b245SRichard Henderson     *   s = m*r
7589261b245SRichard Henderson     *   d = s*r
7599261b245SRichard Henderson     *   u = 3 - d
7609261b245SRichard Henderson     *   r = r*u/2
7619261b245SRichard Henderson     *
7629261b245SRichard Henderson     * Fixed point representations:
7639261b245SRichard Henderson     *   m, s, d, u, three are all 2.30; r is 0.32
7649261b245SRichard Henderson     */
7659261b245SRichard Henderson    m64 = a->frac_hi;
7669261b245SRichard Henderson    m32 = m64 >> 32;
7679261b245SRichard Henderson
7689261b245SRichard Henderson    r32 = rsqrt_tab[index] << 16;
7699261b245SRichard Henderson    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
7709261b245SRichard Henderson
7719261b245SRichard Henderson    s32 = ((uint64_t)m32 * r32) >> 32;
7729261b245SRichard Henderson    d32 = ((uint64_t)s32 * r32) >> 32;
7739261b245SRichard Henderson    u32 = three32 - d32;
7749261b245SRichard Henderson
7759261b245SRichard Henderson    if (N == 64) {
7769261b245SRichard Henderson        /* float64 or smaller */
7779261b245SRichard Henderson
7789261b245SRichard Henderson        r32 = ((uint64_t)r32 * u32) >> 31;
7799261b245SRichard Henderson        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
7809261b245SRichard Henderson
7819261b245SRichard Henderson        s32 = ((uint64_t)m32 * r32) >> 32;
7829261b245SRichard Henderson        d32 = ((uint64_t)s32 * r32) >> 32;
7839261b245SRichard Henderson        u32 = three32 - d32;
7849261b245SRichard Henderson
7859261b245SRichard Henderson        if (fmt->frac_size <= 23) {
7869261b245SRichard Henderson            /* float32 or smaller */
7879261b245SRichard Henderson
7889261b245SRichard Henderson            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
7899261b245SRichard Henderson            s32 = (s32 - 1) >> 6;               /* 9.23 */
7909261b245SRichard Henderson            /* s < sqrt(m) < s + 0x1.08p-23 */
7919261b245SRichard Henderson
7929261b245SRichard Henderson            /* compute nearest rounded result to 2.23 bits */
7939261b245SRichard Henderson            uint32_t d0 = (m32 << 16) - s32 * s32;
7949261b245SRichard Henderson            uint32_t d1 = s32 - d0;
7959261b245SRichard Henderson            uint32_t d2 = d1 + s32 + 1;
7969261b245SRichard Henderson            s32 += d1 >> 31;
7979261b245SRichard Henderson            a->frac_hi = (uint64_t)s32 << (64 - 25);
7989261b245SRichard Henderson
7999261b245SRichard Henderson            /* increment or decrement for inexact */
8009261b245SRichard Henderson            if (d2 != 0) {
8019261b245SRichard Henderson                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
8029261b245SRichard Henderson            }
8039261b245SRichard Henderson            goto done;
8049261b245SRichard Henderson        }
8059261b245SRichard Henderson
8069261b245SRichard Henderson        /* float64 */
8079261b245SRichard Henderson
8089261b245SRichard Henderson        r64 = (uint64_t)r32 * u32 * 2;
8099261b245SRichard Henderson        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
8109261b245SRichard Henderson        mul64To128(m64, r64, &s64, &discard);
8119261b245SRichard Henderson        mul64To128(s64, r64, &d64, &discard);
8129261b245SRichard Henderson        u64 = three64 - d64;
8139261b245SRichard Henderson
8149261b245SRichard Henderson        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
8159261b245SRichard Henderson        s64 = (s64 - 2) >> 9;                  /* 12.52 */
8169261b245SRichard Henderson
8179261b245SRichard Henderson        /* Compute nearest rounded result */
8189261b245SRichard Henderson        uint64_t d0 = (m64 << 42) - s64 * s64;
8199261b245SRichard Henderson        uint64_t d1 = s64 - d0;
8209261b245SRichard Henderson        uint64_t d2 = d1 + s64 + 1;
8219261b245SRichard Henderson        s64 += d1 >> 63;
8229261b245SRichard Henderson        a->frac_hi = s64 << (64 - 54);
8239261b245SRichard Henderson
8249261b245SRichard Henderson        /* increment or decrement for inexact */
8259261b245SRichard Henderson        if (d2 != 0) {
8269261b245SRichard Henderson            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
8279261b245SRichard Henderson        }
8289261b245SRichard Henderson        goto done;
8299261b245SRichard Henderson    }
8309261b245SRichard Henderson
8319261b245SRichard Henderson    r64 = (uint64_t)r32 * u32 * 2;
8329261b245SRichard Henderson    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
8339261b245SRichard Henderson
8349261b245SRichard Henderson    mul64To128(m64, r64, &s64, &discard);
8359261b245SRichard Henderson    mul64To128(s64, r64, &d64, &discard);
8369261b245SRichard Henderson    u64 = three64 - d64;
8379261b245SRichard Henderson    mul64To128(u64, r64, &r64, &discard);
8389261b245SRichard Henderson    r64 <<= 1;
8399261b245SRichard Henderson    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
8409261b245SRichard Henderson
8419261b245SRichard Henderson    mul64To128(m64, r64, &s64, &discard);
8429261b245SRichard Henderson    mul64To128(s64, r64, &d64, &discard);
8439261b245SRichard Henderson    u64 = three64 - d64;
8449261b245SRichard Henderson    mul64To128(u64, r64, &rh, &rl);
8459261b245SRichard Henderson    add128(rh, rl, rh, rl, &rh, &rl);
8469261b245SRichard Henderson    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
8479261b245SRichard Henderson
8489261b245SRichard Henderson    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
8499261b245SRichard Henderson    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
8509261b245SRichard Henderson    sub128(three64, 0, dh, dl, &uh, &ul);
8519261b245SRichard Henderson    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
8529261b245SRichard Henderson    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
8539261b245SRichard Henderson
8549261b245SRichard Henderson    sub128(sh, sl, 0, 4, &sh, &sl);
8559261b245SRichard Henderson    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
8569261b245SRichard Henderson    /* s < sqrt(m) < s + 1ulp */
8579261b245SRichard Henderson
8589261b245SRichard Henderson    /* Compute nearest rounded result */
8599261b245SRichard Henderson    mul64To128(sl, sl, &d0h, &d0l);
8609261b245SRichard Henderson    d0h += 2 * sh * sl;
8619261b245SRichard Henderson    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
8629261b245SRichard Henderson    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
8639261b245SRichard Henderson    add128(sh, sl, 0, 1, &d2h, &d2l);
8649261b245SRichard Henderson    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
8659261b245SRichard Henderson    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
8669261b245SRichard Henderson    shift128Left(sh, sl, 128 - 114, &sh, &sl);
8679261b245SRichard Henderson
8689261b245SRichard Henderson    /* increment or decrement for inexact */
8699261b245SRichard Henderson    if (d2h | d2l) {
8709261b245SRichard Henderson        if ((int64_t)(d1h ^ d2h) < 0) {
8719261b245SRichard Henderson            sub128(sh, sl, 0, 1, &sh, &sl);
8729261b245SRichard Henderson        } else {
8739261b245SRichard Henderson            add128(sh, sl, 0, 1, &sh, &sl);
8749261b245SRichard Henderson        }
8759261b245SRichard Henderson    }
8769261b245SRichard Henderson    a->frac_lo = sl;
8779261b245SRichard Henderson    a->frac_hi = sh;
8789261b245SRichard Henderson
8799261b245SRichard Henderson done:
8809261b245SRichard Henderson    /* Convert back from base 4 to base 2. */
8819261b245SRichard Henderson    a->exp >>= 1;
8829261b245SRichard Henderson    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
8839261b245SRichard Henderson        frac_add(a, a, a);
8849261b245SRichard Henderson    } else {
8859261b245SRichard Henderson        a->exp += 1;
8869261b245SRichard Henderson    }
8879261b245SRichard Henderson    return;
8889261b245SRichard Henderson
8899261b245SRichard Henderson d_nan:
890f8718aabSRichard Henderson    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
8919261b245SRichard Henderson    parts_default_nan(a, status);
8929261b245SRichard Henderson}
8939261b245SRichard Henderson
8949261b245SRichard Henderson/*
895afc34931SRichard Henderson * Rounds the floating-point value `a' to an integer, and returns the
896afc34931SRichard Henderson * result as a floating-point value. The operation is performed
897afc34931SRichard Henderson * according to the IEC/IEEE Standard for Binary Floating-Point
898afc34931SRichard Henderson * Arithmetic.
899afc34931SRichard Henderson *
900afc34931SRichard Henderson * parts_round_to_int_normal is an internal helper function for
901afc34931SRichard Henderson * normal numbers only, returning true for inexact but not directly
902afc34931SRichard Henderson * raising float_flag_inexact.
903afc34931SRichard Henderson */
904afc34931SRichard Hendersonstatic bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
905afc34931SRichard Henderson                                        int scale, int frac_size)
906afc34931SRichard Henderson{
907afc34931SRichard Henderson    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
908afc34931SRichard Henderson    int shift_adj;
909afc34931SRichard Henderson
910afc34931SRichard Henderson    scale = MIN(MAX(scale, -0x10000), 0x10000);
911afc34931SRichard Henderson    a->exp += scale;
912afc34931SRichard Henderson
913afc34931SRichard Henderson    if (a->exp < 0) {
914afc34931SRichard Henderson        bool one;
915afc34931SRichard Henderson
916afc34931SRichard Henderson        /* All fractional */
917afc34931SRichard Henderson        switch (rmode) {
918afc34931SRichard Henderson        case float_round_nearest_even:
919afc34931SRichard Henderson            one = false;
920afc34931SRichard Henderson            if (a->exp == -1) {
921afc34931SRichard Henderson                FloatPartsN tmp;
922afc34931SRichard Henderson                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
923afc34931SRichard Henderson                frac_add(&tmp, a, a);
924afc34931SRichard Henderson                /* Anything remaining means frac > 0.5. */
925afc34931SRichard Henderson                one = !frac_eqz(&tmp);
926afc34931SRichard Henderson            }
927afc34931SRichard Henderson            break;
928afc34931SRichard Henderson        case float_round_ties_away:
929afc34931SRichard Henderson            one = a->exp == -1;
930afc34931SRichard Henderson            break;
931afc34931SRichard Henderson        case float_round_to_zero:
932afc34931SRichard Henderson            one = false;
933afc34931SRichard Henderson            break;
934afc34931SRichard Henderson        case float_round_up:
935afc34931SRichard Henderson            one = !a->sign;
936afc34931SRichard Henderson            break;
937afc34931SRichard Henderson        case float_round_down:
938afc34931SRichard Henderson            one = a->sign;
939afc34931SRichard Henderson            break;
940afc34931SRichard Henderson        case float_round_to_odd:
941afc34931SRichard Henderson            one = true;
942afc34931SRichard Henderson            break;
943afc34931SRichard Henderson        default:
944afc34931SRichard Henderson            g_assert_not_reached();
945afc34931SRichard Henderson        }
946afc34931SRichard Henderson
947afc34931SRichard Henderson        frac_clear(a);
948afc34931SRichard Henderson        a->exp = 0;
949afc34931SRichard Henderson        if (one) {
950afc34931SRichard Henderson            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
951afc34931SRichard Henderson        } else {
952afc34931SRichard Henderson            a->cls = float_class_zero;
953afc34931SRichard Henderson        }
954afc34931SRichard Henderson        return true;
955afc34931SRichard Henderson    }
956afc34931SRichard Henderson
957afc34931SRichard Henderson    if (a->exp >= frac_size) {
958afc34931SRichard Henderson        /* All integral */
959afc34931SRichard Henderson        return false;
960afc34931SRichard Henderson    }
961afc34931SRichard Henderson
962afc34931SRichard Henderson    if (N > 64 && a->exp < N - 64) {
963afc34931SRichard Henderson        /*
964afc34931SRichard Henderson         * Rounding is not in the low word -- shift lsb to bit 2,
965afc34931SRichard Henderson         * which leaves room for sticky and rounding bit.
966afc34931SRichard Henderson         */
967afc34931SRichard Henderson        shift_adj = (N - 1) - (a->exp + 2);
968afc34931SRichard Henderson        frac_shrjam(a, shift_adj);
969afc34931SRichard Henderson        frac_lsb = 1 << 2;
970afc34931SRichard Henderson    } else {
971afc34931SRichard Henderson        shift_adj = 0;
972afc34931SRichard Henderson        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
973afc34931SRichard Henderson    }
974afc34931SRichard Henderson
975afc34931SRichard Henderson    frac_lsbm1 = frac_lsb >> 1;
976afc34931SRichard Henderson    rnd_mask = frac_lsb - 1;
977afc34931SRichard Henderson    rnd_even_mask = rnd_mask | frac_lsb;
978afc34931SRichard Henderson
979afc34931SRichard Henderson    if (!(a->frac_lo & rnd_mask)) {
980afc34931SRichard Henderson        /* Fractional bits already clear, undo the shift above. */
981afc34931SRichard Henderson        frac_shl(a, shift_adj);
982afc34931SRichard Henderson        return false;
983afc34931SRichard Henderson    }
984afc34931SRichard Henderson
985afc34931SRichard Henderson    switch (rmode) {
986afc34931SRichard Henderson    case float_round_nearest_even:
987afc34931SRichard Henderson        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
988afc34931SRichard Henderson        break;
989afc34931SRichard Henderson    case float_round_ties_away:
990afc34931SRichard Henderson        inc = frac_lsbm1;
991afc34931SRichard Henderson        break;
992afc34931SRichard Henderson    case float_round_to_zero:
993afc34931SRichard Henderson        inc = 0;
994afc34931SRichard Henderson        break;
995afc34931SRichard Henderson    case float_round_up:
996afc34931SRichard Henderson        inc = a->sign ? 0 : rnd_mask;
997afc34931SRichard Henderson        break;
998afc34931SRichard Henderson    case float_round_down:
999afc34931SRichard Henderson        inc = a->sign ? rnd_mask : 0;
1000afc34931SRichard Henderson        break;
1001afc34931SRichard Henderson    case float_round_to_odd:
1002afc34931SRichard Henderson        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1003afc34931SRichard Henderson        break;
1004afc34931SRichard Henderson    default:
1005afc34931SRichard Henderson        g_assert_not_reached();
1006afc34931SRichard Henderson    }
1007afc34931SRichard Henderson
1008afc34931SRichard Henderson    if (shift_adj == 0) {
1009afc34931SRichard Henderson        if (frac_addi(a, a, inc)) {
1010afc34931SRichard Henderson            frac_shr(a, 1);
1011afc34931SRichard Henderson            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1012afc34931SRichard Henderson            a->exp++;
1013afc34931SRichard Henderson        }
1014afc34931SRichard Henderson        a->frac_lo &= ~rnd_mask;
1015afc34931SRichard Henderson    } else {
1016afc34931SRichard Henderson        frac_addi(a, a, inc);
1017afc34931SRichard Henderson        a->frac_lo &= ~rnd_mask;
1018afc34931SRichard Henderson        /* Be careful shifting back, not to overflow */
1019afc34931SRichard Henderson        frac_shl(a, shift_adj - 1);
1020afc34931SRichard Henderson        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1021afc34931SRichard Henderson            a->exp++;
1022afc34931SRichard Henderson        } else {
1023afc34931SRichard Henderson            frac_add(a, a, a);
1024afc34931SRichard Henderson        }
1025afc34931SRichard Henderson    }
1026afc34931SRichard Henderson    return true;
1027afc34931SRichard Henderson}
1028afc34931SRichard Henderson
1029afc34931SRichard Hendersonstatic void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1030afc34931SRichard Henderson                                 int scale, float_status *s,
1031afc34931SRichard Henderson                                 const FloatFmt *fmt)
1032afc34931SRichard Henderson{
1033afc34931SRichard Henderson    switch (a->cls) {
1034afc34931SRichard Henderson    case float_class_qnan:
1035afc34931SRichard Henderson    case float_class_snan:
1036afc34931SRichard Henderson        parts_return_nan(a, s);
1037afc34931SRichard Henderson        break;
1038afc34931SRichard Henderson    case float_class_zero:
1039afc34931SRichard Henderson    case float_class_inf:
1040afc34931SRichard Henderson        break;
1041afc34931SRichard Henderson    case float_class_normal:
1042afc34931SRichard Henderson        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1043afc34931SRichard Henderson            float_raise(float_flag_inexact, s);
1044afc34931SRichard Henderson        }
1045afc34931SRichard Henderson        break;
1046afc34931SRichard Henderson    default:
1047afc34931SRichard Henderson        g_assert_not_reached();
1048afc34931SRichard Henderson    }
1049afc34931SRichard Henderson}
1050463b3f0dSRichard Henderson
1051463b3f0dSRichard Henderson/*
1052463b3f0dSRichard Henderson * Returns the result of converting the floating-point value `a' to
1053463b3f0dSRichard Henderson * the two's complement integer format. The conversion is performed
1054463b3f0dSRichard Henderson * according to the IEC/IEEE Standard for Binary Floating-Point
1055463b3f0dSRichard Henderson * Arithmetic---which means in particular that the conversion is
1056463b3f0dSRichard Henderson * rounded according to the current rounding mode. If `a' is a NaN,
1057463b3f0dSRichard Henderson * the largest positive integer is returned. Otherwise, if the
1058463b3f0dSRichard Henderson * conversion overflows, the largest integer with the same sign as `a'
1059463b3f0dSRichard Henderson * is returned.
1060463b3f0dSRichard Henderson */
1061463b3f0dSRichard Hendersonstatic int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1062463b3f0dSRichard Henderson                                     int scale, int64_t min, int64_t max,
1063463b3f0dSRichard Henderson                                     float_status *s)
1064463b3f0dSRichard Henderson{
1065463b3f0dSRichard Henderson    int flags = 0;
1066463b3f0dSRichard Henderson    uint64_t r;
1067463b3f0dSRichard Henderson
1068463b3f0dSRichard Henderson    switch (p->cls) {
1069463b3f0dSRichard Henderson    case float_class_snan:
1070e706d445SRichard Henderson        flags |= float_flag_invalid_snan;
1071e706d445SRichard Henderson        /* fall through */
1072463b3f0dSRichard Henderson    case float_class_qnan:
1073e706d445SRichard Henderson        flags |= float_flag_invalid;
1074463b3f0dSRichard Henderson        r = max;
1075463b3f0dSRichard Henderson        break;
1076463b3f0dSRichard Henderson
1077463b3f0dSRichard Henderson    case float_class_inf:
107881254b02SRichard Henderson        flags = float_flag_invalid | float_flag_invalid_cvti;
1079463b3f0dSRichard Henderson        r = p->sign ? min : max;
1080463b3f0dSRichard Henderson        break;
1081463b3f0dSRichard Henderson
1082463b3f0dSRichard Henderson    case float_class_zero:
1083463b3f0dSRichard Henderson        return 0;
1084463b3f0dSRichard Henderson
1085463b3f0dSRichard Henderson    case float_class_normal:
1086463b3f0dSRichard Henderson        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1087463b3f0dSRichard Henderson        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1088463b3f0dSRichard Henderson            flags = float_flag_inexact;
1089463b3f0dSRichard Henderson        }
1090463b3f0dSRichard Henderson
1091463b3f0dSRichard Henderson        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1092463b3f0dSRichard Henderson            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1093463b3f0dSRichard Henderson        } else {
1094463b3f0dSRichard Henderson            r = UINT64_MAX;
1095463b3f0dSRichard Henderson        }
1096463b3f0dSRichard Henderson        if (p->sign) {
1097463b3f0dSRichard Henderson            if (r <= -(uint64_t)min) {
1098463b3f0dSRichard Henderson                r = -r;
1099463b3f0dSRichard Henderson            } else {
110081254b02SRichard Henderson                flags = float_flag_invalid | float_flag_invalid_cvti;
1101463b3f0dSRichard Henderson                r = min;
1102463b3f0dSRichard Henderson            }
1103463b3f0dSRichard Henderson        } else if (r > max) {
110481254b02SRichard Henderson            flags = float_flag_invalid | float_flag_invalid_cvti;
1105463b3f0dSRichard Henderson            r = max;
11064ab4aef0SRichard Henderson        }
11074ab4aef0SRichard Henderson        break;
11084ab4aef0SRichard Henderson
11094ab4aef0SRichard Henderson    default:
11104ab4aef0SRichard Henderson        g_assert_not_reached();
11114ab4aef0SRichard Henderson    }
11124ab4aef0SRichard Henderson
11134ab4aef0SRichard Henderson    float_raise(flags, s);
11144ab4aef0SRichard Henderson    return r;
11154ab4aef0SRichard Henderson}
11164ab4aef0SRichard Henderson
11174ab4aef0SRichard Henderson/*
11184ab4aef0SRichard Henderson *  Returns the result of converting the floating-point value `a' to
11194ab4aef0SRichard Henderson *  the unsigned integer format. The conversion is performed according
11204ab4aef0SRichard Henderson *  to the IEC/IEEE Standard for Binary Floating-Point
11214ab4aef0SRichard Henderson *  Arithmetic---which means in particular that the conversion is
11224ab4aef0SRichard Henderson *  rounded according to the current rounding mode. If `a' is a NaN,
11234ab4aef0SRichard Henderson *  the largest unsigned integer is returned. Otherwise, if the
11244ab4aef0SRichard Henderson *  conversion overflows, the largest unsigned integer is returned. If
11254ab4aef0SRichard Henderson *  the 'a' is negative, the result is rounded and zero is returned;
11264ab4aef0SRichard Henderson *  values that do not round to zero will raise the inexact exception
11274ab4aef0SRichard Henderson *  flag.
11284ab4aef0SRichard Henderson */
11294ab4aef0SRichard Hendersonstatic uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
11304ab4aef0SRichard Henderson                                      int scale, uint64_t max, float_status *s)
11314ab4aef0SRichard Henderson{
11324ab4aef0SRichard Henderson    int flags = 0;
11334ab4aef0SRichard Henderson    uint64_t r;
11344ab4aef0SRichard Henderson
11354ab4aef0SRichard Henderson    switch (p->cls) {
11364ab4aef0SRichard Henderson    case float_class_snan:
1137e706d445SRichard Henderson        flags |= float_flag_invalid_snan;
1138e706d445SRichard Henderson        /* fall through */
11394ab4aef0SRichard Henderson    case float_class_qnan:
1140e706d445SRichard Henderson        flags |= float_flag_invalid;
11414ab4aef0SRichard Henderson        r = max;
11424ab4aef0SRichard Henderson        break;
11434ab4aef0SRichard Henderson
11444ab4aef0SRichard Henderson    case float_class_inf:
114581254b02SRichard Henderson        flags = float_flag_invalid | float_flag_invalid_cvti;
11464ab4aef0SRichard Henderson        r = p->sign ? 0 : max;
11474ab4aef0SRichard Henderson        break;
11484ab4aef0SRichard Henderson
11494ab4aef0SRichard Henderson    case float_class_zero:
11504ab4aef0SRichard Henderson        return 0;
11514ab4aef0SRichard Henderson
11524ab4aef0SRichard Henderson    case float_class_normal:
11534ab4aef0SRichard Henderson        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
11544ab4aef0SRichard Henderson        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
11554ab4aef0SRichard Henderson            flags = float_flag_inexact;
11564ab4aef0SRichard Henderson            if (p->cls == float_class_zero) {
11574ab4aef0SRichard Henderson                r = 0;
11584ab4aef0SRichard Henderson                break;
11594ab4aef0SRichard Henderson            }
11604ab4aef0SRichard Henderson        }
11614ab4aef0SRichard Henderson
11624ab4aef0SRichard Henderson        if (p->sign) {
116381254b02SRichard Henderson            flags = float_flag_invalid | float_flag_invalid_cvti;
11644ab4aef0SRichard Henderson            r = 0;
11654ab4aef0SRichard Henderson        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
116681254b02SRichard Henderson            flags = float_flag_invalid | float_flag_invalid_cvti;
11674ab4aef0SRichard Henderson            r = max;
11684ab4aef0SRichard Henderson        } else {
11694ab4aef0SRichard Henderson            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
11704ab4aef0SRichard Henderson            if (r > max) {
117181254b02SRichard Henderson                flags = float_flag_invalid | float_flag_invalid_cvti;
11724ab4aef0SRichard Henderson                r = max;
11734ab4aef0SRichard Henderson            }
1174463b3f0dSRichard Henderson        }
1175463b3f0dSRichard Henderson        break;
1176463b3f0dSRichard Henderson
1177463b3f0dSRichard Henderson    default:
1178463b3f0dSRichard Henderson        g_assert_not_reached();
1179463b3f0dSRichard Henderson    }
1180463b3f0dSRichard Henderson
1181463b3f0dSRichard Henderson    float_raise(flags, s);
1182463b3f0dSRichard Henderson    return r;
1183463b3f0dSRichard Henderson}
1184e3689519SRichard Henderson
1185e3689519SRichard Henderson/*
1186e2041f4dSRichard Henderson * Like partsN(float_to_sint), except do not saturate the result.
1187e2041f4dSRichard Henderson * Instead, return the rounded unbounded precision two's compliment result,
1188e2041f4dSRichard Henderson * modulo 2**(bitsm1 + 1).
1189e2041f4dSRichard Henderson */
1190e2041f4dSRichard Hendersonstatic int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1191e2041f4dSRichard Henderson                                            FloatRoundMode rmode,
1192e2041f4dSRichard Henderson                                            int bitsm1, float_status *s)
1193e2041f4dSRichard Henderson{
1194e2041f4dSRichard Henderson    int flags = 0;
1195e2041f4dSRichard Henderson    uint64_t r;
1196e2041f4dSRichard Henderson    bool overflow = false;
1197e2041f4dSRichard Henderson
1198e2041f4dSRichard Henderson    switch (p->cls) {
1199e2041f4dSRichard Henderson    case float_class_snan:
1200e2041f4dSRichard Henderson        flags |= float_flag_invalid_snan;
1201e2041f4dSRichard Henderson        /* fall through */
1202e2041f4dSRichard Henderson    case float_class_qnan:
1203e2041f4dSRichard Henderson        flags |= float_flag_invalid;
1204e2041f4dSRichard Henderson        r = 0;
1205e2041f4dSRichard Henderson        break;
1206e2041f4dSRichard Henderson
1207e2041f4dSRichard Henderson    case float_class_inf:
1208e2041f4dSRichard Henderson        overflow = true;
1209e2041f4dSRichard Henderson        r = 0;
1210e2041f4dSRichard Henderson        break;
1211e2041f4dSRichard Henderson
1212e2041f4dSRichard Henderson    case float_class_zero:
1213e2041f4dSRichard Henderson        return 0;
1214e2041f4dSRichard Henderson
1215e2041f4dSRichard Henderson    case float_class_normal:
1216e2041f4dSRichard Henderson        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1217e2041f4dSRichard Henderson        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1218e2041f4dSRichard Henderson            flags = float_flag_inexact;
1219e2041f4dSRichard Henderson        }
1220e2041f4dSRichard Henderson
1221e2041f4dSRichard Henderson        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1222e2041f4dSRichard Henderson            /*
1223e2041f4dSRichard Henderson             * Because we rounded to integral, and exp < 64,
1224e2041f4dSRichard Henderson             * we know frac_low is zero.
1225e2041f4dSRichard Henderson             */
1226e2041f4dSRichard Henderson            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1227e2041f4dSRichard Henderson            if (p->exp < bitsm1) {
1228e2041f4dSRichard Henderson                /* Result in range. */
1229e2041f4dSRichard Henderson            } else if (p->exp == bitsm1) {
1230e2041f4dSRichard Henderson                /* The only in-range value is INT_MIN. */
1231e2041f4dSRichard Henderson                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1232e2041f4dSRichard Henderson            } else {
1233e2041f4dSRichard Henderson                overflow = true;
1234e2041f4dSRichard Henderson            }
1235e2041f4dSRichard Henderson        } else {
1236e2041f4dSRichard Henderson            /* Overflow, but there might still be bits to return. */
1237e2041f4dSRichard Henderson            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1238e2041f4dSRichard Henderson            if (shl < N) {
1239e2041f4dSRichard Henderson                frac_shl(p, shl);
1240e2041f4dSRichard Henderson                r = p->frac_hi;
1241e2041f4dSRichard Henderson            } else {
1242e2041f4dSRichard Henderson                r = 0;
1243e2041f4dSRichard Henderson            }
1244e2041f4dSRichard Henderson            overflow = true;
1245e2041f4dSRichard Henderson        }
1246e2041f4dSRichard Henderson
1247e2041f4dSRichard Henderson        if (p->sign) {
1248e2041f4dSRichard Henderson            r = -r;
1249e2041f4dSRichard Henderson        }
1250e2041f4dSRichard Henderson        break;
1251e2041f4dSRichard Henderson
1252e2041f4dSRichard Henderson    default:
1253e2041f4dSRichard Henderson        g_assert_not_reached();
1254e2041f4dSRichard Henderson    }
1255e2041f4dSRichard Henderson
1256e2041f4dSRichard Henderson    if (overflow) {
1257e2041f4dSRichard Henderson        flags = float_flag_invalid | float_flag_invalid_cvti;
1258e2041f4dSRichard Henderson    }
1259e2041f4dSRichard Henderson    float_raise(flags, s);
1260e2041f4dSRichard Henderson    return r;
1261e2041f4dSRichard Henderson}
1262e2041f4dSRichard Henderson
1263e2041f4dSRichard Henderson/*
1264e3689519SRichard Henderson * Integer to float conversions
1265e3689519SRichard Henderson *
1266e3689519SRichard Henderson * Returns the result of converting the two's complement integer `a'
1267e3689519SRichard Henderson * to the floating-point format. The conversion is performed according
1268e3689519SRichard Henderson * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269e3689519SRichard Henderson */
1270e3689519SRichard Hendersonstatic void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1271e3689519SRichard Henderson                                  int scale, float_status *s)
1272e3689519SRichard Henderson{
1273e3689519SRichard Henderson    uint64_t f = a;
1274e3689519SRichard Henderson    int shift;
1275e3689519SRichard Henderson
1276e3689519SRichard Henderson    memset(p, 0, sizeof(*p));
1277e3689519SRichard Henderson
1278e3689519SRichard Henderson    if (a == 0) {
1279e3689519SRichard Henderson        p->cls = float_class_zero;
1280e3689519SRichard Henderson        return;
1281e3689519SRichard Henderson    }
1282e3689519SRichard Henderson
1283e3689519SRichard Henderson    p->cls = float_class_normal;
1284e3689519SRichard Henderson    if (a < 0) {
1285e3689519SRichard Henderson        f = -f;
1286e3689519SRichard Henderson        p->sign = true;
1287e3689519SRichard Henderson    }
1288e3689519SRichard Henderson    shift = clz64(f);
1289e3689519SRichard Henderson    scale = MIN(MAX(scale, -0x10000), 0x10000);
1290e3689519SRichard Henderson
1291e3689519SRichard Henderson    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1292e3689519SRichard Henderson    p->frac_hi = f << shift;
1293e3689519SRichard Henderson}
129437c954a1SRichard Henderson
129537c954a1SRichard Henderson/*
129637c954a1SRichard Henderson * Unsigned Integer to float conversions
129737c954a1SRichard Henderson *
129837c954a1SRichard Henderson * Returns the result of converting the unsigned integer `a' to the
129937c954a1SRichard Henderson * floating-point format. The conversion is performed according to the
130037c954a1SRichard Henderson * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
130137c954a1SRichard Henderson */
130237c954a1SRichard Hendersonstatic void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
130337c954a1SRichard Henderson                                  int scale, float_status *status)
130437c954a1SRichard Henderson{
130537c954a1SRichard Henderson    memset(p, 0, sizeof(*p));
130637c954a1SRichard Henderson
130737c954a1SRichard Henderson    if (a == 0) {
130837c954a1SRichard Henderson        p->cls = float_class_zero;
130937c954a1SRichard Henderson    } else {
131037c954a1SRichard Henderson        int shift = clz64(a);
131137c954a1SRichard Henderson        scale = MIN(MAX(scale, -0x10000), 0x10000);
131237c954a1SRichard Henderson        p->cls = float_class_normal;
131337c954a1SRichard Henderson        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
131437c954a1SRichard Henderson        p->frac_hi = a << shift;
131537c954a1SRichard Henderson    }
131637c954a1SRichard Henderson}
1317e1c4667aSRichard Henderson
1318e1c4667aSRichard Henderson/*
1319e1c4667aSRichard Henderson * Float min/max.
1320e1c4667aSRichard Henderson */
1321e1c4667aSRichard Hendersonstatic FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1322e1c4667aSRichard Henderson                                   float_status *s, int flags)
1323e1c4667aSRichard Henderson{
1324e1c4667aSRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1325e1c4667aSRichard Henderson    int a_exp, b_exp, cmp;
1326e1c4667aSRichard Henderson
1327e1c4667aSRichard Henderson    if (unlikely(ab_mask & float_cmask_anynan)) {
1328e1c4667aSRichard Henderson        /*
13290e903037SChih-Min Chao         * For minNum/maxNum (IEEE 754-2008)
13300e903037SChih-Min Chao         * or minimumNumber/maximumNumber (IEEE 754-2019),
13310e903037SChih-Min Chao         * if one operand is a QNaN, and the other
1332e1c4667aSRichard Henderson         * operand is numerical, then return numerical argument.
1333e1c4667aSRichard Henderson         */
13340e903037SChih-Min Chao        if ((flags & (minmax_isnum | minmax_isnumber))
1335e1c4667aSRichard Henderson            && !(ab_mask & float_cmask_snan)
1336e1c4667aSRichard Henderson            && (ab_mask & ~float_cmask_qnan)) {
1337e1c4667aSRichard Henderson            return is_nan(a->cls) ? b : a;
1338e1c4667aSRichard Henderson        }
13390e903037SChih-Min Chao
13400e903037SChih-Min Chao        /*
13410e903037SChih-Min Chao         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
13420e903037SChih-Min Chao         * are removed and replaced with minimum, minimumNumber, maximum
13430e903037SChih-Min Chao         * and maximumNumber.
13440e903037SChih-Min Chao         * minimumNumber/maximumNumber behavior for SNaN is changed to:
13450e903037SChih-Min Chao         *   If both operands are NaNs, a QNaN is returned.
13460e903037SChih-Min Chao         *   If either operand is a SNaN,
13470e903037SChih-Min Chao         *   an invalid operation exception is signaled,
13480e903037SChih-Min Chao         *   but unless both operands are NaNs,
13490e903037SChih-Min Chao         *   the SNaN is otherwise ignored and not converted to a QNaN.
13500e903037SChih-Min Chao         */
13510e903037SChih-Min Chao        if ((flags & minmax_isnumber)
13520e903037SChih-Min Chao            && (ab_mask & float_cmask_snan)
13530e903037SChih-Min Chao            && (ab_mask & ~float_cmask_anynan)) {
13540e903037SChih-Min Chao            float_raise(float_flag_invalid, s);
13550e903037SChih-Min Chao            return is_nan(a->cls) ? b : a;
13560e903037SChih-Min Chao        }
13570e903037SChih-Min Chao
1358e1c4667aSRichard Henderson        return parts_pick_nan(a, b, s);
1359e1c4667aSRichard Henderson    }
1360e1c4667aSRichard Henderson
1361e1c4667aSRichard Henderson    a_exp = a->exp;
1362e1c4667aSRichard Henderson    b_exp = b->exp;
1363e1c4667aSRichard Henderson
1364e1c4667aSRichard Henderson    if (unlikely(ab_mask != float_cmask_normal)) {
1365e1c4667aSRichard Henderson        switch (a->cls) {
1366e1c4667aSRichard Henderson        case float_class_normal:
1367e1c4667aSRichard Henderson            break;
1368e1c4667aSRichard Henderson        case float_class_inf:
1369e1c4667aSRichard Henderson            a_exp = INT16_MAX;
1370e1c4667aSRichard Henderson            break;
1371e1c4667aSRichard Henderson        case float_class_zero:
1372e1c4667aSRichard Henderson            a_exp = INT16_MIN;
1373e1c4667aSRichard Henderson            break;
1374e1c4667aSRichard Henderson        default:
1375e1c4667aSRichard Henderson            g_assert_not_reached();
1376e1c4667aSRichard Henderson            break;
1377e1c4667aSRichard Henderson        }
1378e1c4667aSRichard Henderson        switch (b->cls) {
1379e1c4667aSRichard Henderson        case float_class_normal:
1380e1c4667aSRichard Henderson            break;
1381e1c4667aSRichard Henderson        case float_class_inf:
1382e1c4667aSRichard Henderson            b_exp = INT16_MAX;
1383e1c4667aSRichard Henderson            break;
1384e1c4667aSRichard Henderson        case float_class_zero:
1385e1c4667aSRichard Henderson            b_exp = INT16_MIN;
1386e1c4667aSRichard Henderson            break;
1387e1c4667aSRichard Henderson        default:
1388e1c4667aSRichard Henderson            g_assert_not_reached();
1389e1c4667aSRichard Henderson            break;
1390e1c4667aSRichard Henderson        }
1391e1c4667aSRichard Henderson    }
1392e1c4667aSRichard Henderson
1393e1c4667aSRichard Henderson    /* Compare magnitudes. */
1394e1c4667aSRichard Henderson    cmp = a_exp - b_exp;
1395e1c4667aSRichard Henderson    if (cmp == 0) {
1396e1c4667aSRichard Henderson        cmp = frac_cmp(a, b);
1397e1c4667aSRichard Henderson    }
1398e1c4667aSRichard Henderson
1399e1c4667aSRichard Henderson    /*
1400e1c4667aSRichard Henderson     * Take the sign into account.
1401e1c4667aSRichard Henderson     * For ismag, only do this if the magnitudes are equal.
1402e1c4667aSRichard Henderson     */
1403e1c4667aSRichard Henderson    if (!(flags & minmax_ismag) || cmp == 0) {
1404e1c4667aSRichard Henderson        if (a->sign != b->sign) {
1405e1c4667aSRichard Henderson            /* For differing signs, the negative operand is less. */
1406e1c4667aSRichard Henderson            cmp = a->sign ? -1 : 1;
1407e1c4667aSRichard Henderson        } else if (a->sign) {
1408e1c4667aSRichard Henderson            /* For two negative operands, invert the magnitude comparison. */
1409e1c4667aSRichard Henderson            cmp = -cmp;
1410e1c4667aSRichard Henderson        }
1411e1c4667aSRichard Henderson    }
1412e1c4667aSRichard Henderson
1413e1c4667aSRichard Henderson    if (flags & minmax_ismin) {
1414e1c4667aSRichard Henderson        cmp = -cmp;
1415e1c4667aSRichard Henderson    }
1416e1c4667aSRichard Henderson    return cmp < 0 ? b : a;
1417e1c4667aSRichard Henderson}
14186eb169b8SRichard Henderson
14196eb169b8SRichard Henderson/*
14206eb169b8SRichard Henderson * Floating point compare
14216eb169b8SRichard Henderson */
14226eb169b8SRichard Hendersonstatic FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
14236eb169b8SRichard Henderson                                     float_status *s, bool is_quiet)
14246eb169b8SRichard Henderson{
14256eb169b8SRichard Henderson    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
14266eb169b8SRichard Henderson
14276eb169b8SRichard Henderson    if (likely(ab_mask == float_cmask_normal)) {
14289343c884SRichard Henderson        FloatRelation cmp;
14299343c884SRichard Henderson
14306eb169b8SRichard Henderson        if (a->sign != b->sign) {
14316eb169b8SRichard Henderson            goto a_sign;
14326eb169b8SRichard Henderson        }
14339343c884SRichard Henderson        if (a->exp == b->exp) {
14346eb169b8SRichard Henderson            cmp = frac_cmp(a, b);
14359343c884SRichard Henderson        } else if (a->exp < b->exp) {
14369343c884SRichard Henderson            cmp = float_relation_less;
14379343c884SRichard Henderson        } else {
14389343c884SRichard Henderson            cmp = float_relation_greater;
14396eb169b8SRichard Henderson        }
14406eb169b8SRichard Henderson        if (a->sign) {
14416eb169b8SRichard Henderson            cmp = -cmp;
14426eb169b8SRichard Henderson        }
14436eb169b8SRichard Henderson        return cmp;
14446eb169b8SRichard Henderson    }
14456eb169b8SRichard Henderson
14466eb169b8SRichard Henderson    if (unlikely(ab_mask & float_cmask_anynan)) {
1447e706d445SRichard Henderson        if (ab_mask & float_cmask_snan) {
1448e706d445SRichard Henderson            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1449e706d445SRichard Henderson        } else if (!is_quiet) {
14506eb169b8SRichard Henderson            float_raise(float_flag_invalid, s);
14516eb169b8SRichard Henderson        }
14526eb169b8SRichard Henderson        return float_relation_unordered;
14536eb169b8SRichard Henderson    }
14546eb169b8SRichard Henderson
14556eb169b8SRichard Henderson    if (ab_mask & float_cmask_zero) {
14566eb169b8SRichard Henderson        if (ab_mask == float_cmask_zero) {
14576eb169b8SRichard Henderson            return float_relation_equal;
14586eb169b8SRichard Henderson        } else if (a->cls == float_class_zero) {
14596eb169b8SRichard Henderson            goto b_sign;
14606eb169b8SRichard Henderson        } else {
14616eb169b8SRichard Henderson            goto a_sign;
14626eb169b8SRichard Henderson        }
14636eb169b8SRichard Henderson    }
14646eb169b8SRichard Henderson
14656eb169b8SRichard Henderson    if (ab_mask == float_cmask_inf) {
14666eb169b8SRichard Henderson        if (a->sign == b->sign) {
14676eb169b8SRichard Henderson            return float_relation_equal;
14686eb169b8SRichard Henderson        }
14696eb169b8SRichard Henderson    } else if (b->cls == float_class_inf) {
14706eb169b8SRichard Henderson        goto b_sign;
14716eb169b8SRichard Henderson    } else {
14726eb169b8SRichard Henderson        g_assert(a->cls == float_class_inf);
14736eb169b8SRichard Henderson    }
14746eb169b8SRichard Henderson
14756eb169b8SRichard Henderson a_sign:
14766eb169b8SRichard Henderson    return a->sign ? float_relation_less : float_relation_greater;
14776eb169b8SRichard Henderson b_sign:
14786eb169b8SRichard Henderson    return b->sign ? float_relation_greater : float_relation_less;
14796eb169b8SRichard Henderson}
148039626b0cSRichard Henderson
148139626b0cSRichard Henderson/*
148239626b0cSRichard Henderson * Multiply A by 2 raised to the power N.
148339626b0cSRichard Henderson */
148439626b0cSRichard Hendersonstatic void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
148539626b0cSRichard Henderson{
148639626b0cSRichard Henderson    switch (a->cls) {
148739626b0cSRichard Henderson    case float_class_snan:
148839626b0cSRichard Henderson    case float_class_qnan:
148939626b0cSRichard Henderson        parts_return_nan(a, s);
149039626b0cSRichard Henderson        break;
149139626b0cSRichard Henderson    case float_class_zero:
149239626b0cSRichard Henderson    case float_class_inf:
149339626b0cSRichard Henderson        break;
149439626b0cSRichard Henderson    case float_class_normal:
149539626b0cSRichard Henderson        a->exp += MIN(MAX(n, -0x10000), 0x10000);
149639626b0cSRichard Henderson        break;
149739626b0cSRichard Henderson    default:
149839626b0cSRichard Henderson        g_assert_not_reached();
149939626b0cSRichard Henderson    }
150039626b0cSRichard Henderson}
15012fa3546cSRichard Henderson
15022fa3546cSRichard Henderson/*
15032fa3546cSRichard Henderson * Return log2(A)
15042fa3546cSRichard Henderson */
15052fa3546cSRichard Hendersonstatic void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
15062fa3546cSRichard Henderson{
15072fa3546cSRichard Henderson    uint64_t a0, a1, r, t, ign;
15082fa3546cSRichard Henderson    FloatPartsN f;
15092fa3546cSRichard Henderson    int i, n, a_exp, f_exp;
15102fa3546cSRichard Henderson
15112fa3546cSRichard Henderson    if (unlikely(a->cls != float_class_normal)) {
15122fa3546cSRichard Henderson        switch (a->cls) {
15132fa3546cSRichard Henderson        case float_class_snan:
15142fa3546cSRichard Henderson        case float_class_qnan:
15152fa3546cSRichard Henderson            parts_return_nan(a, s);
15162fa3546cSRichard Henderson            return;
15172fa3546cSRichard Henderson        case float_class_zero:
15183cf71969SSong Gao            float_raise(float_flag_divbyzero, s);
15192fa3546cSRichard Henderson            /* log2(0) = -inf */
15202fa3546cSRichard Henderson            a->cls = float_class_inf;
15212fa3546cSRichard Henderson            a->sign = 1;
15222fa3546cSRichard Henderson            return;
15232fa3546cSRichard Henderson        case float_class_inf:
15242fa3546cSRichard Henderson            if (unlikely(a->sign)) {
15252fa3546cSRichard Henderson                goto d_nan;
15262fa3546cSRichard Henderson            }
15272fa3546cSRichard Henderson            return;
15282fa3546cSRichard Henderson        default:
15292fa3546cSRichard Henderson            break;
15302fa3546cSRichard Henderson        }
15312fa3546cSRichard Henderson        g_assert_not_reached();
15322fa3546cSRichard Henderson    }
15332fa3546cSRichard Henderson    if (unlikely(a->sign)) {
15342fa3546cSRichard Henderson        goto d_nan;
15352fa3546cSRichard Henderson    }
15362fa3546cSRichard Henderson
15372fa3546cSRichard Henderson    /* TODO: This algorithm looses bits too quickly for float128. */
15382fa3546cSRichard Henderson    g_assert(N == 64);
15392fa3546cSRichard Henderson
15402fa3546cSRichard Henderson    a_exp = a->exp;
15412fa3546cSRichard Henderson    f_exp = -1;
15422fa3546cSRichard Henderson
15432fa3546cSRichard Henderson    r = 0;
15442fa3546cSRichard Henderson    t = DECOMPOSED_IMPLICIT_BIT;
15452fa3546cSRichard Henderson    a0 = a->frac_hi;
15462fa3546cSRichard Henderson    a1 = 0;
15472fa3546cSRichard Henderson
15482fa3546cSRichard Henderson    n = fmt->frac_size + 2;
15492fa3546cSRichard Henderson    if (unlikely(a_exp == -1)) {
15502fa3546cSRichard Henderson        /*
15512fa3546cSRichard Henderson         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
15522fa3546cSRichard Henderson         * When the value is very close to 1.0, there are lots of 1's in
15532fa3546cSRichard Henderson         * the msb parts of the fraction.  At the end, when we subtract
15542fa3546cSRichard Henderson         * this value from -1.0, we can see a catastrophic loss of precision,
15552fa3546cSRichard Henderson         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
15562fa3546cSRichard Henderson         * bits of y in the final result.  To minimize this, compute as many
15572fa3546cSRichard Henderson         * digits as we can.
15582fa3546cSRichard Henderson         * ??? This case needs another algorithm to avoid this.
15592fa3546cSRichard Henderson         */
15602fa3546cSRichard Henderson        n = fmt->frac_size * 2 + 2;
15612fa3546cSRichard Henderson        /* Don't compute a value overlapping the sticky bit */
15622fa3546cSRichard Henderson        n = MIN(n, 62);
15632fa3546cSRichard Henderson    }
15642fa3546cSRichard Henderson
15652fa3546cSRichard Henderson    for (i = 0; i < n; i++) {
15662fa3546cSRichard Henderson        if (a1) {
15672fa3546cSRichard Henderson            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
15682fa3546cSRichard Henderson        } else if (a0 & 0xffffffffull) {
15692fa3546cSRichard Henderson            mul64To128(a0, a0, &a0, &a1);
15702fa3546cSRichard Henderson        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
15712fa3546cSRichard Henderson            a0 >>= 32;
15722fa3546cSRichard Henderson            a0 *= a0;
15732fa3546cSRichard Henderson        } else {
15742fa3546cSRichard Henderson            goto exact;
15752fa3546cSRichard Henderson        }
15762fa3546cSRichard Henderson
15772fa3546cSRichard Henderson        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
15782fa3546cSRichard Henderson            if (unlikely(a_exp == 0 && r == 0)) {
15792fa3546cSRichard Henderson                /*
15802fa3546cSRichard Henderson                 * When a_exp == 0, we're computing the log2 of a value
15812fa3546cSRichard Henderson                 * [1.0,2.0).  When the value is very close to 1.0, there
15822fa3546cSRichard Henderson                 * are lots of 0's in the msb parts of the fraction.
15832fa3546cSRichard Henderson                 * We need to compute more digits to produce a correct
15842fa3546cSRichard Henderson                 * result -- restart at the top of the fraction.
15852fa3546cSRichard Henderson                 * ??? This is likely to lose precision quickly, as for
15862fa3546cSRichard Henderson                 * float128; we may need another method.
15872fa3546cSRichard Henderson                 */
15882fa3546cSRichard Henderson                f_exp -= i;
15892fa3546cSRichard Henderson                t = r = DECOMPOSED_IMPLICIT_BIT;
15902fa3546cSRichard Henderson                i = 0;
15912fa3546cSRichard Henderson            } else {
15922fa3546cSRichard Henderson                r |= t;
15932fa3546cSRichard Henderson            }
15942fa3546cSRichard Henderson        } else {
15952fa3546cSRichard Henderson            add128(a0, a1, a0, a1, &a0, &a1);
15962fa3546cSRichard Henderson        }
15972fa3546cSRichard Henderson        t >>= 1;
15982fa3546cSRichard Henderson    }
15992fa3546cSRichard Henderson
16002fa3546cSRichard Henderson    /* Set sticky for inexact. */
16012fa3546cSRichard Henderson    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
16022fa3546cSRichard Henderson
16032fa3546cSRichard Henderson exact:
16042fa3546cSRichard Henderson    parts_sint_to_float(a, a_exp, 0, s);
16052fa3546cSRichard Henderson    if (r == 0) {
16062fa3546cSRichard Henderson        return;
16072fa3546cSRichard Henderson    }
16082fa3546cSRichard Henderson
16092fa3546cSRichard Henderson    memset(&f, 0, sizeof(f));
16102fa3546cSRichard Henderson    f.cls = float_class_normal;
16112fa3546cSRichard Henderson    f.frac_hi = r;
16122fa3546cSRichard Henderson    f.exp = f_exp - frac_normalize(&f);
16132fa3546cSRichard Henderson
16142fa3546cSRichard Henderson    if (a_exp < 0) {
16152fa3546cSRichard Henderson        parts_sub_normal(a, &f);
16162fa3546cSRichard Henderson    } else if (a_exp > 0) {
16172fa3546cSRichard Henderson        parts_add_normal(a, &f);
16182fa3546cSRichard Henderson    } else {
16192fa3546cSRichard Henderson        *a = f;
16202fa3546cSRichard Henderson    }
16212fa3546cSRichard Henderson    return;
16222fa3546cSRichard Henderson
16232fa3546cSRichard Henderson d_nan:
16242fa3546cSRichard Henderson    float_raise(float_flag_invalid, s);
16252fa3546cSRichard Henderson    parts_default_nan(a, s);
16262fa3546cSRichard Henderson}
1627