1 /*
2  *  x86 FPREM test - executes the FPREM and FPREM1 instructions with corner case
3  *  operands and prints the operands, result and FPU status word.
4  *
5  *  Run this on real hardware, then under QEMU, and diff the outputs, to compare
6  *  QEMU's implementation to your hardware. The 'run-test-i386-fprem' make
7  *  target does this.
8  *
9  *  Copyright (c) 2003 Fabrice Bellard
10  *  Copyright (c) 2012 Catalin Patulea
11  *
12  *  This program is free software; you can redistribute it and/or modify
13  *  it under the terms of the GNU General Public License as published by
14  *  the Free Software Foundation; either version 2 of the License, or
15  *  (at your option) any later version.
16  *
17  *  This program is distributed in the hope that it will be useful,
18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *  GNU General Public License for more details.
21  *
22  *  You should have received a copy of the GNU General Public License
23  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #include <stdio.h>
27 #include <stdint.h>
28 
29 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
30 
31 /*
32  * Inspired by <ieee754.h>'s union ieee854_long_double, but with single
33  * long long mantissa fields and assuming little-endianness for simplicity.
34  */
35 union float80u {
36     long double d;
37 
38     /* This is the IEEE 854 double-extended-precision format.  */
39     struct {
40         unsigned long long mantissa:63;
41         unsigned int one:1;
42         unsigned int exponent:15;
43         unsigned int negative:1;
44         unsigned int empty:16;
45     } __attribute__((packed)) ieee;
46 
47     /* This is for NaNs in the IEEE 854 double-extended-precision format.  */
48     struct {
49         unsigned long long mantissa:62;
50         unsigned int quiet_nan:1;
51         unsigned int one:1;
52         unsigned int exponent:15;
53         unsigned int negative:1;
54         unsigned int empty:16;
55     } __attribute__((packed)) ieee_nan;
56 };
57 
58 #define IEEE854_LONG_DOUBLE_BIAS 0x3fff
59 
60 static const union float80u q_nan = {
61     .ieee_nan.negative = 0,  /* X */
62     .ieee_nan.exponent = 0x7fff,
63     .ieee_nan.one = 1,
64     .ieee_nan.quiet_nan = 1,
65     .ieee_nan.mantissa = 0,
66 };
67 
68 static const union float80u s_nan = {
69     .ieee_nan.negative = 0,  /* X */
70     .ieee_nan.exponent = 0x7fff,
71     .ieee_nan.one = 1,
72     .ieee_nan.quiet_nan = 0,
73     .ieee_nan.mantissa = 1,  /* nonzero */
74 };
75 
76 static const union float80u pos_inf = {
77     .ieee.negative = 0,
78     .ieee.exponent = 0x7fff,
79     .ieee.one = 1,
80     .ieee.mantissa = 0,
81 };
82 
83 static const union float80u pseudo_pos_inf = {  /* "unsupported" */
84     .ieee.negative = 0,
85     .ieee.exponent = 0x7fff,
86     .ieee.one = 0,
87     .ieee.mantissa = 0,
88 };
89 
90 static const union float80u pos_denorm = {
91     .ieee.negative = 0,
92     .ieee.exponent = 0,
93     .ieee.one = 0,
94     .ieee.mantissa = 1,
95 };
96 
97 static const union float80u smallest_positive_norm = {
98     .ieee.negative = 0,
99     .ieee.exponent = 1,
100     .ieee.one = 1,
101     .ieee.mantissa = 0,
102 };
103 
fninit()104 static void fninit()
105 {
106     asm volatile ("fninit\n");
107 }
108 
fprem(long double a,long double b,uint16_t * sw)109 static long double fprem(long double a, long double b, uint16_t *sw)
110 {
111     long double result;
112     asm volatile ("fprem\n"
113                   "fnstsw %1\n"
114                   : "=t" (result), "=m" (*sw)
115                   : "0" (a), "u" (b)
116                   : "st(1)");
117     return result;
118 }
119 
fprem1(long double a,long double b,uint16_t * sw)120 static long double fprem1(long double a, long double b, uint16_t *sw)
121 {
122     long double result;
123     asm volatile ("fprem1\n"
124                   "fnstsw %1\n"
125                   : "=t" (result), "=m" (*sw)
126                   : "0" (a), "u" (b)
127                   : "st(1)");
128     return result;
129 }
130 
131 #define FPUS_IE (1 << 0)
132 #define FPUS_DE (1 << 1)
133 #define FPUS_ZE (1 << 2)
134 #define FPUS_OE (1 << 3)
135 #define FPUS_UE (1 << 4)
136 #define FPUS_PE (1 << 5)
137 #define FPUS_SF (1 << 6)
138 #define FPUS_SE (1 << 7)
139 #define FPUS_C0 (1 << 8)
140 #define FPUS_C1 (1 << 9)
141 #define FPUS_C2 (1 << 10)
142 #define FPUS_TOP 0x3800
143 #define FPUS_C3 (1 << 14)
144 #define FPUS_B  (1 << 15)
145 
146 #define FPUS_EMASK 0x007f
147 
148 #define FPUC_EM 0x3f
149 
psw(uint16_t sw)150 static void psw(uint16_t sw)
151 {
152     printf("SW:  C3 TopC2C1C0\n");
153     printf("SW: %c %d %3d %d %d %d %c %c %c %c %c %c %c %c\n",
154            sw & FPUS_B ? 'B' : 'b',
155            !!(sw & FPUS_C3),
156            (sw & FPUS_TOP) >> 11,
157            !!(sw & FPUS_C2),
158            !!(sw & FPUS_C1),
159            !!(sw & FPUS_C0),
160            (sw & FPUS_SE) ? 'S' : 's',
161            (sw & FPUS_SF) ? 'F' : 'f',
162            (sw & FPUS_PE) ? 'P' : 'p',
163            (sw & FPUS_UE) ? 'U' : 'u',
164            (sw & FPUS_OE) ? 'O' : 'o',
165            (sw & FPUS_ZE) ? 'Z' : 'z',
166            (sw & FPUS_DE) ? 'D' : 'd',
167            (sw & FPUS_IE) ? 'I' : 'i');
168 }
169 
do_fprem(long double a,long double b)170 static void do_fprem(long double a, long double b)
171 {
172     const union float80u au = {.d = a};
173     const union float80u bu = {.d = b};
174     union float80u ru;
175     uint16_t sw;
176 
177     printf("A: S=%d Exp=%04x Int=%d (QNaN=%d) Sig=%016llx (%.06Le)\n",
178            au.ieee.negative, au.ieee.exponent, au.ieee.one,
179            au.ieee_nan.quiet_nan, (unsigned long long)au.ieee.mantissa,
180            a);
181     printf("B: S=%d Exp=%04x Int=%d (QNaN=%d) Sig=%016llx (%.06Le)\n",
182            bu.ieee.negative, bu.ieee.exponent, bu.ieee.one,
183            bu.ieee_nan.quiet_nan, (unsigned long long)bu.ieee.mantissa,
184            b);
185     fflush(stdout);
186 
187     fninit();
188     ru.d = fprem(a, b, &sw);
189     psw(sw);
190 
191     printf("R : S=%d Exp=%04x Int=%d (QNaN=%d) Sig=%016llx (%.06Le)\n",
192            ru.ieee.negative, ru.ieee.exponent, ru.ieee.one,
193            ru.ieee_nan.quiet_nan, (unsigned long long)ru.ieee.mantissa,
194            ru.d);
195 
196     fninit();
197     ru.d = fprem1(a, b, &sw);
198     psw(sw);
199 
200     printf("R1: S=%d Exp=%04x Int=%d (QNaN=%d) Sig=%016llx (%.06Le)\n",
201            ru.ieee.negative, ru.ieee.exponent, ru.ieee.one,
202            ru.ieee_nan.quiet_nan, (unsigned long long)ru.ieee.mantissa,
203            ru.d);
204 
205     printf("\n");
206 }
207 
do_fprem_stack_underflow(void)208 static void do_fprem_stack_underflow(void)
209 {
210     const long double a = 1.0;
211     union float80u ru;
212     uint16_t sw;
213 
214     fninit();
215     asm volatile ("fprem\n"
216                   "fnstsw %1\n"
217                   : "=t" (ru.d), "=m" (sw)
218                   : "0" (a)
219                   : "st(1)");
220     psw(sw);
221 
222     printf("R: S=%d Exp=%04x Int=%d (QNaN=%d) Sig=%016llx (%.06Le)\n",
223            ru.ieee.negative, ru.ieee.exponent, ru.ieee.one,
224            ru.ieee_nan.quiet_nan, (unsigned long long)ru.ieee.mantissa,
225            ru.d);
226     printf("\n");
227 }
228 
test_fprem_cases(void)229 static void test_fprem_cases(void)
230 {
231     printf("= stack underflow =\n");
232     do_fprem_stack_underflow();
233 
234     printf("= invalid operation =\n");
235     do_fprem(q_nan.d, 1.0);
236     do_fprem(s_nan.d, 1.0);
237     do_fprem(1.0, 0.0);
238     do_fprem(pos_inf.d, 1.0);
239     do_fprem(pseudo_pos_inf.d, 1.0);
240 
241     printf("= denormal =\n");
242     do_fprem(pos_denorm.d, 1.0);
243     do_fprem(1.0, pos_denorm.d);
244 
245     do_fprem(smallest_positive_norm.d, smallest_positive_norm.d);
246 
247     /* printf("= underflow =\n"); */
248     /* TODO: Is there a case where FPREM raises underflow? */
249 }
250 
test_fprem_pairs(void)251 static void test_fprem_pairs(void)
252 {
253     unsigned long long count;
254 
255     unsigned int negative_index_a = 0;
256     unsigned int negative_index_b = 0;
257     static const unsigned int negative_values[] = {
258         0,
259         1,
260     };
261 
262     unsigned int exponent_index_a = 0;
263     unsigned int exponent_index_b = 0;
264     static const unsigned int exponent_values[] = {
265         0,
266         1,
267         2,
268         IEEE854_LONG_DOUBLE_BIAS - 1,
269         IEEE854_LONG_DOUBLE_BIAS,
270         IEEE854_LONG_DOUBLE_BIAS + 1,
271         0x7ffd,
272         0x7ffe,
273         0x7fff,
274     };
275 
276     unsigned int one_index_a = 0;
277     unsigned int one_index_b = 0;
278     static const unsigned int one_values[] = {
279         0,
280         1,
281     };
282 
283     unsigned int quiet_nan_index_a = 0;
284     unsigned int quiet_nan_index_b = 0;
285     static const unsigned int quiet_nan_values[] = {
286         0,
287         1,
288     };
289 
290     unsigned int mantissa_index_a = 0;
291     unsigned int mantissa_index_b = 0;
292     static const unsigned long long mantissa_values[] = {
293         0,
294         1,
295         2,
296         0x3ffffffffffffffdULL,
297         0x3ffffffffffffffeULL,
298         0x3fffffffffffffffULL,
299     };
300 
301     for (count = 0; ; ++count) {
302 #define INIT_FIELD(var, field) \
303             .ieee_nan.field = field##_values[field##_index_##var]
304         const union float80u a = {
305             INIT_FIELD(a, negative),
306             INIT_FIELD(a, exponent),
307             INIT_FIELD(a, one),
308             INIT_FIELD(a, quiet_nan),
309             INIT_FIELD(a, mantissa),
310         };
311         const union float80u b = {
312             INIT_FIELD(b, negative),
313             INIT_FIELD(b, exponent),
314             INIT_FIELD(b, one),
315             INIT_FIELD(b, quiet_nan),
316             INIT_FIELD(b, mantissa),
317         };
318 #undef INIT_FIELD
319 
320         do_fprem(a.d, b.d);
321 
322         int carry = 1;
323 #define CARRY_INTO(var, field) do { \
324             if (carry) { \
325                 if (++field##_index_##var == ARRAY_SIZE(field##_values)) { \
326                     field##_index_##var = 0; \
327                 } else { \
328                     carry = 0; \
329                 } \
330             } \
331         } while (0)
332         CARRY_INTO(b, mantissa);
333         CARRY_INTO(b, quiet_nan);
334         CARRY_INTO(b, one);
335         CARRY_INTO(b, exponent);
336         CARRY_INTO(b, negative);
337         CARRY_INTO(a, mantissa);
338         CARRY_INTO(a, quiet_nan);
339         CARRY_INTO(a, one);
340         CARRY_INTO(a, exponent);
341         CARRY_INTO(a, negative);
342 #undef CARRY_INTO
343 
344         if (carry) {
345             break;
346         }
347     }
348 
349     fprintf(stderr, "test-i386-fprem: tested %llu cases\n", count);
350 }
351 
main(int argc,char ** argv)352 int main(int argc, char **argv)
353 {
354     test_fprem_cases();
355     test_fprem_pairs();
356     return 0;
357 }
358