1 /* $OpenBSD: fp_complete.c,v 1.13 2023/01/31 15:18:51 deraadt Exp $ */
2 /* $NetBSD: fp_complete.c,v 1.5 2002/01/18 22:15:56 ross Exp $ */
3
4 /*-
5 * Copyright (c) 2001 Ross Harvey
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the NetBSD
19 * Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40
41 #include <machine/cpu.h>
42 #include <machine/fpu.h>
43 #include <machine/reg.h>
44 #include <alpha/alpha/db_instruction.h>
45
46 #include <lib/libkern/softfloat.h>
47
48 #define TSWINSIZE 4 /* size of trap shadow window in u_int32_t units */
49
50 /* Set Name Opcodes AARM C.* Symbols */
51
52 #define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */
53 #define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */
54 #define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */
55 #define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\
56 1UL << 0x19 | /* \PAL\ */\
57 1UL << 0x1a | /* JSR */\
58 1UL << 0x1b | /* \PAL\ */\
59 1UL << 0x1d | /* \PAL\ */\
60 1UL << 0x1e | /* \PAL\ */\
61 1UL << 0x1f | /* \PAL\ */\
62 0xffffUL << 0x30 | /* branch ops */\
63 CHECKFUNCTIONCODE)
64
65 #define MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
66 (u_int ## width ## _t)(sign) << ((width) - 1) |\
67 (u_int ## width ## _t)(exp) << ((width) - 1 - (expwidth)) |\
68 (u_int ## width ## _t)(msb) << ((width) - 1 - (expwidth) - 1) |\
69 (u_int ## width ## _t)(rest_of_frac)
70
71 #define FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
72 #define FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
73
74 #define IS_SUBNORMAL(v) ((v)->exp == 0 && (v)->frac != 0)
75
76 #define PREFILTER_SUBNORMAL(p,v) \
77 do { \
78 if ((p)->p_md.md_flags & IEEE_MAP_DMZ && IS_SUBNORMAL(v)) \
79 (v)->frac = 0; \
80 } while (0)
81
82 #define POSTFILTER_SUBNORMAL(p,v) \
83 do { \
84 if ((p)->p_md.md_flags & IEEE_MAP_UMZ && IS_SUBNORMAL(v)) \
85 (v)->frac = 0; \
86 } while (0)
87
88 /* Alpha returns 2.0 for true, all zeroes for false. */
89
90 #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
91
92 /* Move bits from sw fp_c to hw fpcr. */
93
94 #define CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
95
96 /*
97 * Temporary trap shadow instrumentation. The [un]resolved counters
98 * could be kept permanently, as they provide information on whether
99 * user code has met AARM trap shadow generation requirements.
100 */
101
102 struct alpha_shadow {
103 u_int64_t resolved; /* cases trigger pc found */
104 u_int64_t unresolved; /* cases it wasn't, code problems? */
105 u_int64_t scans; /* trap shadow scans */
106 u_int64_t len; /* number of instructions examined */
107 u_int64_t uop; /* bit mask of unexpected opcodes */
108 u_int64_t sqrts; /* ev6+ square root single count */
109 u_int64_t sqrtt; /* ev6+ square root double count */
110 u_int32_t ufunc; /* bit mask of unexpected functions */
111 u_int32_t max; /* max trap shadow scan */
112 u_int32_t nilswop; /* unexpected op codes */
113 u_int32_t nilswfunc; /* unexpected function codes */
114 u_int32_t nilanyop; /* this "cannot happen" */
115 u_int32_t vax; /* sigs from vax fp opcodes */
116 } alpha_shadow, alpha_shadow_zero;
117
118 static float64 float64_unk(float64, float64);
119 static float64 compare_un(float64, float64);
120 static float64 compare_eq(float64, float64);
121 static float64 compare_lt(float64, float64);
122 static float64 compare_le(float64, float64);
123 static void cvt_qs_ts_st_gf_qf(u_int32_t, struct proc *);
124 static void cvt_gd(u_int32_t, struct proc *);
125 static void cvt_qt_dg_qg(u_int32_t, struct proc *);
126 static void cvt_tq_gq(u_int32_t, struct proc *);
127
128 static float32 (*swfp_s[])(float32, float32) = {
129 float32_add, float32_sub, float32_mul, float32_div,
130 };
131
132 static float64 (*swfp_t[])(float64, float64) = {
133 float64_add, float64_sub, float64_mul, float64_div,
134 compare_un, compare_eq, compare_lt, compare_le,
135 float64_unk, float64_unk, float64_unk, float64_unk
136 };
137
138 static void (*swfp_cvt[])(u_int32_t, struct proc *) = {
139 cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
140 };
141
142 static void
this_cannot_happen(int what_cannot_happen,int64_t bits)143 this_cannot_happen(int what_cannot_happen, int64_t bits)
144 {
145 static int total;
146 alpha_instruction inst;
147 static u_int64_t reported;
148
149 inst.bits = bits;
150 ++alpha_shadow.nilswfunc;
151 if (bits != -1)
152 alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
153 if (1UL << what_cannot_happen & reported)
154 return;
155 reported |= 1UL << what_cannot_happen;
156 if (total >= 1000)
157 return; /* right now, this return "cannot happen" */
158 ++total;
159 if (bits)
160 printf("FP instruction %x\n", (unsigned int)bits);
161 printf("FP event %d/%lx/%lx\n", what_cannot_happen,
162 (unsigned long)reported, (unsigned long)alpha_shadow.uop);
163 }
164
165 static __inline void
sts(unsigned int rn,s_float * v,struct proc * p)166 sts(unsigned int rn, s_float *v, struct proc *p)
167 {
168 alpha_sts(rn, v);
169 PREFILTER_SUBNORMAL(p, v);
170 }
171
172 static __inline void
stt(unsigned int rn,t_float * v,struct proc * p)173 stt(unsigned int rn, t_float *v, struct proc *p)
174 {
175 alpha_stt(rn, v);
176 PREFILTER_SUBNORMAL(p, v);
177 }
178
179 static __inline void
lds(unsigned int rn,s_float * v,struct proc * p)180 lds(unsigned int rn, s_float *v, struct proc *p)
181 {
182 POSTFILTER_SUBNORMAL(p, v);
183 alpha_lds(rn, v);
184 }
185
186 static __inline void
ldt(unsigned int rn,t_float * v,struct proc * p)187 ldt(unsigned int rn, t_float *v, struct proc *p)
188 {
189 POSTFILTER_SUBNORMAL(p, v);
190 alpha_ldt(rn, v);
191 }
192
193 static float64
compare_lt(float64 a,float64 b)194 compare_lt(float64 a, float64 b)
195 {
196 return CMP_RESULT(float64_lt(a, b));
197 }
198
199 static float64
compare_le(float64 a,float64 b)200 compare_le(float64 a, float64 b)
201 {
202 return CMP_RESULT(float64_le(a, b));
203 }
204
205 static float64
compare_un(float64 a,float64 b)206 compare_un(float64 a, float64 b)
207 {
208 if (float64_is_nan(a) | float64_is_nan(b)) {
209 if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
210 float_set_invalid();
211 return CMP_RESULT(1);
212 }
213 return CMP_RESULT(0);
214 }
215
216 static float64
compare_eq(float64 a,float64 b)217 compare_eq(float64 a, float64 b)
218 {
219 return CMP_RESULT(float64_eq(a, b));
220 }
221 /*
222 * A note regarding the VAX FP ops.
223 *
224 * The AARM gives us complete leeway to set or not set status flags on VAX
225 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
226 * flags by IEEE rules. Many ops are common to d/f/g and s/t source types.
227 * For the purely vax ones, it's hard to imagine ever running them.
228 * (Generated VAX fp ops with completion flags? Hmm.) We are careful never
229 * to panic, assert, or print unlimited output based on a path through the
230 * decoder, so weird cases don't become security issues.
231 */
232 static void
cvt_qs_ts_st_gf_qf(u_int32_t inst_bits,struct proc * p)233 cvt_qs_ts_st_gf_qf(u_int32_t inst_bits, struct proc *p)
234 {
235 t_float tfb, tfc;
236 s_float sfb, sfc;
237 alpha_instruction inst;
238
239 inst.bits = inst_bits;
240 /*
241 * cvtst and cvtts have the same opcode, function, and source. The
242 * distinction for cvtst is hidden in the illegal modifier combinations.
243 * We decode even the non-/s modifier, so that the fix-up-always mode
244 * works on ev6 and later. The rounding bits are unused and fixed for
245 * cvtst, so we check those too.
246 */
247 switch(inst.float_format.function) {
248 case op_cvtst:
249 case op_cvtst_u:
250 sts(inst.float_detail.fb, &sfb, p);
251 tfc.i = float32_to_float64(sfb.i);
252 ldt(inst.float_detail.fc, &tfc, p);
253 return;
254 }
255 if(inst.float_detail.src == 2) {
256 stt(inst.float_detail.fb, &tfb, p);
257 sfc.i = float64_to_float32(tfb.i);
258 lds(inst.float_detail.fc, &sfc, p);
259 return;
260 }
261 /* 0: S/F */
262 /* 1: /D */
263 /* 3: Q/Q */
264 this_cannot_happen(5, inst.generic_format.opcode);
265 tfc.i = FLOAT64QNAN;
266 ldt(inst.float_detail.fc, &tfc, p);
267 return;
268 }
269
270 static void
cvt_gd(u_int32_t inst_bits,struct proc * p)271 cvt_gd(u_int32_t inst_bits, struct proc *p)
272 {
273 t_float tfb, tfc;
274 alpha_instruction inst;
275
276 inst.bits = inst_bits;
277 stt(inst.float_detail.fb, &tfb, p);
278 (void) float64_to_float32(tfb.i);
279 p->p_md.md_flags &= ~OPENBSD_FLAG_TO_FP_C(FP_X_IMP);
280 tfc.i = float64_add(tfb.i, (float64)0);
281 ldt(inst.float_detail.fc, &tfc, p);
282 }
283
284 static void
cvt_qt_dg_qg(u_int32_t inst_bits,struct proc * p)285 cvt_qt_dg_qg(u_int32_t inst_bits, struct proc *p)
286 {
287 t_float tfb, tfc;
288 alpha_instruction inst;
289
290 inst.bits = inst_bits;
291 switch(inst.float_detail.src) {
292 case 0: /* S/F */
293 this_cannot_happen(3, inst.bits);
294 /* fall thru */
295 case 1: /* D */
296 /* VAX dirty 0's and reserved ops => UNPREDICTABLE */
297 /* We've done what's important by just not trapping */
298 tfc.i = 0;
299 break;
300 case 2: /* T/G */
301 this_cannot_happen(4, inst.bits);
302 tfc.i = 0;
303 break;
304 case 3: /* Q/Q */
305 stt(inst.float_detail.fb, &tfb, p);
306 tfc.i = int64_to_float64(tfb.i);
307 break;
308 }
309 alpha_ldt(inst.float_detail.fc, &tfc);
310 }
311 /*
312 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
313 * unfortunate habit of always returning the nontrapping result.
314 * XXX: there are several apparent AARM/AAH disagreements, as well as
315 * the issue of trap handler pc and trapping results.
316 * XXX: this function will work for signed and unsigned 64-bit integers.
317 * rounding will happen per IEEE 754. invalid exception will be
318 * raised if argument is infinity, not-a-number or if it
319 * overflows/underflows. zero will be returned, in this case.
320 */
321 static void
cvt_tq_gq(u_int32_t inst_bits,struct proc * p)322 cvt_tq_gq(u_int32_t inst_bits, struct proc *p)
323 {
324 t_float tfb, tfc;
325 alpha_instruction inst;
326
327 inst.bits = inst_bits;
328 stt(inst.float_detail.fb, &tfb, p);
329 tfc.i = float64_to_int64_no_overflow(tfb.i);
330 alpha_ldt(inst.float_detail.fc, &tfc); /* yes, ldt */
331 }
332
333 static u_int64_t
fp_c_to_fpcr_1(u_int64_t fpcr,u_int64_t fp_c)334 fp_c_to_fpcr_1(u_int64_t fpcr, u_int64_t fp_c)
335 {
336 u_int64_t disables;
337
338 /*
339 * It's hard to arrange for conforming bit fields, because the FP_C
340 * and the FPCR are both architected, with specified (and relatively
341 * scrambled) bit numbers. Defining an internal unscrambled FP_C
342 * wouldn't help much, because every user exception requires the
343 * architected bit order in the sigcontext.
344 *
345 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
346 * will lose, because those bits can be and usually are subsetted;
347 * the official home is in the fp_c. Furthermore, the kernel puts
348 * phony enables (it lies :-) in the fpcr in order to get control when
349 * it is necessary to initially set a sticky bit.
350 */
351
352 fpcr &= FPCR_DYN(3);
353
354 /*
355 * enable traps = case where flag bit is clear OR program wants a trap
356 * enables = ~flags | mask
357 * disables = ~(~flags | mask)
358 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
359 */
360 disables = FP_C_TO_OPENBSD_FLAG(fp_c) & ~FP_C_TO_OPENBSD_MASK(fp_c);
361
362 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
363 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
364
365 # if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \
366 FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \
367 FP_X_UFL << (61 - 3) == FPCR_UNFD && \
368 FP_X_IMP << (61 - 3) == FPCR_INED && \
369 FP_X_OFL << (49 - 0) == FPCR_OVFD)
370 # error "Assertion failed"
371 /*
372 * We don't care about the other built-in bit numbers because they
373 * have been architecturally specified.
374 */
375 # endif
376
377 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
378 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
379 if (fp_c & FP_C_MIRRORED)
380 fpcr |= FPCR_SUM;
381 if (fp_c & IEEE_MAP_UMZ)
382 fpcr |= FPCR_UNDZ | FPCR_UNFD;
383 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
384 return fpcr;
385 }
386
387 static void
fp_c_to_fpcr(struct proc * p)388 fp_c_to_fpcr(struct proc *p)
389 {
390 alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), p->p_md.md_flags));
391 }
392
393 void
alpha_write_fp_c(struct proc * p,u_int64_t fp_c)394 alpha_write_fp_c(struct proc *p, u_int64_t fp_c)
395 {
396 u_int64_t md_flags;
397
398 fp_c &= MDP_FP_C;
399 md_flags = p->p_md.md_flags;
400 if ((md_flags & MDP_FP_C) == fp_c)
401 return;
402 p->p_md.md_flags = (md_flags & ~MDP_FP_C) | fp_c;
403 alpha_enable_fp(p, 1);
404 alpha_pal_wrfen(1);
405 fp_c_to_fpcr(p);
406 alpha_pal_wrfen(0);
407 }
408
409 u_int64_t
alpha_read_fp_c(struct proc * p)410 alpha_read_fp_c(struct proc *p)
411 {
412 /*
413 * A possibly desirable EV6-specific optimization would deviate from
414 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
415 * but in a transparent way. Some of the code for that would need to
416 * go right here.
417 */
418 return p->p_md.md_flags & MDP_FP_C;
419 }
420
421 static float64
float64_unk(float64 a,float64 b)422 float64_unk(float64 a, float64 b)
423 {
424 return 0;
425 }
426
427 /*
428 * The real function field encodings for IEEE and VAX FP instructions.
429 *
430 * Since there is only one operand type field, the cvtXX instructions
431 * require a variety of special cases, and these have to be analyzed as
432 * they don't always fit into the field descriptions in AARM section I.
433 *
434 * Lots of staring at bits in the appendix shows what's really going on.
435 *
436 * | |
437 * 15 14 13|12 11 10 09|08 07 06 05
438 * --------======------============
439 * TRAP : RND : SRC : FUNCTION :
440 * 0 0 0:. . .:. . . . . . . . . . . . Imprecise
441 * 0 0 1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
442 * | /V overfloat enable (if int output)
443 * 0 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
444 * 0 1 1|. . .:. . . . . . . . . . . . Unsupported
445 * 1 0 0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
446 * 1 0 1|. . .:. . . . . . . . . . . ./SU
447 * | /SV
448 * 1 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
449 * 1 1 1|. . .:. . . . . . . . . . . ./SUI (if FP output) (IEEE only)
450 * | /SVI (if int output) (IEEE only)
451 * S I UV: In other words: bits 15:13 are S:I:UV, except that _usually_
452 * | not all combinations are valid.
453 * | |
454 * 15 14 13|12 11 10 09|08 07 06 05
455 * --------======------============
456 * TRAP : RND : SRC : FUNCTION :
457 * | 0 0 . . . . . . . . . . . ./C Chopped
458 * : 0 1 . . . . . . . . . . . ./M Minus Infinity
459 * | 1 0 . . . . . . . . . . . . Normal
460 * : 1 1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
461 * | |
462 * 15 14 13|12 11 10 09|08 07 06 05
463 * --------======------============
464 * TRAP : RND : SRC : FUNCTION :
465 * 0 0. . . . . . . . . . S/F
466 * 0 1. . . . . . . . . . -/D
467 * 1 0. . . . . . . . . . T/G
468 * 1 1. . . . . . . . . . Q/Q
469 * | |
470 * 15 14 13|12 11 10 09|08 07 06 05
471 * --------======------============
472 * TRAP : RND : SRC : FUNCTION :
473 * 0 0 0 0 . . . addX
474 * 0 0 0 1 . . . subX
475 * 0 0 1 0 . . . mulX
476 * 0 0 1 1 . . . divX
477 * 0 1 0 0 . . . cmpXun
478 * 0 1 0 1 . . . cmpXeq
479 * 0 1 1 0 . . . cmpXlt
480 * 0 1 1 1 . . . cmpXle
481 * 1 0 0 0 . . . reserved
482 * 1 0 0 1 . . . reserved
483 * 1 0 1 0 . . . sqrt[fg] (op_fix, not exactly "vax")
484 * 1 0 1 1 . . . sqrt[st] (op_fix, not exactly "ieee")
485 * 1 1 0 0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
486 * 1 1 0 1 . . . cvtXd (vax only)
487 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
488 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq)
489 * | |
490 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone
491 * --------======------============
492 * TRAP : RND : SRC : FUNCTION :
493 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7
494 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S
495 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac
496 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T)
497 */
498
499 static void
alpha_fp_interpret(struct proc * p,u_int64_t bits)500 alpha_fp_interpret(struct proc *p, u_int64_t bits)
501 {
502 s_float sfa, sfb, sfc;
503 t_float tfa, tfb, tfc;
504 alpha_instruction inst;
505
506 inst.bits = bits;
507 switch(inst.generic_format.opcode) {
508 default:
509 /* this "cannot happen" */
510 this_cannot_happen(2, inst.bits);
511 return;
512 case op_any_float:
513 if (inst.float_format.function == op_cvtql_sv ||
514 inst.float_format.function == op_cvtql_v) {
515 alpha_stt(inst.float_detail.fb, &tfb);
516 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
517 alpha_lds(inst.float_detail.fc, &sfc);
518 float_raise(FP_X_INV);
519 } else {
520 ++alpha_shadow.nilanyop;
521 this_cannot_happen(3, inst.bits);
522 }
523 break;
524 case op_vax_float:
525 ++alpha_shadow.vax; /* fall thru */
526 case op_ieee_float:
527 case op_fix_float:
528 switch(inst.float_detail.src) {
529 case op_src_sf:
530 sts(inst.float_detail.fb, &sfb, p);
531 if (inst.float_detail.opclass == 10)
532 sfc.i = float32_sqrt(sfb.i);
533 else if (inst.float_detail.opclass & ~3) {
534 this_cannot_happen(1, inst.bits);
535 sfc.i = FLOAT32QNAN;
536 } else {
537 sts(inst.float_detail.fa, &sfa, p);
538 sfc.i = (*swfp_s[inst.float_detail.opclass])(
539 sfa.i, sfb.i);
540 }
541 lds(inst.float_detail.fc, &sfc, p);
542 break;
543 case op_src_xd:
544 case op_src_tg:
545 if (inst.float_detail.opclass >= 12)
546 (*swfp_cvt[inst.float_detail.opclass - 12])(
547 inst.bits, p);
548 else {
549 stt(inst.float_detail.fb, &tfb, p);
550 if (inst.float_detail.opclass == 10)
551 tfc.i = float64_sqrt(tfb.i);
552 else {
553 stt(inst.float_detail.fa, &tfa, p);
554 tfc.i = (*swfp_t[inst.float_detail
555 .opclass])(tfa.i, tfb.i);
556 }
557 ldt(inst.float_detail.fc, &tfc, p);
558 }
559 break;
560 case op_src_qq:
561 float_raise(FP_X_IMP);
562 break;
563 }
564 }
565 }
566
567 int
alpha_fp_complete_at(u_long trigger_pc,struct proc * p,u_int64_t * ucode)568 alpha_fp_complete_at(u_long trigger_pc, struct proc *p, u_int64_t *ucode)
569 {
570 int needsig;
571 alpha_instruction inst;
572 u_int64_t rm, fpcr, orig_fpcr;
573 u_int64_t orig_flags, new_flags, changed_flags, md_flags;
574
575 if (__predict_false(copyinsn(NULL, (u_int32_t *)trigger_pc,
576 (u_int32_t *)&inst))) {
577 this_cannot_happen(6, -1);
578 return SIGSEGV;
579 }
580 alpha_enable_fp(p, 1);
581 alpha_pal_wrfen(1);
582 /*
583 * If necessary, lie about the dynamic rounding mode so emulation
584 * software need go to only one place for it, and so we don't have to
585 * lock any memory locations or pass a third parameter to every
586 * SoftFloat entry point.
587 */
588 orig_fpcr = fpcr = alpha_read_fpcr();
589 rm = inst.float_detail.rnd;
590 if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
591 fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
592 alpha_write_fpcr(fpcr);
593 }
594 orig_flags = FP_C_TO_OPENBSD_FLAG(p->p_md.md_flags);
595
596 alpha_fp_interpret(p, inst.bits);
597
598 md_flags = p->p_md.md_flags;
599
600 new_flags = FP_C_TO_OPENBSD_FLAG(md_flags);
601 changed_flags = orig_flags ^ new_flags;
602 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
603 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
604 needsig = changed_flags & FP_C_TO_OPENBSD_MASK(md_flags);
605 alpha_pal_wrfen(0);
606 if (__predict_false(needsig)) {
607 *ucode = needsig;
608 return SIGFPE;
609 }
610 return 0;
611 }
612
613 int
alpha_fp_complete(u_long a0,u_long a1,struct proc * p,u_int64_t * ucode)614 alpha_fp_complete(u_long a0, u_long a1, struct proc *p, u_int64_t *ucode)
615 {
616 int t;
617 int sig;
618 u_int64_t op_class;
619 alpha_instruction inst;
620 /* "trigger_pc" is Compaq's term for the earliest faulting op */
621 u_long trigger_pc, usertrap_pc;
622 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
623
624 sig = SIGFPE;
625 pc = (alpha_instruction *)p->p_md.md_tf->tf_regs[FRAME_PC];
626 trigger_pc = (u_long)pc - 4; /* for ALPHA_AMASK_PAT case */
627 if (cpu_amask & ALPHA_AMASK_PAT) {
628 if (a0 & 1 || alpha_fp_sync_complete) {
629 sig = alpha_fp_complete_at(trigger_pc, p, ucode);
630 goto done;
631 }
632 }
633 *ucode = a0;
634 if (!(a0 & 1))
635 return sig;
636 /*
637 * At this point we are somewhere in the trap shadow of one or more instruc-
638 * tions that have trapped with software completion specified. We have a mask
639 * of the registers written by trapping instructions.
640 *
641 * Now step backwards through the trap shadow, clearing bits in the
642 * destination write mask until the trigger instruction is found, and
643 * interpret this one instruction in SW. If a SIGFPE is not required, back up
644 * the PC until just after this instruction and restart. This will execute all
645 * trap shadow instructions between the trigger pc and the trap pc twice.
646 */
647 trigger_pc = 0;
648 win_begin = pc;
649 ++alpha_shadow.scans;
650 t = alpha_shadow.len;
651 for (--pc; a1; --pc) {
652 ++alpha_shadow.len;
653 if (pc < win_begin) {
654 win_begin = pc - TSWINSIZE + 1;
655 if (_copyin(win_begin, tsw, sizeof tsw)) {
656 /* sigh, try to get just one */
657 win_begin = pc;
658 if (_copyin(win_begin, tsw, 4))
659 return SIGSEGV;
660 }
661 }
662 inst = tsw[pc - win_begin];
663 op_class = 1UL << inst.generic_format.opcode;
664 if (op_class & FPUREG_CLASS) {
665 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
666 trigger_pc = (u_long)pc;
667 } else if (op_class & CPUREG_CLASS) {
668 a1 &= ~(1UL << inst.operate_generic_format.rc);
669 trigger_pc = (u_long)pc;
670 } else if (op_class & TRAPSHADOWBOUNDARY) {
671 if (op_class & CHECKFUNCTIONCODE) {
672 if (inst.mem_format.displacement == op_trapb ||
673 inst.mem_format.displacement == op_excb)
674 break; /* code breaks AARM rules */
675 } else
676 break; /* code breaks AARM rules */
677 }
678 /* Some shadow-safe op, probably load, store, or FPTI class */
679 }
680 t = alpha_shadow.len - t;
681 if (t > alpha_shadow.max)
682 alpha_shadow.max = t;
683 if (__predict_true(trigger_pc != 0 && a1 == 0)) {
684 ++alpha_shadow.resolved;
685 sig = alpha_fp_complete_at(trigger_pc, p, ucode);
686 } else {
687 ++alpha_shadow.unresolved;
688 return sig;
689 }
690 done:
691 if (sig) {
692 usertrap_pc = trigger_pc + 4;
693 p->p_md.md_tf->tf_regs[FRAME_PC] = usertrap_pc;
694 return sig;
695 }
696 return 0;
697 }
698