1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <ucontext.h>
31 #ifdef __DragonFly__
32 #include <machine/npx.h>
33 #endif
34 #ifdef __FreeBSD__
35 #include <stdint.h>
36 #include <machine/fpu.h>
37 #endif
38 #include <fenv.h>
39 #if defined(__SUNPRO_C)
40 #include <sunmath.h>
41 #else
42 #include <sys/ieeefp.h>
43 #endif
44 #include "fex_handler.h"
45 #include "fenv_inlines.h"
46 
47 /* Hardcoded for amd64 */
48 
49 #if defined __DragonFly__ || defined __FreeBSD__
50 #define REG_PC	mc_rip
51 #define REG_PS	mc_rflags
52 # ifdef __FreeBSD__
53 #define FPU_STATE	mc_fpstate
54 #define FPU_STRUCTURE	savefpu
55 # endif
56 # ifdef __DragonFly__
57 #define FPU_STATE	mc_fpregs
58 #define FPU_STRUCTURE	savexmm64
59 # endif
60 #else
61 #error SSE instructions not supported on this platform
62 #endif
63 
64 /*
65  * Support for SSE instructions
66  */
67 
68 /*
69  * Decode an SSE instruction.  Fill in *inst and return the length of the
70  * instruction in bytes.  Return 0 if the instruction is not recognized.
71  */
72 int
__fex_parse_sse(ucontext_t * uap,sseinst_t * inst)73 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
74 {
75 	unsigned char	*ip;
76 	char		*addr;
77 	int		i, dbl, simd, rex, modrm, sib, r;
78 	struct FPU_STRUCTURE	*fpstate;
79 
80 	i = 0;
81 	ip = (unsigned char *)uap->uc_mcontext.REG_PC;
82 
83 	/* look for pseudo-prefixes */
84 	dbl = 0;
85 	simd = SIMD;
86 	if (ip[i] == 0xF3) {
87 		simd = 0;
88 		i++;
89 	} else if (ip[i] == 0x66) {
90 		dbl = DOUBLE;
91 		i++;
92 	} else if (ip[i] == 0xF2) {
93 		dbl = DOUBLE;
94 		simd = 0;
95 		i++;
96 	}
97 
98 	/* look for AMD64 REX prefix */
99 	rex = 0;
100 	if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
101 		rex = ip[i];
102 		i++;
103 	}
104 
105 	/* parse opcode */
106 	if (ip[i++] != 0x0F)
107 		return 0;
108 	switch (ip[i++]) {
109 	case 0x2A:
110 		inst->op = (int)cvtsi2ss + simd + dbl;
111 		if (!simd)
112 			inst->op = (int)inst->op + (rex & 8);
113 		break;
114 
115 	case 0x2C:
116 		inst->op = (int)cvttss2si + simd + dbl;
117 		if (!simd)
118 			inst->op = (int)inst->op + (rex & 8);
119 		break;
120 
121 	case 0x2D:
122 		inst->op = (int)cvtss2si + simd + dbl;
123 		if (!simd)
124 			inst->op = (int)inst->op + (rex & 8);
125 		break;
126 
127 	case 0x2E:
128 		/* oddball: scalar instruction in a SIMD opcode group */
129 		if (!simd)
130 			return 0;
131 		inst->op = (int)ucomiss + dbl;
132 		break;
133 
134 	case 0x2F:
135 		/* oddball: scalar instruction in a SIMD opcode group */
136 		if (!simd)
137 			return 0;
138 		inst->op = (int)comiss + dbl;
139 		break;
140 
141 	case 0x51:
142 		inst->op = (int)sqrtss + simd + dbl;
143 		break;
144 
145 	case 0x58:
146 		inst->op = (int)addss + simd + dbl;
147 		break;
148 
149 	case 0x59:
150 		inst->op = (int)mulss + simd + dbl;
151 		break;
152 
153 	case 0x5A:
154 		inst->op = (int)cvtss2sd + simd + dbl;
155 		break;
156 
157 	case 0x5B:
158 		if (dbl) {
159 			if (simd)
160 				inst->op = cvtps2dq;
161 			else
162 				return 0;
163 		} else {
164 			inst->op = (simd)? cvtdq2ps : cvttps2dq;
165 		}
166 		break;
167 
168 	case 0x5C:
169 		inst->op = (int)subss + simd + dbl;
170 		break;
171 
172 	case 0x5D:
173 		inst->op = (int)minss + simd + dbl;
174 		break;
175 
176 	case 0x5E:
177 		inst->op = (int)divss + simd + dbl;
178 		break;
179 
180 	case 0x5F:
181 		inst->op = (int)maxss + simd + dbl;
182 		break;
183 
184 	case 0xC2:
185 		inst->op = (int)cmpss + simd + dbl;
186 		break;
187 
188 	case 0xE6:
189 		if (simd) {
190 			if (dbl)
191 				inst->op = cvttpd2dq;
192 			else
193 				return 0;
194 		} else {
195 			inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
196 		}
197 		break;
198 
199 	default:
200 		return 0;
201 	}
202 
203 	/* locate operands */
204 	modrm = ip[i++];
205 
206 	if (inst->op == cvtss2si || inst->op == cvttss2si ||
207 	    inst->op == cvtsd2si || inst->op == cvttsd2si ||
208 	    inst->op == cvtss2siq || inst->op == cvttss2siq ||
209 	    inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
210 		/* op1 is a gp register */
211 		r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
212 		switch (r) {
213 		case  0: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rax; break;
214 		case  1: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rcx; break;
215 		case  2: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rdx; break;
216 		case  3: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rbx; break;
217 		case  4: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rsp; break;
218 		case  5: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rbp; break;
219 		case  6: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rsi; break;
220 		case  7: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rdi; break;
221 		case  8: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r8;  break;
222 		case  9: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r9;  break;
223 		case 10: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r10; break;
224 		case 11: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r11; break;
225 		case 12: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r12; break;
226 		case 13: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r13; break;
227 		case 14: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r14; break;
228 		default: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r15; break;
229 		}
230 	} else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
231 	    inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
232 		/* op1 is a mmx register */
233 		fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
234 		inst->op1 = (sseoperand_t *)&fpstate->sv_fp[(modrm >> 3) & 7];
235 	} else {
236 		/* op1 is a xmm register */
237 		r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
238 		fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
239 		inst->op1 = (sseoperand_t *)&fpstate->sv_xmm[r];
240 	}
241 
242 	if ((modrm >> 6) == 3) {
243 		if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
244 		    inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
245 			/* op2 is a gp register */
246 			r = ((rex & 1) << 3) | (modrm & 7);
247 			switch (r) {
248 			case  0: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rax; break;
249 			case  1: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rcx; break;
250 			case  2: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rdx; break;
251 			case  3: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rbx; break;
252 			case  4: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rsp; break;
253 			case  5: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rbp; break;
254 			case  6: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rsi; break;
255 			case  7: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rdi; break;
256 			case  8: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r8;  break;
257 			case  9: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r9;  break;
258 			case 10: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r10; break;
259 			case 11: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r11; break;
260 			case 12: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r12; break;
261 			case 13: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r13; break;
262 			case 14: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r14; break;
263 			default: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r15; break;
264 			}
265 		} else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
266 			/* op2 is a mmx register */
267 			fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
268 			inst->op2 = (sseoperand_t *)&fpstate->sv_fp[modrm & 7];
269 		} else {
270 			/* op2 is a xmm register */
271 			r = ((rex & 1) << 3) | (modrm & 7);
272 			fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
273 			inst->op2 = (sseoperand_t *)&fpstate->sv_xmm[r];
274 		}
275 	} else if ((modrm & 0xc7) == 0x05) {
276 		/* address of next instruction + offset */
277 		r = i + 4;
278 		if (inst->op == cmpss || inst->op == cmpps ||
279 		    inst->op == cmpsd || inst->op == cmppd)
280 			r++;
281 		inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
282 		i += 4;
283 	} else {
284 		/* complex address */
285 		if ((modrm & 7) == 4) {
286 			/* parse sib byte */
287 			sib = ip[i++];
288 			if ((sib & 7) == 5 && (modrm >> 6) == 0) {
289 				/* start with absolute address */
290 				addr = (char *)(uintptr_t)(*(int *)(ip + i));
291 				i += 4;
292 			} else {
293 				/* start with base */
294 				r = ((rex & 1) << 3) | (sib & 7);
295 				switch (r) {
296 				case  0: addr = (char *)&uap->uc_mcontext.mc_rax; break;
297 				case  1: addr = (char *)&uap->uc_mcontext.mc_rcx; break;
298 				case  2: addr = (char *)&uap->uc_mcontext.mc_rdx; break;
299 				case  3: addr = (char *)&uap->uc_mcontext.mc_rbx; break;
300 				case  4: addr = (char *)&uap->uc_mcontext.mc_rsp; break;
301 				case  5: addr = (char *)&uap->uc_mcontext.mc_rbp; break;
302 				case  6: addr = (char *)&uap->uc_mcontext.mc_rsi; break;
303 				case  7: addr = (char *)&uap->uc_mcontext.mc_rdi; break;
304 				case  8: addr = (char *)&uap->uc_mcontext.mc_r8;  break;
305 				case  9: addr = (char *)&uap->uc_mcontext.mc_r9;  break;
306 				case 10: addr = (char *)&uap->uc_mcontext.mc_r10; break;
307 				case 11: addr = (char *)&uap->uc_mcontext.mc_r11; break;
308 				case 12: addr = (char *)&uap->uc_mcontext.mc_r12; break;
309 				case 13: addr = (char *)&uap->uc_mcontext.mc_r13; break;
310 				case 14: addr = (char *)&uap->uc_mcontext.mc_r14; break;
311 				default: addr = (char *)&uap->uc_mcontext.mc_r15; break;
312 				}
313 			}
314 			r = ((rex & 2) << 2) | ((sib >> 3) & 7);
315 			if (r != 4) {
316 				/* add scaled index */
317 				switch (r) {
318 				case  0: addr += uap->uc_mcontext.mc_rax << (sib >> 6); break;
319 				case  1: addr += uap->uc_mcontext.mc_rcx << (sib >> 6); break;
320 				case  2: addr += uap->uc_mcontext.mc_rdx << (sib >> 6); break;
321 				case  3: addr += uap->uc_mcontext.mc_rbx << (sib >> 6); break;
322 				case  4: addr += uap->uc_mcontext.mc_rsp << (sib >> 6); break;
323 				case  5: addr += uap->uc_mcontext.mc_rbp << (sib >> 6); break;
324 				case  6: addr += uap->uc_mcontext.mc_rsi << (sib >> 6); break;
325 				case  7: addr += uap->uc_mcontext.mc_rdi << (sib >> 6); break;
326 				case  8: addr += uap->uc_mcontext.mc_r8  << (sib >> 6); break;
327 				case  9: addr += uap->uc_mcontext.mc_r9  << (sib >> 6); break;
328 				case 10: addr += uap->uc_mcontext.mc_r10 << (sib >> 6); break;
329 				case 11: addr += uap->uc_mcontext.mc_r11 << (sib >> 6); break;
330 				case 12: addr += uap->uc_mcontext.mc_r12 << (sib >> 6); break;
331 				case 13: addr += uap->uc_mcontext.mc_r13 << (sib >> 6); break;
332 				case 14: addr += uap->uc_mcontext.mc_r14 << (sib >> 6); break;
333 				default: addr += uap->uc_mcontext.mc_r15 << (sib >> 6); break;
334 				}
335 			}
336 		} else {
337 			r = ((rex & 1) << 3) | (modrm & 7);
338 			switch (r) {
339 			case  0: addr = (char *)uap->uc_mcontext.mc_rax; break;
340 			case  1: addr = (char *)uap->uc_mcontext.mc_rcx; break;
341 			case  2: addr = (char *)uap->uc_mcontext.mc_rdx; break;
342 			case  3: addr = (char *)uap->uc_mcontext.mc_rbx; break;
343 			case  4: addr = (char *)uap->uc_mcontext.mc_rsp; break;
344 			case  5: addr = (char *)uap->uc_mcontext.mc_rbp; break;
345 			case  6: addr = (char *)uap->uc_mcontext.mc_rsi; break;
346 			case  7: addr = (char *)uap->uc_mcontext.mc_rdi; break;
347 			case  8: addr = (char *)uap->uc_mcontext.mc_r8;  break;
348 			case  9: addr = (char *)uap->uc_mcontext.mc_r9;  break;
349 			case 10: addr = (char *)uap->uc_mcontext.mc_r10; break;
350 			case 11: addr = (char *)uap->uc_mcontext.mc_r11; break;
351 			case 12: addr = (char *)uap->uc_mcontext.mc_r12; break;
352 			case 13: addr = (char *)uap->uc_mcontext.mc_r13; break;
353 			case 14: addr = (char *)uap->uc_mcontext.mc_r14; break;
354 			default: addr = (char *)uap->uc_mcontext.mc_r15; break;
355 			}
356 		}
357 
358 		/* add displacement, if any */
359 		if ((modrm >> 6) == 1) {
360 			addr += (char)ip[i++];
361 		} else if ((modrm >> 6) == 2) {
362 			addr += *(int *)(ip + i);
363 			i += 4;
364 		}
365 		inst->op2 = (sseoperand_t *)addr;
366 	}
367 
368 	if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
369 	    inst->op == cmppd) {
370 		/* get the immediate operand */
371 		inst->imm = ip[i++];
372 	}
373 
374 	return i;
375 }
376 
377 static enum fp_class_type
my_fp_classf(float * x)378 my_fp_classf(float *x)
379 {
380 	int	i = *(int *)x & ~0x80000000;
381 
382 	if (i < 0x7f800000) {
383 		if (i < 0x00800000)
384 			return ((i == 0)? fp_zero : fp_subnormal);
385 		return fp_normal;
386 	}
387 	else if (i == 0x7f800000)
388 		return fp_infinity;
389 	else if (i & 0x400000)
390 		return fp_quiet;
391 	else
392 		return fp_signaling;
393 }
394 
395 static enum fp_class_type
my_fp_class(double * x)396 my_fp_class(double *x)
397 {
398 	int	i = *(1+(int *)x) & ~0x80000000;
399 
400 	if (i < 0x7ff00000) {
401 		if (i < 0x00100000)
402 			return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
403 		return fp_normal;
404 	}
405 	else if (i == 0x7ff00000 && *(int *)x == 0)
406 		return fp_infinity;
407 	else if (i & 0x80000)
408 		return fp_quiet;
409 	else
410 		return fp_signaling;
411 }
412 
413 /*
414  * Inspect a scalar SSE instruction that incurred an invalid operation
415  * exception to determine which type of exception it was.
416  */
417 static enum fex_exception
__fex_get_sse_invalid_type(sseinst_t * inst)418 __fex_get_sse_invalid_type(sseinst_t *inst)
419 {
420 	enum fp_class_type	t1, t2;
421 
422 	/* check op2 for signaling nan */
423 	t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
424 	    my_fp_classf(&inst->op2->f[0]);
425 	if (t2 == fp_signaling)
426 		return fex_inv_snan;
427 
428 	/* eliminate all single-operand instructions */
429 	switch (inst->op) {
430 	case cvtsd2ss:
431 	case cvtss2sd:
432 		/* hmm, this shouldn't have happened */
433 		return (enum fex_exception) -1;
434 
435 	case sqrtss:
436 	case sqrtsd:
437 		return fex_inv_sqrt;
438 
439 	case cvtss2si:
440 	case cvtsd2si:
441 	case cvttss2si:
442 	case cvttsd2si:
443 	case cvtss2siq:
444 	case cvtsd2siq:
445 	case cvttss2siq:
446 	case cvttsd2siq:
447 		return fex_inv_int;
448 	default:
449 		break;
450 	}
451 
452 	/* check op1 for signaling nan */
453 	t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
454 	    my_fp_classf(&inst->op1->f[0]);
455 	if (t1 == fp_signaling)
456 		return fex_inv_snan;
457 
458 	/* check two-operand instructions for other cases */
459 	switch (inst->op) {
460 	case cmpss:
461 	case cmpsd:
462 	case minss:
463 	case minsd:
464 	case maxss:
465 	case maxsd:
466 	case comiss:
467 	case comisd:
468 		return fex_inv_cmp;
469 
470 	case addss:
471 	case addsd:
472 	case subss:
473 	case subsd:
474 		if (t1 == fp_infinity && t2 == fp_infinity)
475 			return fex_inv_isi;
476 		break;
477 
478 	case mulss:
479 	case mulsd:
480 		if ((t1 == fp_zero && t2 == fp_infinity) ||
481 		    (t2 == fp_zero && t1 == fp_infinity))
482 			return fex_inv_zmi;
483 		break;
484 
485 	case divss:
486 	case divsd:
487 		if (t1 == fp_zero && t2 == fp_zero)
488 			return fex_inv_zdz;
489 		if (t1 == fp_infinity && t2 == fp_infinity)
490 			return fex_inv_idi;
491 	default:
492 		break;
493 	}
494 
495 	return (enum fex_exception)-1;
496 }
497 
498 /* inline templates */
499 extern void sse_cmpeqss(float *, float *, int *);
500 extern void sse_cmpltss(float *, float *, int *);
501 extern void sse_cmpless(float *, float *, int *);
502 extern void sse_cmpunordss(float *, float *, int *);
503 extern void sse_minss(float *, float *, float *);
504 extern void sse_maxss(float *, float *, float *);
505 extern void sse_addss(float *, float *, float *);
506 extern void sse_subss(float *, float *, float *);
507 extern void sse_mulss(float *, float *, float *);
508 extern void sse_divss(float *, float *, float *);
509 extern void sse_sqrtss(float *, float *);
510 extern void sse_ucomiss(float *, float *);
511 extern void sse_comiss(float *, float *);
512 extern void sse_cvtss2sd(float *, double *);
513 extern void sse_cvtsi2ss(int *, float *);
514 extern void sse_cvttss2si(float *, int *);
515 extern void sse_cvtss2si(float *, int *);
516 extern void sse_cvtsi2ssq(long long *, float *);
517 extern void sse_cvttss2siq(float *, long long *);
518 extern void sse_cvtss2siq(float *, long long *);
519 extern void sse_cmpeqsd(double *, double *, long long *);
520 extern void sse_cmpltsd(double *, double *, long long *);
521 extern void sse_cmplesd(double *, double *, long long *);
522 extern void sse_cmpunordsd(double *, double *, long long *);
523 extern void sse_minsd(double *, double *, double *);
524 extern void sse_maxsd(double *, double *, double *);
525 extern void sse_addsd(double *, double *, double *);
526 extern void sse_subsd(double *, double *, double *);
527 extern void sse_mulsd(double *, double *, double *);
528 extern void sse_divsd(double *, double *, double *);
529 extern void sse_sqrtsd(double *, double *);
530 extern void sse_ucomisd(double *, double *);
531 extern void sse_comisd(double *, double *);
532 extern void sse_cvtsd2ss(double *, float *);
533 extern void sse_cvtsi2sd(int *, double *);
534 extern void sse_cvttsd2si(double *, int *);
535 extern void sse_cvtsd2si(double *, int *);
536 extern void sse_cvtsi2sdq(long long *, double *);
537 extern void sse_cvttsd2siq(double *, long long *);
538 extern void sse_cvtsd2siq(double *, long long *);
539 
540 /*
541  * Fill in *info with the operands, default untrapped result, and
542  * flags produced by a scalar SSE instruction, and return the type
543  * of trapped exception (if any).  On entry, the mxcsr must have
544  * all exceptions masked and all flags clear.  The same conditions
545  * will hold on exit.
546  *
547  * This routine does not work if the instruction specified by *inst
548  * is not a scalar instruction.
549  */
550 enum fex_exception
__fex_get_sse_op(ucontext_t * uap,sseinst_t * inst,fex_info_t * info)551 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
552 {
553 	unsigned int	e, te, mxcsr, oldmxcsr, subnorm;
554 	struct FPU_STRUCTURE	*fpstate;
555 
556 	/*
557 	 * Perform the operation with traps disabled and check the
558 	 * exception flags.  If the underflow trap was enabled, also
559 	 * check for an exact subnormal result.
560 	 */
561 	__fenv_getmxcsr(&oldmxcsr);
562 	subnorm = 0;
563 	if ((int)inst->op & DOUBLE) {
564 		if (inst->op == cvtsi2sd) {
565 			info->op1.type = fex_int;
566 			info->op1.val.i = inst->op2->i[0];
567 			info->op2.type = fex_nodata;
568 		} else if (inst->op == cvtsi2sdq) {
569 			info->op1.type = fex_llong;
570 			info->op1.val.l = inst->op2->l[0];
571 			info->op2.type = fex_nodata;
572 		} else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
573 		    inst->op == cvttsd2si || inst->op == cvtsd2si ||
574 		    inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
575 			info->op1.type = fex_double;
576 			info->op1.val.d = inst->op2->d[0];
577 			info->op2.type = fex_nodata;
578 		} else {
579 			info->op1.type = fex_double;
580 			info->op1.val.d = inst->op1->d[0];
581 			info->op2.type = fex_double;
582 			info->op2.val.d = inst->op2->d[0];
583 		}
584 		info->res.type = fex_double;
585 		switch (inst->op) {
586 		case cmpsd:
587 			info->op = fex_cmp;
588 			info->res.type = fex_llong;
589 			switch (inst->imm & 3) {
590 			case 0:
591 				sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
592 				    &info->res.val.l);
593 				break;
594 
595 			case 1:
596 				sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
597 				    &info->res.val.l);
598 				break;
599 
600 			case 2:
601 				sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
602 				    &info->res.val.l);
603 				break;
604 
605 			case 3:
606 				sse_cmpunordsd(&info->op1.val.d,
607 				    &info->op2.val.d, &info->res.val.l);
608 			}
609 			if (inst->imm & 4)
610 				info->res.val.l ^= 0xffffffffffffffffull;
611 			break;
612 
613 		case minsd:
614 			info->op = fex_other;
615 			sse_minsd(&info->op1.val.d, &info->op2.val.d,
616 			    &info->res.val.d);
617 			break;
618 
619 		case maxsd:
620 			info->op = fex_other;
621 			sse_maxsd(&info->op1.val.d, &info->op2.val.d,
622 			    &info->res.val.d);
623 			break;
624 
625 		case addsd:
626 			info->op = fex_add;
627 			sse_addsd(&info->op1.val.d, &info->op2.val.d,
628 			    &info->res.val.d);
629 			if (my_fp_class(&info->res.val.d) == fp_subnormal)
630 				subnorm = 1;
631 			break;
632 
633 		case subsd:
634 			info->op = fex_sub;
635 			sse_subsd(&info->op1.val.d, &info->op2.val.d,
636 			    &info->res.val.d);
637 			if (my_fp_class(&info->res.val.d) == fp_subnormal)
638 				subnorm = 1;
639 			break;
640 
641 		case mulsd:
642 			info->op = fex_mul;
643 			sse_mulsd(&info->op1.val.d, &info->op2.val.d,
644 			    &info->res.val.d);
645 			if (my_fp_class(&info->res.val.d) == fp_subnormal)
646 				subnorm = 1;
647 			break;
648 
649 		case divsd:
650 			info->op = fex_div;
651 			sse_divsd(&info->op1.val.d, &info->op2.val.d,
652 			    &info->res.val.d);
653 			if (my_fp_class(&info->res.val.d) == fp_subnormal)
654 				subnorm = 1;
655 			break;
656 
657 		case sqrtsd:
658 			info->op = fex_sqrt;
659 			sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
660 			break;
661 
662 		case cvtsd2ss:
663 			info->op = fex_cnvt;
664 			info->res.type = fex_float;
665 			sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
666 			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
667 				subnorm = 1;
668 			break;
669 
670 		case cvtsi2sd:
671 			info->op = fex_cnvt;
672 			sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
673 			break;
674 
675 		case cvttsd2si:
676 			info->op = fex_cnvt;
677 			info->res.type = fex_int;
678 			sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
679 			break;
680 
681 		case cvtsd2si:
682 			info->op = fex_cnvt;
683 			info->res.type = fex_int;
684 			sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
685 			break;
686 
687 		case cvtsi2sdq:
688 			info->op = fex_cnvt;
689 			sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
690 			break;
691 
692 		case cvttsd2siq:
693 			info->op = fex_cnvt;
694 			info->res.type = fex_llong;
695 			sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
696 			break;
697 
698 		case cvtsd2siq:
699 			info->op = fex_cnvt;
700 			info->res.type = fex_llong;
701 			sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
702 			break;
703 
704 		case ucomisd:
705 			info->op = fex_cmp;
706 			info->res.type = fex_nodata;
707 			sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
708 			break;
709 
710 		case comisd:
711 			info->op = fex_cmp;
712 			info->res.type = fex_nodata;
713 			sse_comisd(&info->op1.val.d, &info->op2.val.d);
714 			break;
715 		default:
716 			break;
717 		}
718 	} else {
719 		if (inst->op == cvtsi2ss) {
720 			info->op1.type = fex_int;
721 			info->op1.val.i = inst->op2->i[0];
722 			info->op2.type = fex_nodata;
723 		} else if (inst->op == cvtsi2ssq) {
724 			info->op1.type = fex_llong;
725 			info->op1.val.l = inst->op2->l[0];
726 			info->op2.type = fex_nodata;
727 		} else if (inst->op == sqrtss || inst->op == cvtss2sd ||
728 		    inst->op == cvttss2si || inst->op == cvtss2si ||
729 		    inst->op == cvttss2siq || inst->op == cvtss2siq) {
730 			info->op1.type = fex_float;
731 			info->op1.val.f = inst->op2->f[0];
732 			info->op2.type = fex_nodata;
733 		} else {
734 			info->op1.type = fex_float;
735 			info->op1.val.f = inst->op1->f[0];
736 			info->op2.type = fex_float;
737 			info->op2.val.f = inst->op2->f[0];
738 		}
739 		info->res.type = fex_float;
740 		switch (inst->op) {
741 		case cmpss:
742 			info->op = fex_cmp;
743 			info->res.type = fex_int;
744 			switch (inst->imm & 3) {
745 			case 0:
746 				sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
747 				    &info->res.val.i);
748 				break;
749 
750 			case 1:
751 				sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
752 				    &info->res.val.i);
753 				break;
754 
755 			case 2:
756 				sse_cmpless(&info->op1.val.f, &info->op2.val.f,
757 				    &info->res.val.i);
758 				break;
759 
760 			case 3:
761 				sse_cmpunordss(&info->op1.val.f,
762 				    &info->op2.val.f, &info->res.val.i);
763 			}
764 			if (inst->imm & 4)
765 				info->res.val.i ^= 0xffffffffu;
766 			break;
767 
768 		case minss:
769 			info->op = fex_other;
770 			sse_minss(&info->op1.val.f, &info->op2.val.f,
771 			    &info->res.val.f);
772 			break;
773 
774 		case maxss:
775 			info->op = fex_other;
776 			sse_maxss(&info->op1.val.f, &info->op2.val.f,
777 			    &info->res.val.f);
778 			break;
779 
780 		case addss:
781 			info->op = fex_add;
782 			sse_addss(&info->op1.val.f, &info->op2.val.f,
783 			    &info->res.val.f);
784 			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
785 				subnorm = 1;
786 			break;
787 
788 		case subss:
789 			info->op = fex_sub;
790 			sse_subss(&info->op1.val.f, &info->op2.val.f,
791 			    &info->res.val.f);
792 			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
793 				subnorm = 1;
794 			break;
795 
796 		case mulss:
797 			info->op = fex_mul;
798 			sse_mulss(&info->op1.val.f, &info->op2.val.f,
799 			    &info->res.val.f);
800 			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
801 				subnorm = 1;
802 			break;
803 
804 		case divss:
805 			info->op = fex_div;
806 			sse_divss(&info->op1.val.f, &info->op2.val.f,
807 			    &info->res.val.f);
808 			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
809 				subnorm = 1;
810 			break;
811 
812 		case sqrtss:
813 			info->op = fex_sqrt;
814 			sse_sqrtss(&info->op1.val.f, &info->res.val.f);
815 			break;
816 
817 		case cvtss2sd:
818 			info->op = fex_cnvt;
819 			info->res.type = fex_double;
820 			sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
821 			break;
822 
823 		case cvtsi2ss:
824 			info->op = fex_cnvt;
825 			sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
826 			break;
827 
828 		case cvttss2si:
829 			info->op = fex_cnvt;
830 			info->res.type = fex_int;
831 			sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
832 			break;
833 
834 		case cvtss2si:
835 			info->op = fex_cnvt;
836 			info->res.type = fex_int;
837 			sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
838 			break;
839 
840 		case cvtsi2ssq:
841 			info->op = fex_cnvt;
842 			sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
843 			break;
844 
845 		case cvttss2siq:
846 			info->op = fex_cnvt;
847 			info->res.type = fex_llong;
848 			sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
849 			break;
850 
851 		case cvtss2siq:
852 			info->op = fex_cnvt;
853 			info->res.type = fex_llong;
854 			sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
855 			break;
856 
857 		case ucomiss:
858 			info->op = fex_cmp;
859 			info->res.type = fex_nodata;
860 			sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
861 			break;
862 
863 		case comiss:
864 			info->op = fex_cmp;
865 			info->res.type = fex_nodata;
866 			sse_comiss(&info->op1.val.f, &info->op2.val.f);
867 			break;
868 		default:
869 			break;
870 		}
871 	}
872 	__fenv_getmxcsr(&mxcsr);
873 	info->flags = mxcsr & 0x3d;
874 	__fenv_setmxcsr(&oldmxcsr);
875 
876 	/* determine which exception would have been trapped */
877 	fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
878 	te = ~(fpstate->sv_env.en_mxcsr >> 7) & 0x3d;
879 	e = mxcsr & te;
880 	if (e & FE_INVALID)
881 		return __fex_get_sse_invalid_type(inst);
882 	if (e & FE_DIVBYZERO)
883 		return fex_division;
884 	if (e & FE_OVERFLOW)
885 		return fex_overflow;
886 	if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
887 		return fex_underflow;
888 	if (e & FE_INEXACT)
889 		return fex_inexact;
890 	return (enum fex_exception)-1;
891 }
892 
893 /*
894  * Emulate a SIMD SSE instruction to determine which exceptions occur
895  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
896  * trapped exception that would occur if the i-th part of the SIMD
897  * instruction were executed in isolation; set e[i] to -1 if no
898  * trapped exception would occur in this part.  Also fill in info[i]
899  * with the corresponding operands, default untrapped result, and
900  * flags.
901  *
902  * This routine does not work if the instruction specified by *inst
903  * is not a SIMD instruction.
904  */
905 void
__fex_get_simd_op(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)906 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
907     fex_info_t *info)
908 {
909 	sseinst_t	dummy;
910 	int		i;
911 
912 	e[0] = e[1] = e[2] = e[3] = -1;
913 
914 	/* perform each part of the SIMD operation */
915 	switch (inst->op) {
916 	case cmpps:
917 		dummy.op = cmpss;
918 		dummy.imm = inst->imm;
919 		for (i = 0; i < 4; i++) {
920 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
921 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
922 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
923 		}
924 		break;
925 
926 	case minps:
927 		dummy.op = minss;
928 		for (i = 0; i < 4; i++) {
929 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
930 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
931 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
932 		}
933 		break;
934 
935 	case maxps:
936 		dummy.op = maxss;
937 		for (i = 0; i < 4; i++) {
938 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
939 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
940 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
941 		}
942 		break;
943 
944 	case addps:
945 		dummy.op = addss;
946 		for (i = 0; i < 4; i++) {
947 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
948 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
949 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
950 		}
951 		break;
952 
953 	case subps:
954 		dummy.op = subss;
955 		for (i = 0; i < 4; i++) {
956 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
957 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
958 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
959 		}
960 		break;
961 
962 	case mulps:
963 		dummy.op = mulss;
964 		for (i = 0; i < 4; i++) {
965 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
966 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
967 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
968 		}
969 		break;
970 
971 	case divps:
972 		dummy.op = divss;
973 		for (i = 0; i < 4; i++) {
974 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
975 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
976 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
977 		}
978 		break;
979 
980 	case sqrtps:
981 		dummy.op = sqrtss;
982 		for (i = 0; i < 4; i++) {
983 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
984 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
985 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
986 		}
987 		break;
988 
989 	case cvtdq2ps:
990 		dummy.op = cvtsi2ss;
991 		for (i = 0; i < 4; i++) {
992 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
993 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
994 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
995 		}
996 		break;
997 
998 	case cvttps2dq:
999 		dummy.op = cvttss2si;
1000 		for (i = 0; i < 4; i++) {
1001 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1002 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1003 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1004 		}
1005 		break;
1006 
1007 	case cvtps2dq:
1008 		dummy.op = cvtss2si;
1009 		for (i = 0; i < 4; i++) {
1010 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1011 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1012 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1013 		}
1014 		break;
1015 
1016 	case cvtpi2ps:
1017 		dummy.op = cvtsi2ss;
1018 		for (i = 0; i < 2; i++) {
1019 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022 		}
1023 		break;
1024 
1025 	case cvttps2pi:
1026 		dummy.op = cvttss2si;
1027 		for (i = 0; i < 2; i++) {
1028 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1029 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1030 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1031 		}
1032 		break;
1033 
1034 	case cvtps2pi:
1035 		dummy.op = cvtss2si;
1036 		for (i = 0; i < 2; i++) {
1037 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1038 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1039 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1040 		}
1041 		break;
1042 
1043 	case cmppd:
1044 		dummy.op = cmpsd;
1045 		dummy.imm = inst->imm;
1046 		for (i = 0; i < 2; i++) {
1047 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1048 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1049 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1050 		}
1051 		break;
1052 
1053 	case minpd:
1054 		dummy.op = minsd;
1055 		for (i = 0; i < 2; i++) {
1056 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1057 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1058 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1059 		}
1060 		break;
1061 
1062 	case maxpd:
1063 		dummy.op = maxsd;
1064 		for (i = 0; i < 2; i++) {
1065 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1066 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1067 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1068 		}
1069 		break;
1070 
1071 	case addpd:
1072 		dummy.op = addsd;
1073 		for (i = 0; i < 2; i++) {
1074 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1075 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1076 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077 		}
1078 		break;
1079 
1080 	case subpd:
1081 		dummy.op = subsd;
1082 		for (i = 0; i < 2; i++) {
1083 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1084 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1085 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1086 		}
1087 		break;
1088 
1089 	case mulpd:
1090 		dummy.op = mulsd;
1091 		for (i = 0; i < 2; i++) {
1092 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1093 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1094 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1095 		}
1096 		break;
1097 
1098 	case divpd:
1099 		dummy.op = divsd;
1100 		for (i = 0; i < 2; i++) {
1101 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1102 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1103 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1104 		}
1105 		break;
1106 
1107 	case sqrtpd:
1108 		dummy.op = sqrtsd;
1109 		for (i = 0; i < 2; i++) {
1110 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1111 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1112 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1113 		}
1114 		break;
1115 
1116 	case cvtpi2pd:
1117 	case cvtdq2pd:
1118 		dummy.op = cvtsi2sd;
1119 		for (i = 0; i < 2; i++) {
1120 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1121 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1122 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1123 		}
1124 		break;
1125 
1126 	case cvttpd2pi:
1127 	case cvttpd2dq:
1128 		dummy.op = cvttsd2si;
1129 		for (i = 0; i < 2; i++) {
1130 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1131 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133 		}
1134 		break;
1135 
1136 	case cvtpd2pi:
1137 	case cvtpd2dq:
1138 		dummy.op = cvtsd2si;
1139 		for (i = 0; i < 2; i++) {
1140 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1141 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1142 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1143 		}
1144 		break;
1145 
1146 	case cvtps2pd:
1147 		dummy.op = cvtss2sd;
1148 		for (i = 0; i < 2; i++) {
1149 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1150 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1151 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1152 		}
1153 		break;
1154 
1155 	case cvtpd2ps:
1156 		dummy.op = cvtsd2ss;
1157 		for (i = 0; i < 2; i++) {
1158 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1159 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1160 			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1161 		}
1162 	default:
1163 		break;
1164 	}
1165 }
1166 
1167 /*
1168  * Store the result value from *info in the destination of the scalar
1169  * SSE instruction specified by *inst.  If no result is given but the
1170  * exception is underflow or overflow, supply the default trapped result.
1171  *
1172  * This routine does not work if the instruction specified by *inst
1173  * is not a scalar instruction.
1174  */
1175 void
__fex_st_sse_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception e,fex_info_t * info)1176 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1177     fex_info_t *info)
1178 {
1179 	int		i = 0;
1180 	long long	l = 0L;;
1181 	float		f = 0.0, fscl;
1182 	double		d = 0.0L, dscl;
1183 
1184 	/* for compares that write eflags, just set the flags
1185 	   to indicate "unordered" */
1186 	if (inst->op == ucomiss || inst->op == comiss ||
1187 	    inst->op == ucomisd || inst->op == comisd) {
1188 		uap->uc_mcontext.REG_PS |= 0x45;
1189 		return;
1190 	}
1191 
1192 	/* if info doesn't specify a result value, try to generate
1193 	   the default trapped result */
1194 	if (info->res.type == fex_nodata) {
1195 		/* set scale factors for exponent wrapping */
1196 		switch (e) {
1197 		case fex_overflow:
1198 			fscl = 1.262177448e-29f; /* 2^-96 */
1199 			dscl = 6.441148769597133308e-232; /* 2^-768 */
1200 			break;
1201 
1202 		case fex_underflow:
1203 			fscl = 7.922816251e+28f; /* 2^96 */
1204 			dscl = 1.552518092300708935e+231; /* 2^768 */
1205 			break;
1206 
1207 		default:
1208 			(void) __fex_get_sse_op(uap, inst, info);
1209 			if (info->res.type == fex_nodata)
1210 				return;
1211 			goto stuff;
1212 		}
1213 
1214 		/* generate the wrapped result */
1215 		if (inst->op == cvtsd2ss) {
1216 			info->op1.type = fex_double;
1217 			info->op1.val.d = inst->op2->d[0];
1218 			info->op2.type = fex_nodata;
1219 			info->res.type = fex_float;
1220 			info->res.val.f = (float)(fscl * (fscl *
1221 			    info->op1.val.d));
1222 		} else if ((int)inst->op & DOUBLE) {
1223 			info->op1.type = fex_double;
1224 			info->op1.val.d = inst->op1->d[0];
1225 			info->op2.type = fex_double;
1226 			info->op2.val.d = inst->op2->d[0];
1227 			info->res.type = fex_double;
1228 			switch (inst->op) {
1229 			case addsd:
1230 				info->res.val.d = dscl * (dscl *
1231 				    info->op1.val.d + dscl * info->op2.val.d);
1232 				break;
1233 
1234 			case subsd:
1235 				info->res.val.d = dscl * (dscl *
1236 				    info->op1.val.d - dscl * info->op2.val.d);
1237 				break;
1238 
1239 			case mulsd:
1240 				info->res.val.d = (dscl * info->op1.val.d) *
1241 				    (dscl * info->op2.val.d);
1242 				break;
1243 
1244 			case divsd:
1245 				info->res.val.d = (dscl * info->op1.val.d) /
1246 				    (info->op2.val.d / dscl);
1247 				break;
1248 
1249 			default:
1250 				return;
1251 			}
1252 		} else {
1253 			info->op1.type = fex_float;
1254 			info->op1.val.f = inst->op1->f[0];
1255 			info->op2.type = fex_float;
1256 			info->op2.val.f = inst->op2->f[0];
1257 			info->res.type = fex_float;
1258 			switch (inst->op) {
1259 			case addss:
1260 				info->res.val.f = fscl * (fscl *
1261 				    info->op1.val.f + fscl * info->op2.val.f);
1262 				break;
1263 
1264 			case subss:
1265 				info->res.val.f = fscl * (fscl *
1266 				    info->op1.val.f - fscl * info->op2.val.f);
1267 				break;
1268 
1269 			case mulss:
1270 				info->res.val.f = (fscl * info->op1.val.f) *
1271 				    (fscl * info->op2.val.f);
1272 				break;
1273 
1274 			case divss:
1275 				info->res.val.f = (fscl * info->op1.val.f) /
1276 				    (info->op2.val.f / fscl);
1277 				break;
1278 
1279 			default:
1280 				return;
1281 			}
1282 		}
1283 	}
1284 
1285 	/* put the result in the destination */
1286 stuff:
1287 	if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1288 	    || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1289 		switch (info->res.type) {
1290 		case fex_int:
1291 			i = info->res.val.i;
1292 			break;
1293 
1294 		case fex_llong:
1295 			i = info->res.val.l;
1296 			break;
1297 
1298 		case fex_float:
1299 			i = info->res.val.f;
1300 			break;
1301 
1302 		case fex_double:
1303 			i = info->res.val.d;
1304 			break;
1305 
1306 		case fex_ldouble:
1307 			i = info->res.val.q;
1308 			break;
1309 
1310 		default:
1311 			break;
1312 		}
1313 		inst->op1->i[0] = i;
1314 	} else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1315 	    inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1316 	    inst->op == cvtsd2siq) {
1317 		switch (info->res.type) {
1318 		case fex_int:
1319 			l = info->res.val.i;
1320 			break;
1321 
1322 		case fex_llong:
1323 			l = info->res.val.l;
1324 			break;
1325 
1326 		case fex_float:
1327 			l = info->res.val.f;
1328 			break;
1329 
1330 		case fex_double:
1331 			l = info->res.val.d;
1332 			break;
1333 
1334 		case fex_ldouble:
1335 			l = info->res.val.q;
1336 			break;
1337 
1338 		default:
1339 			break;
1340 		}
1341 		inst->op1->l[0] = l;
1342 	} else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1343 	    inst->op == cvtss2sd) {
1344 		switch (info->res.type) {
1345 		case fex_int:
1346 			d = info->res.val.i;
1347 			break;
1348 
1349 		case fex_llong:
1350 			d = info->res.val.l;
1351 			break;
1352 
1353 		case fex_float:
1354 			d = info->res.val.f;
1355 			break;
1356 
1357 		case fex_double:
1358 			d = info->res.val.d;
1359 			break;
1360 
1361 		case fex_ldouble:
1362 			d = info->res.val.q;
1363 			break;
1364 
1365 		default:
1366 			break;
1367 		}
1368 		inst->op1->d[0] = d;
1369 	} else {
1370 		switch (info->res.type) {
1371 		case fex_int:
1372 			f = info->res.val.i;
1373 			break;
1374 
1375 		case fex_llong:
1376 			f = info->res.val.l;
1377 			break;
1378 
1379 		case fex_float:
1380 			f = info->res.val.f;
1381 			break;
1382 
1383 		case fex_double:
1384 			f = info->res.val.d;
1385 			break;
1386 
1387 		case fex_ldouble:
1388 			f = info->res.val.q;
1389 			break;
1390 
1391 		default:
1392 			break;
1393 		}
1394 		inst->op1->f[0] = f;
1395 	}
1396 }
1397 
1398 /*
1399  * Store the results from a SIMD instruction.  For each i, store
1400  * the result value from info[i] in the i-th part of the destination
1401  * of the SIMD SSE instruction specified by *inst.  If no result
1402  * is given but the exception indicated by e[i] is underflow or
1403  * overflow, supply the default trapped result.
1404  *
1405  * This routine does not work if the instruction specified by *inst
1406  * is not a SIMD instruction.
1407  */
1408 void
__fex_st_simd_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)1409 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1410     fex_info_t *info)
1411 {
1412 	sseinst_t	dummy;
1413 	int		i;
1414 
1415 	/* store each part */
1416 	switch (inst->op) {
1417 	case cmpps:
1418 		dummy.op = cmpss;
1419 		dummy.imm = inst->imm;
1420 		for (i = 0; i < 4; i++) {
1421 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1422 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1423 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1424 		}
1425 		break;
1426 
1427 	case minps:
1428 		dummy.op = minss;
1429 		for (i = 0; i < 4; i++) {
1430 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1431 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1432 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1433 		}
1434 		break;
1435 
1436 	case maxps:
1437 		dummy.op = maxss;
1438 		for (i = 0; i < 4; i++) {
1439 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1440 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1441 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1442 		}
1443 		break;
1444 
1445 	case addps:
1446 		dummy.op = addss;
1447 		for (i = 0; i < 4; i++) {
1448 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1449 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1450 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1451 		}
1452 		break;
1453 
1454 	case subps:
1455 		dummy.op = subss;
1456 		for (i = 0; i < 4; i++) {
1457 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1458 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1459 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1460 		}
1461 		break;
1462 
1463 	case mulps:
1464 		dummy.op = mulss;
1465 		for (i = 0; i < 4; i++) {
1466 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1467 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1468 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1469 		}
1470 		break;
1471 
1472 	case divps:
1473 		dummy.op = divss;
1474 		for (i = 0; i < 4; i++) {
1475 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1476 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1477 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1478 		}
1479 		break;
1480 
1481 	case sqrtps:
1482 		dummy.op = sqrtss;
1483 		for (i = 0; i < 4; i++) {
1484 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1485 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1486 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1487 		}
1488 		break;
1489 
1490 	case cvtdq2ps:
1491 		dummy.op = cvtsi2ss;
1492 		for (i = 0; i < 4; i++) {
1493 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1494 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1495 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1496 		}
1497 		break;
1498 
1499 	case cvttps2dq:
1500 		dummy.op = cvttss2si;
1501 		for (i = 0; i < 4; i++) {
1502 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1503 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1504 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1505 		}
1506 		break;
1507 
1508 	case cvtps2dq:
1509 		dummy.op = cvtss2si;
1510 		for (i = 0; i < 4; i++) {
1511 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1512 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1513 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1514 		}
1515 		break;
1516 
1517 	case cvtpi2ps:
1518 		dummy.op = cvtsi2ss;
1519 		for (i = 0; i < 2; i++) {
1520 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1521 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1522 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1523 		}
1524 		break;
1525 
1526 	case cvttps2pi:
1527 		dummy.op = cvttss2si;
1528 		for (i = 0; i < 2; i++) {
1529 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1530 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1531 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1532 		}
1533 		break;
1534 
1535 	case cvtps2pi:
1536 		dummy.op = cvtss2si;
1537 		for (i = 0; i < 2; i++) {
1538 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1539 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1540 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1541 		}
1542 		break;
1543 
1544 	case cmppd:
1545 		dummy.op = cmpsd;
1546 		dummy.imm = inst->imm;
1547 		for (i = 0; i < 2; i++) {
1548 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1549 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1550 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1551 		}
1552 		break;
1553 
1554 	case minpd:
1555 		dummy.op = minsd;
1556 		for (i = 0; i < 2; i++) {
1557 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1558 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1559 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1560 		}
1561 		break;
1562 
1563 	case maxpd:
1564 		dummy.op = maxsd;
1565 		for (i = 0; i < 2; i++) {
1566 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1568 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569 		}
1570 		break;
1571 
1572 	case addpd:
1573 		dummy.op = addsd;
1574 		for (i = 0; i < 2; i++) {
1575 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1576 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 		}
1579 		break;
1580 
1581 	case subpd:
1582 		dummy.op = subsd;
1583 		for (i = 0; i < 2; i++) {
1584 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1585 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1586 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1587 		}
1588 		break;
1589 
1590 	case mulpd:
1591 		dummy.op = mulsd;
1592 		for (i = 0; i < 2; i++) {
1593 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1594 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1595 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1596 		}
1597 		break;
1598 
1599 	case divpd:
1600 		dummy.op = divsd;
1601 		for (i = 0; i < 2; i++) {
1602 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1603 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1604 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1605 		}
1606 		break;
1607 
1608 	case sqrtpd:
1609 		dummy.op = sqrtsd;
1610 		for (i = 0; i < 2; i++) {
1611 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1612 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1613 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1614 		}
1615 		break;
1616 
1617 	case cvtpi2pd:
1618 	case cvtdq2pd:
1619 		dummy.op = cvtsi2sd;
1620 		for (i = 0; i < 2; i++) {
1621 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1622 			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1623 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1624 		}
1625 		break;
1626 
1627 	case cvttpd2pi:
1628 	case cvttpd2dq:
1629 		dummy.op = cvttsd2si;
1630 		for (i = 0; i < 2; i++) {
1631 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1632 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1633 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1634 		}
1635 		/* for cvttpd2dq, zero the high 64 bits of the destination */
1636 		if (inst->op == cvttpd2dq)
1637 			inst->op1->l[1] = 0ll;
1638 		break;
1639 
1640 	case cvtpd2pi:
1641 	case cvtpd2dq:
1642 		dummy.op = cvtsd2si;
1643 		for (i = 0; i < 2; i++) {
1644 			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1645 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1646 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1647 		}
1648 		/* for cvtpd2dq, zero the high 64 bits of the destination */
1649 		if (inst->op == cvtpd2dq)
1650 			inst->op1->l[1] = 0ll;
1651 		break;
1652 
1653 	case cvtps2pd:
1654 		dummy.op = cvtss2sd;
1655 		for (i = 0; i < 2; i++) {
1656 			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1657 			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1658 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1659 		}
1660 		break;
1661 
1662 	case cvtpd2ps:
1663 		dummy.op = cvtsd2ss;
1664 		for (i = 0; i < 2; i++) {
1665 			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1666 			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1667 			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1668 		}
1669 		/* zero the high 64 bits of the destination */
1670 		inst->op1->l[1] = 0ll;
1671 
1672 	default:
1673 		break;
1674 	}
1675 }
1676