1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <ucontext.h>
31 #ifdef __DragonFly__
32 #include <machine/npx.h>
33 #endif
34 #ifdef __FreeBSD__
35 #include <stdint.h>
36 #include <machine/fpu.h>
37 #endif
38 #include <fenv.h>
39 #if defined(__SUNPRO_C)
40 #include <sunmath.h>
41 #else
42 #include <sys/ieeefp.h>
43 #endif
44 #include "fex_handler.h"
45 #include "fenv_inlines.h"
46
47 /* Hardcoded for amd64 */
48
49 #if defined __DragonFly__ || defined __FreeBSD__
50 #define REG_PC mc_rip
51 #define REG_PS mc_rflags
52 # ifdef __FreeBSD__
53 #define FPU_STATE mc_fpstate
54 #define FPU_STRUCTURE savefpu
55 # endif
56 # ifdef __DragonFly__
57 #define FPU_STATE mc_fpregs
58 #define FPU_STRUCTURE savexmm64
59 # endif
60 #else
61 #error SSE instructions not supported on this platform
62 #endif
63
64 /*
65 * Support for SSE instructions
66 */
67
68 /*
69 * Decode an SSE instruction. Fill in *inst and return the length of the
70 * instruction in bytes. Return 0 if the instruction is not recognized.
71 */
72 int
__fex_parse_sse(ucontext_t * uap,sseinst_t * inst)73 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
74 {
75 unsigned char *ip;
76 char *addr;
77 int i, dbl, simd, rex, modrm, sib, r;
78 struct FPU_STRUCTURE *fpstate;
79
80 i = 0;
81 ip = (unsigned char *)uap->uc_mcontext.REG_PC;
82
83 /* look for pseudo-prefixes */
84 dbl = 0;
85 simd = SIMD;
86 if (ip[i] == 0xF3) {
87 simd = 0;
88 i++;
89 } else if (ip[i] == 0x66) {
90 dbl = DOUBLE;
91 i++;
92 } else if (ip[i] == 0xF2) {
93 dbl = DOUBLE;
94 simd = 0;
95 i++;
96 }
97
98 /* look for AMD64 REX prefix */
99 rex = 0;
100 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
101 rex = ip[i];
102 i++;
103 }
104
105 /* parse opcode */
106 if (ip[i++] != 0x0F)
107 return 0;
108 switch (ip[i++]) {
109 case 0x2A:
110 inst->op = (int)cvtsi2ss + simd + dbl;
111 if (!simd)
112 inst->op = (int)inst->op + (rex & 8);
113 break;
114
115 case 0x2C:
116 inst->op = (int)cvttss2si + simd + dbl;
117 if (!simd)
118 inst->op = (int)inst->op + (rex & 8);
119 break;
120
121 case 0x2D:
122 inst->op = (int)cvtss2si + simd + dbl;
123 if (!simd)
124 inst->op = (int)inst->op + (rex & 8);
125 break;
126
127 case 0x2E:
128 /* oddball: scalar instruction in a SIMD opcode group */
129 if (!simd)
130 return 0;
131 inst->op = (int)ucomiss + dbl;
132 break;
133
134 case 0x2F:
135 /* oddball: scalar instruction in a SIMD opcode group */
136 if (!simd)
137 return 0;
138 inst->op = (int)comiss + dbl;
139 break;
140
141 case 0x51:
142 inst->op = (int)sqrtss + simd + dbl;
143 break;
144
145 case 0x58:
146 inst->op = (int)addss + simd + dbl;
147 break;
148
149 case 0x59:
150 inst->op = (int)mulss + simd + dbl;
151 break;
152
153 case 0x5A:
154 inst->op = (int)cvtss2sd + simd + dbl;
155 break;
156
157 case 0x5B:
158 if (dbl) {
159 if (simd)
160 inst->op = cvtps2dq;
161 else
162 return 0;
163 } else {
164 inst->op = (simd)? cvtdq2ps : cvttps2dq;
165 }
166 break;
167
168 case 0x5C:
169 inst->op = (int)subss + simd + dbl;
170 break;
171
172 case 0x5D:
173 inst->op = (int)minss + simd + dbl;
174 break;
175
176 case 0x5E:
177 inst->op = (int)divss + simd + dbl;
178 break;
179
180 case 0x5F:
181 inst->op = (int)maxss + simd + dbl;
182 break;
183
184 case 0xC2:
185 inst->op = (int)cmpss + simd + dbl;
186 break;
187
188 case 0xE6:
189 if (simd) {
190 if (dbl)
191 inst->op = cvttpd2dq;
192 else
193 return 0;
194 } else {
195 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
196 }
197 break;
198
199 default:
200 return 0;
201 }
202
203 /* locate operands */
204 modrm = ip[i++];
205
206 if (inst->op == cvtss2si || inst->op == cvttss2si ||
207 inst->op == cvtsd2si || inst->op == cvttsd2si ||
208 inst->op == cvtss2siq || inst->op == cvttss2siq ||
209 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
210 /* op1 is a gp register */
211 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
212 switch (r) {
213 case 0: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rax; break;
214 case 1: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rcx; break;
215 case 2: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rdx; break;
216 case 3: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rbx; break;
217 case 4: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rsp; break;
218 case 5: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rbp; break;
219 case 6: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rsi; break;
220 case 7: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_rdi; break;
221 case 8: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r8; break;
222 case 9: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r9; break;
223 case 10: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r10; break;
224 case 11: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r11; break;
225 case 12: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r12; break;
226 case 13: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r13; break;
227 case 14: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r14; break;
228 default: inst->op1 = (sseoperand_t *)&uap->uc_mcontext.mc_r15; break;
229 }
230 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
231 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
232 /* op1 is a mmx register */
233 fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
234 inst->op1 = (sseoperand_t *)&fpstate->sv_fp[(modrm >> 3) & 7];
235 } else {
236 /* op1 is a xmm register */
237 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
238 fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
239 inst->op1 = (sseoperand_t *)&fpstate->sv_xmm[r];
240 }
241
242 if ((modrm >> 6) == 3) {
243 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
244 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
245 /* op2 is a gp register */
246 r = ((rex & 1) << 3) | (modrm & 7);
247 switch (r) {
248 case 0: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rax; break;
249 case 1: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rcx; break;
250 case 2: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rdx; break;
251 case 3: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rbx; break;
252 case 4: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rsp; break;
253 case 5: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rbp; break;
254 case 6: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rsi; break;
255 case 7: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_rdi; break;
256 case 8: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r8; break;
257 case 9: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r9; break;
258 case 10: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r10; break;
259 case 11: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r11; break;
260 case 12: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r12; break;
261 case 13: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r13; break;
262 case 14: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r14; break;
263 default: inst->op2 = (sseoperand_t *)&uap->uc_mcontext.mc_r15; break;
264 }
265 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
266 /* op2 is a mmx register */
267 fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
268 inst->op2 = (sseoperand_t *)&fpstate->sv_fp[modrm & 7];
269 } else {
270 /* op2 is a xmm register */
271 r = ((rex & 1) << 3) | (modrm & 7);
272 fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
273 inst->op2 = (sseoperand_t *)&fpstate->sv_xmm[r];
274 }
275 } else if ((modrm & 0xc7) == 0x05) {
276 /* address of next instruction + offset */
277 r = i + 4;
278 if (inst->op == cmpss || inst->op == cmpps ||
279 inst->op == cmpsd || inst->op == cmppd)
280 r++;
281 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
282 i += 4;
283 } else {
284 /* complex address */
285 if ((modrm & 7) == 4) {
286 /* parse sib byte */
287 sib = ip[i++];
288 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
289 /* start with absolute address */
290 addr = (char *)(uintptr_t)(*(int *)(ip + i));
291 i += 4;
292 } else {
293 /* start with base */
294 r = ((rex & 1) << 3) | (sib & 7);
295 switch (r) {
296 case 0: addr = (char *)&uap->uc_mcontext.mc_rax; break;
297 case 1: addr = (char *)&uap->uc_mcontext.mc_rcx; break;
298 case 2: addr = (char *)&uap->uc_mcontext.mc_rdx; break;
299 case 3: addr = (char *)&uap->uc_mcontext.mc_rbx; break;
300 case 4: addr = (char *)&uap->uc_mcontext.mc_rsp; break;
301 case 5: addr = (char *)&uap->uc_mcontext.mc_rbp; break;
302 case 6: addr = (char *)&uap->uc_mcontext.mc_rsi; break;
303 case 7: addr = (char *)&uap->uc_mcontext.mc_rdi; break;
304 case 8: addr = (char *)&uap->uc_mcontext.mc_r8; break;
305 case 9: addr = (char *)&uap->uc_mcontext.mc_r9; break;
306 case 10: addr = (char *)&uap->uc_mcontext.mc_r10; break;
307 case 11: addr = (char *)&uap->uc_mcontext.mc_r11; break;
308 case 12: addr = (char *)&uap->uc_mcontext.mc_r12; break;
309 case 13: addr = (char *)&uap->uc_mcontext.mc_r13; break;
310 case 14: addr = (char *)&uap->uc_mcontext.mc_r14; break;
311 default: addr = (char *)&uap->uc_mcontext.mc_r15; break;
312 }
313 }
314 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
315 if (r != 4) {
316 /* add scaled index */
317 switch (r) {
318 case 0: addr += uap->uc_mcontext.mc_rax << (sib >> 6); break;
319 case 1: addr += uap->uc_mcontext.mc_rcx << (sib >> 6); break;
320 case 2: addr += uap->uc_mcontext.mc_rdx << (sib >> 6); break;
321 case 3: addr += uap->uc_mcontext.mc_rbx << (sib >> 6); break;
322 case 4: addr += uap->uc_mcontext.mc_rsp << (sib >> 6); break;
323 case 5: addr += uap->uc_mcontext.mc_rbp << (sib >> 6); break;
324 case 6: addr += uap->uc_mcontext.mc_rsi << (sib >> 6); break;
325 case 7: addr += uap->uc_mcontext.mc_rdi << (sib >> 6); break;
326 case 8: addr += uap->uc_mcontext.mc_r8 << (sib >> 6); break;
327 case 9: addr += uap->uc_mcontext.mc_r9 << (sib >> 6); break;
328 case 10: addr += uap->uc_mcontext.mc_r10 << (sib >> 6); break;
329 case 11: addr += uap->uc_mcontext.mc_r11 << (sib >> 6); break;
330 case 12: addr += uap->uc_mcontext.mc_r12 << (sib >> 6); break;
331 case 13: addr += uap->uc_mcontext.mc_r13 << (sib >> 6); break;
332 case 14: addr += uap->uc_mcontext.mc_r14 << (sib >> 6); break;
333 default: addr += uap->uc_mcontext.mc_r15 << (sib >> 6); break;
334 }
335 }
336 } else {
337 r = ((rex & 1) << 3) | (modrm & 7);
338 switch (r) {
339 case 0: addr = (char *)uap->uc_mcontext.mc_rax; break;
340 case 1: addr = (char *)uap->uc_mcontext.mc_rcx; break;
341 case 2: addr = (char *)uap->uc_mcontext.mc_rdx; break;
342 case 3: addr = (char *)uap->uc_mcontext.mc_rbx; break;
343 case 4: addr = (char *)uap->uc_mcontext.mc_rsp; break;
344 case 5: addr = (char *)uap->uc_mcontext.mc_rbp; break;
345 case 6: addr = (char *)uap->uc_mcontext.mc_rsi; break;
346 case 7: addr = (char *)uap->uc_mcontext.mc_rdi; break;
347 case 8: addr = (char *)uap->uc_mcontext.mc_r8; break;
348 case 9: addr = (char *)uap->uc_mcontext.mc_r9; break;
349 case 10: addr = (char *)uap->uc_mcontext.mc_r10; break;
350 case 11: addr = (char *)uap->uc_mcontext.mc_r11; break;
351 case 12: addr = (char *)uap->uc_mcontext.mc_r12; break;
352 case 13: addr = (char *)uap->uc_mcontext.mc_r13; break;
353 case 14: addr = (char *)uap->uc_mcontext.mc_r14; break;
354 default: addr = (char *)uap->uc_mcontext.mc_r15; break;
355 }
356 }
357
358 /* add displacement, if any */
359 if ((modrm >> 6) == 1) {
360 addr += (char)ip[i++];
361 } else if ((modrm >> 6) == 2) {
362 addr += *(int *)(ip + i);
363 i += 4;
364 }
365 inst->op2 = (sseoperand_t *)addr;
366 }
367
368 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
369 inst->op == cmppd) {
370 /* get the immediate operand */
371 inst->imm = ip[i++];
372 }
373
374 return i;
375 }
376
377 static enum fp_class_type
my_fp_classf(float * x)378 my_fp_classf(float *x)
379 {
380 int i = *(int *)x & ~0x80000000;
381
382 if (i < 0x7f800000) {
383 if (i < 0x00800000)
384 return ((i == 0)? fp_zero : fp_subnormal);
385 return fp_normal;
386 }
387 else if (i == 0x7f800000)
388 return fp_infinity;
389 else if (i & 0x400000)
390 return fp_quiet;
391 else
392 return fp_signaling;
393 }
394
395 static enum fp_class_type
my_fp_class(double * x)396 my_fp_class(double *x)
397 {
398 int i = *(1+(int *)x) & ~0x80000000;
399
400 if (i < 0x7ff00000) {
401 if (i < 0x00100000)
402 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
403 return fp_normal;
404 }
405 else if (i == 0x7ff00000 && *(int *)x == 0)
406 return fp_infinity;
407 else if (i & 0x80000)
408 return fp_quiet;
409 else
410 return fp_signaling;
411 }
412
413 /*
414 * Inspect a scalar SSE instruction that incurred an invalid operation
415 * exception to determine which type of exception it was.
416 */
417 static enum fex_exception
__fex_get_sse_invalid_type(sseinst_t * inst)418 __fex_get_sse_invalid_type(sseinst_t *inst)
419 {
420 enum fp_class_type t1, t2;
421
422 /* check op2 for signaling nan */
423 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
424 my_fp_classf(&inst->op2->f[0]);
425 if (t2 == fp_signaling)
426 return fex_inv_snan;
427
428 /* eliminate all single-operand instructions */
429 switch (inst->op) {
430 case cvtsd2ss:
431 case cvtss2sd:
432 /* hmm, this shouldn't have happened */
433 return (enum fex_exception) -1;
434
435 case sqrtss:
436 case sqrtsd:
437 return fex_inv_sqrt;
438
439 case cvtss2si:
440 case cvtsd2si:
441 case cvttss2si:
442 case cvttsd2si:
443 case cvtss2siq:
444 case cvtsd2siq:
445 case cvttss2siq:
446 case cvttsd2siq:
447 return fex_inv_int;
448 default:
449 break;
450 }
451
452 /* check op1 for signaling nan */
453 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
454 my_fp_classf(&inst->op1->f[0]);
455 if (t1 == fp_signaling)
456 return fex_inv_snan;
457
458 /* check two-operand instructions for other cases */
459 switch (inst->op) {
460 case cmpss:
461 case cmpsd:
462 case minss:
463 case minsd:
464 case maxss:
465 case maxsd:
466 case comiss:
467 case comisd:
468 return fex_inv_cmp;
469
470 case addss:
471 case addsd:
472 case subss:
473 case subsd:
474 if (t1 == fp_infinity && t2 == fp_infinity)
475 return fex_inv_isi;
476 break;
477
478 case mulss:
479 case mulsd:
480 if ((t1 == fp_zero && t2 == fp_infinity) ||
481 (t2 == fp_zero && t1 == fp_infinity))
482 return fex_inv_zmi;
483 break;
484
485 case divss:
486 case divsd:
487 if (t1 == fp_zero && t2 == fp_zero)
488 return fex_inv_zdz;
489 if (t1 == fp_infinity && t2 == fp_infinity)
490 return fex_inv_idi;
491 default:
492 break;
493 }
494
495 return (enum fex_exception)-1;
496 }
497
498 /* inline templates */
499 extern void sse_cmpeqss(float *, float *, int *);
500 extern void sse_cmpltss(float *, float *, int *);
501 extern void sse_cmpless(float *, float *, int *);
502 extern void sse_cmpunordss(float *, float *, int *);
503 extern void sse_minss(float *, float *, float *);
504 extern void sse_maxss(float *, float *, float *);
505 extern void sse_addss(float *, float *, float *);
506 extern void sse_subss(float *, float *, float *);
507 extern void sse_mulss(float *, float *, float *);
508 extern void sse_divss(float *, float *, float *);
509 extern void sse_sqrtss(float *, float *);
510 extern void sse_ucomiss(float *, float *);
511 extern void sse_comiss(float *, float *);
512 extern void sse_cvtss2sd(float *, double *);
513 extern void sse_cvtsi2ss(int *, float *);
514 extern void sse_cvttss2si(float *, int *);
515 extern void sse_cvtss2si(float *, int *);
516 extern void sse_cvtsi2ssq(long long *, float *);
517 extern void sse_cvttss2siq(float *, long long *);
518 extern void sse_cvtss2siq(float *, long long *);
519 extern void sse_cmpeqsd(double *, double *, long long *);
520 extern void sse_cmpltsd(double *, double *, long long *);
521 extern void sse_cmplesd(double *, double *, long long *);
522 extern void sse_cmpunordsd(double *, double *, long long *);
523 extern void sse_minsd(double *, double *, double *);
524 extern void sse_maxsd(double *, double *, double *);
525 extern void sse_addsd(double *, double *, double *);
526 extern void sse_subsd(double *, double *, double *);
527 extern void sse_mulsd(double *, double *, double *);
528 extern void sse_divsd(double *, double *, double *);
529 extern void sse_sqrtsd(double *, double *);
530 extern void sse_ucomisd(double *, double *);
531 extern void sse_comisd(double *, double *);
532 extern void sse_cvtsd2ss(double *, float *);
533 extern void sse_cvtsi2sd(int *, double *);
534 extern void sse_cvttsd2si(double *, int *);
535 extern void sse_cvtsd2si(double *, int *);
536 extern void sse_cvtsi2sdq(long long *, double *);
537 extern void sse_cvttsd2siq(double *, long long *);
538 extern void sse_cvtsd2siq(double *, long long *);
539
540 /*
541 * Fill in *info with the operands, default untrapped result, and
542 * flags produced by a scalar SSE instruction, and return the type
543 * of trapped exception (if any). On entry, the mxcsr must have
544 * all exceptions masked and all flags clear. The same conditions
545 * will hold on exit.
546 *
547 * This routine does not work if the instruction specified by *inst
548 * is not a scalar instruction.
549 */
550 enum fex_exception
__fex_get_sse_op(ucontext_t * uap,sseinst_t * inst,fex_info_t * info)551 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
552 {
553 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
554 struct FPU_STRUCTURE *fpstate;
555
556 /*
557 * Perform the operation with traps disabled and check the
558 * exception flags. If the underflow trap was enabled, also
559 * check for an exact subnormal result.
560 */
561 __fenv_getmxcsr(&oldmxcsr);
562 subnorm = 0;
563 if ((int)inst->op & DOUBLE) {
564 if (inst->op == cvtsi2sd) {
565 info->op1.type = fex_int;
566 info->op1.val.i = inst->op2->i[0];
567 info->op2.type = fex_nodata;
568 } else if (inst->op == cvtsi2sdq) {
569 info->op1.type = fex_llong;
570 info->op1.val.l = inst->op2->l[0];
571 info->op2.type = fex_nodata;
572 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
573 inst->op == cvttsd2si || inst->op == cvtsd2si ||
574 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
575 info->op1.type = fex_double;
576 info->op1.val.d = inst->op2->d[0];
577 info->op2.type = fex_nodata;
578 } else {
579 info->op1.type = fex_double;
580 info->op1.val.d = inst->op1->d[0];
581 info->op2.type = fex_double;
582 info->op2.val.d = inst->op2->d[0];
583 }
584 info->res.type = fex_double;
585 switch (inst->op) {
586 case cmpsd:
587 info->op = fex_cmp;
588 info->res.type = fex_llong;
589 switch (inst->imm & 3) {
590 case 0:
591 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
592 &info->res.val.l);
593 break;
594
595 case 1:
596 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
597 &info->res.val.l);
598 break;
599
600 case 2:
601 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
602 &info->res.val.l);
603 break;
604
605 case 3:
606 sse_cmpunordsd(&info->op1.val.d,
607 &info->op2.val.d, &info->res.val.l);
608 }
609 if (inst->imm & 4)
610 info->res.val.l ^= 0xffffffffffffffffull;
611 break;
612
613 case minsd:
614 info->op = fex_other;
615 sse_minsd(&info->op1.val.d, &info->op2.val.d,
616 &info->res.val.d);
617 break;
618
619 case maxsd:
620 info->op = fex_other;
621 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
622 &info->res.val.d);
623 break;
624
625 case addsd:
626 info->op = fex_add;
627 sse_addsd(&info->op1.val.d, &info->op2.val.d,
628 &info->res.val.d);
629 if (my_fp_class(&info->res.val.d) == fp_subnormal)
630 subnorm = 1;
631 break;
632
633 case subsd:
634 info->op = fex_sub;
635 sse_subsd(&info->op1.val.d, &info->op2.val.d,
636 &info->res.val.d);
637 if (my_fp_class(&info->res.val.d) == fp_subnormal)
638 subnorm = 1;
639 break;
640
641 case mulsd:
642 info->op = fex_mul;
643 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
644 &info->res.val.d);
645 if (my_fp_class(&info->res.val.d) == fp_subnormal)
646 subnorm = 1;
647 break;
648
649 case divsd:
650 info->op = fex_div;
651 sse_divsd(&info->op1.val.d, &info->op2.val.d,
652 &info->res.val.d);
653 if (my_fp_class(&info->res.val.d) == fp_subnormal)
654 subnorm = 1;
655 break;
656
657 case sqrtsd:
658 info->op = fex_sqrt;
659 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
660 break;
661
662 case cvtsd2ss:
663 info->op = fex_cnvt;
664 info->res.type = fex_float;
665 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
666 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
667 subnorm = 1;
668 break;
669
670 case cvtsi2sd:
671 info->op = fex_cnvt;
672 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
673 break;
674
675 case cvttsd2si:
676 info->op = fex_cnvt;
677 info->res.type = fex_int;
678 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
679 break;
680
681 case cvtsd2si:
682 info->op = fex_cnvt;
683 info->res.type = fex_int;
684 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
685 break;
686
687 case cvtsi2sdq:
688 info->op = fex_cnvt;
689 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
690 break;
691
692 case cvttsd2siq:
693 info->op = fex_cnvt;
694 info->res.type = fex_llong;
695 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
696 break;
697
698 case cvtsd2siq:
699 info->op = fex_cnvt;
700 info->res.type = fex_llong;
701 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
702 break;
703
704 case ucomisd:
705 info->op = fex_cmp;
706 info->res.type = fex_nodata;
707 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
708 break;
709
710 case comisd:
711 info->op = fex_cmp;
712 info->res.type = fex_nodata;
713 sse_comisd(&info->op1.val.d, &info->op2.val.d);
714 break;
715 default:
716 break;
717 }
718 } else {
719 if (inst->op == cvtsi2ss) {
720 info->op1.type = fex_int;
721 info->op1.val.i = inst->op2->i[0];
722 info->op2.type = fex_nodata;
723 } else if (inst->op == cvtsi2ssq) {
724 info->op1.type = fex_llong;
725 info->op1.val.l = inst->op2->l[0];
726 info->op2.type = fex_nodata;
727 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
728 inst->op == cvttss2si || inst->op == cvtss2si ||
729 inst->op == cvttss2siq || inst->op == cvtss2siq) {
730 info->op1.type = fex_float;
731 info->op1.val.f = inst->op2->f[0];
732 info->op2.type = fex_nodata;
733 } else {
734 info->op1.type = fex_float;
735 info->op1.val.f = inst->op1->f[0];
736 info->op2.type = fex_float;
737 info->op2.val.f = inst->op2->f[0];
738 }
739 info->res.type = fex_float;
740 switch (inst->op) {
741 case cmpss:
742 info->op = fex_cmp;
743 info->res.type = fex_int;
744 switch (inst->imm & 3) {
745 case 0:
746 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
747 &info->res.val.i);
748 break;
749
750 case 1:
751 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
752 &info->res.val.i);
753 break;
754
755 case 2:
756 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
757 &info->res.val.i);
758 break;
759
760 case 3:
761 sse_cmpunordss(&info->op1.val.f,
762 &info->op2.val.f, &info->res.val.i);
763 }
764 if (inst->imm & 4)
765 info->res.val.i ^= 0xffffffffu;
766 break;
767
768 case minss:
769 info->op = fex_other;
770 sse_minss(&info->op1.val.f, &info->op2.val.f,
771 &info->res.val.f);
772 break;
773
774 case maxss:
775 info->op = fex_other;
776 sse_maxss(&info->op1.val.f, &info->op2.val.f,
777 &info->res.val.f);
778 break;
779
780 case addss:
781 info->op = fex_add;
782 sse_addss(&info->op1.val.f, &info->op2.val.f,
783 &info->res.val.f);
784 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
785 subnorm = 1;
786 break;
787
788 case subss:
789 info->op = fex_sub;
790 sse_subss(&info->op1.val.f, &info->op2.val.f,
791 &info->res.val.f);
792 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
793 subnorm = 1;
794 break;
795
796 case mulss:
797 info->op = fex_mul;
798 sse_mulss(&info->op1.val.f, &info->op2.val.f,
799 &info->res.val.f);
800 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
801 subnorm = 1;
802 break;
803
804 case divss:
805 info->op = fex_div;
806 sse_divss(&info->op1.val.f, &info->op2.val.f,
807 &info->res.val.f);
808 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
809 subnorm = 1;
810 break;
811
812 case sqrtss:
813 info->op = fex_sqrt;
814 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
815 break;
816
817 case cvtss2sd:
818 info->op = fex_cnvt;
819 info->res.type = fex_double;
820 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
821 break;
822
823 case cvtsi2ss:
824 info->op = fex_cnvt;
825 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
826 break;
827
828 case cvttss2si:
829 info->op = fex_cnvt;
830 info->res.type = fex_int;
831 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
832 break;
833
834 case cvtss2si:
835 info->op = fex_cnvt;
836 info->res.type = fex_int;
837 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
838 break;
839
840 case cvtsi2ssq:
841 info->op = fex_cnvt;
842 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
843 break;
844
845 case cvttss2siq:
846 info->op = fex_cnvt;
847 info->res.type = fex_llong;
848 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
849 break;
850
851 case cvtss2siq:
852 info->op = fex_cnvt;
853 info->res.type = fex_llong;
854 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
855 break;
856
857 case ucomiss:
858 info->op = fex_cmp;
859 info->res.type = fex_nodata;
860 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
861 break;
862
863 case comiss:
864 info->op = fex_cmp;
865 info->res.type = fex_nodata;
866 sse_comiss(&info->op1.val.f, &info->op2.val.f);
867 break;
868 default:
869 break;
870 }
871 }
872 __fenv_getmxcsr(&mxcsr);
873 info->flags = mxcsr & 0x3d;
874 __fenv_setmxcsr(&oldmxcsr);
875
876 /* determine which exception would have been trapped */
877 fpstate = (struct FPU_STRUCTURE *)&uap->uc_mcontext.FPU_STATE;
878 te = ~(fpstate->sv_env.en_mxcsr >> 7) & 0x3d;
879 e = mxcsr & te;
880 if (e & FE_INVALID)
881 return __fex_get_sse_invalid_type(inst);
882 if (e & FE_DIVBYZERO)
883 return fex_division;
884 if (e & FE_OVERFLOW)
885 return fex_overflow;
886 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
887 return fex_underflow;
888 if (e & FE_INEXACT)
889 return fex_inexact;
890 return (enum fex_exception)-1;
891 }
892
893 /*
894 * Emulate a SIMD SSE instruction to determine which exceptions occur
895 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
896 * trapped exception that would occur if the i-th part of the SIMD
897 * instruction were executed in isolation; set e[i] to -1 if no
898 * trapped exception would occur in this part. Also fill in info[i]
899 * with the corresponding operands, default untrapped result, and
900 * flags.
901 *
902 * This routine does not work if the instruction specified by *inst
903 * is not a SIMD instruction.
904 */
905 void
__fex_get_simd_op(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)906 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
907 fex_info_t *info)
908 {
909 sseinst_t dummy;
910 int i;
911
912 e[0] = e[1] = e[2] = e[3] = -1;
913
914 /* perform each part of the SIMD operation */
915 switch (inst->op) {
916 case cmpps:
917 dummy.op = cmpss;
918 dummy.imm = inst->imm;
919 for (i = 0; i < 4; i++) {
920 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
921 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
922 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
923 }
924 break;
925
926 case minps:
927 dummy.op = minss;
928 for (i = 0; i < 4; i++) {
929 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
930 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
931 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
932 }
933 break;
934
935 case maxps:
936 dummy.op = maxss;
937 for (i = 0; i < 4; i++) {
938 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
939 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
940 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
941 }
942 break;
943
944 case addps:
945 dummy.op = addss;
946 for (i = 0; i < 4; i++) {
947 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
948 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
949 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
950 }
951 break;
952
953 case subps:
954 dummy.op = subss;
955 for (i = 0; i < 4; i++) {
956 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
957 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
958 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
959 }
960 break;
961
962 case mulps:
963 dummy.op = mulss;
964 for (i = 0; i < 4; i++) {
965 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
966 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
967 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
968 }
969 break;
970
971 case divps:
972 dummy.op = divss;
973 for (i = 0; i < 4; i++) {
974 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
975 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
976 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
977 }
978 break;
979
980 case sqrtps:
981 dummy.op = sqrtss;
982 for (i = 0; i < 4; i++) {
983 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
984 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
985 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
986 }
987 break;
988
989 case cvtdq2ps:
990 dummy.op = cvtsi2ss;
991 for (i = 0; i < 4; i++) {
992 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
993 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
994 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
995 }
996 break;
997
998 case cvttps2dq:
999 dummy.op = cvttss2si;
1000 for (i = 0; i < 4; i++) {
1001 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1002 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1003 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1004 }
1005 break;
1006
1007 case cvtps2dq:
1008 dummy.op = cvtss2si;
1009 for (i = 0; i < 4; i++) {
1010 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1011 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1012 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1013 }
1014 break;
1015
1016 case cvtpi2ps:
1017 dummy.op = cvtsi2ss;
1018 for (i = 0; i < 2; i++) {
1019 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022 }
1023 break;
1024
1025 case cvttps2pi:
1026 dummy.op = cvttss2si;
1027 for (i = 0; i < 2; i++) {
1028 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1029 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1030 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1031 }
1032 break;
1033
1034 case cvtps2pi:
1035 dummy.op = cvtss2si;
1036 for (i = 0; i < 2; i++) {
1037 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1038 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1039 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1040 }
1041 break;
1042
1043 case cmppd:
1044 dummy.op = cmpsd;
1045 dummy.imm = inst->imm;
1046 for (i = 0; i < 2; i++) {
1047 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1048 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1049 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1050 }
1051 break;
1052
1053 case minpd:
1054 dummy.op = minsd;
1055 for (i = 0; i < 2; i++) {
1056 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1057 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1058 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1059 }
1060 break;
1061
1062 case maxpd:
1063 dummy.op = maxsd;
1064 for (i = 0; i < 2; i++) {
1065 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1066 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1067 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1068 }
1069 break;
1070
1071 case addpd:
1072 dummy.op = addsd;
1073 for (i = 0; i < 2; i++) {
1074 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1075 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1076 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077 }
1078 break;
1079
1080 case subpd:
1081 dummy.op = subsd;
1082 for (i = 0; i < 2; i++) {
1083 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1084 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1085 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1086 }
1087 break;
1088
1089 case mulpd:
1090 dummy.op = mulsd;
1091 for (i = 0; i < 2; i++) {
1092 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1093 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1094 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1095 }
1096 break;
1097
1098 case divpd:
1099 dummy.op = divsd;
1100 for (i = 0; i < 2; i++) {
1101 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1102 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1103 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1104 }
1105 break;
1106
1107 case sqrtpd:
1108 dummy.op = sqrtsd;
1109 for (i = 0; i < 2; i++) {
1110 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1111 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1112 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1113 }
1114 break;
1115
1116 case cvtpi2pd:
1117 case cvtdq2pd:
1118 dummy.op = cvtsi2sd;
1119 for (i = 0; i < 2; i++) {
1120 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1121 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1122 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1123 }
1124 break;
1125
1126 case cvttpd2pi:
1127 case cvttpd2dq:
1128 dummy.op = cvttsd2si;
1129 for (i = 0; i < 2; i++) {
1130 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1131 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133 }
1134 break;
1135
1136 case cvtpd2pi:
1137 case cvtpd2dq:
1138 dummy.op = cvtsd2si;
1139 for (i = 0; i < 2; i++) {
1140 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1141 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1142 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1143 }
1144 break;
1145
1146 case cvtps2pd:
1147 dummy.op = cvtss2sd;
1148 for (i = 0; i < 2; i++) {
1149 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1150 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1151 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1152 }
1153 break;
1154
1155 case cvtpd2ps:
1156 dummy.op = cvtsd2ss;
1157 for (i = 0; i < 2; i++) {
1158 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1159 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1160 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1161 }
1162 default:
1163 break;
1164 }
1165 }
1166
1167 /*
1168 * Store the result value from *info in the destination of the scalar
1169 * SSE instruction specified by *inst. If no result is given but the
1170 * exception is underflow or overflow, supply the default trapped result.
1171 *
1172 * This routine does not work if the instruction specified by *inst
1173 * is not a scalar instruction.
1174 */
1175 void
__fex_st_sse_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception e,fex_info_t * info)1176 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1177 fex_info_t *info)
1178 {
1179 int i = 0;
1180 long long l = 0L;;
1181 float f = 0.0, fscl;
1182 double d = 0.0L, dscl;
1183
1184 /* for compares that write eflags, just set the flags
1185 to indicate "unordered" */
1186 if (inst->op == ucomiss || inst->op == comiss ||
1187 inst->op == ucomisd || inst->op == comisd) {
1188 uap->uc_mcontext.REG_PS |= 0x45;
1189 return;
1190 }
1191
1192 /* if info doesn't specify a result value, try to generate
1193 the default trapped result */
1194 if (info->res.type == fex_nodata) {
1195 /* set scale factors for exponent wrapping */
1196 switch (e) {
1197 case fex_overflow:
1198 fscl = 1.262177448e-29f; /* 2^-96 */
1199 dscl = 6.441148769597133308e-232; /* 2^-768 */
1200 break;
1201
1202 case fex_underflow:
1203 fscl = 7.922816251e+28f; /* 2^96 */
1204 dscl = 1.552518092300708935e+231; /* 2^768 */
1205 break;
1206
1207 default:
1208 (void) __fex_get_sse_op(uap, inst, info);
1209 if (info->res.type == fex_nodata)
1210 return;
1211 goto stuff;
1212 }
1213
1214 /* generate the wrapped result */
1215 if (inst->op == cvtsd2ss) {
1216 info->op1.type = fex_double;
1217 info->op1.val.d = inst->op2->d[0];
1218 info->op2.type = fex_nodata;
1219 info->res.type = fex_float;
1220 info->res.val.f = (float)(fscl * (fscl *
1221 info->op1.val.d));
1222 } else if ((int)inst->op & DOUBLE) {
1223 info->op1.type = fex_double;
1224 info->op1.val.d = inst->op1->d[0];
1225 info->op2.type = fex_double;
1226 info->op2.val.d = inst->op2->d[0];
1227 info->res.type = fex_double;
1228 switch (inst->op) {
1229 case addsd:
1230 info->res.val.d = dscl * (dscl *
1231 info->op1.val.d + dscl * info->op2.val.d);
1232 break;
1233
1234 case subsd:
1235 info->res.val.d = dscl * (dscl *
1236 info->op1.val.d - dscl * info->op2.val.d);
1237 break;
1238
1239 case mulsd:
1240 info->res.val.d = (dscl * info->op1.val.d) *
1241 (dscl * info->op2.val.d);
1242 break;
1243
1244 case divsd:
1245 info->res.val.d = (dscl * info->op1.val.d) /
1246 (info->op2.val.d / dscl);
1247 break;
1248
1249 default:
1250 return;
1251 }
1252 } else {
1253 info->op1.type = fex_float;
1254 info->op1.val.f = inst->op1->f[0];
1255 info->op2.type = fex_float;
1256 info->op2.val.f = inst->op2->f[0];
1257 info->res.type = fex_float;
1258 switch (inst->op) {
1259 case addss:
1260 info->res.val.f = fscl * (fscl *
1261 info->op1.val.f + fscl * info->op2.val.f);
1262 break;
1263
1264 case subss:
1265 info->res.val.f = fscl * (fscl *
1266 info->op1.val.f - fscl * info->op2.val.f);
1267 break;
1268
1269 case mulss:
1270 info->res.val.f = (fscl * info->op1.val.f) *
1271 (fscl * info->op2.val.f);
1272 break;
1273
1274 case divss:
1275 info->res.val.f = (fscl * info->op1.val.f) /
1276 (info->op2.val.f / fscl);
1277 break;
1278
1279 default:
1280 return;
1281 }
1282 }
1283 }
1284
1285 /* put the result in the destination */
1286 stuff:
1287 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1288 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1289 switch (info->res.type) {
1290 case fex_int:
1291 i = info->res.val.i;
1292 break;
1293
1294 case fex_llong:
1295 i = info->res.val.l;
1296 break;
1297
1298 case fex_float:
1299 i = info->res.val.f;
1300 break;
1301
1302 case fex_double:
1303 i = info->res.val.d;
1304 break;
1305
1306 case fex_ldouble:
1307 i = info->res.val.q;
1308 break;
1309
1310 default:
1311 break;
1312 }
1313 inst->op1->i[0] = i;
1314 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1315 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1316 inst->op == cvtsd2siq) {
1317 switch (info->res.type) {
1318 case fex_int:
1319 l = info->res.val.i;
1320 break;
1321
1322 case fex_llong:
1323 l = info->res.val.l;
1324 break;
1325
1326 case fex_float:
1327 l = info->res.val.f;
1328 break;
1329
1330 case fex_double:
1331 l = info->res.val.d;
1332 break;
1333
1334 case fex_ldouble:
1335 l = info->res.val.q;
1336 break;
1337
1338 default:
1339 break;
1340 }
1341 inst->op1->l[0] = l;
1342 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1343 inst->op == cvtss2sd) {
1344 switch (info->res.type) {
1345 case fex_int:
1346 d = info->res.val.i;
1347 break;
1348
1349 case fex_llong:
1350 d = info->res.val.l;
1351 break;
1352
1353 case fex_float:
1354 d = info->res.val.f;
1355 break;
1356
1357 case fex_double:
1358 d = info->res.val.d;
1359 break;
1360
1361 case fex_ldouble:
1362 d = info->res.val.q;
1363 break;
1364
1365 default:
1366 break;
1367 }
1368 inst->op1->d[0] = d;
1369 } else {
1370 switch (info->res.type) {
1371 case fex_int:
1372 f = info->res.val.i;
1373 break;
1374
1375 case fex_llong:
1376 f = info->res.val.l;
1377 break;
1378
1379 case fex_float:
1380 f = info->res.val.f;
1381 break;
1382
1383 case fex_double:
1384 f = info->res.val.d;
1385 break;
1386
1387 case fex_ldouble:
1388 f = info->res.val.q;
1389 break;
1390
1391 default:
1392 break;
1393 }
1394 inst->op1->f[0] = f;
1395 }
1396 }
1397
1398 /*
1399 * Store the results from a SIMD instruction. For each i, store
1400 * the result value from info[i] in the i-th part of the destination
1401 * of the SIMD SSE instruction specified by *inst. If no result
1402 * is given but the exception indicated by e[i] is underflow or
1403 * overflow, supply the default trapped result.
1404 *
1405 * This routine does not work if the instruction specified by *inst
1406 * is not a SIMD instruction.
1407 */
1408 void
__fex_st_simd_result(ucontext_t * uap,sseinst_t * inst,enum fex_exception * e,fex_info_t * info)1409 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1410 fex_info_t *info)
1411 {
1412 sseinst_t dummy;
1413 int i;
1414
1415 /* store each part */
1416 switch (inst->op) {
1417 case cmpps:
1418 dummy.op = cmpss;
1419 dummy.imm = inst->imm;
1420 for (i = 0; i < 4; i++) {
1421 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1422 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1423 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1424 }
1425 break;
1426
1427 case minps:
1428 dummy.op = minss;
1429 for (i = 0; i < 4; i++) {
1430 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1431 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1432 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1433 }
1434 break;
1435
1436 case maxps:
1437 dummy.op = maxss;
1438 for (i = 0; i < 4; i++) {
1439 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1440 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1441 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1442 }
1443 break;
1444
1445 case addps:
1446 dummy.op = addss;
1447 for (i = 0; i < 4; i++) {
1448 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1449 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1450 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1451 }
1452 break;
1453
1454 case subps:
1455 dummy.op = subss;
1456 for (i = 0; i < 4; i++) {
1457 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1458 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1459 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1460 }
1461 break;
1462
1463 case mulps:
1464 dummy.op = mulss;
1465 for (i = 0; i < 4; i++) {
1466 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1467 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1468 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1469 }
1470 break;
1471
1472 case divps:
1473 dummy.op = divss;
1474 for (i = 0; i < 4; i++) {
1475 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1476 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1477 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1478 }
1479 break;
1480
1481 case sqrtps:
1482 dummy.op = sqrtss;
1483 for (i = 0; i < 4; i++) {
1484 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1485 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1486 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1487 }
1488 break;
1489
1490 case cvtdq2ps:
1491 dummy.op = cvtsi2ss;
1492 for (i = 0; i < 4; i++) {
1493 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1494 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1495 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1496 }
1497 break;
1498
1499 case cvttps2dq:
1500 dummy.op = cvttss2si;
1501 for (i = 0; i < 4; i++) {
1502 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1503 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1504 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1505 }
1506 break;
1507
1508 case cvtps2dq:
1509 dummy.op = cvtss2si;
1510 for (i = 0; i < 4; i++) {
1511 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1512 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1513 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1514 }
1515 break;
1516
1517 case cvtpi2ps:
1518 dummy.op = cvtsi2ss;
1519 for (i = 0; i < 2; i++) {
1520 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1521 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1522 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1523 }
1524 break;
1525
1526 case cvttps2pi:
1527 dummy.op = cvttss2si;
1528 for (i = 0; i < 2; i++) {
1529 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1530 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1531 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1532 }
1533 break;
1534
1535 case cvtps2pi:
1536 dummy.op = cvtss2si;
1537 for (i = 0; i < 2; i++) {
1538 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1539 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1540 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1541 }
1542 break;
1543
1544 case cmppd:
1545 dummy.op = cmpsd;
1546 dummy.imm = inst->imm;
1547 for (i = 0; i < 2; i++) {
1548 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1549 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1550 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1551 }
1552 break;
1553
1554 case minpd:
1555 dummy.op = minsd;
1556 for (i = 0; i < 2; i++) {
1557 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1558 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1559 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1560 }
1561 break;
1562
1563 case maxpd:
1564 dummy.op = maxsd;
1565 for (i = 0; i < 2; i++) {
1566 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1568 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569 }
1570 break;
1571
1572 case addpd:
1573 dummy.op = addsd;
1574 for (i = 0; i < 2; i++) {
1575 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 }
1579 break;
1580
1581 case subpd:
1582 dummy.op = subsd;
1583 for (i = 0; i < 2; i++) {
1584 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1585 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1586 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1587 }
1588 break;
1589
1590 case mulpd:
1591 dummy.op = mulsd;
1592 for (i = 0; i < 2; i++) {
1593 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1594 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1595 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1596 }
1597 break;
1598
1599 case divpd:
1600 dummy.op = divsd;
1601 for (i = 0; i < 2; i++) {
1602 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1603 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1604 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1605 }
1606 break;
1607
1608 case sqrtpd:
1609 dummy.op = sqrtsd;
1610 for (i = 0; i < 2; i++) {
1611 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1612 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1613 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1614 }
1615 break;
1616
1617 case cvtpi2pd:
1618 case cvtdq2pd:
1619 dummy.op = cvtsi2sd;
1620 for (i = 0; i < 2; i++) {
1621 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1622 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1623 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1624 }
1625 break;
1626
1627 case cvttpd2pi:
1628 case cvttpd2dq:
1629 dummy.op = cvttsd2si;
1630 for (i = 0; i < 2; i++) {
1631 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1632 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1633 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1634 }
1635 /* for cvttpd2dq, zero the high 64 bits of the destination */
1636 if (inst->op == cvttpd2dq)
1637 inst->op1->l[1] = 0ll;
1638 break;
1639
1640 case cvtpd2pi:
1641 case cvtpd2dq:
1642 dummy.op = cvtsd2si;
1643 for (i = 0; i < 2; i++) {
1644 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1645 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1646 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1647 }
1648 /* for cvtpd2dq, zero the high 64 bits of the destination */
1649 if (inst->op == cvtpd2dq)
1650 inst->op1->l[1] = 0ll;
1651 break;
1652
1653 case cvtps2pd:
1654 dummy.op = cvtss2sd;
1655 for (i = 0; i < 2; i++) {
1656 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1657 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1658 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1659 }
1660 break;
1661
1662 case cvtpd2ps:
1663 dummy.op = cvtsd2ss;
1664 for (i = 0; i < 2; i++) {
1665 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1666 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1667 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1668 }
1669 /* zero the high 64 bits of the destination */
1670 inst->op1->l[1] = 0ll;
1671
1672 default:
1673 break;
1674 }
1675 }
1676