1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for x86 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-x86/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named x86g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_EFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 static const UChar parity_table[256] = {
78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115 if (n >= 0)
116 return (UInt)x << n;
117 else
118 return x >> (-n);
119 }
120
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124 return x;
125 }
126
127
128 #define PREAMBLE(__data_bits) \
129 /* const */ UInt DATA_MASK \
130 = __data_bits==8 ? 0xFF \
131 : (__data_bits==16 ? 0xFFFF \
132 : 0xFFFFFFFF); \
133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
136 /* const */ UInt CC_NDEP = cc_ndep_formal; \
137 /* Four bogus assignments, which hopefully gcc can */ \
138 /* optimise away, and which stop it complaining about */ \
139 /* unused variables. */ \
140 SIGN_MASK = SIGN_MASK; \
141 DATA_MASK = DATA_MASK; \
142 CC_DEP2 = CC_DEP2; \
143 CC_NDEP = CC_NDEP;
144
145
146 /*-------------------------------------------------------------*/
147
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
149 { \
150 PREAMBLE(DATA_BITS); \
151 { UInt cf, pf, af, zf, sf, of; \
152 UInt argL, argR, res; \
153 argL = CC_DEP1; \
154 argR = CC_DEP2; \
155 res = argL + argR; \
156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
157 pf = parity_table[(UChar)res]; \
158 af = (res ^ argL ^ argR) & 0x10; \
159 zf = ((DATA_UTYPE)res == 0) << 6; \
160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
162 12 - DATA_BITS) & X86G_CC_MASK_O; \
163 return cf | pf | af | zf | sf | of; \
164 } \
165 }
166
167 /*-------------------------------------------------------------*/
168
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
170 { \
171 PREAMBLE(DATA_BITS); \
172 { UInt cf, pf, af, zf, sf, of; \
173 UInt argL, argR, res; \
174 argL = CC_DEP1; \
175 argR = CC_DEP2; \
176 res = argL - argR; \
177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
178 pf = parity_table[(UChar)res]; \
179 af = (res ^ argL ^ argR) & 0x10; \
180 zf = ((DATA_UTYPE)res == 0) << 6; \
181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
182 of = lshift((argL ^ argR) & (argL ^ res), \
183 12 - DATA_BITS) & X86G_CC_MASK_O; \
184 return cf | pf | af | zf | sf | of; \
185 } \
186 }
187
188 /*-------------------------------------------------------------*/
189
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
191 { \
192 PREAMBLE(DATA_BITS); \
193 { UInt cf, pf, af, zf, sf, of; \
194 UInt argL, argR, oldC, res; \
195 oldC = CC_NDEP & X86G_CC_MASK_C; \
196 argL = CC_DEP1; \
197 argR = CC_DEP2 ^ oldC; \
198 res = (argL + argR) + oldC; \
199 if (oldC) \
200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
201 else \
202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
203 pf = parity_table[(UChar)res]; \
204 af = (res ^ argL ^ argR) & 0x10; \
205 zf = ((DATA_UTYPE)res == 0) << 6; \
206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
208 12 - DATA_BITS) & X86G_CC_MASK_O; \
209 return cf | pf | af | zf | sf | of; \
210 } \
211 }
212
213 /*-------------------------------------------------------------*/
214
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
216 { \
217 PREAMBLE(DATA_BITS); \
218 { UInt cf, pf, af, zf, sf, of; \
219 UInt argL, argR, oldC, res; \
220 oldC = CC_NDEP & X86G_CC_MASK_C; \
221 argL = CC_DEP1; \
222 argR = CC_DEP2 ^ oldC; \
223 res = (argL - argR) - oldC; \
224 if (oldC) \
225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
226 else \
227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
228 pf = parity_table[(UChar)res]; \
229 af = (res ^ argL ^ argR) & 0x10; \
230 zf = ((DATA_UTYPE)res == 0) << 6; \
231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
232 of = lshift((argL ^ argR) & (argL ^ res), \
233 12 - DATA_BITS) & X86G_CC_MASK_O; \
234 return cf | pf | af | zf | sf | of; \
235 } \
236 }
237
238 /*-------------------------------------------------------------*/
239
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
241 { \
242 PREAMBLE(DATA_BITS); \
243 { UInt cf, pf, af, zf, sf, of; \
244 cf = 0; \
245 pf = parity_table[(UChar)CC_DEP1]; \
246 af = 0; \
247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
249 of = 0; \
250 return cf | pf | af | zf | sf | of; \
251 } \
252 }
253
254 /*-------------------------------------------------------------*/
255
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
257 { \
258 PREAMBLE(DATA_BITS); \
259 { UInt cf, pf, af, zf, sf, of; \
260 UInt argL, argR, res; \
261 res = CC_DEP1; \
262 argL = res - 1; \
263 argR = 1; \
264 cf = CC_NDEP & X86G_CC_MASK_C; \
265 pf = parity_table[(UChar)res]; \
266 af = (res ^ argL ^ argR) & 0x10; \
267 zf = ((DATA_UTYPE)res == 0) << 6; \
268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
270 return cf | pf | af | zf | sf | of; \
271 } \
272 }
273
274 /*-------------------------------------------------------------*/
275
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
277 { \
278 PREAMBLE(DATA_BITS); \
279 { UInt cf, pf, af, zf, sf, of; \
280 UInt argL, argR, res; \
281 res = CC_DEP1; \
282 argL = res + 1; \
283 argR = 1; \
284 cf = CC_NDEP & X86G_CC_MASK_C; \
285 pf = parity_table[(UChar)res]; \
286 af = (res ^ argL ^ argR) & 0x10; \
287 zf = ((DATA_UTYPE)res == 0) << 6; \
288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
289 of = ((res & DATA_MASK) \
290 == ((UInt)SIGN_MASK - 1)) << 11; \
291 return cf | pf | af | zf | sf | of; \
292 } \
293 }
294
295 /*-------------------------------------------------------------*/
296
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
298 { \
299 PREAMBLE(DATA_BITS); \
300 { UInt cf, pf, af, zf, sf, of; \
301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
302 pf = parity_table[(UChar)CC_DEP1]; \
303 af = 0; /* undefined */ \
304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
306 /* of is defined if shift count == 1 */ \
307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
308 & X86G_CC_MASK_O; \
309 return cf | pf | af | zf | sf | of; \
310 } \
311 }
312
313 /*-------------------------------------------------------------*/
314
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
316 { \
317 PREAMBLE(DATA_BITS); \
318 { UInt cf, pf, af, zf, sf, of; \
319 cf = CC_DEP2 & 1; \
320 pf = parity_table[(UChar)CC_DEP1]; \
321 af = 0; /* undefined */ \
322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
324 /* of is defined if shift count == 1 */ \
325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
326 & X86G_CC_MASK_O; \
327 return cf | pf | af | zf | sf | of; \
328 } \
329 }
330
331 /*-------------------------------------------------------------*/
332
333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
336 { \
337 PREAMBLE(DATA_BITS); \
338 { UInt fl \
339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
340 | (X86G_CC_MASK_C & CC_DEP1) \
341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
342 11-(DATA_BITS-1)) \
343 ^ lshift(CC_DEP1, 11))); \
344 return fl; \
345 } \
346 }
347
348 /*-------------------------------------------------------------*/
349
350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
353 { \
354 PREAMBLE(DATA_BITS); \
355 { UInt fl \
356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
359 11-(DATA_BITS-1)) \
360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
361 return fl; \
362 } \
363 }
364
365 /*-------------------------------------------------------------*/
366
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
368 DATA_U2TYPE, NARROWto2U) \
369 { \
370 PREAMBLE(DATA_BITS); \
371 { UInt cf, pf, af, zf, sf, of; \
372 DATA_UTYPE hi; \
373 DATA_UTYPE lo \
374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
375 * ((DATA_UTYPE)CC_DEP2) ); \
376 DATA_U2TYPE rr \
377 = NARROWto2U( \
378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
381 cf = (hi != 0); \
382 pf = parity_table[(UChar)lo]; \
383 af = 0; /* undefined */ \
384 zf = (lo == 0) << 6; \
385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
386 of = cf << 11; \
387 return cf | pf | af | zf | sf | of; \
388 } \
389 }
390
391 /*-------------------------------------------------------------*/
392
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
394 DATA_S2TYPE, NARROWto2S) \
395 { \
396 PREAMBLE(DATA_BITS); \
397 { UInt cf, pf, af, zf, sf, of; \
398 DATA_STYPE hi; \
399 DATA_STYPE lo \
400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
402 DATA_S2TYPE rr \
403 = NARROWto2S( \
404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
408 pf = parity_table[(UChar)lo]; \
409 af = 0; /* undefined */ \
410 zf = (lo == 0) << 6; \
411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
412 of = cf << 11; \
413 return cf | pf | af | zf | sf | of; \
414 } \
415 }
416
417
418 #if PROFILE_EFLAGS
419
420 static Bool initted = False;
421
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all = 0;
430 static UInt n_calc_c = 0;
431 static UInt n_calc_cond = 0;
432
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
showCounts(void)436 static void showCounts ( void )
437 {
438 Int op, co;
439 HChar ch;
440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
441 n_calc_all, n_calc_cond, n_calc_c);
442
443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
444 " S NS P NP L NL LE NLE\n");
445 vex_printf(" -----------------------------------------------------"
446 "----------------------------------------\n");
447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449 ch = ' ';
450 if (op > 0 && (op-1) % 3 == 0)
451 ch = 'B';
452 if (op > 0 && (op-1) % 3 == 1)
453 ch = 'W';
454 if (op > 0 && (op-1) % 3 == 2)
455 ch = 'L';
456
457 vex_printf("%2d%c: ", op, ch);
458 vex_printf("%6u ", tabc_slow[op]);
459 vex_printf("%6u ", tabc_fast[op]);
460 for (co = 0; co < 16; co++) {
461 Int n = tab_cond[op][co];
462 if (n >= 1000) {
463 vex_printf(" %3dK", n / 1000);
464 } else
465 if (n >= 0) {
466 vex_printf(" %3d ", n );
467 } else {
468 vex_printf(" ");
469 }
470 }
471 vex_printf("\n");
472 }
473 vex_printf("\n");
474 }
475
initCounts(void)476 static void initCounts ( void )
477 {
478 Int op, co;
479 initted = True;
480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481 tabc_fast[op] = tabc_slow[op] = 0;
482 for (co = 0; co < 16; co++)
483 tab_cond[op][co] = 0;
484 }
485 }
486
487 #endif /* PROFILE_EFLAGS */
488
489
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492 Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495 UInt cc_dep1_formal,
496 UInt cc_dep2_formal,
497 UInt cc_ndep_formal )
498 {
499 switch (cc_op) {
500 case X86G_CC_OP_COPY:
501 return cc_dep1_formal
502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
508
509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
512
513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
516
517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
520
521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
524
525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
528
529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
532
533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
536
537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
540
541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
544
545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
548
549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
550 UShort, toUShort );
551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
552 UInt, toUInt );
553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
554 ULong, idULong );
555
556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
557 Short, toUShort );
558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
559 Int, toUInt );
560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
561 Long, idULong );
562
563 default:
564 /* shouldn't really make these calls from generated code */
565 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566 "( %u, 0x%x, 0x%x, 0x%x )\n",
567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568 vpanic("x86g_calculate_eflags_all_WRK(X86)");
569 }
570 }
571
572
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576 UInt cc_dep1,
577 UInt cc_dep2,
578 UInt cc_ndep )
579 {
580 # if PROFILE_EFLAGS
581 if (!initted) initCounts();
582 n_calc_all++;
583 if (SHOW_COUNTS_NOW) showCounts();
584 # endif
585 return
586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588
589
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594 UInt cc_dep1,
595 UInt cc_dep2,
596 UInt cc_ndep )
597 {
598 # if PROFILE_EFLAGS
599 if (!initted) initCounts();
600 n_calc_c++;
601 tabc_fast[cc_op]++;
602 if (SHOW_COUNTS_NOW) showCounts();
603 # endif
604
605 /* Fast-case some common ones. */
606 switch (cc_op) {
607 case X86G_CC_OP_LOGICL:
608 case X86G_CC_OP_LOGICW:
609 case X86G_CC_OP_LOGICB:
610 return 0;
611 case X86G_CC_OP_SUBL:
612 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613 ? X86G_CC_MASK_C : 0;
614 case X86G_CC_OP_SUBW:
615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616 ? X86G_CC_MASK_C : 0;
617 case X86G_CC_OP_SUBB:
618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619 ? X86G_CC_MASK_C : 0;
620 case X86G_CC_OP_INCL:
621 case X86G_CC_OP_DECL:
622 return cc_ndep & X86G_CC_MASK_C;
623 default:
624 break;
625 }
626
627 # if PROFILE_EFLAGS
628 tabc_fast[cc_op]--;
629 tabc_slow[cc_op]++;
630 # endif
631
632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633 & X86G_CC_MASK_C;
634 }
635
636
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640 UInt cc_op,
641 UInt cc_dep1,
642 UInt cc_dep2,
643 UInt cc_ndep )
644 {
645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646 cc_dep2, cc_ndep);
647 UInt of,sf,zf,cf,pf;
648 UInt inv = cond & 1;
649
650 # if PROFILE_EFLAGS
651 if (!initted) initCounts();
652 tab_cond[cc_op][cond]++;
653 n_calc_cond++;
654 if (SHOW_COUNTS_NOW) showCounts();
655 # endif
656
657 switch (cond) {
658 case X86CondNO:
659 case X86CondO: /* OF == 1 */
660 of = eflags >> X86G_CC_SHIFT_O;
661 return 1 & (inv ^ of);
662
663 case X86CondNZ:
664 case X86CondZ: /* ZF == 1 */
665 zf = eflags >> X86G_CC_SHIFT_Z;
666 return 1 & (inv ^ zf);
667
668 case X86CondNB:
669 case X86CondB: /* CF == 1 */
670 cf = eflags >> X86G_CC_SHIFT_C;
671 return 1 & (inv ^ cf);
672 break;
673
674 case X86CondNBE:
675 case X86CondBE: /* (CF or ZF) == 1 */
676 cf = eflags >> X86G_CC_SHIFT_C;
677 zf = eflags >> X86G_CC_SHIFT_Z;
678 return 1 & (inv ^ (cf | zf));
679 break;
680
681 case X86CondNS:
682 case X86CondS: /* SF == 1 */
683 sf = eflags >> X86G_CC_SHIFT_S;
684 return 1 & (inv ^ sf);
685
686 case X86CondNP:
687 case X86CondP: /* PF == 1 */
688 pf = eflags >> X86G_CC_SHIFT_P;
689 return 1 & (inv ^ pf);
690
691 case X86CondNL:
692 case X86CondL: /* (SF xor OF) == 1 */
693 sf = eflags >> X86G_CC_SHIFT_S;
694 of = eflags >> X86G_CC_SHIFT_O;
695 return 1 & (inv ^ (sf ^ of));
696 break;
697
698 case X86CondNLE:
699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
700 sf = eflags >> X86G_CC_SHIFT_S;
701 of = eflags >> X86G_CC_SHIFT_O;
702 zf = eflags >> X86G_CC_SHIFT_Z;
703 return 1 & (inv ^ ((sf ^ of) | zf));
704 break;
705
706 default:
707 /* shouldn't really make these calls from generated code */
708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710 vpanic("x86g_calculate_condition");
711 }
712 }
713
714
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718 UInt eflags = x86g_calculate_eflags_all_WRK(
719 vex_state->guest_CC_OP,
720 vex_state->guest_CC_DEP1,
721 vex_state->guest_CC_DEP2,
722 vex_state->guest_CC_NDEP
723 );
724 UInt dflag = vex_state->guest_DFLAG;
725 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726 if (dflag == 0xFFFFFFFF)
727 eflags |= X86G_CC_MASK_D;
728 if (vex_state->guest_IDFLAG == 1)
729 eflags |= X86G_CC_MASK_ID;
730 if (vex_state->guest_ACFLAG == 1)
731 eflags |= X86G_CC_MASK_AC;
732
733 return eflags;
734 }
735
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739 /*MOD*/VexGuestX86State* vex_state )
740 {
741 /* D flag */
742 if (eflags & X86G_CC_MASK_D) {
743 vex_state->guest_DFLAG = 0xFFFFFFFF;
744 eflags &= ~X86G_CC_MASK_D;
745 }
746 else
747 vex_state->guest_DFLAG = 1;
748
749 /* ID flag */
750 if (eflags & X86G_CC_MASK_ID) {
751 vex_state->guest_IDFLAG = 1;
752 eflags &= ~X86G_CC_MASK_ID;
753 }
754 else
755 vex_state->guest_IDFLAG = 0;
756
757 /* AC flag */
758 if (eflags & X86G_CC_MASK_AC) {
759 vex_state->guest_ACFLAG = 1;
760 eflags &= ~X86G_CC_MASK_AC;
761 }
762 else
763 vex_state->guest_ACFLAG = 0;
764
765 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
768 vex_state->guest_CC_DEP1 = eflags & cc_mask;
769 vex_state->guest_CC_DEP2 = 0;
770 vex_state->guest_CC_NDEP = 0;
771 }
772
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776 /*MOD*/VexGuestX86State* vex_state )
777 {
778 UInt oszacp = x86g_calculate_eflags_all_WRK(
779 vex_state->guest_CC_OP,
780 vex_state->guest_CC_DEP1,
781 vex_state->guest_CC_DEP2,
782 vex_state->guest_CC_NDEP
783 );
784 if (new_carry_flag & 1) {
785 oszacp |= X86G_CC_MASK_C;
786 } else {
787 oszacp &= ~X86G_CC_MASK_C;
788 }
789 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
790 vex_state->guest_CC_DEP1 = oszacp;
791 vex_state->guest_CC_DEP2 = 0;
792 vex_state->guest_CC_NDEP = 0;
793 }
794
795
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers. ---*/
798 /*--- These help iropt specialise calls the above run-time ---*/
799 /*--- %eflags functions. ---*/
800 /*---------------------------------------------------------------*/
801
802 /* Used by the optimiser to try specialisations. Returns an
803 equivalent expression, or NULL if none. */
804
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807 return
808 toBool( e->tag == Iex_Const
809 && e->Iex.Const.con->tag == Ico_U32
810 && e->Iex.Const.con->Ico.U32 == n );
811 }
812
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814 IRExpr** args,
815 IRStmt** precedingStmts,
816 Int n_precedingStmts )
817 {
818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
822
823 Int i, arity = 0;
824 for (i = 0; args[i]; i++)
825 arity++;
826 # if 0
827 vex_printf("spec request:\n");
828 vex_printf(" %s ", function_name);
829 for (i = 0; i < arity; i++) {
830 vex_printf(" ");
831 ppIRExpr(args[i]);
832 }
833 vex_printf("\n");
834 # endif
835
836 /* --------- specialising "x86g_calculate_condition" --------- */
837
838 if (vex_streq(function_name, "x86g_calculate_condition")) {
839 /* specialise calls to above "calculate condition" function */
840 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841 vassert(arity == 5);
842 cond = args[0];
843 cc_op = args[1];
844 cc_dep1 = args[2];
845 cc_dep2 = args[3];
846
847 /*---------------- ADDL ----------------*/
848
849 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850 /* long add, then Z --> test (dst+src == 0) */
851 return unop(Iop_1Uto32,
852 binop(Iop_CmpEQ32,
853 binop(Iop_Add32, cc_dep1, cc_dep2),
854 mkU32(0)));
855 }
856
857 /*---------------- SUBL ----------------*/
858
859 /* 4, 5 */
860 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
861 /* long sub/cmp, then Z --> test dst==src */
862 return unop(Iop_1Uto32,
863 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
864 }
865 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
866 /* long sub/cmp, then NZ --> test dst!=src */
867 return unop(Iop_1Uto32,
868 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
869 }
870
871 /* 12, 13 */
872 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
873 /* long sub/cmp, then L (signed less than)
874 --> test dst <s src */
875 return unop(Iop_1Uto32,
876 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
877 }
878 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
879 /* long sub/cmp, then NL (signed greater than or equal)
880 --> test !(dst <s src) */
881 return binop(Iop_Xor32,
882 unop(Iop_1Uto32,
883 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
884 mkU32(1));
885 }
886
887 /* 14, 15 */
888 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
889 /* long sub/cmp, then LE (signed less than or equal)
890 --> test dst <=s src */
891 return unop(Iop_1Uto32,
892 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
893 }
894 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
895 /* long sub/cmp, then NLE (signed not less than or equal)
896 --> test dst >s src
897 --> test !(dst <=s src) */
898 return binop(Iop_Xor32,
899 unop(Iop_1Uto32,
900 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
901 mkU32(1));
902 }
903
904 /* 6, 7 */
905 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
906 /* long sub/cmp, then BE (unsigned less than or equal)
907 --> test dst <=u src */
908 return unop(Iop_1Uto32,
909 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
910 }
911 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
912 /* long sub/cmp, then BE (unsigned greater than)
913 --> test !(dst <=u src) */
914 return binop(Iop_Xor32,
915 unop(Iop_1Uto32,
916 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
917 mkU32(1));
918 }
919
920 /* 2, 3 */
921 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
922 /* long sub/cmp, then B (unsigned less than)
923 --> test dst <u src */
924 return unop(Iop_1Uto32,
925 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
926 }
927 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
928 /* long sub/cmp, then NB (unsigned greater than or equal)
929 --> test !(dst <u src) */
930 return binop(Iop_Xor32,
931 unop(Iop_1Uto32,
932 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
933 mkU32(1));
934 }
935
936 /* 8, 9 */
937 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)
938 && isU32(cc_dep2, 0)) {
939 /* long sub/cmp of zero, then S --> test (dst-0 <s 0)
940 --> test dst <s 0
941 --> (UInt)dst[31] */
942 return binop(Iop_And32,
943 binop(Iop_Shr32,cc_dep1,mkU8(31)),
944 mkU32(1));
945 }
946 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)
947 && isU32(cc_dep2, 0)) {
948 /* long sub/cmp of zero, then NS --> test !(dst-0 <s 0)
949 --> test !(dst <s 0)
950 --> (UInt) !dst[31] */
951 return binop(Iop_Xor32,
952 binop(Iop_And32,
953 binop(Iop_Shr32,cc_dep1,mkU8(31)),
954 mkU32(1)),
955 mkU32(1));
956 }
957
958 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
959 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
960 return unop(Iop_1Uto32,
961 binop(Iop_CmpLT32S,
962 binop(Iop_Sub32, cc_dep1, cc_dep2),
963 mkU32(0)));
964 }
965 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
966 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
967 return binop(Iop_Xor32,
968 unop(Iop_1Uto32,
969 binop(Iop_CmpLT32S,
970 binop(Iop_Sub32, cc_dep1, cc_dep2),
971 mkU32(0))),
972 mkU32(1));
973 }
974
975 /*---------------- SUBW ----------------*/
976
977 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
978 /* word sub/cmp, then Z --> test dst==src */
979 return unop(Iop_1Uto32,
980 binop(Iop_CmpEQ16,
981 unop(Iop_32to16,cc_dep1),
982 unop(Iop_32to16,cc_dep2)));
983 }
984 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
985 /* word sub/cmp, then NZ --> test dst!=src */
986 return unop(Iop_1Uto32,
987 binop(Iop_CmpNE16,
988 unop(Iop_32to16,cc_dep1),
989 unop(Iop_32to16,cc_dep2)));
990 }
991
992 /*---------------- SUBB ----------------*/
993
994 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
995 /* byte sub/cmp, then Z --> test dst==src */
996 return unop(Iop_1Uto32,
997 binop(Iop_CmpEQ8,
998 unop(Iop_32to8,cc_dep1),
999 unop(Iop_32to8,cc_dep2)));
1000 }
1001 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
1002 /* byte sub/cmp, then NZ --> test dst!=src */
1003 return unop(Iop_1Uto32,
1004 binop(Iop_CmpNE8,
1005 unop(Iop_32to8,cc_dep1),
1006 unop(Iop_32to8,cc_dep2)));
1007 }
1008
1009 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
1010 /* byte sub/cmp, then NBE (unsigned greater than)
1011 --> test src <u dst */
1012 /* Note, args are opposite way round from the usual */
1013 return unop(Iop_1Uto32,
1014 binop(Iop_CmpLT32U,
1015 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
1016 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
1017 }
1018
1019 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
1020 && isU32(cc_dep2, 0)) {
1021 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1022 --> test dst <s 0
1023 --> (UInt)dst[7]
1024 This is yet another scheme by which gcc figures out if the
1025 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1026 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1027 for an 8-bit comparison, since the args to the helper
1028 function are always U32s. */
1029 return binop(Iop_And32,
1030 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1031 mkU32(1));
1032 }
1033 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1034 && isU32(cc_dep2, 0)) {
1035 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1036 --> test !(dst <s 0)
1037 --> (UInt) !dst[7]
1038 */
1039 return binop(Iop_Xor32,
1040 binop(Iop_And32,
1041 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1042 mkU32(1)),
1043 mkU32(1));
1044 }
1045
1046 /*---------------- LOGICL ----------------*/
1047
1048 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1049 /* long and/or/xor, then Z --> test dst==0 */
1050 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1051 }
1052 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1053 /* long and/or/xor, then NZ --> test dst!=0 */
1054 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1055 }
1056
1057 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1058 /* long and/or/xor, then LE
1059 This is pretty subtle. LOGIC sets SF and ZF according to the
1060 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1061 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1062 the result is <=signed 0. Hence ...
1063 */
1064 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1065 }
1066
1067 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1068 /* long and/or/xor, then BE
1069 LOGIC sets ZF according to the result and makes CF be zero.
1070 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1071 -- which will be 1 iff the result is zero. Hence ...
1072 */
1073 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1074 }
1075
1076 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1077 /* see comment below for (LOGICB, CondS) */
1078 /* long and/or/xor, then S --> (UInt)result[31] */
1079 return binop(Iop_And32,
1080 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1081 mkU32(1));
1082 }
1083 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1084 /* see comment below for (LOGICB, CondNS) */
1085 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1086 return binop(Iop_Xor32,
1087 binop(Iop_And32,
1088 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1089 mkU32(1)),
1090 mkU32(1));
1091 }
1092
1093 /*---------------- LOGICW ----------------*/
1094
1095 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1096 /* word and/or/xor, then Z --> test dst==0 */
1097 return unop(Iop_1Uto32,
1098 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1099 mkU32(0)));
1100 }
1101
1102 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1103 /* see comment below for (LOGICB, CondS) */
1104 /* word and/or/xor, then S --> (UInt)result[15] */
1105 return binop(Iop_And32,
1106 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1107 mkU32(1));
1108 }
1109
1110 /*---------------- LOGICB ----------------*/
1111
1112 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1113 /* byte and/or/xor, then Z --> test dst==0 */
1114 return unop(Iop_1Uto32,
1115 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1116 mkU32(0)));
1117 }
1118 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1119 /* byte and/or/xor, then Z --> test dst!=0 */
1120 /* b9ac9: 84 c0 test %al,%al
1121 b9acb: 75 0d jne b9ada */
1122 return unop(Iop_1Uto32,
1123 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1124 mkU32(0)));
1125 }
1126
1127 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1128 /* this is an idiom gcc sometimes uses to find out if the top
1129 bit of a byte register is set: eg testb %al,%al; js ..
1130 Since it just depends on the top bit of the byte, extract
1131 that bit and explicitly get rid of all the rest. This
1132 helps memcheck avoid false positives in the case where any
1133 of the other bits in the byte are undefined. */
1134 /* byte and/or/xor, then S --> (UInt)result[7] */
1135 return binop(Iop_And32,
1136 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1137 mkU32(1));
1138 }
1139 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1140 /* ditto, for negation-of-S. */
1141 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1142 return binop(Iop_Xor32,
1143 binop(Iop_And32,
1144 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1145 mkU32(1)),
1146 mkU32(1));
1147 }
1148
1149 /*---------------- DECL ----------------*/
1150
1151 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1152 /* dec L, then Z --> test dst == 0 */
1153 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1154 }
1155
1156 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1157 /* dec L, then S --> compare DST <s 0 */
1158 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1159 }
1160
1161 /*---------------- DECW ----------------*/
1162
1163 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1164 /* dec W, then Z --> test dst == 0 */
1165 return unop(Iop_1Uto32,
1166 binop(Iop_CmpEQ32,
1167 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1168 mkU32(0)));
1169 }
1170
1171 /*---------------- INCW ----------------*/
1172
1173 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1174 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1175 /* inc W, then Z --> test dst == 0 */
1176 return unop(Iop_1Uto32,
1177 binop(Iop_CmpEQ32,
1178 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1179 mkU32(0)));
1180 }
1181
1182 /*---------------- SHRL ----------------*/
1183
1184 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1185 /* SHRL, then Z --> test dep1 == 0 */
1186 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1187 }
1188
1189 /*---------------- COPY ----------------*/
1190 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1191 fnstsw %ax ; sahf ; jbe" for example. */
1192
1193 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1194 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1195 /* COPY, then BE --> extract C and Z from dep1, and test
1196 (C or Z) == 1. */
1197 /* COPY, then NBE --> extract C and Z from dep1, and test
1198 (C or Z) == 0. */
1199 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1200 return
1201 unop(
1202 Iop_1Uto32,
1203 binop(
1204 Iop_CmpEQ32,
1205 binop(
1206 Iop_And32,
1207 binop(
1208 Iop_Or32,
1209 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1210 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1211 ),
1212 mkU32(1)
1213 ),
1214 mkU32(nnn)
1215 )
1216 );
1217 }
1218
1219 if (isU32(cc_op, X86G_CC_OP_COPY)
1220 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1221 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1222 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1223 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1224 return
1225 unop(
1226 Iop_1Uto32,
1227 binop(
1228 Iop_CmpEQ32,
1229 binop(
1230 Iop_And32,
1231 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1232 mkU32(1)
1233 ),
1234 mkU32(nnn)
1235 )
1236 );
1237 }
1238
1239 if (isU32(cc_op, X86G_CC_OP_COPY)
1240 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1241 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1242 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1243 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1244 return
1245 unop(
1246 Iop_1Uto32,
1247 binop(
1248 Iop_CmpEQ32,
1249 binop(
1250 Iop_And32,
1251 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1252 mkU32(1)
1253 ),
1254 mkU32(nnn)
1255 )
1256 );
1257 }
1258
1259 if (isU32(cc_op, X86G_CC_OP_COPY)
1260 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1261 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1262 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1263 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1264 return
1265 unop(
1266 Iop_1Uto32,
1267 binop(
1268 Iop_CmpEQ32,
1269 binop(
1270 Iop_And32,
1271 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1272 mkU32(1)
1273 ),
1274 mkU32(nnn)
1275 )
1276 );
1277 }
1278
1279 return NULL;
1280 }
1281
1282 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1283
1284 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1285 /* specialise calls to above "calculate_eflags_c" function */
1286 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1287 vassert(arity == 4);
1288 cc_op = args[0];
1289 cc_dep1 = args[1];
1290 cc_dep2 = args[2];
1291 cc_ndep = args[3];
1292
1293 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1294 /* C after sub denotes unsigned less than */
1295 return unop(Iop_1Uto32,
1296 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1297 }
1298 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1299 /* C after sub denotes unsigned less than */
1300 return unop(Iop_1Uto32,
1301 binop(Iop_CmpLT32U,
1302 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1303 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1304 }
1305 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1306 || isU32(cc_op, X86G_CC_OP_LOGICW)
1307 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1308 /* cflag after logic is zero */
1309 return mkU32(0);
1310 }
1311 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1312 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1313 return cc_ndep;
1314 }
1315 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1316 /* cflag after COPY is stored in DEP1. */
1317 return
1318 binop(
1319 Iop_And32,
1320 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1321 mkU32(1)
1322 );
1323 }
1324 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1325 /* C after add denotes sum <u either arg */
1326 return unop(Iop_1Uto32,
1327 binop(Iop_CmpLT32U,
1328 binop(Iop_Add32, cc_dep1, cc_dep2),
1329 cc_dep1));
1330 }
1331 // ATC, requires verification, no test case known
1332 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1333 // /* C after signed widening multiply denotes the case where
1334 // the top half of the result isn't simply the sign extension
1335 // of the bottom half (iow the result doesn't fit completely
1336 // in the bottom half). Hence:
1337 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1338 // where 'x' denotes signed widening multiply.*/
1339 // return
1340 // unop(Iop_1Uto32,
1341 // binop(Iop_CmpNE32,
1342 // unop(Iop_64HIto32,
1343 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1344 // binop(Iop_Sar32,
1345 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1346 //}
1347 # if 0
1348 if (cc_op->tag == Iex_Const) {
1349 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1350 }
1351 # endif
1352
1353 return NULL;
1354 }
1355
1356 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1357
1358 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1359 /* specialise calls to above "calculate_eflags_all" function */
1360 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1361 vassert(arity == 4);
1362 cc_op = args[0];
1363 cc_dep1 = args[1];
1364 /* cc_dep2 = args[2]; */
1365 /* cc_ndep = args[3]; */
1366
1367 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1368 /* eflags after COPY are stored in DEP1. */
1369 return
1370 binop(
1371 Iop_And32,
1372 cc_dep1,
1373 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1374 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1375 );
1376 }
1377 return NULL;
1378 }
1379
1380 # undef unop
1381 # undef binop
1382 # undef mkU32
1383 # undef mkU8
1384
1385 return NULL;
1386 }
1387
1388
1389 /*---------------------------------------------------------------*/
1390 /*--- Supporting functions for x87 FPU activities. ---*/
1391 /*---------------------------------------------------------------*/
1392
host_is_little_endian(void)1393 static inline Bool host_is_little_endian ( void )
1394 {
1395 UInt x = 0x76543210;
1396 UChar* p = (UChar*)(&x);
1397 return toBool(*p == 0x10);
1398 }
1399
1400 /* 80 and 64-bit floating point formats:
1401
1402 80-bit:
1403
1404 S 0 0-------0 zero
1405 S 0 0X------X denormals
1406 S 1-7FFE 1X------X normals (all normals have leading 1)
1407 S 7FFF 10------0 infinity
1408 S 7FFF 10X-----X snan
1409 S 7FFF 11X-----X qnan
1410
1411 S is the sign bit. For runs X----X, at least one of the Xs must be
1412 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1413 there is an explicitly represented leading 1, and a sign bit,
1414 giving 80 in total.
1415
1416 64-bit avoids the confusion of an explicitly represented leading 1
1417 and so is simpler:
1418
1419 S 0 0------0 zero
1420 S 0 X------X denormals
1421 S 1-7FE any normals
1422 S 7FF 0------0 infinity
1423 S 7FF 0X-----X snan
1424 S 7FF 1X-----X qnan
1425
1426 Exponent is 11 bits, fractional part is 52 bits, and there is a
1427 sign bit, giving 64 in total.
1428 */
1429
1430 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1431 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1432 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1433 {
1434 Bool mantissaIsZero;
1435 Int bexp;
1436 UChar sign;
1437 UChar* f64;
1438
1439 vassert(host_is_little_endian());
1440
1441 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1442
1443 f64 = (UChar*)(&dbl);
1444 sign = toUChar( (f64[7] >> 7) & 1 );
1445
1446 /* First off, if the tag indicates the register was empty,
1447 return 1,0,sign,1 */
1448 if (tag == 0) {
1449 /* vex_printf("Empty\n"); */
1450 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1451 | X86G_FC_MASK_C0;
1452 }
1453
1454 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1455 bexp &= 0x7FF;
1456
1457 mantissaIsZero
1458 = toBool(
1459 (f64[6] & 0x0F) == 0
1460 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1461 );
1462
1463 /* If both exponent and mantissa are zero, the value is zero.
1464 Return 1,0,sign,0. */
1465 if (bexp == 0 && mantissaIsZero) {
1466 /* vex_printf("Zero\n"); */
1467 return X86G_FC_MASK_C3 | 0
1468 | (sign << X86G_FC_SHIFT_C1) | 0;
1469 }
1470
1471 /* If exponent is zero but mantissa isn't, it's a denormal.
1472 Return 1,1,sign,0. */
1473 if (bexp == 0 && !mantissaIsZero) {
1474 /* vex_printf("Denormal\n"); */
1475 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1476 | (sign << X86G_FC_SHIFT_C1) | 0;
1477 }
1478
1479 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1480 Return 0,1,sign,1. */
1481 if (bexp == 0x7FF && mantissaIsZero) {
1482 /* vex_printf("Inf\n"); */
1483 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1484 | X86G_FC_MASK_C0;
1485 }
1486
1487 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1488 Return 0,0,sign,1. */
1489 if (bexp == 0x7FF && !mantissaIsZero) {
1490 /* vex_printf("NaN\n"); */
1491 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1492 }
1493
1494 /* Uh, ok, we give up. It must be a normal finite number.
1495 Return 0,1,sign,0.
1496 */
1497 /* vex_printf("normal\n"); */
1498 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1499 }
1500
1501
1502 /* CALLED FROM GENERATED CODE */
1503 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1504 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1505 {
1506 ULong f64;
1507 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1508 return f64;
1509 }
1510
1511 /* CALLED FROM GENERATED CODE */
1512 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1513 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1514 {
1515 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1516 }
1517
1518
1519 /*----------------------------------------------*/
1520 /*--- The exported fns .. ---*/
1521 /*----------------------------------------------*/
1522
1523 /* Layout of the real x87 state. */
1524 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1525 g_generic_x87.h */
1526
1527
1528 /* CLEAN HELPER */
1529 /* fpucw[15:0] contains a x87 native format FPU control word.
1530 Extract from it the required FPROUND value and any resulting
1531 emulation warning, and return (warn << 32) | fpround value.
1532 */
x86g_check_fldcw(UInt fpucw)1533 ULong x86g_check_fldcw ( UInt fpucw )
1534 {
1535 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1536 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1537 UInt rmode = (fpucw >> 10) & 3;
1538
1539 /* Detect any required emulation warnings. */
1540 VexEmNote ew = EmNote_NONE;
1541
1542 if ((fpucw & 0x3F) != 0x3F) {
1543 /* unmasked exceptions! */
1544 ew = EmWarn_X86_x87exns;
1545 }
1546 else
1547 if (((fpucw >> 8) & 3) != 3) {
1548 /* unsupported precision */
1549 ew = EmWarn_X86_x87precision;
1550 }
1551
1552 return (((ULong)ew) << 32) | ((ULong)rmode);
1553 }
1554
1555 /* CLEAN HELPER */
1556 /* Given fpround as an IRRoundingMode value, create a suitable x87
1557 native format FPU control word. */
x86g_create_fpucw(UInt fpround)1558 UInt x86g_create_fpucw ( UInt fpround )
1559 {
1560 fpround &= 3;
1561 return 0x037F | (fpround << 10);
1562 }
1563
1564
1565 /* CLEAN HELPER */
1566 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1567 Extract from it the required SSEROUND value and any resulting
1568 emulation warning, and return (warn << 32) | sseround value.
1569 */
x86g_check_ldmxcsr(UInt mxcsr)1570 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1571 {
1572 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1573 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1574 UInt rmode = (mxcsr >> 13) & 3;
1575
1576 /* Detect any required emulation warnings. */
1577 VexEmNote ew = EmNote_NONE;
1578
1579 if ((mxcsr & 0x1F80) != 0x1F80) {
1580 /* unmasked exceptions! */
1581 ew = EmWarn_X86_sseExns;
1582 }
1583 else
1584 if (mxcsr & (1<<15)) {
1585 /* FZ is set */
1586 ew = EmWarn_X86_fz;
1587 }
1588 else
1589 if (mxcsr & (1<<6)) {
1590 /* DAZ is set */
1591 ew = EmWarn_X86_daz;
1592 }
1593
1594 return (((ULong)ew) << 32) | ((ULong)rmode);
1595 }
1596
1597
1598 /* CLEAN HELPER */
1599 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1600 native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1601 UInt x86g_create_mxcsr ( UInt sseround )
1602 {
1603 sseround &= 3;
1604 return 0x1F80 | (sseround << 13);
1605 }
1606
1607
1608 /* CALLED FROM GENERATED CODE */
1609 /* DIRTY HELPER (writes guest state) */
1610 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1611 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1612 {
1613 Int i;
1614 gst->guest_FTOP = 0;
1615 for (i = 0; i < 8; i++) {
1616 gst->guest_FPTAG[i] = 0; /* empty */
1617 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1618 }
1619 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1620 gst->guest_FC3210 = 0;
1621 }
1622
1623
1624 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1625 appears to differ from the former only in that the 8 FP registers
1626 themselves are not transferred into the guest state. */
1627 static
do_put_x87(Bool moveRegs,Fpu_State * x87_state,VexGuestX86State * vex_state)1628 VexEmNote do_put_x87 ( Bool moveRegs,
1629 /*IN*/Fpu_State* x87_state,
1630 /*OUT*/VexGuestX86State* vex_state )
1631 {
1632 Int stno, preg;
1633 UInt tag;
1634 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1635 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1636 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1637 UInt tagw = x87_state->env[FP_ENV_TAG];
1638 UInt fpucw = x87_state->env[FP_ENV_CTRL];
1639 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
1640 VexEmNote ew;
1641 UInt fpround;
1642 ULong pair;
1643
1644 /* Copy registers and tags */
1645 for (stno = 0; stno < 8; stno++) {
1646 preg = (stno + ftop) & 7;
1647 tag = (tagw >> (2*preg)) & 3;
1648 if (tag == 3) {
1649 /* register is empty */
1650 /* hmm, if it's empty, does it still get written? Probably
1651 safer to say it does. If we don't, memcheck could get out
1652 of sync, in that it thinks all FP registers are defined by
1653 this helper, but in reality some have not been updated. */
1654 if (moveRegs)
1655 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1656 vexTags[preg] = 0;
1657 } else {
1658 /* register is non-empty */
1659 if (moveRegs)
1660 convert_f80le_to_f64le( &x87_state->reg[10*stno],
1661 (UChar*)&vexRegs[preg] );
1662 vexTags[preg] = 1;
1663 }
1664 }
1665
1666 /* stack pointer */
1667 vex_state->guest_FTOP = ftop;
1668
1669 /* status word */
1670 vex_state->guest_FC3210 = c3210;
1671
1672 /* handle the control word, setting FPROUND and detecting any
1673 emulation warnings. */
1674 pair = x86g_check_fldcw ( (UInt)fpucw );
1675 fpround = (UInt)pair;
1676 ew = (VexEmNote)(pair >> 32);
1677
1678 vex_state->guest_FPROUND = fpround & 3;
1679
1680 /* emulation warnings --> caller */
1681 return ew;
1682 }
1683
1684
1685 /* Create an x87 FPU state from the guest state, as close as
1686 we can approximate it. */
1687 static
do_get_x87(VexGuestX86State * vex_state,Fpu_State * x87_state)1688 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1689 /*OUT*/Fpu_State* x87_state )
1690 {
1691 Int i, stno, preg;
1692 UInt tagw;
1693 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1694 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1695 UInt ftop = vex_state->guest_FTOP;
1696 UInt c3210 = vex_state->guest_FC3210;
1697
1698 for (i = 0; i < 14; i++)
1699 x87_state->env[i] = 0;
1700
1701 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
1702 = x87_state->env[13] = 0xFFFF;
1703 x87_state->env[FP_ENV_STAT]
1704 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1705 x87_state->env[FP_ENV_CTRL]
1706 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1707
1708 /* Dump the register stack in ST order. */
1709 tagw = 0;
1710 for (stno = 0; stno < 8; stno++) {
1711 preg = (stno + ftop) & 7;
1712 if (vexTags[preg] == 0) {
1713 /* register is empty */
1714 tagw |= (3 << (2*preg));
1715 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1716 &x87_state->reg[10*stno] );
1717 } else {
1718 /* register is full. */
1719 tagw |= (0 << (2*preg));
1720 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1721 &x87_state->reg[10*stno] );
1722 }
1723 }
1724 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
1725 }
1726
1727
1728 /* CALLED FROM GENERATED CODE */
1729 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1730 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1731 {
1732 /* Somewhat roundabout, but at least it's simple. */
1733 Fpu_State tmp;
1734 UShort* addrS = (UShort*)addr;
1735 UChar* addrC = (UChar*)addr;
1736 U128* xmm = (U128*)(addr + 160);
1737 UInt mxcsr;
1738 UShort fp_tags;
1739 UInt summary_tags;
1740 Int r, stno;
1741 UShort *srcS, *dstS;
1742
1743 do_get_x87( gst, &tmp );
1744 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1745
1746 /* Now build the proper fxsave image from the x87 image we just
1747 made. */
1748
1749 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1750 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1751
1752 /* set addrS[2] in an endian-independent way */
1753 summary_tags = 0;
1754 fp_tags = tmp.env[FP_ENV_TAG];
1755 for (r = 0; r < 8; r++) {
1756 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1757 summary_tags |= (1 << r);
1758 }
1759 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1760 addrC[5] = 0; /* pad */
1761
1762 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1763 addrS[4] = 0;
1764 addrS[5] = 0; /* FPU IP (bogus) */
1765 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1766 could conceivably dump %CS here) */
1767
1768 addrS[7] = 0; /* Intel reserved */
1769
1770 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1771 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1772 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1773 perhaps? */
1774 addrS[11] = 0; /* Intel reserved */
1775
1776 addrS[12] = toUShort(mxcsr); /* MXCSR */
1777 addrS[13] = toUShort(mxcsr >> 16);
1778
1779 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1780 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1781
1782 /* Copy in the FP registers, in ST order. */
1783 for (stno = 0; stno < 8; stno++) {
1784 srcS = (UShort*)(&tmp.reg[10*stno]);
1785 dstS = (UShort*)(&addrS[16 + 8*stno]);
1786 dstS[0] = srcS[0];
1787 dstS[1] = srcS[1];
1788 dstS[2] = srcS[2];
1789 dstS[3] = srcS[3];
1790 dstS[4] = srcS[4];
1791 dstS[5] = 0;
1792 dstS[6] = 0;
1793 dstS[7] = 0;
1794 }
1795
1796 /* That's the first 160 bytes of the image done. Now only %xmm0
1797 .. %xmm7 remain to be copied. If the host is big-endian, these
1798 need to be byte-swapped. */
1799 vassert(host_is_little_endian());
1800
1801 # define COPY_U128(_dst,_src) \
1802 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1803 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1804 while (0)
1805
1806 COPY_U128( xmm[0], gst->guest_XMM0 );
1807 COPY_U128( xmm[1], gst->guest_XMM1 );
1808 COPY_U128( xmm[2], gst->guest_XMM2 );
1809 COPY_U128( xmm[3], gst->guest_XMM3 );
1810 COPY_U128( xmm[4], gst->guest_XMM4 );
1811 COPY_U128( xmm[5], gst->guest_XMM5 );
1812 COPY_U128( xmm[6], gst->guest_XMM6 );
1813 COPY_U128( xmm[7], gst->guest_XMM7 );
1814
1815 # undef COPY_U128
1816 }
1817
1818
1819 /* CALLED FROM GENERATED CODE */
1820 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1821 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1822 {
1823 Fpu_State tmp;
1824 VexEmNote warnX87 = EmNote_NONE;
1825 VexEmNote warnXMM = EmNote_NONE;
1826 UShort* addrS = (UShort*)addr;
1827 UChar* addrC = (UChar*)addr;
1828 U128* xmm = (U128*)(addr + 160);
1829 UShort fp_tags;
1830 Int r, stno, i;
1831
1832 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1833 to be byte-swapped. */
1834 vassert(host_is_little_endian());
1835
1836 # define COPY_U128(_dst,_src) \
1837 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1838 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1839 while (0)
1840
1841 COPY_U128( gst->guest_XMM0, xmm[0] );
1842 COPY_U128( gst->guest_XMM1, xmm[1] );
1843 COPY_U128( gst->guest_XMM2, xmm[2] );
1844 COPY_U128( gst->guest_XMM3, xmm[3] );
1845 COPY_U128( gst->guest_XMM4, xmm[4] );
1846 COPY_U128( gst->guest_XMM5, xmm[5] );
1847 COPY_U128( gst->guest_XMM6, xmm[6] );
1848 COPY_U128( gst->guest_XMM7, xmm[7] );
1849
1850 # undef COPY_U128
1851
1852 /* Copy the x87 registers out of the image, into a temporary
1853 Fpu_State struct. */
1854
1855 /* LLVM on Darwin turns the following loop into a movaps plus a
1856 handful of scalar stores. This would work fine except for the
1857 fact that VEX doesn't keep the stack correctly (16-) aligned for
1858 the call, so it segfaults. Hence, split the loop into two
1859 pieces (and pray LLVM doesn't merely glue them back together) so
1860 it's composed only of scalar stores and so is alignment
1861 insensitive. Of course this is a kludge of the lamest kind --
1862 VEX should be fixed properly. */
1863 /* Code that seems to trigger the problem:
1864 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1865 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1866 __asm__ __volatile__("" ::: "memory");
1867 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1868
1869 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1870 /* fill in tmp.reg[0..7] */
1871 for (stno = 0; stno < 8; stno++) {
1872 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1873 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1874 dstS[0] = srcS[0];
1875 dstS[1] = srcS[1];
1876 dstS[2] = srcS[2];
1877 dstS[3] = srcS[3];
1878 dstS[4] = srcS[4];
1879 }
1880 /* fill in tmp.env[0..13] */
1881 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1882 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1883
1884 fp_tags = 0;
1885 for (r = 0; r < 8; r++) {
1886 if (addrC[4] & (1<<r))
1887 fp_tags |= (0 << (2*r)); /* EMPTY */
1888 else
1889 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1890 }
1891 tmp.env[FP_ENV_TAG] = fp_tags;
1892
1893 /* Now write 'tmp' into the guest state. */
1894 warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
1895
1896 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1897 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1898 ULong w64 = x86g_check_ldmxcsr( w32 );
1899
1900 warnXMM = (VexEmNote)(w64 >> 32);
1901
1902 gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1903 }
1904
1905 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1906 if (warnX87 != EmNote_NONE)
1907 return warnX87;
1908 else
1909 return warnXMM;
1910 }
1911
1912
1913 /* CALLED FROM GENERATED CODE */
1914 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1915 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1916 {
1917 do_get_x87( gst, (Fpu_State*)addr );
1918 }
1919
1920 /* CALLED FROM GENERATED CODE */
1921 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1922 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1923 {
1924 return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
1925 }
1926
1927 /* CALLED FROM GENERATED CODE */
1928 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1929 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1930 {
1931 /* Somewhat roundabout, but at least it's simple. */
1932 Int i;
1933 UShort* addrP = (UShort*)addr;
1934 Fpu_State tmp;
1935 do_get_x87( gst, &tmp );
1936 for (i = 0; i < 14; i++)
1937 addrP[i] = tmp.env[i];
1938 }
1939
1940 /* CALLED FROM GENERATED CODE */
1941 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1942 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1943 {
1944 return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
1945 }
1946
1947 /* VISIBLE TO LIBVEX CLIENT */
1948 /* Do x87 save from the supplied VexGuestX86State structure and store the
1949 result at the given address which represents a buffer of at least 108
1950 bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1951 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1952 /*OUT*/UChar* x87_state )
1953 {
1954 do_get_x87 ( vex_state, (Fpu_State*)x87_state );
1955 }
1956
1957 /* VISIBLE TO LIBVEX CLIENT */
1958 /* Do x87 restore from the supplied address and store read values to the given
1959 VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1960 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1961 /*MOD*/VexGuestX86State* vex_state )
1962 {
1963 return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
1964 }
1965
1966 /* VISIBLE TO LIBVEX CLIENT */
1967 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1968 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1969 {
1970 return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1971 }
1972
1973 /* VISIBLE TO LIBVEX CLIENT */
1974 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1975 value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1976 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1977 /*MOD*/VexGuestX86State* vex_state)
1978 {
1979 ULong w64 = x86g_check_ldmxcsr( mxcsr );
1980 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1981 return (VexEmNote)(w64 >> 32);
1982 }
1983
1984 /*---------------------------------------------------------------*/
1985 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1986 /*---------------------------------------------------------------*/
1987
1988 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1989 /* Calculate both flags and value result for rotate right
1990 through the carry bit. Result in low 32 bits,
1991 new flags (OSZACP) in high 32 bits.
1992 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1993 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1994 {
1995 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1996
1997 switch (sz) {
1998 case 4:
1999 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2000 of = ((arg >> 31) ^ cf) & 1;
2001 while (tempCOUNT > 0) {
2002 tempcf = arg & 1;
2003 arg = (arg >> 1) | (cf << 31);
2004 cf = tempcf;
2005 tempCOUNT--;
2006 }
2007 break;
2008 case 2:
2009 while (tempCOUNT >= 17) tempCOUNT -= 17;
2010 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2011 of = ((arg >> 15) ^ cf) & 1;
2012 while (tempCOUNT > 0) {
2013 tempcf = arg & 1;
2014 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
2015 cf = tempcf;
2016 tempCOUNT--;
2017 }
2018 break;
2019 case 1:
2020 while (tempCOUNT >= 9) tempCOUNT -= 9;
2021 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2022 of = ((arg >> 7) ^ cf) & 1;
2023 while (tempCOUNT > 0) {
2024 tempcf = arg & 1;
2025 arg = ((arg >> 1) & 0x7F) | (cf << 7);
2026 cf = tempcf;
2027 tempCOUNT--;
2028 }
2029 break;
2030 default:
2031 vpanic("calculate_RCR: invalid size");
2032 }
2033
2034 cf &= 1;
2035 of &= 1;
2036 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2037 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2038
2039 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2040 }
2041
2042
2043 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2044 /* Calculate both flags and value result for rotate left
2045 through the carry bit. Result in low 32 bits,
2046 new flags (OSZACP) in high 32 bits.
2047 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2048 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2049 {
2050 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2051
2052 switch (sz) {
2053 case 4:
2054 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2055 while (tempCOUNT > 0) {
2056 tempcf = (arg >> 31) & 1;
2057 arg = (arg << 1) | (cf & 1);
2058 cf = tempcf;
2059 tempCOUNT--;
2060 }
2061 of = ((arg >> 31) ^ cf) & 1;
2062 break;
2063 case 2:
2064 while (tempCOUNT >= 17) tempCOUNT -= 17;
2065 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2066 while (tempCOUNT > 0) {
2067 tempcf = (arg >> 15) & 1;
2068 arg = 0xFFFF & ((arg << 1) | (cf & 1));
2069 cf = tempcf;
2070 tempCOUNT--;
2071 }
2072 of = ((arg >> 15) ^ cf) & 1;
2073 break;
2074 case 1:
2075 while (tempCOUNT >= 9) tempCOUNT -= 9;
2076 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2077 while (tempCOUNT > 0) {
2078 tempcf = (arg >> 7) & 1;
2079 arg = 0xFF & ((arg << 1) | (cf & 1));
2080 cf = tempcf;
2081 tempCOUNT--;
2082 }
2083 of = ((arg >> 7) ^ cf) & 1;
2084 break;
2085 default:
2086 vpanic("calculate_RCL: invalid size");
2087 }
2088
2089 cf &= 1;
2090 of &= 1;
2091 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2092 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2093
2094 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2095 }
2096
2097
2098 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2099 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2100 AX value in low half of arg, OSZACP in upper half.
2101 See guest-x86/toIR.c usage point for details.
2102 */
calc_parity_8bit(UInt w32)2103 static UInt calc_parity_8bit ( UInt w32 ) {
2104 UInt i;
2105 UInt p = 1;
2106 for (i = 0; i < 8; i++)
2107 p ^= (1 & (w32 >> i));
2108 return p;
2109 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2110 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2111 {
2112 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2113 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2114 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2115 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2116 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2117 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2118 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2119 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2120 UInt result = 0;
2121
2122 switch (opcode) {
2123 case 0x27: { /* DAA */
2124 UInt old_AL = r_AL;
2125 UInt old_C = r_C;
2126 r_C = 0;
2127 if ((r_AL & 0xF) > 9 || r_A == 1) {
2128 r_AL = r_AL + 6;
2129 r_C = old_C;
2130 if (r_AL >= 0x100) r_C = 1;
2131 r_A = 1;
2132 } else {
2133 r_A = 0;
2134 }
2135 if (old_AL > 0x99 || old_C == 1) {
2136 r_AL = r_AL + 0x60;
2137 r_C = 1;
2138 } else {
2139 r_C = 0;
2140 }
2141 /* O is undefined. S Z and P are set according to the
2142 result. */
2143 r_AL &= 0xFF;
2144 r_O = 0; /* let's say */
2145 r_S = (r_AL & 0x80) ? 1 : 0;
2146 r_Z = (r_AL == 0) ? 1 : 0;
2147 r_P = calc_parity_8bit( r_AL );
2148 break;
2149 }
2150 case 0x2F: { /* DAS */
2151 UInt old_AL = r_AL;
2152 UInt old_C = r_C;
2153 r_C = 0;
2154 if ((r_AL & 0xF) > 9 || r_A == 1) {
2155 Bool borrow = r_AL < 6;
2156 r_AL = r_AL - 6;
2157 r_C = old_C;
2158 if (borrow) r_C = 1;
2159 r_A = 1;
2160 } else {
2161 r_A = 0;
2162 }
2163 if (old_AL > 0x99 || old_C == 1) {
2164 r_AL = r_AL - 0x60;
2165 r_C = 1;
2166 } else {
2167 /* Intel docs are wrong: r_C = 0; */
2168 }
2169 /* O is undefined. S Z and P are set according to the
2170 result. */
2171 r_AL &= 0xFF;
2172 r_O = 0; /* let's say */
2173 r_S = (r_AL & 0x80) ? 1 : 0;
2174 r_Z = (r_AL == 0) ? 1 : 0;
2175 r_P = calc_parity_8bit( r_AL );
2176 break;
2177 }
2178 case 0x37: { /* AAA */
2179 Bool nudge = r_AL > 0xF9;
2180 if ((r_AL & 0xF) > 9 || r_A == 1) {
2181 r_AL = r_AL + 6;
2182 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2183 r_A = 1;
2184 r_C = 1;
2185 r_AL = r_AL & 0xF;
2186 } else {
2187 r_A = 0;
2188 r_C = 0;
2189 r_AL = r_AL & 0xF;
2190 }
2191 /* O S Z and P are undefined. */
2192 r_O = r_S = r_Z = r_P = 0; /* let's say */
2193 break;
2194 }
2195 case 0x3F: { /* AAS */
2196 Bool nudge = r_AL < 0x06;
2197 if ((r_AL & 0xF) > 9 || r_A == 1) {
2198 r_AL = r_AL - 6;
2199 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2200 r_A = 1;
2201 r_C = 1;
2202 r_AL = r_AL & 0xF;
2203 } else {
2204 r_A = 0;
2205 r_C = 0;
2206 r_AL = r_AL & 0xF;
2207 }
2208 /* O S Z and P are undefined. */
2209 r_O = r_S = r_Z = r_P = 0; /* let's say */
2210 break;
2211 }
2212 default:
2213 vassert(0);
2214 }
2215 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2216 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2217 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2218 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2219 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2220 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2221 | ( (r_AH & 0xFF) << 8 )
2222 | ( (r_AL & 0xFF) << 0 );
2223 return result;
2224 }
2225
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2226 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2227 {
2228 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2229 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2230 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2231 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2232 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2233 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2234 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2235 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2236 UInt result = 0;
2237
2238 switch (opcode) {
2239 case 0xD4: { /* AAM */
2240 r_AH = r_AL / 10;
2241 r_AL = r_AL % 10;
2242 break;
2243 }
2244 case 0xD5: { /* AAD */
2245 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2246 r_AH = 0;
2247 break;
2248 }
2249 default:
2250 vassert(0);
2251 }
2252
2253 r_O = 0; /* let's say (undefined) */
2254 r_C = 0; /* let's say (undefined) */
2255 r_A = 0; /* let's say (undefined) */
2256 r_S = (r_AL & 0x80) ? 1 : 0;
2257 r_Z = (r_AL == 0) ? 1 : 0;
2258 r_P = calc_parity_8bit( r_AL );
2259
2260 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2261 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2262 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2263 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2264 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2265 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2266 | ( (r_AH & 0xFF) << 8 )
2267 | ( (r_AL & 0xFF) << 0 );
2268 return result;
2269 }
2270
2271
2272 /* CALLED FROM GENERATED CODE */
2273 /* DIRTY HELPER (non-referentially-transparent) */
2274 /* Horrible hack. On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2275 ULong x86g_dirtyhelper_RDTSC ( void )
2276 {
2277 # if defined(__i386__)
2278 ULong res;
2279 __asm__ __volatile__("rdtsc" : "=A" (res));
2280 return res;
2281 # else
2282 return 1ULL;
2283 # endif
2284 }
2285
2286
2287 /* CALLED FROM GENERATED CODE */
2288 /* DIRTY HELPER (modifies guest state) */
2289 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2290 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2291 {
2292 switch (st->guest_EAX) {
2293 case 0:
2294 st->guest_EAX = 0x1;
2295 st->guest_EBX = 0x756e6547;
2296 st->guest_ECX = 0x6c65746e;
2297 st->guest_EDX = 0x49656e69;
2298 break;
2299 default:
2300 st->guest_EAX = 0x543;
2301 st->guest_EBX = 0x0;
2302 st->guest_ECX = 0x0;
2303 st->guest_EDX = 0x8001bf;
2304 break;
2305 }
2306 }
2307
2308 /* CALLED FROM GENERATED CODE */
2309 /* DIRTY HELPER (modifies guest state) */
2310 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2311 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2312 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2313 {
2314 switch (st->guest_EAX) {
2315 /* vendor ID */
2316 case 0:
2317 st->guest_EAX = 0x1;
2318 st->guest_EBX = 0x68747541;
2319 st->guest_ECX = 0x444d4163;
2320 st->guest_EDX = 0x69746e65;
2321 break;
2322 /* feature bits */
2323 case 1:
2324 st->guest_EAX = 0x621;
2325 st->guest_EBX = 0x0;
2326 st->guest_ECX = 0x0;
2327 st->guest_EDX = 0x183f9ff;
2328 break;
2329 /* Highest Extended Function Supported (0x80000004 brand string) */
2330 case 0x80000000:
2331 st->guest_EAX = 0x80000004;
2332 st->guest_EBX = 0x68747541;
2333 st->guest_ECX = 0x444d4163;
2334 st->guest_EDX = 0x69746e65;
2335 break;
2336 /* Extended Processor Info and Feature Bits */
2337 case 0x80000001:
2338 st->guest_EAX = 0x721;
2339 st->guest_EBX = 0x0;
2340 st->guest_ECX = 0x0;
2341 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2342 break;
2343 /* Processor Brand String "AMD Athlon(tm) Processor" */
2344 case 0x80000002:
2345 st->guest_EAX = 0x20444d41;
2346 st->guest_EBX = 0x6c687441;
2347 st->guest_ECX = 0x74286e6f;
2348 st->guest_EDX = 0x5020296d;
2349 break;
2350 case 0x80000003:
2351 st->guest_EAX = 0x65636f72;
2352 st->guest_EBX = 0x726f7373;
2353 st->guest_ECX = 0x0;
2354 st->guest_EDX = 0x0;
2355 break;
2356 default:
2357 st->guest_EAX = 0x0;
2358 st->guest_EBX = 0x0;
2359 st->guest_ECX = 0x0;
2360 st->guest_EDX = 0x0;
2361 break;
2362 }
2363 }
2364
2365 /* CALLED FROM GENERATED CODE */
2366 /* DIRTY HELPER (modifies guest state) */
2367 /* Claim to be the following SSE1-capable CPU:
2368 vendor_id : GenuineIntel
2369 cpu family : 6
2370 model : 11
2371 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2372 stepping : 1
2373 cpu MHz : 1131.013
2374 cache size : 512 KB
2375 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2376 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2377 {
2378 switch (st->guest_EAX) {
2379 case 0:
2380 st->guest_EAX = 0x00000002;
2381 st->guest_EBX = 0x756e6547;
2382 st->guest_ECX = 0x6c65746e;
2383 st->guest_EDX = 0x49656e69;
2384 break;
2385 case 1:
2386 st->guest_EAX = 0x000006b1;
2387 st->guest_EBX = 0x00000004;
2388 st->guest_ECX = 0x00000000;
2389 st->guest_EDX = 0x0383fbff;
2390 break;
2391 default:
2392 st->guest_EAX = 0x03020101;
2393 st->guest_EBX = 0x00000000;
2394 st->guest_ECX = 0x00000000;
2395 st->guest_EDX = 0x0c040883;
2396 break;
2397 }
2398 }
2399
2400 /* Claim to be the following SSE2-capable CPU:
2401 vendor_id : GenuineIntel
2402 cpu family : 15
2403 model : 2
2404 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2405 stepping : 9
2406 microcode : 0x17
2407 cpu MHz : 2992.577
2408 cache size : 512 KB
2409 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2410 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2411 pebs bts cid xtpr
2412 clflush size : 64
2413 cache_alignment : 128
2414 address sizes : 36 bits physical, 32 bits virtual
2415 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2416 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2417 {
2418 switch (st->guest_EAX) {
2419 case 0:
2420 st->guest_EAX = 0x00000002;
2421 st->guest_EBX = 0x756e6547;
2422 st->guest_ECX = 0x6c65746e;
2423 st->guest_EDX = 0x49656e69;
2424 break;
2425 case 1:
2426 st->guest_EAX = 0x00000f29;
2427 st->guest_EBX = 0x01020809;
2428 st->guest_ECX = 0x00004400;
2429 st->guest_EDX = 0xbfebfbff;
2430 break;
2431 default:
2432 st->guest_EAX = 0x03020101;
2433 st->guest_EBX = 0x00000000;
2434 st->guest_ECX = 0x00000000;
2435 st->guest_EDX = 0x0c040883;
2436 break;
2437 }
2438 }
2439
2440 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2441 vendor_id : GenuineIntel
2442 cpu family : 6
2443 model : 15
2444 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2445 stepping : 6
2446 cpu MHz : 2394.000
2447 cache size : 4096 KB
2448 physical id : 0
2449 siblings : 2
2450 core id : 0
2451 cpu cores : 2
2452 fpu : yes
2453 fpu_exception : yes
2454 cpuid level : 10
2455 wp : yes
2456 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2457 mtrr pge mca cmov pat pse36 clflush dts acpi
2458 mmx fxsr sse sse2 ss ht tm syscall nx lm
2459 constant_tsc pni monitor ds_cpl vmx est tm2
2460 cx16 xtpr lahf_lm
2461 bogomips : 4798.78
2462 clflush size : 64
2463 cache_alignment : 64
2464 address sizes : 36 bits physical, 48 bits virtual
2465 power management:
2466 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2467 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2468 {
2469 # define SET_ABCD(_a,_b,_c,_d) \
2470 do { st->guest_EAX = (UInt)(_a); \
2471 st->guest_EBX = (UInt)(_b); \
2472 st->guest_ECX = (UInt)(_c); \
2473 st->guest_EDX = (UInt)(_d); \
2474 } while (0)
2475
2476 switch (st->guest_EAX) {
2477 case 0x00000000:
2478 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2479 break;
2480 case 0x00000001:
2481 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2482 break;
2483 case 0x00000002:
2484 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2485 break;
2486 case 0x00000003:
2487 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2488 break;
2489 case 0x00000004: {
2490 switch (st->guest_ECX) {
2491 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2492 0x0000003f, 0x00000001); break;
2493 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2494 0x0000003f, 0x00000001); break;
2495 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2496 0x00000fff, 0x00000001); break;
2497 default: SET_ABCD(0x00000000, 0x00000000,
2498 0x00000000, 0x00000000); break;
2499 }
2500 break;
2501 }
2502 case 0x00000005:
2503 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2504 break;
2505 case 0x00000006:
2506 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2507 break;
2508 case 0x00000007:
2509 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2510 break;
2511 case 0x00000008:
2512 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2513 break;
2514 case 0x00000009:
2515 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2516 break;
2517 case 0x0000000a:
2518 unhandled_eax_value:
2519 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2520 break;
2521 case 0x80000000:
2522 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2523 break;
2524 case 0x80000001:
2525 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2526 break;
2527 case 0x80000002:
2528 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2529 break;
2530 case 0x80000003:
2531 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2532 break;
2533 case 0x80000004:
2534 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2535 break;
2536 case 0x80000005:
2537 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2538 break;
2539 case 0x80000006:
2540 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2541 break;
2542 case 0x80000007:
2543 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2544 break;
2545 case 0x80000008:
2546 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2547 break;
2548 default:
2549 goto unhandled_eax_value;
2550 }
2551 # undef SET_ABCD
2552 }
2553
2554
2555 /* CALLED FROM GENERATED CODE */
2556 /* DIRTY HELPER (non-referentially-transparent) */
2557 /* Horrible hack. On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2558 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2559 {
2560 # if defined(__i386__)
2561 UInt r = 0;
2562 portno &= 0xFFFF;
2563 switch (sz) {
2564 case 4:
2565 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2566 : "=a" (r) : "Nd" (portno));
2567 break;
2568 case 2:
2569 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2570 : "=a" (r) : "Nd" (portno));
2571 break;
2572 case 1:
2573 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2574 : "=a" (r) : "Nd" (portno));
2575 break;
2576 default:
2577 break;
2578 }
2579 return r;
2580 # else
2581 return 0;
2582 # endif
2583 }
2584
2585
2586 /* CALLED FROM GENERATED CODE */
2587 /* DIRTY HELPER (non-referentially-transparent) */
2588 /* Horrible hack. On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2589 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2590 {
2591 # if defined(__i386__)
2592 portno &= 0xFFFF;
2593 switch (sz) {
2594 case 4:
2595 __asm__ __volatile__("outl %0, %w1"
2596 : : "a" (data), "Nd" (portno));
2597 break;
2598 case 2:
2599 __asm__ __volatile__("outw %w0, %w1"
2600 : : "a" (data), "Nd" (portno));
2601 break;
2602 case 1:
2603 __asm__ __volatile__("outb %b0, %w1"
2604 : : "a" (data), "Nd" (portno));
2605 break;
2606 default:
2607 break;
2608 }
2609 # else
2610 /* do nothing */
2611 # endif
2612 }
2613
2614 /* CALLED FROM GENERATED CODE */
2615 /* DIRTY HELPER (non-referentially-transparent) */
2616 /* Horrible hack. On non-x86 platforms, do nothing. */
2617 /* op = 0: call the native SGDT instruction.
2618 op = 1: call the native SIDT instruction.
2619 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2620 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2621 # if defined(__i386__)
2622 switch (op) {
2623 case 0:
2624 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2625 break;
2626 case 1:
2627 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2628 break;
2629 default:
2630 vpanic("x86g_dirtyhelper_SxDT");
2631 }
2632 # else
2633 /* do nothing */
2634 UChar* p = (UChar*)address;
2635 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2636 # endif
2637 }
2638
2639 /*---------------------------------------------------------------*/
2640 /*--- Helpers for MMX/SSE/SSE2. ---*/
2641 /*---------------------------------------------------------------*/
2642
abdU8(UChar xx,UChar yy)2643 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2644 return toUChar(xx>yy ? xx-yy : yy-xx);
2645 }
2646
mk32x2(UInt w1,UInt w0)2647 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2648 return (((ULong)w1) << 32) | ((ULong)w0);
2649 }
2650
sel16x4_3(ULong w64)2651 static inline UShort sel16x4_3 ( ULong w64 ) {
2652 UInt hi32 = toUInt(w64 >> 32);
2653 return toUShort(hi32 >> 16);
2654 }
sel16x4_2(ULong w64)2655 static inline UShort sel16x4_2 ( ULong w64 ) {
2656 UInt hi32 = toUInt(w64 >> 32);
2657 return toUShort(hi32);
2658 }
sel16x4_1(ULong w64)2659 static inline UShort sel16x4_1 ( ULong w64 ) {
2660 UInt lo32 = toUInt(w64);
2661 return toUShort(lo32 >> 16);
2662 }
sel16x4_0(ULong w64)2663 static inline UShort sel16x4_0 ( ULong w64 ) {
2664 UInt lo32 = toUInt(w64);
2665 return toUShort(lo32);
2666 }
2667
sel8x8_7(ULong w64)2668 static inline UChar sel8x8_7 ( ULong w64 ) {
2669 UInt hi32 = toUInt(w64 >> 32);
2670 return toUChar(hi32 >> 24);
2671 }
sel8x8_6(ULong w64)2672 static inline UChar sel8x8_6 ( ULong w64 ) {
2673 UInt hi32 = toUInt(w64 >> 32);
2674 return toUChar(hi32 >> 16);
2675 }
sel8x8_5(ULong w64)2676 static inline UChar sel8x8_5 ( ULong w64 ) {
2677 UInt hi32 = toUInt(w64 >> 32);
2678 return toUChar(hi32 >> 8);
2679 }
sel8x8_4(ULong w64)2680 static inline UChar sel8x8_4 ( ULong w64 ) {
2681 UInt hi32 = toUInt(w64 >> 32);
2682 return toUChar(hi32 >> 0);
2683 }
sel8x8_3(ULong w64)2684 static inline UChar sel8x8_3 ( ULong w64 ) {
2685 UInt lo32 = toUInt(w64);
2686 return toUChar(lo32 >> 24);
2687 }
sel8x8_2(ULong w64)2688 static inline UChar sel8x8_2 ( ULong w64 ) {
2689 UInt lo32 = toUInt(w64);
2690 return toUChar(lo32 >> 16);
2691 }
sel8x8_1(ULong w64)2692 static inline UChar sel8x8_1 ( ULong w64 ) {
2693 UInt lo32 = toUInt(w64);
2694 return toUChar(lo32 >> 8);
2695 }
sel8x8_0(ULong w64)2696 static inline UChar sel8x8_0 ( ULong w64 ) {
2697 UInt lo32 = toUInt(w64);
2698 return toUChar(lo32 >> 0);
2699 }
2700
2701 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2702 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2703 {
2704 return
2705 mk32x2(
2706 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2707 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2708 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2709 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2710 );
2711 }
2712
2713 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2714 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2715 {
2716 UInt t = 0;
2717 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2718 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2719 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2720 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2721 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2722 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2723 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2724 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2725 t &= 0xFFFF;
2726 return (ULong)t;
2727 }
2728
2729
2730 /*---------------------------------------------------------------*/
2731 /*--- Helpers for dealing with segment overrides. ---*/
2732 /*---------------------------------------------------------------*/
2733
2734 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2735 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2736 {
2737 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2738 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2739 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2740 return (hi << 24) | (mid << 16) | lo;
2741 }
2742
2743 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2744 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2745 {
2746 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2747 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2748 UInt limit = (hi << 16) | lo;
2749 if (ent->LdtEnt.Bits.Granularity)
2750 limit = (limit << 12) | 0xFFF;
2751 return limit;
2752 }
2753
2754 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2755 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2756 UInt seg_selector, UInt virtual_addr )
2757 {
2758 UInt tiBit, base, limit;
2759 VexGuestX86SegDescr* the_descrs;
2760
2761 Bool verboze = False;
2762
2763 /* If this isn't true, we're in Big Trouble. */
2764 vassert(8 == sizeof(VexGuestX86SegDescr));
2765
2766 if (verboze)
2767 vex_printf("x86h_use_seg_selector: "
2768 "seg_selector = 0x%x, vaddr = 0x%x\n",
2769 seg_selector, virtual_addr);
2770
2771 /* Check for wildly invalid selector. */
2772 if (seg_selector & ~0xFFFF)
2773 goto bad;
2774
2775 seg_selector &= 0x0000FFFF;
2776
2777 /* Sanity check the segment selector. Ensure that RPL=11b (least
2778 privilege). This forms the bottom 2 bits of the selector. */
2779 if ((seg_selector & 3) != 3)
2780 goto bad;
2781
2782 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2783 tiBit = (seg_selector >> 2) & 1;
2784
2785 /* Convert the segment selector onto a table index */
2786 seg_selector >>= 3;
2787 vassert(seg_selector >= 0 && seg_selector < 8192);
2788
2789 if (tiBit == 0) {
2790
2791 /* GDT access. */
2792 /* Do we actually have a GDT to look at? */
2793 if (gdt == 0)
2794 goto bad;
2795
2796 /* Check for access to non-existent entry. */
2797 if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2798 goto bad;
2799
2800 the_descrs = (VexGuestX86SegDescr*)gdt;
2801 base = get_segdescr_base (&the_descrs[seg_selector]);
2802 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2803
2804 } else {
2805
2806 /* All the same stuff, except for the LDT. */
2807 if (ldt == 0)
2808 goto bad;
2809
2810 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2811 goto bad;
2812
2813 the_descrs = (VexGuestX86SegDescr*)ldt;
2814 base = get_segdescr_base (&the_descrs[seg_selector]);
2815 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2816
2817 }
2818
2819 /* Do the limit check. Note, this check is just slightly too
2820 slack. Really it should be "if (virtual_addr + size - 1 >=
2821 limit)," but we don't have the size info to hand. Getting it
2822 could be significantly complex. */
2823 if (virtual_addr >= limit)
2824 goto bad;
2825
2826 if (verboze)
2827 vex_printf("x86h_use_seg_selector: "
2828 "base = 0x%x, addr = 0x%x\n",
2829 base, base + virtual_addr);
2830
2831 /* High 32 bits are zero, indicating success. */
2832 return (ULong)( ((UInt)virtual_addr) + base );
2833
2834 bad:
2835 return 1ULL << 32;
2836 }
2837
2838
2839 /*---------------------------------------------------------------*/
2840 /*--- Helpers for dealing with, and describing, ---*/
2841 /*--- guest state as a whole. ---*/
2842 /*---------------------------------------------------------------*/
2843
2844 /* Initialise the entire x86 guest state. */
2845 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2846 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2847 {
2848 vex_state->host_EvC_FAILADDR = 0;
2849 vex_state->host_EvC_COUNTER = 0;
2850
2851 vex_state->guest_EAX = 0;
2852 vex_state->guest_ECX = 0;
2853 vex_state->guest_EDX = 0;
2854 vex_state->guest_EBX = 0;
2855 vex_state->guest_ESP = 0;
2856 vex_state->guest_EBP = 0;
2857 vex_state->guest_ESI = 0;
2858 vex_state->guest_EDI = 0;
2859
2860 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2861 vex_state->guest_CC_DEP1 = 0;
2862 vex_state->guest_CC_DEP2 = 0;
2863 vex_state->guest_CC_NDEP = 0;
2864 vex_state->guest_DFLAG = 1; /* forwards */
2865 vex_state->guest_IDFLAG = 0;
2866 vex_state->guest_ACFLAG = 0;
2867
2868 vex_state->guest_EIP = 0;
2869
2870 /* Initialise the simulated FPU */
2871 x86g_dirtyhelper_FINIT( vex_state );
2872
2873 /* Initialse the SSE state. */
2874 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2875
2876 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2877 SSEZERO(vex_state->guest_XMM0);
2878 SSEZERO(vex_state->guest_XMM1);
2879 SSEZERO(vex_state->guest_XMM2);
2880 SSEZERO(vex_state->guest_XMM3);
2881 SSEZERO(vex_state->guest_XMM4);
2882 SSEZERO(vex_state->guest_XMM5);
2883 SSEZERO(vex_state->guest_XMM6);
2884 SSEZERO(vex_state->guest_XMM7);
2885
2886 # undef SSEZERO
2887
2888 vex_state->guest_CS = 0;
2889 vex_state->guest_DS = 0;
2890 vex_state->guest_ES = 0;
2891 vex_state->guest_FS = 0;
2892 vex_state->guest_GS = 0;
2893 vex_state->guest_SS = 0;
2894 vex_state->guest_LDT = 0;
2895 vex_state->guest_GDT = 0;
2896
2897 vex_state->guest_EMNOTE = EmNote_NONE;
2898
2899 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2900 vex_state->guest_CMSTART = 0;
2901 vex_state->guest_CMLEN = 0;
2902
2903 vex_state->guest_NRADDR = 0;
2904 vex_state->guest_SC_CLASS = 0;
2905 vex_state->guest_IP_AT_SYSCALL = 0;
2906
2907 vex_state->padding1 = 0;
2908 vex_state->padding2 = 0;
2909 vex_state->padding3 = 0;
2910 }
2911
2912
2913 /* Figure out if any part of the guest state contained in minoff
2914 .. maxoff requires precise memory exceptions. If in doubt return
2915 True (but this generates significantly slower code).
2916
2917 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2918 only. These are the minimum needed to extract correct stack
2919 backtraces from x86 code.
2920
2921 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2922 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2923 Bool guest_x86_state_requires_precise_mem_exns (
2924 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2925 )
2926 {
2927 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2928 Int ebp_max = ebp_min + 4 - 1;
2929 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2930 Int esp_max = esp_min + 4 - 1;
2931 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2932 Int eip_max = eip_min + 4 - 1;
2933
2934 if (maxoff < esp_min || minoff > esp_max) {
2935 /* no overlap with esp */
2936 if (pxControl == VexRegUpdSpAtMemAccess)
2937 return False; // We only need to check stack pointer.
2938 } else {
2939 return True;
2940 }
2941
2942 if (maxoff < ebp_min || minoff > ebp_max) {
2943 /* no overlap with ebp */
2944 } else {
2945 return True;
2946 }
2947
2948 if (maxoff < eip_min || minoff > eip_max) {
2949 /* no overlap with eip */
2950 } else {
2951 return True;
2952 }
2953
2954 return False;
2955 }
2956
2957
2958 #define ALWAYSDEFD(field) \
2959 { offsetof(VexGuestX86State, field), \
2960 (sizeof ((VexGuestX86State*)0)->field) }
2961
2962 VexGuestLayout
2963 x86guest_layout
2964 = {
2965 /* Total size of the guest state, in bytes. */
2966 .total_sizeB = sizeof(VexGuestX86State),
2967
2968 /* Describe the stack pointer. */
2969 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2970 .sizeof_SP = 4,
2971
2972 /* Describe the frame pointer. */
2973 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2974 .sizeof_FP = 4,
2975
2976 /* Describe the instruction pointer. */
2977 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2978 .sizeof_IP = 4,
2979
2980 /* Describe any sections to be regarded by Memcheck as
2981 'always-defined'. */
2982 .n_alwaysDefd = 24,
2983
2984 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2985 and DEP2 have to be tracked. See detailed comment in
2986 gdefs.h on meaning of thunk fields. */
2987 .alwaysDefd
2988 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2989 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2990 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2991 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2992 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
2993 /* 5 */ ALWAYSDEFD(guest_EIP),
2994 /* 6 */ ALWAYSDEFD(guest_FTOP),
2995 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2996 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2997 /* 9 */ ALWAYSDEFD(guest_FC3210),
2998 /* 10 */ ALWAYSDEFD(guest_CS),
2999 /* 11 */ ALWAYSDEFD(guest_DS),
3000 /* 12 */ ALWAYSDEFD(guest_ES),
3001 /* 13 */ ALWAYSDEFD(guest_FS),
3002 /* 14 */ ALWAYSDEFD(guest_GS),
3003 /* 15 */ ALWAYSDEFD(guest_SS),
3004 /* 16 */ ALWAYSDEFD(guest_LDT),
3005 /* 17 */ ALWAYSDEFD(guest_GDT),
3006 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
3007 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
3008 /* 20 */ ALWAYSDEFD(guest_CMSTART),
3009 /* 21 */ ALWAYSDEFD(guest_CMLEN),
3010 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
3011 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3012 }
3013 };
3014
3015
3016 /*---------------------------------------------------------------*/
3017 /*--- end guest_x86_helpers.c ---*/
3018 /*---------------------------------------------------------------*/
3019