1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for x86 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-x86/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named x86g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_EFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 static const UChar parity_table[256] = {
78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111 
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115    if (n >= 0)
116       return (UInt)x << n;
117    else
118       return x >> (-n);
119 }
120 
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124    return x;
125 }
126 
127 
128 #define PREAMBLE(__data_bits)					\
129    /* const */ UInt DATA_MASK 					\
130       = __data_bits==8 ? 0xFF 					\
131                        : (__data_bits==16 ? 0xFFFF 		\
132                                           : 0xFFFFFFFF); 	\
133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137    /* Four bogus assignments, which hopefully gcc can     */	\
138    /* optimise away, and which stop it complaining about  */	\
139    /* unused variables.                                   */	\
140    SIGN_MASK = SIGN_MASK;					\
141    DATA_MASK = DATA_MASK;					\
142    CC_DEP2 = CC_DEP2;						\
143    CC_NDEP = CC_NDEP;
144 
145 
146 /*-------------------------------------------------------------*/
147 
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149 {								\
150    PREAMBLE(DATA_BITS);						\
151    { UInt cf, pf, af, zf, sf, of;				\
152      UInt argL, argR, res;					\
153      argL = CC_DEP1;						\
154      argR = CC_DEP2;						\
155      res  = argL + argR;					\
156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157      pf = parity_table[(UChar)res];				\
158      af = (res ^ argL ^ argR) & 0x10;				\
159      zf = ((DATA_UTYPE)res == 0) << 6;				\
160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
163      return cf | pf | af | zf | sf | of;			\
164    }								\
165 }
166 
167 /*-------------------------------------------------------------*/
168 
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170 {								\
171    PREAMBLE(DATA_BITS);						\
172    { UInt cf, pf, af, zf, sf, of;				\
173      UInt argL, argR, res;					\
174      argL = CC_DEP1;						\
175      argR = CC_DEP2;						\
176      res  = argL - argR;					\
177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178      pf = parity_table[(UChar)res];				\
179      af = (res ^ argL ^ argR) & 0x10;				\
180      zf = ((DATA_UTYPE)res == 0) << 6;				\
181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184      return cf | pf | af | zf | sf | of;			\
185    }								\
186 }
187 
188 /*-------------------------------------------------------------*/
189 
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191 {								\
192    PREAMBLE(DATA_BITS);						\
193    { UInt cf, pf, af, zf, sf, of;				\
194      UInt argL, argR, oldC, res;		       		\
195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
196      argL = CC_DEP1;						\
197      argR = CC_DEP2 ^ oldC;	       				\
198      res  = (argL + argR) + oldC;				\
199      if (oldC)							\
200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201      else							\
202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203      pf = parity_table[(UChar)res];				\
204      af = (res ^ argL ^ argR) & 0x10;				\
205      zf = ((DATA_UTYPE)res == 0) << 6;				\
206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
209      return cf | pf | af | zf | sf | of;			\
210    }								\
211 }
212 
213 /*-------------------------------------------------------------*/
214 
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216 {								\
217    PREAMBLE(DATA_BITS);						\
218    { UInt cf, pf, af, zf, sf, of;				\
219      UInt argL, argR, oldC, res;		       		\
220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
221      argL = CC_DEP1;						\
222      argR = CC_DEP2 ^ oldC;	       				\
223      res  = (argL - argR) - oldC;				\
224      if (oldC)							\
225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226      else							\
227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228      pf = parity_table[(UChar)res];				\
229      af = (res ^ argL ^ argR) & 0x10;				\
230      zf = ((DATA_UTYPE)res == 0) << 6;				\
231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232      of = lshift((argL ^ argR) & (argL ^ res), 			\
233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
234      return cf | pf | af | zf | sf | of;			\
235    }								\
236 }
237 
238 /*-------------------------------------------------------------*/
239 
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241 {								\
242    PREAMBLE(DATA_BITS);						\
243    { UInt cf, pf, af, zf, sf, of;				\
244      cf = 0;							\
245      pf = parity_table[(UChar)CC_DEP1];				\
246      af = 0;							\
247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249      of = 0;							\
250      return cf | pf | af | zf | sf | of;			\
251    }								\
252 }
253 
254 /*-------------------------------------------------------------*/
255 
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257 {								\
258    PREAMBLE(DATA_BITS);						\
259    { UInt cf, pf, af, zf, sf, of;				\
260      UInt argL, argR, res;					\
261      res  = CC_DEP1;						\
262      argL = res - 1;						\
263      argR = 1;							\
264      cf = CC_NDEP & X86G_CC_MASK_C;				\
265      pf = parity_table[(UChar)res];				\
266      af = (res ^ argL ^ argR) & 0x10;				\
267      zf = ((DATA_UTYPE)res == 0) << 6;				\
268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270      return cf | pf | af | zf | sf | of;			\
271    }								\
272 }
273 
274 /*-------------------------------------------------------------*/
275 
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277 {								\
278    PREAMBLE(DATA_BITS);						\
279    { UInt cf, pf, af, zf, sf, of;				\
280      UInt argL, argR, res;					\
281      res  = CC_DEP1;						\
282      argL = res + 1;						\
283      argR = 1;							\
284      cf = CC_NDEP & X86G_CC_MASK_C;				\
285      pf = parity_table[(UChar)res];				\
286      af = (res ^ argL ^ argR) & 0x10;				\
287      zf = ((DATA_UTYPE)res == 0) << 6;				\
288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289      of = ((res & DATA_MASK) 					\
290           == ((UInt)SIGN_MASK - 1)) << 11;			\
291      return cf | pf | af | zf | sf | of;			\
292    }								\
293 }
294 
295 /*-------------------------------------------------------------*/
296 
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298 {								\
299    PREAMBLE(DATA_BITS);						\
300    { UInt cf, pf, af, zf, sf, of;				\
301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302      pf = parity_table[(UChar)CC_DEP1];				\
303      af = 0; /* undefined */					\
304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306      /* of is defined if shift count == 1 */			\
307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308           & X86G_CC_MASK_O;					\
309      return cf | pf | af | zf | sf | of;			\
310    }								\
311 }
312 
313 /*-------------------------------------------------------------*/
314 
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316 {								\
317    PREAMBLE(DATA_BITS);  					\
318    { UInt cf, pf, af, zf, sf, of;				\
319      cf = CC_DEP2 & 1;						\
320      pf = parity_table[(UChar)CC_DEP1];				\
321      af = 0; /* undefined */					\
322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324      /* of is defined if shift count == 1 */			\
325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326           & X86G_CC_MASK_O;					\
327      return cf | pf | af | zf | sf | of;			\
328    }								\
329 }
330 
331 /*-------------------------------------------------------------*/
332 
333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336 {								\
337    PREAMBLE(DATA_BITS);						\
338    { UInt fl 							\
339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340           | (X86G_CC_MASK_C & CC_DEP1)				\
341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                       11-(DATA_BITS-1)) 	\
343                      ^ lshift(CC_DEP1, 11)));			\
344      return fl;							\
345    }								\
346 }
347 
348 /*-------------------------------------------------------------*/
349 
350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353 {								\
354    PREAMBLE(DATA_BITS);						\
355    { UInt fl 							\
356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                       11-(DATA_BITS-1)) 	\
360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361      return fl;							\
362    }								\
363 }
364 
365 /*-------------------------------------------------------------*/
366 
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                 DATA_U2TYPE, NARROWto2U)        \
369 {                                                               \
370    PREAMBLE(DATA_BITS);                                         \
371    { UInt cf, pf, af, zf, sf, of;                               \
372      DATA_UTYPE  hi;                                            \
373      DATA_UTYPE  lo                                             \
374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                      * ((DATA_UTYPE)CC_DEP2) );                 \
376      DATA_U2TYPE rr                                             \
377         = NARROWto2U(                                           \
378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381      cf = (hi != 0);                                            \
382      pf = parity_table[(UChar)lo];                              \
383      af = 0; /* undefined */                                    \
384      zf = (lo == 0) << 6;                                       \
385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386      of = cf << 11;                                             \
387      return cf | pf | af | zf | sf | of;                        \
388    }								\
389 }
390 
391 /*-------------------------------------------------------------*/
392 
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                 DATA_S2TYPE, NARROWto2S)        \
395 {                                                               \
396    PREAMBLE(DATA_BITS);                                         \
397    { UInt cf, pf, af, zf, sf, of;                               \
398      DATA_STYPE  hi;                                            \
399      DATA_STYPE  lo                                             \
400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402      DATA_S2TYPE rr                                             \
403         = NARROWto2S(                                           \
404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408      pf = parity_table[(UChar)lo];                              \
409      af = 0; /* undefined */                                    \
410      zf = (lo == 0) << 6;                                       \
411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412      of = cf << 11;                                             \
413      return cf | pf | af | zf | sf | of;                        \
414    }								\
415 }
416 
417 
418 #if PROFILE_EFLAGS
419 
420 static Bool initted     = False;
421 
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all  = 0;
430 static UInt n_calc_c    = 0;
431 static UInt n_calc_cond = 0;
432 
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 
435 
showCounts(void)436 static void showCounts ( void )
437 {
438    Int op, co;
439    HChar ch;
440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441               n_calc_all, n_calc_cond, n_calc_c);
442 
443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444               "    S   NS    P   NP    L   NL   LE  NLE\n");
445    vex_printf("     -----------------------------------------------------"
446               "----------------------------------------\n");
447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448 
449       ch = ' ';
450       if (op > 0 && (op-1) % 3 == 0)
451          ch = 'B';
452       if (op > 0 && (op-1) % 3 == 1)
453          ch = 'W';
454       if (op > 0 && (op-1) % 3 == 2)
455          ch = 'L';
456 
457       vex_printf("%2d%c: ", op, ch);
458       vex_printf("%6u ", tabc_slow[op]);
459       vex_printf("%6u ", tabc_fast[op]);
460       for (co = 0; co < 16; co++) {
461          Int n = tab_cond[op][co];
462          if (n >= 1000) {
463             vex_printf(" %3dK", n / 1000);
464          } else
465          if (n >= 0) {
466             vex_printf(" %3d ", n );
467          } else {
468             vex_printf("     ");
469          }
470       }
471       vex_printf("\n");
472    }
473    vex_printf("\n");
474 }
475 
initCounts(void)476 static void initCounts ( void )
477 {
478    Int op, co;
479    initted = True;
480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481       tabc_fast[op] = tabc_slow[op] = 0;
482       for (co = 0; co < 16; co++)
483          tab_cond[op][co] = 0;
484    }
485 }
486 
487 #endif /* PROFILE_EFLAGS */
488 
489 
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492    Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                      UInt cc_dep1_formal,
496                                      UInt cc_dep2_formal,
497                                      UInt cc_ndep_formal )
498 {
499    switch (cc_op) {
500       case X86G_CC_OP_COPY:
501          return cc_dep1_formal
502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504 
505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508 
509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512 
513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516 
517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520 
521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524 
525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528 
529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532 
533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536 
537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540 
541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544 
545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548 
549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                 UShort, toUShort );
551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                 UInt,   toUInt );
553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                 ULong,  idULong );
555 
556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                 Short,  toUShort );
558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                 Int,    toUInt   );
560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                 Long,   idULong );
562 
563       default:
564          /* shouldn't really make these calls from generated code */
565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
569    }
570 }
571 
572 
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                  UInt cc_dep1,
577                                  UInt cc_dep2,
578                                  UInt cc_ndep )
579 {
580 #  if PROFILE_EFLAGS
581    if (!initted) initCounts();
582    n_calc_all++;
583    if (SHOW_COUNTS_NOW) showCounts();
584 #  endif
585    return
586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588 
589 
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594                                UInt cc_dep1,
595                                UInt cc_dep2,
596                                UInt cc_ndep )
597 {
598 #  if PROFILE_EFLAGS
599    if (!initted) initCounts();
600    n_calc_c++;
601    tabc_fast[cc_op]++;
602    if (SHOW_COUNTS_NOW) showCounts();
603 #  endif
604 
605    /* Fast-case some common ones. */
606    switch (cc_op) {
607       case X86G_CC_OP_LOGICL:
608       case X86G_CC_OP_LOGICW:
609       case X86G_CC_OP_LOGICB:
610          return 0;
611       case X86G_CC_OP_SUBL:
612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                    ? X86G_CC_MASK_C : 0;
614       case X86G_CC_OP_SUBW:
615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                    ? X86G_CC_MASK_C : 0;
617       case X86G_CC_OP_SUBB:
618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                    ? X86G_CC_MASK_C : 0;
620       case X86G_CC_OP_INCL:
621       case X86G_CC_OP_DECL:
622          return cc_ndep & X86G_CC_MASK_C;
623       default:
624          break;
625    }
626 
627 #  if PROFILE_EFLAGS
628    tabc_fast[cc_op]--;
629    tabc_slow[cc_op]++;
630 #  endif
631 
632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633           & X86G_CC_MASK_C;
634 }
635 
636 
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                 UInt cc_op,
641                                 UInt cc_dep1,
642                                 UInt cc_dep2,
643                                 UInt cc_ndep )
644 {
645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                                cc_dep2, cc_ndep);
647    UInt of,sf,zf,cf,pf;
648    UInt inv = cond & 1;
649 
650 #  if PROFILE_EFLAGS
651    if (!initted) initCounts();
652    tab_cond[cc_op][cond]++;
653    n_calc_cond++;
654    if (SHOW_COUNTS_NOW) showCounts();
655 #  endif
656 
657    switch (cond) {
658       case X86CondNO:
659       case X86CondO: /* OF == 1 */
660          of = eflags >> X86G_CC_SHIFT_O;
661          return 1 & (inv ^ of);
662 
663       case X86CondNZ:
664       case X86CondZ: /* ZF == 1 */
665          zf = eflags >> X86G_CC_SHIFT_Z;
666          return 1 & (inv ^ zf);
667 
668       case X86CondNB:
669       case X86CondB: /* CF == 1 */
670          cf = eflags >> X86G_CC_SHIFT_C;
671          return 1 & (inv ^ cf);
672          break;
673 
674       case X86CondNBE:
675       case X86CondBE: /* (CF or ZF) == 1 */
676          cf = eflags >> X86G_CC_SHIFT_C;
677          zf = eflags >> X86G_CC_SHIFT_Z;
678          return 1 & (inv ^ (cf | zf));
679          break;
680 
681       case X86CondNS:
682       case X86CondS: /* SF == 1 */
683          sf = eflags >> X86G_CC_SHIFT_S;
684          return 1 & (inv ^ sf);
685 
686       case X86CondNP:
687       case X86CondP: /* PF == 1 */
688          pf = eflags >> X86G_CC_SHIFT_P;
689          return 1 & (inv ^ pf);
690 
691       case X86CondNL:
692       case X86CondL: /* (SF xor OF) == 1 */
693          sf = eflags >> X86G_CC_SHIFT_S;
694          of = eflags >> X86G_CC_SHIFT_O;
695          return 1 & (inv ^ (sf ^ of));
696          break;
697 
698       case X86CondNLE:
699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700          sf = eflags >> X86G_CC_SHIFT_S;
701          of = eflags >> X86G_CC_SHIFT_O;
702          zf = eflags >> X86G_CC_SHIFT_Z;
703          return 1 & (inv ^ ((sf ^ of) | zf));
704          break;
705 
706       default:
707          /* shouldn't really make these calls from generated code */
708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710          vpanic("x86g_calculate_condition");
711    }
712 }
713 
714 
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718    UInt eflags = x86g_calculate_eflags_all_WRK(
719                     vex_state->guest_CC_OP,
720                     vex_state->guest_CC_DEP1,
721                     vex_state->guest_CC_DEP2,
722                     vex_state->guest_CC_NDEP
723                  );
724    UInt dflag = vex_state->guest_DFLAG;
725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726    if (dflag == 0xFFFFFFFF)
727       eflags |= X86G_CC_MASK_D;
728    if (vex_state->guest_IDFLAG == 1)
729       eflags |= X86G_CC_MASK_ID;
730    if (vex_state->guest_ACFLAG == 1)
731       eflags |= X86G_CC_MASK_AC;
732 
733    return eflags;
734 }
735 
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739                              /*MOD*/VexGuestX86State* vex_state )
740 {
741    /* D flag */
742    if (eflags & X86G_CC_MASK_D) {
743       vex_state->guest_DFLAG = 0xFFFFFFFF;
744       eflags &= ~X86G_CC_MASK_D;
745    }
746    else
747       vex_state->guest_DFLAG = 1;
748 
749    /* ID flag */
750    if (eflags & X86G_CC_MASK_ID) {
751       vex_state->guest_IDFLAG = 1;
752       eflags &= ~X86G_CC_MASK_ID;
753    }
754    else
755       vex_state->guest_IDFLAG = 0;
756 
757    /* AC flag */
758    if (eflags & X86G_CC_MASK_AC) {
759       vex_state->guest_ACFLAG = 1;
760       eflags &= ~X86G_CC_MASK_AC;
761    }
762    else
763       vex_state->guest_ACFLAG = 0;
764 
765    UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766                   X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
768    vex_state->guest_CC_DEP1 = eflags & cc_mask;
769    vex_state->guest_CC_DEP2 = 0;
770    vex_state->guest_CC_NDEP = 0;
771 }
772 
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776                               /*MOD*/VexGuestX86State* vex_state )
777 {
778    UInt oszacp = x86g_calculate_eflags_all_WRK(
779                     vex_state->guest_CC_OP,
780                     vex_state->guest_CC_DEP1,
781                     vex_state->guest_CC_DEP2,
782                     vex_state->guest_CC_NDEP
783                  );
784    if (new_carry_flag & 1) {
785       oszacp |= X86G_CC_MASK_C;
786    } else {
787       oszacp &= ~X86G_CC_MASK_C;
788    }
789    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
790    vex_state->guest_CC_DEP1 = oszacp;
791    vex_state->guest_CC_DEP2 = 0;
792    vex_state->guest_CC_NDEP = 0;
793 }
794 
795 
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers.         ---*/
798 /*--- These help iropt specialise calls the above run-time    ---*/
799 /*--- %eflags functions.                                      ---*/
800 /*---------------------------------------------------------------*/
801 
802 /* Used by the optimiser to try specialisations.  Returns an
803    equivalent expression, or NULL if none. */
804 
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807    return
808       toBool( e->tag == Iex_Const
809               && e->Iex.Const.con->tag == Ico_U32
810               && e->Iex.Const.con->Ico.U32 == n );
811 }
812 
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814                                IRExpr** args,
815                                IRStmt** precedingStmts,
816                                Int      n_precedingStmts )
817 {
818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
822 
823    Int i, arity = 0;
824    for (i = 0; args[i]; i++)
825       arity++;
826 #  if 0
827    vex_printf("spec request:\n");
828    vex_printf("   %s  ", function_name);
829    for (i = 0; i < arity; i++) {
830       vex_printf("  ");
831       ppIRExpr(args[i]);
832    }
833    vex_printf("\n");
834 #  endif
835 
836    /* --------- specialising "x86g_calculate_condition" --------- */
837 
838    if (vex_streq(function_name, "x86g_calculate_condition")) {
839       /* specialise calls to above "calculate condition" function */
840       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841       vassert(arity == 5);
842       cond    = args[0];
843       cc_op   = args[1];
844       cc_dep1 = args[2];
845       cc_dep2 = args[3];
846 
847       /*---------------- ADDL ----------------*/
848 
849       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850          /* long add, then Z --> test (dst+src == 0) */
851          return unop(Iop_1Uto32,
852                      binop(Iop_CmpEQ32,
853                            binop(Iop_Add32, cc_dep1, cc_dep2),
854                            mkU32(0)));
855       }
856 
857       /*---------------- SUBL ----------------*/
858 
859       /* 4, 5 */
860       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
861          /* long sub/cmp, then Z --> test dst==src */
862          return unop(Iop_1Uto32,
863                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
864       }
865       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
866          /* long sub/cmp, then NZ --> test dst!=src */
867          return unop(Iop_1Uto32,
868                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
869       }
870 
871       /* 12, 13 */
872       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
873          /* long sub/cmp, then L (signed less than)
874             --> test dst <s src */
875          return unop(Iop_1Uto32,
876                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
877       }
878       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
879          /* long sub/cmp, then NL (signed greater than or equal)
880             --> test !(dst <s src) */
881          return binop(Iop_Xor32,
882                       unop(Iop_1Uto32,
883                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
884                       mkU32(1));
885       }
886 
887       /* 14, 15 */
888       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
889          /* long sub/cmp, then LE (signed less than or equal)
890             --> test dst <=s src */
891          return unop(Iop_1Uto32,
892                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
893       }
894       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
895          /* long sub/cmp, then NLE (signed not less than or equal)
896             --> test dst >s src
897             --> test !(dst <=s src) */
898          return binop(Iop_Xor32,
899                       unop(Iop_1Uto32,
900                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
901                       mkU32(1));
902       }
903 
904       /* 6, 7 */
905       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
906          /* long sub/cmp, then BE (unsigned less than or equal)
907             --> test dst <=u src */
908          return unop(Iop_1Uto32,
909                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
910       }
911       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
912          /* long sub/cmp, then BE (unsigned greater than)
913             --> test !(dst <=u src) */
914          return binop(Iop_Xor32,
915                       unop(Iop_1Uto32,
916                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
917                       mkU32(1));
918       }
919 
920       /* 2, 3 */
921       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
922          /* long sub/cmp, then B (unsigned less than)
923             --> test dst <u src */
924          return unop(Iop_1Uto32,
925                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
926       }
927       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
928          /* long sub/cmp, then NB (unsigned greater than or equal)
929             --> test !(dst <u src) */
930          return binop(Iop_Xor32,
931                       unop(Iop_1Uto32,
932                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
933                       mkU32(1));
934       }
935 
936       /* 8, 9 */
937       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)
938                                         && isU32(cc_dep2, 0)) {
939          /* long sub/cmp of zero, then S --> test (dst-0 <s 0)
940                                          --> test dst <s 0
941                                          --> (UInt)dst[31] */
942          return binop(Iop_And32,
943                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
944                       mkU32(1));
945       }
946       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)
947                                         && isU32(cc_dep2, 0)) {
948          /* long sub/cmp of zero, then NS --> test !(dst-0 <s 0)
949                                           --> test !(dst <s 0)
950                                           --> (UInt) !dst[31] */
951          return binop(Iop_Xor32,
952                       binop(Iop_And32,
953                             binop(Iop_Shr32,cc_dep1,mkU8(31)),
954                             mkU32(1)),
955                 mkU32(1));
956       }
957 
958       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
959          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
960          return unop(Iop_1Uto32,
961                      binop(Iop_CmpLT32S,
962                            binop(Iop_Sub32, cc_dep1, cc_dep2),
963                            mkU32(0)));
964       }
965       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
966          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
967          return binop(Iop_Xor32,
968                       unop(Iop_1Uto32,
969                            binop(Iop_CmpLT32S,
970                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
971                                  mkU32(0))),
972                       mkU32(1));
973       }
974 
975       /*---------------- SUBW ----------------*/
976 
977       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
978          /* word sub/cmp, then Z --> test dst==src */
979          return unop(Iop_1Uto32,
980                      binop(Iop_CmpEQ16,
981                            unop(Iop_32to16,cc_dep1),
982                            unop(Iop_32to16,cc_dep2)));
983       }
984       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
985          /* word sub/cmp, then NZ --> test dst!=src */
986          return unop(Iop_1Uto32,
987                      binop(Iop_CmpNE16,
988                            unop(Iop_32to16,cc_dep1),
989                            unop(Iop_32to16,cc_dep2)));
990       }
991 
992       /*---------------- SUBB ----------------*/
993 
994       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
995          /* byte sub/cmp, then Z --> test dst==src */
996          return unop(Iop_1Uto32,
997                      binop(Iop_CmpEQ8,
998                            unop(Iop_32to8,cc_dep1),
999                            unop(Iop_32to8,cc_dep2)));
1000       }
1001       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
1002          /* byte sub/cmp, then NZ --> test dst!=src */
1003          return unop(Iop_1Uto32,
1004                      binop(Iop_CmpNE8,
1005                            unop(Iop_32to8,cc_dep1),
1006                            unop(Iop_32to8,cc_dep2)));
1007       }
1008 
1009       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
1010          /* byte sub/cmp, then NBE (unsigned greater than)
1011             --> test src <u dst */
1012          /* Note, args are opposite way round from the usual */
1013          return unop(Iop_1Uto32,
1014                      binop(Iop_CmpLT32U,
1015                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
1016 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
1017       }
1018 
1019       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
1020                                         && isU32(cc_dep2, 0)) {
1021          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1022                                          --> test dst <s 0
1023                                          --> (UInt)dst[7]
1024             This is yet another scheme by which gcc figures out if the
1025             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
1026          /* Note: isU32(cc_dep2, 0) is correct, even though this is
1027             for an 8-bit comparison, since the args to the helper
1028             function are always U32s. */
1029          return binop(Iop_And32,
1030                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1031                       mkU32(1));
1032       }
1033       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1034                                         && isU32(cc_dep2, 0)) {
1035          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1036                                           --> test !(dst <s 0)
1037                                           --> (UInt) !dst[7]
1038          */
1039          return binop(Iop_Xor32,
1040                       binop(Iop_And32,
1041                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
1042                             mkU32(1)),
1043                 mkU32(1));
1044       }
1045 
1046       /*---------------- LOGICL ----------------*/
1047 
1048       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1049          /* long and/or/xor, then Z --> test dst==0 */
1050          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1051       }
1052       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1053          /* long and/or/xor, then NZ --> test dst!=0 */
1054          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1055       }
1056 
1057       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1058          /* long and/or/xor, then LE
1059             This is pretty subtle.  LOGIC sets SF and ZF according to the
1060             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
1061             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1062             the result is <=signed 0.  Hence ...
1063          */
1064          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1065       }
1066 
1067       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1068          /* long and/or/xor, then BE
1069             LOGIC sets ZF according to the result and makes CF be zero.
1070             BE computes (CF | ZF), but CF is zero, so this reduces ZF
1071             -- which will be 1 iff the result is zero.  Hence ...
1072          */
1073          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1074       }
1075 
1076       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1077          /* see comment below for (LOGICB, CondS) */
1078          /* long and/or/xor, then S --> (UInt)result[31] */
1079          return binop(Iop_And32,
1080                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1081                       mkU32(1));
1082       }
1083       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1084          /* see comment below for (LOGICB, CondNS) */
1085          /* long and/or/xor, then S --> (UInt) ~ result[31] */
1086          return binop(Iop_Xor32,
1087                 binop(Iop_And32,
1088                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1089                       mkU32(1)),
1090                 mkU32(1));
1091       }
1092 
1093       /*---------------- LOGICW ----------------*/
1094 
1095       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1096          /* word and/or/xor, then Z --> test dst==0 */
1097          return unop(Iop_1Uto32,
1098                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1099                                         mkU32(0)));
1100       }
1101 
1102       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1103          /* see comment below for (LOGICB, CondS) */
1104          /* word and/or/xor, then S --> (UInt)result[15] */
1105          return binop(Iop_And32,
1106                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
1107                       mkU32(1));
1108       }
1109 
1110       /*---------------- LOGICB ----------------*/
1111 
1112       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1113          /* byte and/or/xor, then Z --> test dst==0 */
1114          return unop(Iop_1Uto32,
1115                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1116                                         mkU32(0)));
1117       }
1118       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1119          /* byte and/or/xor, then Z --> test dst!=0 */
1120          /* b9ac9:       84 c0                   test   %al,%al
1121             b9acb:       75 0d                   jne    b9ada */
1122          return unop(Iop_1Uto32,
1123                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1124                                         mkU32(0)));
1125       }
1126 
1127       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1128          /* this is an idiom gcc sometimes uses to find out if the top
1129             bit of a byte register is set: eg testb %al,%al; js ..
1130             Since it just depends on the top bit of the byte, extract
1131             that bit and explicitly get rid of all the rest.  This
1132             helps memcheck avoid false positives in the case where any
1133             of the other bits in the byte are undefined. */
1134          /* byte and/or/xor, then S --> (UInt)result[7] */
1135          return binop(Iop_And32,
1136                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1137                       mkU32(1));
1138       }
1139       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1140          /* ditto, for negation-of-S. */
1141          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1142          return binop(Iop_Xor32,
1143                 binop(Iop_And32,
1144                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1145                       mkU32(1)),
1146                 mkU32(1));
1147       }
1148 
1149       /*---------------- DECL ----------------*/
1150 
1151       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1152          /* dec L, then Z --> test dst == 0 */
1153          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1154       }
1155 
1156       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1157          /* dec L, then S --> compare DST <s 0 */
1158          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1159       }
1160 
1161       /*---------------- DECW ----------------*/
1162 
1163       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1164          /* dec W, then Z --> test dst == 0 */
1165          return unop(Iop_1Uto32,
1166                      binop(Iop_CmpEQ32,
1167                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1168                            mkU32(0)));
1169       }
1170 
1171       /*---------------- INCW ----------------*/
1172 
1173       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1174          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1175          /* inc W, then Z --> test dst == 0 */
1176          return unop(Iop_1Uto32,
1177                      binop(Iop_CmpEQ32,
1178                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1179                            mkU32(0)));
1180       }
1181 
1182       /*---------------- SHRL ----------------*/
1183 
1184       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1185          /* SHRL, then Z --> test dep1 == 0 */
1186          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1187       }
1188 
1189       /*---------------- COPY ----------------*/
1190       /* This can happen, as a result of x87 FP compares: "fcom ... ;
1191          fnstsw %ax ; sahf ; jbe" for example. */
1192 
1193       if (isU32(cc_op, X86G_CC_OP_COPY) &&
1194           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1195          /* COPY, then BE --> extract C and Z from dep1, and test
1196             (C or Z) == 1. */
1197          /* COPY, then NBE --> extract C and Z from dep1, and test
1198             (C or Z) == 0. */
1199          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1200          return
1201             unop(
1202                Iop_1Uto32,
1203                binop(
1204                   Iop_CmpEQ32,
1205                   binop(
1206                      Iop_And32,
1207                      binop(
1208                         Iop_Or32,
1209                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1210                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1211                      ),
1212                      mkU32(1)
1213                   ),
1214                   mkU32(nnn)
1215                )
1216             );
1217       }
1218 
1219       if (isU32(cc_op, X86G_CC_OP_COPY)
1220           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1221          /* COPY, then B --> extract C from dep1, and test (C == 1). */
1222          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1223          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1224          return
1225             unop(
1226                Iop_1Uto32,
1227                binop(
1228                   Iop_CmpEQ32,
1229                   binop(
1230                      Iop_And32,
1231                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1232                      mkU32(1)
1233                   ),
1234                   mkU32(nnn)
1235                )
1236             );
1237       }
1238 
1239       if (isU32(cc_op, X86G_CC_OP_COPY)
1240           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1241          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1242          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1243          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1244          return
1245             unop(
1246                Iop_1Uto32,
1247                binop(
1248                   Iop_CmpEQ32,
1249                   binop(
1250                      Iop_And32,
1251                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1252                      mkU32(1)
1253                   ),
1254                   mkU32(nnn)
1255                )
1256             );
1257       }
1258 
1259       if (isU32(cc_op, X86G_CC_OP_COPY)
1260           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1261          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1262          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1263          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1264          return
1265             unop(
1266                Iop_1Uto32,
1267                binop(
1268                   Iop_CmpEQ32,
1269                   binop(
1270                      Iop_And32,
1271                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1272                      mkU32(1)
1273                   ),
1274                   mkU32(nnn)
1275                )
1276             );
1277       }
1278 
1279       return NULL;
1280    }
1281 
1282    /* --------- specialising "x86g_calculate_eflags_c" --------- */
1283 
1284    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1285       /* specialise calls to above "calculate_eflags_c" function */
1286       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1287       vassert(arity == 4);
1288       cc_op   = args[0];
1289       cc_dep1 = args[1];
1290       cc_dep2 = args[2];
1291       cc_ndep = args[3];
1292 
1293       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1294          /* C after sub denotes unsigned less than */
1295          return unop(Iop_1Uto32,
1296                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1297       }
1298       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1299          /* C after sub denotes unsigned less than */
1300          return unop(Iop_1Uto32,
1301                      binop(Iop_CmpLT32U,
1302                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1303                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1304       }
1305       if (isU32(cc_op, X86G_CC_OP_LOGICL)
1306           || isU32(cc_op, X86G_CC_OP_LOGICW)
1307           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1308          /* cflag after logic is zero */
1309          return mkU32(0);
1310       }
1311       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1312          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1313          return cc_ndep;
1314       }
1315       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1316          /* cflag after COPY is stored in DEP1. */
1317          return
1318             binop(
1319                Iop_And32,
1320                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1321                mkU32(1)
1322             );
1323       }
1324       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1325          /* C after add denotes sum <u either arg */
1326          return unop(Iop_1Uto32,
1327                      binop(Iop_CmpLT32U,
1328                            binop(Iop_Add32, cc_dep1, cc_dep2),
1329                            cc_dep1));
1330       }
1331       // ATC, requires verification, no test case known
1332       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1333       //   /* C after signed widening multiply denotes the case where
1334       //      the top half of the result isn't simply the sign extension
1335       //      of the bottom half (iow the result doesn't fit completely
1336       //      in the bottom half).  Hence:
1337       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1338       //      where 'x' denotes signed widening multiply.*/
1339       //   return
1340       //      unop(Iop_1Uto32,
1341       //           binop(Iop_CmpNE32,
1342       //                 unop(Iop_64HIto32,
1343       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1344       //                 binop(Iop_Sar32,
1345       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1346       //}
1347 #     if 0
1348       if (cc_op->tag == Iex_Const) {
1349          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1350       }
1351 #     endif
1352 
1353       return NULL;
1354    }
1355 
1356    /* --------- specialising "x86g_calculate_eflags_all" --------- */
1357 
1358    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1359       /* specialise calls to above "calculate_eflags_all" function */
1360       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1361       vassert(arity == 4);
1362       cc_op   = args[0];
1363       cc_dep1 = args[1];
1364       /* cc_dep2 = args[2]; */
1365       /* cc_ndep = args[3]; */
1366 
1367       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1368          /* eflags after COPY are stored in DEP1. */
1369          return
1370             binop(
1371                Iop_And32,
1372                cc_dep1,
1373                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1374                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1375             );
1376       }
1377       return NULL;
1378    }
1379 
1380 #  undef unop
1381 #  undef binop
1382 #  undef mkU32
1383 #  undef mkU8
1384 
1385    return NULL;
1386 }
1387 
1388 
1389 /*---------------------------------------------------------------*/
1390 /*--- Supporting functions for x87 FPU activities.            ---*/
1391 /*---------------------------------------------------------------*/
1392 
host_is_little_endian(void)1393 static inline Bool host_is_little_endian ( void )
1394 {
1395    UInt x = 0x76543210;
1396    UChar* p = (UChar*)(&x);
1397    return toBool(*p == 0x10);
1398 }
1399 
1400 /* 80 and 64-bit floating point formats:
1401 
1402    80-bit:
1403 
1404     S  0       0-------0      zero
1405     S  0       0X------X      denormals
1406     S  1-7FFE  1X------X      normals (all normals have leading 1)
1407     S  7FFF    10------0      infinity
1408     S  7FFF    10X-----X      snan
1409     S  7FFF    11X-----X      qnan
1410 
1411    S is the sign bit.  For runs X----X, at least one of the Xs must be
1412    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1413    there is an explicitly represented leading 1, and a sign bit,
1414    giving 80 in total.
1415 
1416    64-bit avoids the confusion of an explicitly represented leading 1
1417    and so is simpler:
1418 
1419     S  0      0------0   zero
1420     S  0      X------X   denormals
1421     S  1-7FE  any        normals
1422     S  7FF    0------0   infinity
1423     S  7FF    0X-----X   snan
1424     S  7FF    1X-----X   qnan
1425 
1426    Exponent is 11 bits, fractional part is 52 bits, and there is a
1427    sign bit, giving 64 in total.
1428 */
1429 
1430 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1431 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1432 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1433 {
1434    Bool   mantissaIsZero;
1435    Int    bexp;
1436    UChar  sign;
1437    UChar* f64;
1438 
1439    vassert(host_is_little_endian());
1440 
1441    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1442 
1443    f64  = (UChar*)(&dbl);
1444    sign = toUChar( (f64[7] >> 7) & 1 );
1445 
1446    /* First off, if the tag indicates the register was empty,
1447       return 1,0,sign,1 */
1448    if (tag == 0) {
1449       /* vex_printf("Empty\n"); */
1450       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1451                                  | X86G_FC_MASK_C0;
1452    }
1453 
1454    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1455    bexp &= 0x7FF;
1456 
1457    mantissaIsZero
1458       = toBool(
1459            (f64[6] & 0x0F) == 0
1460            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1461         );
1462 
1463    /* If both exponent and mantissa are zero, the value is zero.
1464       Return 1,0,sign,0. */
1465    if (bexp == 0 && mantissaIsZero) {
1466       /* vex_printf("Zero\n"); */
1467       return X86G_FC_MASK_C3 | 0
1468                              | (sign << X86G_FC_SHIFT_C1) | 0;
1469    }
1470 
1471    /* If exponent is zero but mantissa isn't, it's a denormal.
1472       Return 1,1,sign,0. */
1473    if (bexp == 0 && !mantissaIsZero) {
1474       /* vex_printf("Denormal\n"); */
1475       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1476                              | (sign << X86G_FC_SHIFT_C1) | 0;
1477    }
1478 
1479    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1480       Return 0,1,sign,1. */
1481    if (bexp == 0x7FF && mantissaIsZero) {
1482       /* vex_printf("Inf\n"); */
1483       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1484                                  | X86G_FC_MASK_C0;
1485    }
1486 
1487    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1488       Return 0,0,sign,1. */
1489    if (bexp == 0x7FF && !mantissaIsZero) {
1490       /* vex_printf("NaN\n"); */
1491       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1492    }
1493 
1494    /* Uh, ok, we give up.  It must be a normal finite number.
1495       Return 0,1,sign,0.
1496    */
1497    /* vex_printf("normal\n"); */
1498    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1499 }
1500 
1501 
1502 /* CALLED FROM GENERATED CODE */
1503 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1504 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1505 {
1506    ULong f64;
1507    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1508    return f64;
1509 }
1510 
1511 /* CALLED FROM GENERATED CODE */
1512 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1513 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1514 {
1515    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1516 }
1517 
1518 
1519 /*----------------------------------------------*/
1520 /*--- The exported fns ..                    ---*/
1521 /*----------------------------------------------*/
1522 
1523 /* Layout of the real x87 state. */
1524 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1525    g_generic_x87.h */
1526 
1527 
1528 /* CLEAN HELPER */
1529 /* fpucw[15:0] contains a x87 native format FPU control word.
1530    Extract from it the required FPROUND value and any resulting
1531    emulation warning, and return (warn << 32) | fpround value.
1532 */
x86g_check_fldcw(UInt fpucw)1533 ULong x86g_check_fldcw ( UInt fpucw )
1534 {
1535    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1536    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1537    UInt rmode = (fpucw >> 10) & 3;
1538 
1539    /* Detect any required emulation warnings. */
1540    VexEmNote ew = EmNote_NONE;
1541 
1542    if ((fpucw & 0x3F) != 0x3F) {
1543       /* unmasked exceptions! */
1544       ew = EmWarn_X86_x87exns;
1545    }
1546    else
1547    if (((fpucw >> 8) & 3) != 3) {
1548       /* unsupported precision */
1549       ew = EmWarn_X86_x87precision;
1550    }
1551 
1552    return (((ULong)ew) << 32) | ((ULong)rmode);
1553 }
1554 
1555 /* CLEAN HELPER */
1556 /* Given fpround as an IRRoundingMode value, create a suitable x87
1557    native format FPU control word. */
x86g_create_fpucw(UInt fpround)1558 UInt x86g_create_fpucw ( UInt fpround )
1559 {
1560    fpround &= 3;
1561    return 0x037F | (fpround << 10);
1562 }
1563 
1564 
1565 /* CLEAN HELPER */
1566 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1567    Extract from it the required SSEROUND value and any resulting
1568    emulation warning, and return (warn << 32) | sseround value.
1569 */
x86g_check_ldmxcsr(UInt mxcsr)1570 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1571 {
1572    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1573    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1574    UInt rmode = (mxcsr >> 13) & 3;
1575 
1576    /* Detect any required emulation warnings. */
1577    VexEmNote ew = EmNote_NONE;
1578 
1579    if ((mxcsr & 0x1F80) != 0x1F80) {
1580       /* unmasked exceptions! */
1581       ew = EmWarn_X86_sseExns;
1582    }
1583    else
1584    if (mxcsr & (1<<15)) {
1585       /* FZ is set */
1586       ew = EmWarn_X86_fz;
1587    }
1588    else
1589    if (mxcsr & (1<<6)) {
1590       /* DAZ is set */
1591       ew = EmWarn_X86_daz;
1592    }
1593 
1594    return (((ULong)ew) << 32) | ((ULong)rmode);
1595 }
1596 
1597 
1598 /* CLEAN HELPER */
1599 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1600    native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1601 UInt x86g_create_mxcsr ( UInt sseround )
1602 {
1603    sseround &= 3;
1604    return 0x1F80 | (sseround << 13);
1605 }
1606 
1607 
1608 /* CALLED FROM GENERATED CODE */
1609 /* DIRTY HELPER (writes guest state) */
1610 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1611 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1612 {
1613    Int i;
1614    gst->guest_FTOP = 0;
1615    for (i = 0; i < 8; i++) {
1616       gst->guest_FPTAG[i] = 0; /* empty */
1617       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1618    }
1619    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1620    gst->guest_FC3210  = 0;
1621 }
1622 
1623 
1624 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1625    appears to differ from the former only in that the 8 FP registers
1626    themselves are not transferred into the guest state. */
1627 static
do_put_x87(Bool moveRegs,Fpu_State * x87_state,VexGuestX86State * vex_state)1628 VexEmNote do_put_x87 ( Bool moveRegs,
1629                        /*IN*/Fpu_State* x87_state,
1630                        /*OUT*/VexGuestX86State* vex_state )
1631 {
1632    Int        stno, preg;
1633    UInt       tag;
1634    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1635    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1636    UInt       ftop    = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1637    UInt       tagw    = x87_state->env[FP_ENV_TAG];
1638    UInt       fpucw   = x87_state->env[FP_ENV_CTRL];
1639    UInt       c3210   = x87_state->env[FP_ENV_STAT] & 0x4700;
1640    VexEmNote  ew;
1641    UInt       fpround;
1642    ULong      pair;
1643 
1644    /* Copy registers and tags */
1645    for (stno = 0; stno < 8; stno++) {
1646       preg = (stno + ftop) & 7;
1647       tag = (tagw >> (2*preg)) & 3;
1648       if (tag == 3) {
1649          /* register is empty */
1650          /* hmm, if it's empty, does it still get written?  Probably
1651             safer to say it does.  If we don't, memcheck could get out
1652             of sync, in that it thinks all FP registers are defined by
1653             this helper, but in reality some have not been updated. */
1654          if (moveRegs)
1655             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1656          vexTags[preg] = 0;
1657       } else {
1658          /* register is non-empty */
1659          if (moveRegs)
1660             convert_f80le_to_f64le( &x87_state->reg[10*stno],
1661                                     (UChar*)&vexRegs[preg] );
1662          vexTags[preg] = 1;
1663       }
1664    }
1665 
1666    /* stack pointer */
1667    vex_state->guest_FTOP = ftop;
1668 
1669    /* status word */
1670    vex_state->guest_FC3210 = c3210;
1671 
1672    /* handle the control word, setting FPROUND and detecting any
1673       emulation warnings. */
1674    pair    = x86g_check_fldcw ( (UInt)fpucw );
1675    fpround = (UInt)pair;
1676    ew      = (VexEmNote)(pair >> 32);
1677 
1678    vex_state->guest_FPROUND = fpround & 3;
1679 
1680    /* emulation warnings --> caller */
1681    return ew;
1682 }
1683 
1684 
1685 /* Create an x87 FPU state from the guest state, as close as
1686    we can approximate it. */
1687 static
do_get_x87(VexGuestX86State * vex_state,Fpu_State * x87_state)1688 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1689                   /*OUT*/Fpu_State* x87_state )
1690 {
1691    Int        i, stno, preg;
1692    UInt       tagw;
1693    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1694    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1695    UInt       ftop    = vex_state->guest_FTOP;
1696    UInt       c3210   = vex_state->guest_FC3210;
1697 
1698    for (i = 0; i < 14; i++)
1699       x87_state->env[i] = 0;
1700 
1701    x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
1702       = x87_state->env[13] = 0xFFFF;
1703    x87_state->env[FP_ENV_STAT]
1704       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1705    x87_state->env[FP_ENV_CTRL]
1706       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1707 
1708    /* Dump the register stack in ST order. */
1709    tagw = 0;
1710    for (stno = 0; stno < 8; stno++) {
1711       preg = (stno + ftop) & 7;
1712       if (vexTags[preg] == 0) {
1713          /* register is empty */
1714          tagw |= (3 << (2*preg));
1715          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1716                                  &x87_state->reg[10*stno] );
1717       } else {
1718          /* register is full. */
1719          tagw |= (0 << (2*preg));
1720          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1721                                  &x87_state->reg[10*stno] );
1722       }
1723    }
1724    x87_state->env[FP_ENV_TAG] = toUShort(tagw);
1725 }
1726 
1727 
1728 /* CALLED FROM GENERATED CODE */
1729 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1730 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1731 {
1732    /* Somewhat roundabout, but at least it's simple. */
1733    Fpu_State tmp;
1734    UShort*   addrS = (UShort*)addr;
1735    UChar*    addrC = (UChar*)addr;
1736    U128*     xmm   = (U128*)(addr + 160);
1737    UInt      mxcsr;
1738    UShort    fp_tags;
1739    UInt      summary_tags;
1740    Int       r, stno;
1741    UShort    *srcS, *dstS;
1742 
1743    do_get_x87( gst, &tmp );
1744    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1745 
1746    /* Now build the proper fxsave image from the x87 image we just
1747       made. */
1748 
1749    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1750    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1751 
1752    /* set addrS[2] in an endian-independent way */
1753    summary_tags = 0;
1754    fp_tags = tmp.env[FP_ENV_TAG];
1755    for (r = 0; r < 8; r++) {
1756       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1757          summary_tags |= (1 << r);
1758    }
1759    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1760    addrC[5]  = 0; /* pad */
1761 
1762    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1763    addrS[4]  = 0;
1764    addrS[5]  = 0; /* FPU IP (bogus) */
1765    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1766                      could conceivably dump %CS here) */
1767 
1768    addrS[7]  = 0; /* Intel reserved */
1769 
1770    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1771    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1772    addrS[10] = 0; /* segment selector for above operand pointer; %DS
1773                      perhaps? */
1774    addrS[11] = 0; /* Intel reserved */
1775 
1776    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1777    addrS[13] = toUShort(mxcsr >> 16);
1778 
1779    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1780    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1781 
1782    /* Copy in the FP registers, in ST order. */
1783    for (stno = 0; stno < 8; stno++) {
1784       srcS = (UShort*)(&tmp.reg[10*stno]);
1785       dstS = (UShort*)(&addrS[16 + 8*stno]);
1786       dstS[0] = srcS[0];
1787       dstS[1] = srcS[1];
1788       dstS[2] = srcS[2];
1789       dstS[3] = srcS[3];
1790       dstS[4] = srcS[4];
1791       dstS[5] = 0;
1792       dstS[6] = 0;
1793       dstS[7] = 0;
1794    }
1795 
1796    /* That's the first 160 bytes of the image done.  Now only %xmm0
1797       .. %xmm7 remain to be copied.  If the host is big-endian, these
1798       need to be byte-swapped. */
1799    vassert(host_is_little_endian());
1800 
1801 #  define COPY_U128(_dst,_src)                       \
1802       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1803            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1804       while (0)
1805 
1806    COPY_U128( xmm[0], gst->guest_XMM0 );
1807    COPY_U128( xmm[1], gst->guest_XMM1 );
1808    COPY_U128( xmm[2], gst->guest_XMM2 );
1809    COPY_U128( xmm[3], gst->guest_XMM3 );
1810    COPY_U128( xmm[4], gst->guest_XMM4 );
1811    COPY_U128( xmm[5], gst->guest_XMM5 );
1812    COPY_U128( xmm[6], gst->guest_XMM6 );
1813    COPY_U128( xmm[7], gst->guest_XMM7 );
1814 
1815 #  undef COPY_U128
1816 }
1817 
1818 
1819 /* CALLED FROM GENERATED CODE */
1820 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1821 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1822 {
1823    Fpu_State tmp;
1824    VexEmNote warnX87 = EmNote_NONE;
1825    VexEmNote warnXMM = EmNote_NONE;
1826    UShort*   addrS   = (UShort*)addr;
1827    UChar*    addrC   = (UChar*)addr;
1828    U128*     xmm     = (U128*)(addr + 160);
1829    UShort    fp_tags;
1830    Int       r, stno, i;
1831 
1832    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1833       to be byte-swapped. */
1834    vassert(host_is_little_endian());
1835 
1836 #  define COPY_U128(_dst,_src)                       \
1837       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1838            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1839       while (0)
1840 
1841    COPY_U128( gst->guest_XMM0, xmm[0] );
1842    COPY_U128( gst->guest_XMM1, xmm[1] );
1843    COPY_U128( gst->guest_XMM2, xmm[2] );
1844    COPY_U128( gst->guest_XMM3, xmm[3] );
1845    COPY_U128( gst->guest_XMM4, xmm[4] );
1846    COPY_U128( gst->guest_XMM5, xmm[5] );
1847    COPY_U128( gst->guest_XMM6, xmm[6] );
1848    COPY_U128( gst->guest_XMM7, xmm[7] );
1849 
1850 #  undef COPY_U128
1851 
1852    /* Copy the x87 registers out of the image, into a temporary
1853       Fpu_State struct. */
1854 
1855    /* LLVM on Darwin turns the following loop into a movaps plus a
1856       handful of scalar stores.  This would work fine except for the
1857       fact that VEX doesn't keep the stack correctly (16-) aligned for
1858       the call, so it segfaults.  Hence, split the loop into two
1859       pieces (and pray LLVM doesn't merely glue them back together) so
1860       it's composed only of scalar stores and so is alignment
1861       insensitive.  Of course this is a kludge of the lamest kind --
1862       VEX should be fixed properly. */
1863    /* Code that seems to trigger the problem:
1864       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1865    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1866    __asm__ __volatile__("" ::: "memory");
1867    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1868 
1869    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1870    /* fill in tmp.reg[0..7] */
1871    for (stno = 0; stno < 8; stno++) {
1872       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1873       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1874       dstS[0] = srcS[0];
1875       dstS[1] = srcS[1];
1876       dstS[2] = srcS[2];
1877       dstS[3] = srcS[3];
1878       dstS[4] = srcS[4];
1879    }
1880    /* fill in tmp.env[0..13] */
1881    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1882    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1883 
1884    fp_tags = 0;
1885    for (r = 0; r < 8; r++) {
1886       if (addrC[4] & (1<<r))
1887          fp_tags |= (0 << (2*r)); /* EMPTY */
1888       else
1889          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1890    }
1891    tmp.env[FP_ENV_TAG] = fp_tags;
1892 
1893    /* Now write 'tmp' into the guest state. */
1894    warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
1895 
1896    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1897                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1898      ULong w64 = x86g_check_ldmxcsr( w32 );
1899 
1900      warnXMM = (VexEmNote)(w64 >> 32);
1901 
1902      gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1903    }
1904 
1905    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1906    if (warnX87 != EmNote_NONE)
1907       return warnX87;
1908    else
1909       return warnXMM;
1910 }
1911 
1912 
1913 /* CALLED FROM GENERATED CODE */
1914 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1915 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1916 {
1917    do_get_x87( gst, (Fpu_State*)addr );
1918 }
1919 
1920 /* CALLED FROM GENERATED CODE */
1921 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1922 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1923 {
1924    return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
1925 }
1926 
1927 /* CALLED FROM GENERATED CODE */
1928 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1929 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1930 {
1931    /* Somewhat roundabout, but at least it's simple. */
1932    Int       i;
1933    UShort*   addrP = (UShort*)addr;
1934    Fpu_State tmp;
1935    do_get_x87( gst, &tmp );
1936    for (i = 0; i < 14; i++)
1937       addrP[i] = tmp.env[i];
1938 }
1939 
1940 /* CALLED FROM GENERATED CODE */
1941 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1942 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1943 {
1944    return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
1945 }
1946 
1947 /* VISIBLE TO LIBVEX CLIENT */
1948 /* Do x87 save from the supplied VexGuestX86State structure and store the
1949    result at the given address which represents a buffer of at least 108
1950    bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1951 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1952                                /*OUT*/UChar* x87_state )
1953 {
1954    do_get_x87 ( vex_state, (Fpu_State*)x87_state );
1955 }
1956 
1957 /* VISIBLE TO LIBVEX CLIENT */
1958 /* Do x87 restore from the supplied address and store read values to the given
1959    VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1960 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1961                                     /*MOD*/VexGuestX86State* vex_state )
1962 {
1963    return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
1964 }
1965 
1966 /* VISIBLE TO LIBVEX CLIENT */
1967 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1968 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1969 {
1970    return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1971 }
1972 
1973 /* VISIBLE TO LIBVEX CLIENT */
1974 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1975    value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1976 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1977                                       /*MOD*/VexGuestX86State* vex_state)
1978 {
1979    ULong w64 = x86g_check_ldmxcsr( mxcsr );
1980    vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1981    return (VexEmNote)(w64 >> 32);
1982 }
1983 
1984 /*---------------------------------------------------------------*/
1985 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
1986 /*---------------------------------------------------------------*/
1987 
1988 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1989 /* Calculate both flags and value result for rotate right
1990    through the carry bit.  Result in low 32 bits,
1991    new flags (OSZACP) in high 32 bits.
1992 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1993 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1994 {
1995    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1996 
1997    switch (sz) {
1998       case 4:
1999          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2000          of        = ((arg >> 31) ^ cf) & 1;
2001          while (tempCOUNT > 0) {
2002             tempcf = arg & 1;
2003             arg    = (arg >> 1) | (cf << 31);
2004             cf     = tempcf;
2005             tempCOUNT--;
2006          }
2007          break;
2008       case 2:
2009          while (tempCOUNT >= 17) tempCOUNT -= 17;
2010          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2011          of        = ((arg >> 15) ^ cf) & 1;
2012          while (tempCOUNT > 0) {
2013             tempcf = arg & 1;
2014             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
2015             cf     = tempcf;
2016             tempCOUNT--;
2017          }
2018          break;
2019       case 1:
2020          while (tempCOUNT >= 9) tempCOUNT -= 9;
2021          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2022          of        = ((arg >> 7) ^ cf) & 1;
2023          while (tempCOUNT > 0) {
2024             tempcf = arg & 1;
2025             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
2026             cf     = tempcf;
2027             tempCOUNT--;
2028          }
2029          break;
2030       default:
2031          vpanic("calculate_RCR: invalid size");
2032    }
2033 
2034    cf &= 1;
2035    of &= 1;
2036    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2037    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2038 
2039    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2040 }
2041 
2042 
2043 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2044 /* Calculate both flags and value result for rotate left
2045    through the carry bit.  Result in low 32 bits,
2046    new flags (OSZACP) in high 32 bits.
2047 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2048 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2049 {
2050    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2051 
2052    switch (sz) {
2053       case 4:
2054          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2055          while (tempCOUNT > 0) {
2056             tempcf = (arg >> 31) & 1;
2057             arg    = (arg << 1) | (cf & 1);
2058             cf     = tempcf;
2059             tempCOUNT--;
2060          }
2061          of = ((arg >> 31) ^ cf) & 1;
2062          break;
2063       case 2:
2064          while (tempCOUNT >= 17) tempCOUNT -= 17;
2065          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2066          while (tempCOUNT > 0) {
2067             tempcf = (arg >> 15) & 1;
2068             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
2069             cf     = tempcf;
2070             tempCOUNT--;
2071          }
2072          of = ((arg >> 15) ^ cf) & 1;
2073          break;
2074       case 1:
2075          while (tempCOUNT >= 9) tempCOUNT -= 9;
2076          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2077          while (tempCOUNT > 0) {
2078             tempcf = (arg >> 7) & 1;
2079             arg    = 0xFF & ((arg << 1) | (cf & 1));
2080             cf     = tempcf;
2081             tempCOUNT--;
2082          }
2083          of = ((arg >> 7) ^ cf) & 1;
2084          break;
2085       default:
2086          vpanic("calculate_RCL: invalid size");
2087    }
2088 
2089    cf &= 1;
2090    of &= 1;
2091    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2092    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2093 
2094    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2095 }
2096 
2097 
2098 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2099 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2100    AX value in low half of arg, OSZACP in upper half.
2101    See guest-x86/toIR.c usage point for details.
2102 */
calc_parity_8bit(UInt w32)2103 static UInt calc_parity_8bit ( UInt w32 ) {
2104    UInt i;
2105    UInt p = 1;
2106    for (i = 0; i < 8; i++)
2107       p ^= (1 & (w32 >> i));
2108    return p;
2109 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2110 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2111 {
2112    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2113    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2114    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2115    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2116    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2117    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2118    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2119    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2120    UInt result = 0;
2121 
2122    switch (opcode) {
2123       case 0x27: { /* DAA */
2124          UInt old_AL = r_AL;
2125          UInt old_C  = r_C;
2126          r_C = 0;
2127          if ((r_AL & 0xF) > 9 || r_A == 1) {
2128             r_AL = r_AL + 6;
2129             r_C  = old_C;
2130             if (r_AL >= 0x100) r_C = 1;
2131             r_A = 1;
2132          } else {
2133             r_A = 0;
2134          }
2135          if (old_AL > 0x99 || old_C == 1) {
2136             r_AL = r_AL + 0x60;
2137             r_C  = 1;
2138          } else {
2139             r_C = 0;
2140          }
2141          /* O is undefined.  S Z and P are set according to the
2142 	    result. */
2143          r_AL &= 0xFF;
2144          r_O = 0; /* let's say */
2145          r_S = (r_AL & 0x80) ? 1 : 0;
2146          r_Z = (r_AL == 0) ? 1 : 0;
2147          r_P = calc_parity_8bit( r_AL );
2148          break;
2149       }
2150       case 0x2F: { /* DAS */
2151          UInt old_AL = r_AL;
2152          UInt old_C  = r_C;
2153          r_C = 0;
2154          if ((r_AL & 0xF) > 9 || r_A == 1) {
2155             Bool borrow = r_AL < 6;
2156             r_AL = r_AL - 6;
2157             r_C  = old_C;
2158             if (borrow) r_C = 1;
2159             r_A = 1;
2160          } else {
2161             r_A = 0;
2162          }
2163          if (old_AL > 0x99 || old_C == 1) {
2164             r_AL = r_AL - 0x60;
2165             r_C  = 1;
2166          } else {
2167             /* Intel docs are wrong: r_C = 0; */
2168          }
2169          /* O is undefined.  S Z and P are set according to the
2170 	    result. */
2171          r_AL &= 0xFF;
2172          r_O = 0; /* let's say */
2173          r_S = (r_AL & 0x80) ? 1 : 0;
2174          r_Z = (r_AL == 0) ? 1 : 0;
2175          r_P = calc_parity_8bit( r_AL );
2176          break;
2177       }
2178       case 0x37: { /* AAA */
2179          Bool nudge = r_AL > 0xF9;
2180          if ((r_AL & 0xF) > 9 || r_A == 1) {
2181             r_AL = r_AL + 6;
2182             r_AH = r_AH + 1 + (nudge ? 1 : 0);
2183             r_A  = 1;
2184             r_C  = 1;
2185             r_AL = r_AL & 0xF;
2186          } else {
2187             r_A  = 0;
2188             r_C  = 0;
2189             r_AL = r_AL & 0xF;
2190          }
2191          /* O S Z and P are undefined. */
2192          r_O = r_S = r_Z = r_P = 0; /* let's say */
2193          break;
2194       }
2195       case 0x3F: { /* AAS */
2196          Bool nudge = r_AL < 0x06;
2197          if ((r_AL & 0xF) > 9 || r_A == 1) {
2198             r_AL = r_AL - 6;
2199             r_AH = r_AH - 1 - (nudge ? 1 : 0);
2200             r_A  = 1;
2201             r_C  = 1;
2202             r_AL = r_AL & 0xF;
2203          } else {
2204             r_A  = 0;
2205             r_C  = 0;
2206             r_AL = r_AL & 0xF;
2207          }
2208          /* O S Z and P are undefined. */
2209          r_O = r_S = r_Z = r_P = 0; /* let's say */
2210          break;
2211       }
2212       default:
2213          vassert(0);
2214    }
2215    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2216             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2217             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2218             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2219             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2220             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2221             | ( (r_AH & 0xFF) << 8 )
2222             | ( (r_AL & 0xFF) << 0 );
2223    return result;
2224 }
2225 
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2226 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2227 {
2228    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2229    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2230    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2231    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2232    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2233    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2234    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2235    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2236    UInt result = 0;
2237 
2238    switch (opcode) {
2239       case 0xD4: { /* AAM */
2240          r_AH = r_AL / 10;
2241          r_AL = r_AL % 10;
2242          break;
2243       }
2244       case 0xD5: { /* AAD */
2245          r_AL = ((r_AH * 10) + r_AL) & 0xff;
2246          r_AH = 0;
2247          break;
2248       }
2249       default:
2250          vassert(0);
2251    }
2252 
2253    r_O = 0; /* let's say (undefined) */
2254    r_C = 0; /* let's say (undefined) */
2255    r_A = 0; /* let's say (undefined) */
2256    r_S = (r_AL & 0x80) ? 1 : 0;
2257    r_Z = (r_AL == 0) ? 1 : 0;
2258    r_P = calc_parity_8bit( r_AL );
2259 
2260    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2261             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2262             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2263             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2264             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2265             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2266             | ( (r_AH & 0xFF) << 8 )
2267             | ( (r_AL & 0xFF) << 0 );
2268    return result;
2269 }
2270 
2271 
2272 /* CALLED FROM GENERATED CODE */
2273 /* DIRTY HELPER (non-referentially-transparent) */
2274 /* Horrible hack.  On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2275 ULong x86g_dirtyhelper_RDTSC ( void )
2276 {
2277 #  if defined(__i386__)
2278    ULong res;
2279    __asm__ __volatile__("rdtsc" : "=A" (res));
2280    return res;
2281 #  else
2282    return 1ULL;
2283 #  endif
2284 }
2285 
2286 
2287 /* CALLED FROM GENERATED CODE */
2288 /* DIRTY HELPER (modifies guest state) */
2289 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2290 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2291 {
2292    switch (st->guest_EAX) {
2293       case 0:
2294          st->guest_EAX = 0x1;
2295          st->guest_EBX = 0x756e6547;
2296          st->guest_ECX = 0x6c65746e;
2297          st->guest_EDX = 0x49656e69;
2298          break;
2299       default:
2300          st->guest_EAX = 0x543;
2301          st->guest_EBX = 0x0;
2302          st->guest_ECX = 0x0;
2303          st->guest_EDX = 0x8001bf;
2304          break;
2305    }
2306 }
2307 
2308 /* CALLED FROM GENERATED CODE */
2309 /* DIRTY HELPER (modifies guest state) */
2310 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2311 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2312 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2313 {
2314    switch (st->guest_EAX) {
2315       /* vendor ID */
2316       case 0:
2317          st->guest_EAX = 0x1;
2318          st->guest_EBX = 0x68747541;
2319          st->guest_ECX = 0x444d4163;
2320          st->guest_EDX = 0x69746e65;
2321          break;
2322       /* feature bits */
2323       case 1:
2324          st->guest_EAX = 0x621;
2325          st->guest_EBX = 0x0;
2326          st->guest_ECX = 0x0;
2327          st->guest_EDX = 0x183f9ff;
2328          break;
2329       /* Highest Extended Function Supported (0x80000004 brand string) */
2330       case 0x80000000:
2331          st->guest_EAX = 0x80000004;
2332          st->guest_EBX = 0x68747541;
2333          st->guest_ECX = 0x444d4163;
2334          st->guest_EDX = 0x69746e65;
2335          break;
2336       /* Extended Processor Info and Feature Bits */
2337       case 0x80000001:
2338          st->guest_EAX = 0x721;
2339          st->guest_EBX = 0x0;
2340          st->guest_ECX = 0x0;
2341          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2342          break;
2343       /* Processor Brand String "AMD Athlon(tm) Processor" */
2344       case 0x80000002:
2345          st->guest_EAX = 0x20444d41;
2346          st->guest_EBX = 0x6c687441;
2347          st->guest_ECX = 0x74286e6f;
2348          st->guest_EDX = 0x5020296d;
2349          break;
2350       case 0x80000003:
2351          st->guest_EAX = 0x65636f72;
2352          st->guest_EBX = 0x726f7373;
2353          st->guest_ECX = 0x0;
2354          st->guest_EDX = 0x0;
2355          break;
2356       default:
2357          st->guest_EAX = 0x0;
2358          st->guest_EBX = 0x0;
2359          st->guest_ECX = 0x0;
2360          st->guest_EDX = 0x0;
2361          break;
2362    }
2363 }
2364 
2365 /* CALLED FROM GENERATED CODE */
2366 /* DIRTY HELPER (modifies guest state) */
2367 /* Claim to be the following SSE1-capable CPU:
2368    vendor_id       : GenuineIntel
2369    cpu family      : 6
2370    model           : 11
2371    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2372    stepping        : 1
2373    cpu MHz         : 1131.013
2374    cache size      : 512 KB
2375 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2376 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2377 {
2378    switch (st->guest_EAX) {
2379       case 0:
2380          st->guest_EAX = 0x00000002;
2381          st->guest_EBX = 0x756e6547;
2382          st->guest_ECX = 0x6c65746e;
2383          st->guest_EDX = 0x49656e69;
2384          break;
2385       case 1:
2386          st->guest_EAX = 0x000006b1;
2387          st->guest_EBX = 0x00000004;
2388          st->guest_ECX = 0x00000000;
2389          st->guest_EDX = 0x0383fbff;
2390          break;
2391       default:
2392          st->guest_EAX = 0x03020101;
2393          st->guest_EBX = 0x00000000;
2394          st->guest_ECX = 0x00000000;
2395          st->guest_EDX = 0x0c040883;
2396          break;
2397    }
2398 }
2399 
2400 /* Claim to be the following SSE2-capable CPU:
2401    vendor_id    : GenuineIntel
2402    cpu family   : 15
2403    model        : 2
2404    model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
2405    stepping     : 9
2406    microcode    : 0x17
2407    cpu MHz      : 2992.577
2408    cache size   : 512 KB
2409    flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2410                   pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2411                    pebs bts cid xtpr
2412    clflush size : 64
2413    cache_alignment : 128
2414    address sizes : 36 bits physical, 32 bits virtual
2415 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2416 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2417 {
2418    switch (st->guest_EAX) {
2419       case 0:
2420          st->guest_EAX = 0x00000002;
2421          st->guest_EBX = 0x756e6547;
2422          st->guest_ECX = 0x6c65746e;
2423          st->guest_EDX = 0x49656e69;
2424          break;
2425       case 1:
2426          st->guest_EAX = 0x00000f29;
2427          st->guest_EBX = 0x01020809;
2428          st->guest_ECX = 0x00004400;
2429          st->guest_EDX = 0xbfebfbff;
2430          break;
2431       default:
2432          st->guest_EAX = 0x03020101;
2433          st->guest_EBX = 0x00000000;
2434          st->guest_ECX = 0x00000000;
2435          st->guest_EDX = 0x0c040883;
2436          break;
2437    }
2438 }
2439 
2440 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2441    vendor_id       : GenuineIntel
2442    cpu family      : 6
2443    model           : 15
2444    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2445    stepping        : 6
2446    cpu MHz         : 2394.000
2447    cache size      : 4096 KB
2448    physical id     : 0
2449    siblings        : 2
2450    core id         : 0
2451    cpu cores       : 2
2452    fpu             : yes
2453    fpu_exception   : yes
2454    cpuid level     : 10
2455    wp              : yes
2456    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2457                      mtrr pge mca cmov pat pse36 clflush dts acpi
2458                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2459                      constant_tsc pni monitor ds_cpl vmx est tm2
2460                      cx16 xtpr lahf_lm
2461    bogomips        : 4798.78
2462    clflush size    : 64
2463    cache_alignment : 64
2464    address sizes   : 36 bits physical, 48 bits virtual
2465    power management:
2466 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2467 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2468 {
2469 #  define SET_ABCD(_a,_b,_c,_d)               \
2470       do { st->guest_EAX = (UInt)(_a);        \
2471            st->guest_EBX = (UInt)(_b);        \
2472            st->guest_ECX = (UInt)(_c);        \
2473            st->guest_EDX = (UInt)(_d);        \
2474       } while (0)
2475 
2476    switch (st->guest_EAX) {
2477       case 0x00000000:
2478          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2479          break;
2480       case 0x00000001:
2481          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2482          break;
2483       case 0x00000002:
2484          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2485          break;
2486       case 0x00000003:
2487          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2488          break;
2489       case 0x00000004: {
2490          switch (st->guest_ECX) {
2491             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2492                                       0x0000003f, 0x00000001); break;
2493             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2494                                       0x0000003f, 0x00000001); break;
2495             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2496                                       0x00000fff, 0x00000001); break;
2497             default:         SET_ABCD(0x00000000, 0x00000000,
2498                                       0x00000000, 0x00000000); break;
2499          }
2500          break;
2501       }
2502       case 0x00000005:
2503          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2504          break;
2505       case 0x00000006:
2506          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2507          break;
2508       case 0x00000007:
2509          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2510          break;
2511       case 0x00000008:
2512          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2513          break;
2514       case 0x00000009:
2515          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2516          break;
2517       case 0x0000000a:
2518       unhandled_eax_value:
2519          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2520          break;
2521       case 0x80000000:
2522          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2523          break;
2524       case 0x80000001:
2525          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2526          break;
2527       case 0x80000002:
2528          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2529          break;
2530       case 0x80000003:
2531          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2532          break;
2533       case 0x80000004:
2534          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2535          break;
2536       case 0x80000005:
2537          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2538          break;
2539       case 0x80000006:
2540          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2541          break;
2542       case 0x80000007:
2543          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2544          break;
2545       case 0x80000008:
2546          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2547          break;
2548       default:
2549          goto unhandled_eax_value;
2550    }
2551 #  undef SET_ABCD
2552 }
2553 
2554 
2555 /* CALLED FROM GENERATED CODE */
2556 /* DIRTY HELPER (non-referentially-transparent) */
2557 /* Horrible hack.  On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2558 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2559 {
2560 #  if defined(__i386__)
2561    UInt r = 0;
2562    portno &= 0xFFFF;
2563    switch (sz) {
2564       case 4:
2565          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2566                               : "=a" (r) : "Nd" (portno));
2567 	 break;
2568       case 2:
2569          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2570                               : "=a" (r) : "Nd" (portno));
2571 	 break;
2572       case 1:
2573          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2574                               : "=a" (r) : "Nd" (portno));
2575 	 break;
2576       default:
2577          break;
2578    }
2579    return r;
2580 #  else
2581    return 0;
2582 #  endif
2583 }
2584 
2585 
2586 /* CALLED FROM GENERATED CODE */
2587 /* DIRTY HELPER (non-referentially-transparent) */
2588 /* Horrible hack.  On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2589 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2590 {
2591 #  if defined(__i386__)
2592    portno &= 0xFFFF;
2593    switch (sz) {
2594       case 4:
2595          __asm__ __volatile__("outl %0, %w1"
2596                               : : "a" (data), "Nd" (portno));
2597 	 break;
2598       case 2:
2599          __asm__ __volatile__("outw %w0, %w1"
2600                               : : "a" (data), "Nd" (portno));
2601 	 break;
2602       case 1:
2603          __asm__ __volatile__("outb %b0, %w1"
2604                               : : "a" (data), "Nd" (portno));
2605 	 break;
2606       default:
2607          break;
2608    }
2609 #  else
2610    /* do nothing */
2611 #  endif
2612 }
2613 
2614 /* CALLED FROM GENERATED CODE */
2615 /* DIRTY HELPER (non-referentially-transparent) */
2616 /* Horrible hack.  On non-x86 platforms, do nothing. */
2617 /* op = 0: call the native SGDT instruction.
2618    op = 1: call the native SIDT instruction.
2619 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2620 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2621 #  if defined(__i386__)
2622    switch (op) {
2623       case 0:
2624          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2625          break;
2626       case 1:
2627          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2628          break;
2629       default:
2630          vpanic("x86g_dirtyhelper_SxDT");
2631    }
2632 #  else
2633    /* do nothing */
2634    UChar* p = (UChar*)address;
2635    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2636 #  endif
2637 }
2638 
2639 /*---------------------------------------------------------------*/
2640 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
2641 /*---------------------------------------------------------------*/
2642 
abdU8(UChar xx,UChar yy)2643 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2644    return toUChar(xx>yy ? xx-yy : yy-xx);
2645 }
2646 
mk32x2(UInt w1,UInt w0)2647 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2648    return (((ULong)w1) << 32) | ((ULong)w0);
2649 }
2650 
sel16x4_3(ULong w64)2651 static inline UShort sel16x4_3 ( ULong w64 ) {
2652    UInt hi32 = toUInt(w64 >> 32);
2653    return toUShort(hi32 >> 16);
2654 }
sel16x4_2(ULong w64)2655 static inline UShort sel16x4_2 ( ULong w64 ) {
2656    UInt hi32 = toUInt(w64 >> 32);
2657    return toUShort(hi32);
2658 }
sel16x4_1(ULong w64)2659 static inline UShort sel16x4_1 ( ULong w64 ) {
2660    UInt lo32 = toUInt(w64);
2661    return toUShort(lo32 >> 16);
2662 }
sel16x4_0(ULong w64)2663 static inline UShort sel16x4_0 ( ULong w64 ) {
2664    UInt lo32 = toUInt(w64);
2665    return toUShort(lo32);
2666 }
2667 
sel8x8_7(ULong w64)2668 static inline UChar sel8x8_7 ( ULong w64 ) {
2669    UInt hi32 = toUInt(w64 >> 32);
2670    return toUChar(hi32 >> 24);
2671 }
sel8x8_6(ULong w64)2672 static inline UChar sel8x8_6 ( ULong w64 ) {
2673    UInt hi32 = toUInt(w64 >> 32);
2674    return toUChar(hi32 >> 16);
2675 }
sel8x8_5(ULong w64)2676 static inline UChar sel8x8_5 ( ULong w64 ) {
2677    UInt hi32 = toUInt(w64 >> 32);
2678    return toUChar(hi32 >> 8);
2679 }
sel8x8_4(ULong w64)2680 static inline UChar sel8x8_4 ( ULong w64 ) {
2681    UInt hi32 = toUInt(w64 >> 32);
2682    return toUChar(hi32 >> 0);
2683 }
sel8x8_3(ULong w64)2684 static inline UChar sel8x8_3 ( ULong w64 ) {
2685    UInt lo32 = toUInt(w64);
2686    return toUChar(lo32 >> 24);
2687 }
sel8x8_2(ULong w64)2688 static inline UChar sel8x8_2 ( ULong w64 ) {
2689    UInt lo32 = toUInt(w64);
2690    return toUChar(lo32 >> 16);
2691 }
sel8x8_1(ULong w64)2692 static inline UChar sel8x8_1 ( ULong w64 ) {
2693    UInt lo32 = toUInt(w64);
2694    return toUChar(lo32 >> 8);
2695 }
sel8x8_0(ULong w64)2696 static inline UChar sel8x8_0 ( ULong w64 ) {
2697    UInt lo32 = toUInt(w64);
2698    return toUChar(lo32 >> 0);
2699 }
2700 
2701 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2702 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2703 {
2704    return
2705       mk32x2(
2706          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2707             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2708          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2709             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2710       );
2711 }
2712 
2713 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2714 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2715 {
2716    UInt t = 0;
2717    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2718    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2719    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2720    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2721    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2722    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2723    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2724    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2725    t &= 0xFFFF;
2726    return (ULong)t;
2727 }
2728 
2729 
2730 /*---------------------------------------------------------------*/
2731 /*--- Helpers for dealing with segment overrides.             ---*/
2732 /*---------------------------------------------------------------*/
2733 
2734 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2735 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2736 {
2737    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2738    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2739    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2740    return (hi << 24) | (mid << 16) | lo;
2741 }
2742 
2743 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2744 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2745 {
2746     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2747     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2748     UInt limit = (hi << 16) | lo;
2749     if (ent->LdtEnt.Bits.Granularity)
2750        limit = (limit << 12) | 0xFFF;
2751     return limit;
2752 }
2753 
2754 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2755 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2756                               UInt seg_selector, UInt virtual_addr )
2757 {
2758    UInt tiBit, base, limit;
2759    VexGuestX86SegDescr* the_descrs;
2760 
2761    Bool verboze = False;
2762 
2763    /* If this isn't true, we're in Big Trouble. */
2764    vassert(8 == sizeof(VexGuestX86SegDescr));
2765 
2766    if (verboze)
2767       vex_printf("x86h_use_seg_selector: "
2768                  "seg_selector = 0x%x, vaddr = 0x%x\n",
2769                  seg_selector, virtual_addr);
2770 
2771    /* Check for wildly invalid selector. */
2772    if (seg_selector & ~0xFFFF)
2773       goto bad;
2774 
2775    seg_selector &= 0x0000FFFF;
2776 
2777    /* Sanity check the segment selector.  Ensure that RPL=11b (least
2778       privilege).  This forms the bottom 2 bits of the selector. */
2779    if ((seg_selector & 3) != 3)
2780       goto bad;
2781 
2782    /* Extract the TI bit (0 means GDT, 1 means LDT) */
2783    tiBit = (seg_selector >> 2) & 1;
2784 
2785    /* Convert the segment selector onto a table index */
2786    seg_selector >>= 3;
2787    vassert(seg_selector >= 0 && seg_selector < 8192);
2788 
2789    if (tiBit == 0) {
2790 
2791       /* GDT access. */
2792       /* Do we actually have a GDT to look at? */
2793       if (gdt == 0)
2794          goto bad;
2795 
2796       /* Check for access to non-existent entry. */
2797       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2798          goto bad;
2799 
2800       the_descrs = (VexGuestX86SegDescr*)gdt;
2801       base  = get_segdescr_base (&the_descrs[seg_selector]);
2802       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2803 
2804    } else {
2805 
2806       /* All the same stuff, except for the LDT. */
2807       if (ldt == 0)
2808          goto bad;
2809 
2810       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2811          goto bad;
2812 
2813       the_descrs = (VexGuestX86SegDescr*)ldt;
2814       base  = get_segdescr_base (&the_descrs[seg_selector]);
2815       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2816 
2817    }
2818 
2819    /* Do the limit check.  Note, this check is just slightly too
2820       slack.  Really it should be "if (virtual_addr + size - 1 >=
2821       limit)," but we don't have the size info to hand.  Getting it
2822       could be significantly complex.  */
2823    if (virtual_addr >= limit)
2824       goto bad;
2825 
2826    if (verboze)
2827       vex_printf("x86h_use_seg_selector: "
2828                  "base = 0x%x, addr = 0x%x\n",
2829                  base, base + virtual_addr);
2830 
2831    /* High 32 bits are zero, indicating success. */
2832    return (ULong)( ((UInt)virtual_addr) + base );
2833 
2834  bad:
2835    return 1ULL << 32;
2836 }
2837 
2838 
2839 /*---------------------------------------------------------------*/
2840 /*--- Helpers for dealing with, and describing,               ---*/
2841 /*--- guest state as a whole.                                 ---*/
2842 /*---------------------------------------------------------------*/
2843 
2844 /* Initialise the entire x86 guest state. */
2845 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2846 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2847 {
2848    vex_state->host_EvC_FAILADDR = 0;
2849    vex_state->host_EvC_COUNTER = 0;
2850 
2851    vex_state->guest_EAX = 0;
2852    vex_state->guest_ECX = 0;
2853    vex_state->guest_EDX = 0;
2854    vex_state->guest_EBX = 0;
2855    vex_state->guest_ESP = 0;
2856    vex_state->guest_EBP = 0;
2857    vex_state->guest_ESI = 0;
2858    vex_state->guest_EDI = 0;
2859 
2860    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2861    vex_state->guest_CC_DEP1 = 0;
2862    vex_state->guest_CC_DEP2 = 0;
2863    vex_state->guest_CC_NDEP = 0;
2864    vex_state->guest_DFLAG   = 1; /* forwards */
2865    vex_state->guest_IDFLAG  = 0;
2866    vex_state->guest_ACFLAG  = 0;
2867 
2868    vex_state->guest_EIP = 0;
2869 
2870    /* Initialise the simulated FPU */
2871    x86g_dirtyhelper_FINIT( vex_state );
2872 
2873    /* Initialse the SSE state. */
2874 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2875 
2876    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2877    SSEZERO(vex_state->guest_XMM0);
2878    SSEZERO(vex_state->guest_XMM1);
2879    SSEZERO(vex_state->guest_XMM2);
2880    SSEZERO(vex_state->guest_XMM3);
2881    SSEZERO(vex_state->guest_XMM4);
2882    SSEZERO(vex_state->guest_XMM5);
2883    SSEZERO(vex_state->guest_XMM6);
2884    SSEZERO(vex_state->guest_XMM7);
2885 
2886 #  undef SSEZERO
2887 
2888    vex_state->guest_CS  = 0;
2889    vex_state->guest_DS  = 0;
2890    vex_state->guest_ES  = 0;
2891    vex_state->guest_FS  = 0;
2892    vex_state->guest_GS  = 0;
2893    vex_state->guest_SS  = 0;
2894    vex_state->guest_LDT = 0;
2895    vex_state->guest_GDT = 0;
2896 
2897    vex_state->guest_EMNOTE = EmNote_NONE;
2898 
2899    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2900    vex_state->guest_CMSTART = 0;
2901    vex_state->guest_CMLEN   = 0;
2902 
2903    vex_state->guest_NRADDR   = 0;
2904    vex_state->guest_SC_CLASS = 0;
2905    vex_state->guest_IP_AT_SYSCALL = 0;
2906 
2907    vex_state->padding1 = 0;
2908    vex_state->padding2 = 0;
2909    vex_state->padding3 = 0;
2910 }
2911 
2912 
2913 /* Figure out if any part of the guest state contained in minoff
2914    .. maxoff requires precise memory exceptions.  If in doubt return
2915    True (but this generates significantly slower code).
2916 
2917    By default we enforce precise exns for guest %ESP, %EBP and %EIP
2918    only.  These are the minimum needed to extract correct stack
2919    backtraces from x86 code.
2920 
2921    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2922 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2923 Bool guest_x86_state_requires_precise_mem_exns (
2924         Int minoff, Int maxoff, VexRegisterUpdates pxControl
2925      )
2926 {
2927    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2928    Int ebp_max = ebp_min + 4 - 1;
2929    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2930    Int esp_max = esp_min + 4 - 1;
2931    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2932    Int eip_max = eip_min + 4 - 1;
2933 
2934    if (maxoff < esp_min || minoff > esp_max) {
2935       /* no overlap with esp */
2936       if (pxControl == VexRegUpdSpAtMemAccess)
2937          return False; // We only need to check stack pointer.
2938    } else {
2939       return True;
2940    }
2941 
2942    if (maxoff < ebp_min || minoff > ebp_max) {
2943       /* no overlap with ebp */
2944    } else {
2945       return True;
2946    }
2947 
2948    if (maxoff < eip_min || minoff > eip_max) {
2949       /* no overlap with eip */
2950    } else {
2951       return True;
2952    }
2953 
2954    return False;
2955 }
2956 
2957 
2958 #define ALWAYSDEFD(field)                           \
2959     { offsetof(VexGuestX86State, field),            \
2960       (sizeof ((VexGuestX86State*)0)->field) }
2961 
2962 VexGuestLayout
2963    x86guest_layout
2964       = {
2965           /* Total size of the guest state, in bytes. */
2966           .total_sizeB = sizeof(VexGuestX86State),
2967 
2968           /* Describe the stack pointer. */
2969           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2970           .sizeof_SP = 4,
2971 
2972           /* Describe the frame pointer. */
2973           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2974           .sizeof_FP = 4,
2975 
2976           /* Describe the instruction pointer. */
2977           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2978           .sizeof_IP = 4,
2979 
2980           /* Describe any sections to be regarded by Memcheck as
2981              'always-defined'. */
2982           .n_alwaysDefd = 24,
2983 
2984           /* flags thunk: OP and NDEP are always defd, whereas DEP1
2985              and DEP2 have to be tracked.  See detailed comment in
2986              gdefs.h on meaning of thunk fields. */
2987           .alwaysDefd
2988              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2989                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2990                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
2991                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2992                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2993                  /*  5 */ ALWAYSDEFD(guest_EIP),
2994                  /*  6 */ ALWAYSDEFD(guest_FTOP),
2995                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
2996                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
2997                  /*  9 */ ALWAYSDEFD(guest_FC3210),
2998                  /* 10 */ ALWAYSDEFD(guest_CS),
2999                  /* 11 */ ALWAYSDEFD(guest_DS),
3000                  /* 12 */ ALWAYSDEFD(guest_ES),
3001                  /* 13 */ ALWAYSDEFD(guest_FS),
3002                  /* 14 */ ALWAYSDEFD(guest_GS),
3003                  /* 15 */ ALWAYSDEFD(guest_SS),
3004                  /* 16 */ ALWAYSDEFD(guest_LDT),
3005                  /* 17 */ ALWAYSDEFD(guest_GDT),
3006                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
3007                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
3008                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
3009                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
3010                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
3011                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3012                }
3013         };
3014 
3015 
3016 /*---------------------------------------------------------------*/
3017 /*--- end                                 guest_x86_helpers.c ---*/
3018 /*---------------------------------------------------------------*/
3019